diff --git a/Documentation/ABI/testing/sysfs-ata b/Documentation/ABI/testing/sysfs-ata index 9ab0ef1dd1c72..299e0d1dc1619 100644 --- a/Documentation/ABI/testing/sysfs-ata +++ b/Documentation/ABI/testing/sysfs-ata @@ -107,13 +107,14 @@ Description: described in ATA8 7.16 and 7.17. Only valid if the device is not a PM. - pio_mode: (RO) Transfer modes supported by the device when - in PIO mode. Mostly used by PATA device. + pio_mode: (RO) PIO transfer mode used by the device. + Mostly used by PATA devices. - xfer_mode: (RO) Current transfer mode + xfer_mode: (RO) Current transfer mode. Mostly used by + PATA devices. - dma_mode: (RO) Transfer modes supported by the device when - in DMA mode. Mostly used by PATA device. + dma_mode: (RO) DMA transfer mode used by the device. + Mostly used by PATA devices. class: (RO) Device class. Can be "ata" for disk, "atapi" for packet device, "pmp" for PM, or diff --git a/Documentation/ABI/testing/sysfs-bus-iio-vf610 b/Documentation/ABI/testing/sysfs-bus-iio-vf610 index 308a6756d3bf3..491ead8044888 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio-vf610 +++ b/Documentation/ABI/testing/sysfs-bus-iio-vf610 @@ -1,4 +1,4 @@ -What: /sys/bus/iio/devices/iio:deviceX/conversion_mode +What: /sys/bus/iio/devices/iio:deviceX/in_conversion_mode KernelVersion: 4.2 Contact: linux-iio@vger.kernel.org Description: diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index b39531a3c5bc7..fe6bb70f84f8f 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -495,6 +495,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/srbds /sys/devices/system/cpu/vulnerabilities/tsx_async_abort /sys/devices/system/cpu/vulnerabilities/itlb_multihit + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst index 13beee23cb04f..ccd7134231338 100644 --- a/Documentation/PCI/pci-error-recovery.rst +++ b/Documentation/PCI/pci-error-recovery.rst @@ -79,7 +79,7 @@ This structure has the form:: struct pci_error_handlers { - int (*error_detected)(struct pci_dev *dev, enum pci_channel_state); + int (*error_detected)(struct pci_dev *dev, pci_channel_state_t); int (*mmio_enabled)(struct pci_dev *dev); int (*slot_reset)(struct pci_dev *dev); void (*resume)(struct pci_dev *dev); @@ -87,11 +87,11 @@ This structure has the form:: The possible channel states are:: - enum pci_channel_state { + typedef enum { pci_channel_io_normal, /* I/O channel is in normal state */ pci_channel_io_frozen, /* I/O to channel is blocked */ pci_channel_io_perm_failure, /* PCI card is dead */ - }; + } pci_channel_state_t; Possible return values are:: @@ -348,7 +348,7 @@ STEP 6: Permanent Failure ------------------------- A "permanent failure" has occurred, and the platform cannot recover the device. The platform will call error_detected() with a -pci_channel_state value of pci_channel_io_perm_failure. +pci_channel_state_t value of pci_channel_io_perm_failure. The device driver should, at this point, assume the worst. It should cancel all pending I/O, refuse all new I/O, returning -EIO to diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst index 621111ce57401..ad31ebae0af8a 100644 --- a/Documentation/accounting/psi.rst +++ b/Documentation/accounting/psi.rst @@ -35,11 +35,7 @@ Pressure interface Pressure information for each resource is exported through the respective file in /proc/pressure/ -- cpu, memory, and io. -The format for CPU is as such:: - - some avg10=0.00 avg60=0.00 avg300=0.00 total=0 - -and for memory and IO:: +The format is as such:: some avg10=0.00 avg60=0.00 avg300=0.00 total=0 full avg10=0.00 avg60=0.00 avg300=0.00 total=0 @@ -56,6 +52,9 @@ situation from a state where some tasks are stalled but the CPU is still doing productive work. As such, time spent in this subset of the stall state is tracked separately and exported in the "full" averages. +CPU full is undefined at the system level, but has been reported +since 5.13, so it is set to zero for backward compatibility. + The ratios (in %) are tracked as recent trends over ten, sixty, and three hundred second windows, which gives insight into short term events as well as medium and long term trends. The total absolute stall time @@ -90,7 +89,8 @@ Triggers can be set on more than one psi metric and more than one trigger for the same psi metric can be specified. However for each trigger a separate file descriptor is required to be able to poll it separately from others, therefore for each trigger a separate open() syscall should be made even -when opening the same psi interface file. +when opening the same psi interface file. Write operations to a file descriptor +with an already existing psi trigger will fail with EBUSY. Monitors activate only when system enters stall state for the monitored psi metric and deactivates upon exit from the stall state. While system is diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 37925c07a3d2e..abd564ba8cf56 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -986,6 +986,8 @@ All time durations are in microseconds. - nr_periods - nr_throttled - throttled_usec + - nr_bursts + - burst_usec cpu.weight A read-write single value file which exists on non-root @@ -1017,6 +1019,12 @@ All time durations are in microseconds. $PERIOD duration. "max" for $MAX indicates no limit. If only one number is written, $MAX is updated. + cpu.max.burst + A read-write single value file which exists on non-root + cgroups. The default is "0". + + The burst in the range [0, $MAX]. + cpu.pressure A read-only nested-key file which exists on non-root cgroups. @@ -1174,6 +1182,27 @@ PAGE_SIZE multiple when read back. It will broke low boundary. Because it tries to reclaim the memory many times, until the memory drops to a certain level. + memory.reclaim + A write-only nested-keyed file which exists for all cgroups. + + This is a simple interface to trigger memory reclaim in the + target cgroup. + + This file accepts a single key, the number of bytes to reclaim. + No nested keys are currently supported. + + Example:: + + echo "1G" > memory.reclaim + + The interface can be later extended with nested keys to + configure the reclaim behavior. For example, specify the + type of memory to reclaim from (anon, file, ..). + + Please note that the kernel can over or under reclaim from + the target cgroup. If less bytes are reclaimed than the + specified amount, -EAGAIN is returned. + memory.oom.group A read-write single value file which exists on non-root cgroups. The default value is "0". @@ -1344,8 +1373,19 @@ PAGE_SIZE multiple when read back. Amount of reclaimed pages - pgactivate(npn) + pgscan_kswapd (npn) + Amount of scanned pages by kswapd (in an inactive LRU list) + + pgscan_direct (npn) + Amount of scanned pages directly (in an inactive LRU list) + pgsteal_kswapd (npn) + Amount of reclaimed pages by kswapd + + pgsteal_direct (npn) + Amount of reclaimed pages directly + + pgactivate(npn) Amount of pages moved to the active LRU list pgdeactivate(npn) @@ -1496,8 +1536,7 @@ IO Interface Files ~~~~~~~~~~~~~~~~~~ io.stat - A read-only nested-keyed file which exists on non-root - cgroups. + A read-only nested-keyed file. Lines are keyed by $MAJ:$MIN device numbers and not ordered. The following nested keys are defined. diff --git a/Documentation/admin-guide/devices.txt b/Documentation/admin-guide/devices.txt index 1c5d2281efc97..771d9e7ae082b 100644 --- a/Documentation/admin-guide/devices.txt +++ b/Documentation/admin-guide/devices.txt @@ -3002,10 +3002,10 @@ 65 = /dev/infiniband/issm1 Second InfiniBand IsSM device ... 127 = /dev/infiniband/issm63 63rd InfiniBand IsSM device - 128 = /dev/infiniband/uverbs0 First InfiniBand verbs device - 129 = /dev/infiniband/uverbs1 Second InfiniBand verbs device + 192 = /dev/infiniband/uverbs0 First InfiniBand verbs device + 193 = /dev/infiniband/uverbs1 Second InfiniBand verbs device ... - 159 = /dev/infiniband/uverbs31 31st InfiniBand verbs device + 223 = /dev/infiniband/uverbs31 31st InfiniBand verbs device 232 char Biometric Devices 0 = /dev/biometric/sensor0/fingerprint first fingerprint sensor on first device diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index ca4dbdd9016d5..2adec1e6520a6 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -15,3 +15,4 @@ are configurable at compile, boot or run time. tsx_async_abort multihit.rst special-register-buffer-data-sampling.rst + processor_mmio_stale_data.rst diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst new file mode 100644 index 0000000000000..9393c50b5afc9 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst @@ -0,0 +1,246 @@ +========================================= +Processor MMIO Stale Data Vulnerabilities +========================================= + +Processor MMIO Stale Data Vulnerabilities are a class of memory-mapped I/O +(MMIO) vulnerabilities that can expose data. The sequences of operations for +exposing data range from simple to very complex. Because most of the +vulnerabilities require the attacker to have access to MMIO, many environments +are not affected. System environments using virtualization where MMIO access is +provided to untrusted guests may need mitigation. These vulnerabilities are +not transient execution attacks. However, these vulnerabilities may propagate +stale data into core fill buffers where the data can subsequently be inferred +by an unmitigated transient execution attack. Mitigation for these +vulnerabilities includes a combination of microcode update and software +changes, depending on the platform and usage model. Some of these mitigations +are similar to those used to mitigate Microarchitectural Data Sampling (MDS) or +those used to mitigate Special Register Buffer Data Sampling (SRBDS). + +Data Propagators +================ +Propagators are operations that result in stale data being copied or moved from +one microarchitectural buffer or register to another. Processor MMIO Stale Data +Vulnerabilities are operations that may result in stale data being directly +read into an architectural, software-visible state or sampled from a buffer or +register. + +Fill Buffer Stale Data Propagator (FBSDP) +----------------------------------------- +Stale data may propagate from fill buffers (FB) into the non-coherent portion +of the uncore on some non-coherent writes. Fill buffer propagation by itself +does not make stale data architecturally visible. Stale data must be propagated +to a location where it is subject to reading or sampling. + +Sideband Stale Data Propagator (SSDP) +------------------------------------- +The sideband stale data propagator (SSDP) is limited to the client (including +Intel Xeon server E3) uncore implementation. The sideband response buffer is +shared by all client cores. For non-coherent reads that go to sideband +destinations, the uncore logic returns 64 bytes of data to the core, including +both requested data and unrequested stale data, from a transaction buffer and +the sideband response buffer. As a result, stale data from the sideband +response and transaction buffers may now reside in a core fill buffer. + +Primary Stale Data Propagator (PSDP) +------------------------------------ +The primary stale data propagator (PSDP) is limited to the client (including +Intel Xeon server E3) uncore implementation. Similar to the sideband response +buffer, the primary response buffer is shared by all client cores. For some +processors, MMIO primary reads will return 64 bytes of data to the core fill +buffer including both requested data and unrequested stale data. This is +similar to the sideband stale data propagator. + +Vulnerabilities +=============== +Device Register Partial Write (DRPW) (CVE-2022-21166) +----------------------------------------------------- +Some endpoint MMIO registers incorrectly handle writes that are smaller than +the register size. Instead of aborting the write or only copying the correct +subset of bytes (for example, 2 bytes for a 2-byte write), more bytes than +specified by the write transaction may be written to the register. On +processors affected by FBSDP, this may expose stale data from the fill buffers +of the core that created the write transaction. + +Shared Buffers Data Sampling (SBDS) (CVE-2022-21125) +---------------------------------------------------- +After propagators may have moved data around the uncore and copied stale data +into client core fill buffers, processors affected by MFBDS can leak data from +the fill buffer. It is limited to the client (including Intel Xeon server E3) +uncore implementation. + +Shared Buffers Data Read (SBDR) (CVE-2022-21123) +------------------------------------------------ +It is similar to Shared Buffer Data Sampling (SBDS) except that the data is +directly read into the architectural software-visible state. It is limited to +the client (including Intel Xeon server E3) uncore implementation. + +Affected Processors +=================== +Not all the CPUs are affected by all the variants. For instance, most +processors for the server market (excluding Intel Xeon E3 processors) are +impacted by only Device Register Partial Write (DRPW). + +Below is the list of affected Intel processors [#f1]_: + + =================== ============ ========= + Common name Family_Model Steppings + =================== ============ ========= + HASWELL_X 06_3FH 2,4 + SKYLAKE_L 06_4EH 3 + BROADWELL_X 06_4FH All + SKYLAKE_X 06_55H 3,4,6,7,11 + BROADWELL_D 06_56H 3,4,5 + SKYLAKE 06_5EH 3 + ICELAKE_X 06_6AH 4,5,6 + ICELAKE_D 06_6CH 1 + ICELAKE_L 06_7EH 5 + ATOM_TREMONT_D 06_86H All + LAKEFIELD 06_8AH 1 + KABYLAKE_L 06_8EH 9 to 12 + ATOM_TREMONT 06_96H 1 + ATOM_TREMONT_L 06_9CH 0 + KABYLAKE 06_9EH 9 to 13 + COMETLAKE 06_A5H 2,3,5 + COMETLAKE_L 06_A6H 0,1 + ROCKETLAKE 06_A7H 1 + =================== ============ ========= + +If a CPU is in the affected processor list, but not affected by a variant, it +is indicated by new bits in MSR IA32_ARCH_CAPABILITIES. As described in a later +section, mitigation largely remains the same for all the variants, i.e. to +clear the CPU fill buffers via VERW instruction. + +New bits in MSRs +================ +Newer processors and microcode update on existing affected processors added new +bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate +specific variants of Processor MMIO Stale Data vulnerabilities and mitigation +capability. + +MSR IA32_ARCH_CAPABILITIES +-------------------------- +Bit 13 - SBDR_SSDP_NO - When set, processor is not affected by either the + Shared Buffers Data Read (SBDR) vulnerability or the sideband stale + data propagator (SSDP). +Bit 14 - FBSDP_NO - When set, processor is not affected by the Fill Buffer + Stale Data Propagator (FBSDP). +Bit 15 - PSDP_NO - When set, processor is not affected by Primary Stale Data + Propagator (PSDP). +Bit 17 - FB_CLEAR - When set, VERW instruction will overwrite CPU fill buffer + values as part of MD_CLEAR operations. Processors that do not + enumerate MDS_NO (meaning they are affected by MDS) but that do + enumerate support for both L1D_FLUSH and MD_CLEAR implicitly enumerate + FB_CLEAR as part of their MD_CLEAR support. +Bit 18 - FB_CLEAR_CTRL - Processor supports read and write to MSR + IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]. On such processors, the FB_CLEAR_DIS + bit can be set to cause the VERW instruction to not perform the + FB_CLEAR action. Not all processors that support FB_CLEAR will support + FB_CLEAR_CTRL. + +MSR IA32_MCU_OPT_CTRL +--------------------- +Bit 3 - FB_CLEAR_DIS - When set, VERW instruction does not perform the FB_CLEAR +action. This may be useful to reduce the performance impact of FB_CLEAR in +cases where system software deems it warranted (for example, when performance +is more critical, or the untrusted software has no MMIO access). Note that +FB_CLEAR_DIS has no impact on enumeration (for example, it does not change +FB_CLEAR or MD_CLEAR enumeration) and it may not be supported on all processors +that enumerate FB_CLEAR. + +Mitigation +========== +Like MDS, all variants of Processor MMIO Stale Data vulnerabilities have the +same mitigation strategy to force the CPU to clear the affected buffers before +an attacker can extract the secrets. + +This is achieved by using the otherwise unused and obsolete VERW instruction in +combination with a microcode update. The microcode clears the affected CPU +buffers when the VERW instruction is executed. + +Kernel reuses the MDS function to invoke the buffer clearing: + + mds_clear_cpu_buffers() + +On MDS affected CPUs, the kernel already invokes CPU buffer clear on +kernel/userspace, hypervisor/guest and C-state (idle) transitions. No +additional mitigation is needed on such CPUs. + +For CPUs not affected by MDS or TAA, mitigation is needed only for the attacker +with MMIO capability. Therefore, VERW is not required for kernel/userspace. For +virtualization case, VERW is only needed at VMENTER for a guest with MMIO +capability. + +Mitigation points +----------------- +Return to user space +^^^^^^^^^^^^^^^^^^^^ +Same mitigation as MDS when affected by MDS/TAA, otherwise no mitigation +needed. + +C-State transition +^^^^^^^^^^^^^^^^^^ +Control register writes by CPU during C-state transition can propagate data +from fill buffer to uncore buffers. Execute VERW before C-state transition to +clear CPU fill buffers. + +Guest entry point +^^^^^^^^^^^^^^^^^ +Same mitigation as MDS when processor is also affected by MDS/TAA, otherwise +execute VERW at VMENTER only for MMIO capable guests. On CPUs not affected by +MDS/TAA, guest without MMIO access cannot extract secrets using Processor MMIO +Stale Data vulnerabilities, so there is no need to execute VERW for such guests. + +Mitigation control on the kernel command line +--------------------------------------------- +The kernel command line allows to control the Processor MMIO Stale Data +mitigations at boot time with the option "mmio_stale_data=". The valid +arguments for this option are: + + ========== ================================================================= + full If the CPU is vulnerable, enable mitigation; CPU buffer clearing + on exit to userspace and when entering a VM. Idle transitions are + protected as well. It does not automatically disable SMT. + full,nosmt Same as full, with SMT disabled on vulnerable CPUs. This is the + complete mitigation. + off Disables mitigation completely. + ========== ================================================================= + +If the CPU is affected and mmio_stale_data=off is not supplied on the kernel +command line, then the kernel selects the appropriate mitigation. + +Mitigation status information +----------------------------- +The Linux kernel provides a sysfs interface to enumerate the current +vulnerability status of the system: whether the system is vulnerable, and +which mitigations are active. The relevant sysfs file is: + + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + +The possible values in this file are: + + .. list-table:: + + * - 'Not affected' + - The processor is not vulnerable + * - 'Vulnerable' + - The processor is vulnerable, but no mitigation enabled + * - 'Vulnerable: Clear CPU buffers attempted, no microcode' + - The processor is vulnerable, but microcode is not updated. The + mitigation is enabled on a best effort basis. + * - 'Mitigation: Clear CPU buffers' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. + +If the processor is vulnerable then the following information is appended to +the above information: + + ======================== =========================================== + 'SMT vulnerable' SMT is enabled + 'SMT disabled' SMT is disabled + 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown + ======================== =========================================== + +References +---------- +.. [#f1] Affected Processors + https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 3a6e1fcad3c02..7e061ed449aaa 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -422,6 +422,14 @@ The possible values in this file are: 'RSB filling' Protection of RSB on context switch enabled ============= =========================================== + - EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status: + + =========================== ======================================================= + 'PBRSB-eIBRS: SW sequence' CPU is affected and protection of RSB on VMEXIT enabled + 'PBRSB-eIBRS: Vulnerable' CPU is vulnerable + 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB + =========================== ======================================================= + Full mitigation might require a microcode update from the CPU vendor. When the necessary microcode is not available, the kernel will report vulnerability. @@ -483,7 +491,7 @@ Spectre variant 2 before invoking any firmware code to prevent Spectre variant 2 exploits using the firmware. - Using kernel address space randomization (CONFIG_RANDOMIZE_SLAB=y + Using kernel address space randomization (CONFIG_RANDOMIZE_BASE=y and CONFIG_SLAB_FREELIST_RANDOM=y in the kernel configuration) makes attacks on the kernel generally more difficult. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ddbc95bf4917a..8931ddbc17a31 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -484,16 +484,21 @@ ccw_timeout_log [S390] See Documentation/s390/common_io.rst for details. - cgroup_disable= [KNL] Disable a particular controller - Format: {name of the controller(s) to disable} + cgroup_disable= [KNL] Disable a particular controller or optional feature + Format: {name of the controller(s) or feature(s) to disable} The effects of cgroup_disable=foo are: - foo isn't auto-mounted if you mount all cgroups in a single hierarchy - foo isn't visible as an individually mountable subsystem + - if foo is an optional feature then the feature is + disabled and corresponding cgroup files are not + created {Currently only "memory" controller deal with this and cut the overhead, others just disable the usage. So only cgroup_disable=memory is actually worthy} + Specifying "pressure" disables per-cgroup pressure + stall information accounting feature cgroup_no_v1= [KNL] Disable cgroup controllers and named hierarchies in v1 Format: { { controller | "all" | "named" } @@ -885,12 +890,6 @@ causing system reset or hang due to sending INIT from AP to BSP. - perf_v4_pmi= [X86,INTEL] - Format: - Disable Intel PMU counter freezing feature. - The feature only exists starting from - Arch Perfmon v4 (Skylake and newer). - disable_ddw [PPC/PSERIES] Disable Dynamic DMA Window support. Use this if to workaround buggy firmware. @@ -1428,16 +1427,16 @@ (when the CPU supports the "pdpe1gb" cpuinfo flag). hugetlb_free_vmemmap= - [KNL] Reguires CONFIG_HUGETLB_PAGE_FREE_VMEMMAP + [KNL] Reguires CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP enabled. Allows heavy hugetlb users to free up some more memory (7 * PAGE_SIZE for each 2MB hugetlb page). - Format: { on | off (default) } + Format: { [oO][Nn]/Y/y/1 | [oO][Ff]/N/n/0 (default) } - on: enable the feature - off: disable the feature + [oO][Nn]/Y/y/1: enable the feature + [oO][Ff]/N/n/0: disable the feature - Built with CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON=y, + Built with CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON=y, the default is on. hung_task_panic= @@ -1500,6 +1499,8 @@ architectures force reset to be always executed i8042.unlock [HW] Unlock (ignore) the keylock i8042.kbdreset [HW] Reset device connected to KBD port + i8042.probe_defer + [HW] Allow deferred probing upon i8042 probe errors i810= [HW,DRM] @@ -2125,8 +2126,12 @@ Default is 1 (enabled) kvm-intel.emulate_invalid_guest_state= - [KVM,Intel] Enable emulation of invalid guest states - Default is 0 (disabled) + [KVM,Intel] Disable emulation of invalid guest state. + Ignored if kvm-intel.enable_unrestricted_guest=1, as + guest state is never invalid for unrestricted guests. + This param doesn't apply to nested guests (L2), as KVM + never emulates invalid L2 guest state. + Default is 1 (enabled) kvm-intel.flexpriority= [KVM,Intel] Disable FlexPriority feature (TPR shadow). @@ -2688,6 +2693,7 @@ kvm.nx_huge_pages=off [X86] no_entry_flush [PPC] no_uaccess_flush [PPC] + mmio_stale_data=off [X86] Exceptions: This does not have any effect on @@ -2709,6 +2715,7 @@ Equivalent to: l1tf=flush,nosmt [X86] mds=full,nosmt [X86] tsx_async_abort=full,nosmt [X86] + mmio_stale_data=full,nosmt [X86] mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this @@ -2718,6 +2725,40 @@ log everything. Information is printed at KERN_DEBUG so loglevel=8 may also need to be specified. + mmio_stale_data= + [X86,INTEL] Control mitigation for the Processor + MMIO Stale Data vulnerabilities. + + Processor MMIO Stale Data is a class of + vulnerabilities that may expose data after an MMIO + operation. Exposed data could originate or end in + the same CPU buffers as affected by MDS and TAA. + Therefore, similar to MDS and TAA, the mitigation + is to clear the affected CPU buffers. + + This parameter controls the mitigation. The + options are: + + full - Enable mitigation on vulnerable CPUs + + full,nosmt - Enable mitigation and disable SMT on + vulnerable CPUs. + + off - Unconditionally disable mitigation + + On MDS or TAA affected machines, + mmio_stale_data=off can be prevented by an active + MDS or TAA mitigation as these vulnerabilities are + mitigated with the same mechanism so in order to + disable this mitigation, you need to specify + mds=off and tsx_async_abort=off too. + + Not specifying this option is equivalent to + mmio_stale_data=full. + + For details see: + Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst + module.sig_enforce [KNL] When CONFIG_MODULE_SIG is set, this means that modules without (valid) signatures will fail to load. @@ -3815,6 +3856,12 @@ fully seed the kernel's CRNG. Default is controlled by CONFIG_RANDOM_TRUST_CPU. + random.trust_bootloader={on,off} + [KNL] Enable or disable trusting the use of a + seed passed by the bootloader (if available) to + fully seed the kernel's CRNG. Default is controlled + by CONFIG_RANDOM_TRUST_BOOTLOADER. + ras=option[,option,...] [KNL] RAS-specific options cec_disable [X86] @@ -5499,6 +5546,13 @@ as generic guest with no PV drivers. Currently support XEN HVM, KVM, HYPER_V and VMWARE guest. + xen.balloon_boot_timeout= [XEN] + The time (in seconds) to wait before giving up to boot + in case initial ballooning fails to free enough memory. + Applies only when running as HVM or PVH guest and + started with less memory configured than allowed at + max. Default is 180. + xen.event_eoi_delay= [XEN] How long to delay EOI handling in case of event storms (jiffies). Default is 10. diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 7b477fa19534d..9715685be6e3b 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -862,9 +862,40 @@ The kernel command line parameter printk.devkmsg= overrides this and is a one-time setting until next reboot: once set, it cannot be changed by this sysctl interface anymore. +pty +=== -randomize_va_space: -=================== +See Documentation/filesystems/devpts.rst. + + +random +====== + +This is a directory, with the following entries: + +* ``boot_id``: a UUID generated the first time this is retrieved, and + unvarying after that; + +* ``uuid``: a UUID generated every time this is retrieved (this can + thus be used to generate UUIDs at will); + +* ``entropy_avail``: the pool's entropy count, in bits; + +* ``poolsize``: the entropy pool size, in bits; + +* ``urandom_min_reseed_secs``: obsolete (used to determine the minimum + number of seconds between urandom pool reseeding). This file is + writable for compatibility purposes, but writing to it has no effect + on any RNG behavior; + +* ``write_wakeup_threshold``: when the entropy count drops below this + (as a number of bits), processes waiting to write to ``/dev/random`` + are woken up. This file is writable for compatibility purposes, but + writing to it has no effect on any RNG behavior. + + +randomize_va_space +================== This option can be used to select the type of process address space randomization that is used in the system, for architectures diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index c4b6f1a03b826..47985671f6931 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -548,17 +548,43 @@ Change the minimum size of the hugepage pool. See Documentation/admin-guide/mm/hugetlbpage.rst -hugetlb_free_vmemmap -==================== - -A toggle value indicating if vmemmap pages are allowed to be optimized. -If it is off (0), then it can be set true (1). Once true, the vmemmap -pages associated with each HugeTLB page will be optimized, and the toggle -cannot be set back to false. It only optimizes the subsequent allocation -of HugeTLB pages from buddy system, while already allocated HugeTLB pages -will not be optimized. +hugetlb_optimize_vmemmap +======================== -See Documentation/admin-guide/mm/hugetlbpage.rst +This knob is not available when memory_hotplug.memmap_on_memory (kernel parameter) +is configured or the size of 'struct page' (a structure defined in +include/linux/mm_types.h) is not power of two (an unusual system config could +result in this). + +Enable (set to 1) or disable (set to 0) the feature of optimizing vmemmap pages +associated with each HugeTLB page. + +Once enabled, the vmemmap pages of subsequent allocation of HugeTLB pages from +buddy allocator will be optimized (7 pages per 2MB HugeTLB page and 4095 pages +per 1GB HugeTLB page), whereas already allocated HugeTLB pages will not be +optimized. When those optimized HugeTLB pages are freed from the HugeTLB pool +to the buddy allocator, the vmemmap pages representing that range needs to be +remapped again and the vmemmap pages discarded earlier need to be rellocated +again. If your use case is that HugeTLB pages are allocated 'on the fly' (e.g. +never explicitly allocating HugeTLB pages with 'nr_hugepages' but only set +'nr_overcommit_hugepages', those overcommitted HugeTLB pages are allocated 'on +the fly') instead of being pulled from the HugeTLB pool, you should weigh the +benefits of memory savings against the more overhead (~2x slower than before) +of allocation or freeing HugeTLB pages between the HugeTLB pool and the buddy +allocator. Another behavior to note is that if the system is under heavy memory +pressure, it could prevent the user from freeing HugeTLB pages from the HugeTLB +pool to the buddy allocator since the allocation of vmemmap pages could be +failed, you have to retry later if your system encounter this situation. + +Once disabled, the vmemmap pages of subsequent allocation of HugeTLB pages from +buddy allocator will not be optimized meaning the extra overhead at allocation +time from buddy allocator disappears, whereas already optimized HugeTLB pages +will not be affected. If you want to make sure there are no optimized HugeTLB +pages, you can set "nr_hugepages" to 0 first and then disable this. Note that +writing 0 to nr_hugepages will make any "in use" HugeTLB pages become surplus +pages. So, those surplus pages are still optimized until they are no longer +in use. You would need to wait for those surplus pages to be released before +there are no optimized pages in the system. nr_hugepages_mempolicy diff --git a/Documentation/conf.py b/Documentation/conf.py index a8fe845832bce..38c1f7618b5e8 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -98,7 +98,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: diff --git a/Documentation/devicetree/bindings/arm/tegra.yaml b/Documentation/devicetree/bindings/arm/tegra.yaml index 60b38eb5c61ab..56e1945911f1e 100644 --- a/Documentation/devicetree/bindings/arm/tegra.yaml +++ b/Documentation/devicetree/bindings/arm/tegra.yaml @@ -49,7 +49,7 @@ properties: - const: toradex,apalis_t30 - const: nvidia,tegra30 - items: - - const: toradex,apalis_t30-eval-v1.1 + - const: toradex,apalis_t30-v1.1-eval - const: toradex,apalis_t30-eval - const: toradex,apalis_t30-v1.1 - const: toradex,apalis_t30 diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml index fb747682006d9..92dc08b78a176 100644 --- a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml +++ b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml @@ -10,6 +10,9 @@ title: Amlogic specific extensions to the Synopsys Designware HDMI Controller maintainers: - Neil Armstrong +allOf: + - $ref: /schemas/sound/name-prefix.yaml# + description: | The Amlogic Meson Synopsys Designware Integration is composed of - A Synopsys DesignWare HDMI Controller IP @@ -101,6 +104,8 @@ properties: "#sound-dai-cells": const: 0 + sound-name-prefix: true + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml index d1205a6697a09..766b04501cb96 100644 --- a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml +++ b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml @@ -78,6 +78,10 @@ properties: interrupts: maxItems: 1 + amlogic,canvas: + description: should point to a canvas provider node + $ref: /schemas/types.yaml#/definitions/phandle + power-domains: maxItems: 1 description: phandle to the associated power domain @@ -106,6 +110,7 @@ required: - port@1 - "#address-cells" - "#size-cells" + - amlogic,canvas examples: - | @@ -116,6 +121,7 @@ examples: interrupts = <3>; #address-cells = <1>; #size-cells = <0>; + amlogic,canvas = <&canvas>; /* CVBS VDAC output port */ port@0 { diff --git a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml index 4cb9d6b931389..c61c4a6b57f10 100644 --- a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml +++ b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml @@ -58,7 +58,7 @@ if: then: properties: clocks: - maxItems: 2 + minItems: 2 required: - clock-names diff --git a/Documentation/devicetree/bindings/gpio/gpio-altera.txt b/Documentation/devicetree/bindings/gpio/gpio-altera.txt index 146e554b3c676..2a80e272cd666 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-altera.txt +++ b/Documentation/devicetree/bindings/gpio/gpio-altera.txt @@ -9,8 +9,9 @@ Required properties: - The second cell is reserved and is currently unused. - gpio-controller : Marks the device node as a GPIO controller. - interrupt-controller: Mark the device node as an interrupt controller -- #interrupt-cells : Should be 1. The interrupt type is fixed in the hardware. +- #interrupt-cells : Should be 2. The interrupt type is fixed in the hardware. - The first cell is the GPIO offset number within the GPIO controller. + - The second cell is the interrupt trigger type and level flags. - interrupts: Specify the interrupt. - altr,interrupt-type: Specifies the interrupt trigger type the GPIO hardware is synthesized. This field is required if the Altera GPIO controller @@ -38,6 +39,6 @@ gpio_altr: gpio@ff200000 { altr,interrupt-type = ; #gpio-cells = <2>; gpio-controller; - #interrupt-cells = <1>; + #interrupt-cells = <2>; interrupt-controller; }; diff --git a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt index 44919d48d2415..c459f169a9044 100644 --- a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt +++ b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt @@ -122,7 +122,7 @@ on various other factors also like; so the device should have enough free bytes available its OOB/Spare area to accommodate ECC for entire page. In general following expression helps in determining if given device can accommodate ECC syndrome: - "2 + (PAGESIZE / 512) * ECC_BYTES" >= OOBSIZE" + "2 + (PAGESIZE / 512) * ECC_BYTES" <= OOBSIZE" where OOBSIZE number of bytes in OOB/spare area PAGESIZE number of bytes in main-area of device page diff --git a/Documentation/devicetree/bindings/mtd/nand-controller.yaml b/Documentation/devicetree/bindings/mtd/nand-controller.yaml index d261b7096c696..2767f182fd3c5 100644 --- a/Documentation/devicetree/bindings/mtd/nand-controller.yaml +++ b/Documentation/devicetree/bindings/mtd/nand-controller.yaml @@ -44,7 +44,7 @@ patternProperties: properties: reg: description: - Contains the native Ready/Busy IDs. + Contains the chip-select IDs. nand-ecc-mode: allOf: @@ -139,6 +139,6 @@ examples: nand-ecc-mode = "soft"; nand-ecc-algo = "bch"; - /* controller specific properties */ + /* NAND chip specific properties */ }; }; diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt index 9cb3560756d00..53c26ffd020a3 100644 --- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt +++ b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt @@ -31,7 +31,7 @@ tcan4x5x: tcan4x5x@0 { #address-cells = <1>; #size-cells = <1>; spi-max-frequency = <10000000>; - bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>; + bosch,mram-cfg = <0x0 0 0 16 0 0 1 1>; interrupt-parent = <&gpio1>; interrupts = <14 IRQ_TYPE_LEVEL_LOW>; device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml index f70f18ff821f5..8f3e9c774b74a 100644 --- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml @@ -87,6 +87,14 @@ properties: compensate for the board being designed with the lanes swapped. + enet-phy-lane-no-swap: + $ref: /schemas/types.yaml#/definitions/flag + description: + If set, indicates that PHY will disable swap of the + TX/RX lanes. This property allows the PHY to work correcly after + e.g. wrong bootstrap configuration caused by issues in PCB + layout design. + eee-broken-100tx: $ref: /schemas/types.yaml#definitions/flag description: diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt index 38dc56a577604..ecec514b31550 100644 --- a/Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt @@ -43,19 +43,19 @@ group emmc_nb group pwm0 - pin 11 (GPIO1-11) - - functions pwm, gpio + - functions pwm, led, gpio group pwm1 - pin 12 - - functions pwm, gpio + - functions pwm, led, gpio group pwm2 - pin 13 - - functions pwm, gpio + - functions pwm, led, gpio group pwm3 - pin 14 - - functions pwm, gpio + - functions pwm, led, gpio group pmic1 - pin 7 diff --git a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt index 093edda0c8dfc..6cd83d920155f 100644 --- a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt +++ b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt @@ -13,6 +13,14 @@ common regulator binding documented in: Required properties of the main device node (the parent!): + - s5m8767,pmic-buck-ds-gpios: GPIO specifiers for three host gpio's used + for selecting GPIO DVS lines. It is one-to-one mapped to dvs gpio lines. + + [1] If either of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional + property is specified, then all the eight voltage values for the + 's5m8767,pmic-buck[2/3/4]-dvs-voltage' should be specified. + +Optional properties of the main device node (the parent!): - s5m8767,pmic-buck2-dvs-voltage: A set of 8 voltage values in micro-volt (uV) units for buck2 when changing voltage using gpio dvs. Refer to [1] below for additional information. @@ -25,26 +33,13 @@ Required properties of the main device node (the parent!): units for buck4 when changing voltage using gpio dvs. Refer to [1] below for additional information. - - s5m8767,pmic-buck-ds-gpios: GPIO specifiers for three host gpio's used - for selecting GPIO DVS lines. It is one-to-one mapped to dvs gpio lines. - - [1] If none of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional - property is specified, the 's5m8767,pmic-buck[2/3/4]-dvs-voltage' - property should specify atleast one voltage level (which would be a - safe operating voltage). - - If either of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional - property is specified, then all the eight voltage values for the - 's5m8767,pmic-buck[2/3/4]-dvs-voltage' should be specified. - -Optional properties of the main device node (the parent!): - s5m8767,pmic-buck2-uses-gpio-dvs: 'buck2' can be controlled by gpio dvs. - s5m8767,pmic-buck3-uses-gpio-dvs: 'buck3' can be controlled by gpio dvs. - s5m8767,pmic-buck4-uses-gpio-dvs: 'buck4' can be controlled by gpio dvs. Additional properties required if either of the optional properties are used: - - s5m8767,pmic-buck234-default-dvs-idx: Default voltage setting selected from + - s5m8767,pmic-buck-default-dvs-idx: Default voltage setting selected from the possible 8 options selectable by the dvs gpios. The value of this property should be between 0 and 7. If not specified or if out of range, the default value of this property is set to 0. diff --git a/Documentation/devicetree/bindings/spi/spi-mxic.txt b/Documentation/devicetree/bindings/spi/spi-mxic.txt index 529f2dab2648a..7bcbb229b78bb 100644 --- a/Documentation/devicetree/bindings/spi/spi-mxic.txt +++ b/Documentation/devicetree/bindings/spi/spi-mxic.txt @@ -8,11 +8,13 @@ Required properties: - reg: should contain 2 entries, one for the registers and one for the direct mapping area - reg-names: should contain "regs" and "dirmap" -- interrupts: interrupt line connected to the SPI controller - clock-names: should contain "ps_clk", "send_clk" and "send_dly_clk" - clocks: should contain 3 entries for the "ps_clk", "send_clk" and "send_dly_clk" clocks +Optional properties: +- interrupts: interrupt line connected to the SPI controller + Example: spi@43c30000 { diff --git a/Documentation/driver-api/dmaengine/dmatest.rst b/Documentation/driver-api/dmaengine/dmatest.rst index ee268d445d38b..d2e1d8b58e7dc 100644 --- a/Documentation/driver-api/dmaengine/dmatest.rst +++ b/Documentation/driver-api/dmaengine/dmatest.rst @@ -143,13 +143,14 @@ Part 5 - Handling channel allocation Allocating Channels ------------------- -Channels are required to be configured prior to starting the test run. -Attempting to run the test without configuring the channels will fail. +Channels do not need to be configured prior to starting a test run. Attempting +to run the test without configuring the channels will result in testing any +channels that are available. Example:: % echo 1 > /sys/module/dmatest/parameters/run - dmatest: Could not start test, no channels configured + dmatest: No channels configured, continue with any Channels are registered using the "channel" parameter. Channels can be requested by their name, once requested, the channel is registered and a pending thread is added to the test list. diff --git a/Documentation/driver-api/firewire.rst b/Documentation/driver-api/firewire.rst index 94a2d7f01d999..d3cfa73cbb2b4 100644 --- a/Documentation/driver-api/firewire.rst +++ b/Documentation/driver-api/firewire.rst @@ -19,7 +19,7 @@ of kernel interfaces is available via exported symbols in `firewire-core` module Firewire char device data structures ==================================== -.. include:: /ABI/stable/firewire-cdev +.. include:: ../ABI/stable/firewire-cdev :literal: .. kernel-doc:: include/uapi/linux/firewire-cdev.h @@ -28,7 +28,7 @@ Firewire char device data structures Firewire device probing and sysfs interfaces ============================================ -.. include:: /ABI/stable/sysfs-bus-firewire +.. include:: ../ABI/stable/sysfs-bus-firewire :literal: .. kernel-doc:: drivers/firewire/core-device.c diff --git a/Documentation/filesystems/ext4/attributes.rst b/Documentation/filesystems/ext4/attributes.rst index 54386a010a8d7..871d2da7a0a91 100644 --- a/Documentation/filesystems/ext4/attributes.rst +++ b/Documentation/filesystems/ext4/attributes.rst @@ -76,7 +76,7 @@ The beginning of an extended attribute block is in - Checksum of the extended attribute block. * - 0x14 - \_\_u32 - - h\_reserved[2] + - h\_reserved[3] - Zero. The checksum is calculated against the FS UUID, the 64-bit block number diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index fc3a0704553cf..fdaeda5fc20c8 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -428,14 +428,14 @@ prototypes:: locking rules: -========== ============= ================= ========= +====================== ============= ================= ========= ops inode->i_lock blocked_lock_lock may block -========== ============= ================= ========= +====================== ============= ================= ========= lm_notify: yes yes no lm_grant: no no no lm_break: yes no no lm_change yes no no -========== ============= ================= ========= +====================== ============= ================= ========= buffer_head =========== @@ -610,9 +610,9 @@ prototypes:: locking rules: -============= ======== =========================== +============= ========= =========================== ops mmap_sem PageLocked(page) -============= ======== =========================== +============= ========= =========================== open: yes close: yes fault: yes can return with page locked @@ -620,7 +620,7 @@ map_pages: yes page_mkwrite: yes can return with page locked pfn_mkwrite: yes access: yes -============= ======== =========================== +============= ========= =========================== ->fault() is called when a previously not present pte is about to be faulted in. The filesystem must find and return the page associated diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index 845d689e0fd71..11551b0f7ce16 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -481,6 +481,33 @@ verified on mount time to check that upper file handles are not stale. This verification may cause significant overhead in some cases. +Volatile mount +-------------- + +This is enabled with the "volatile" mount option. Volatile mounts are not +guaranteed to survive a crash. It is strongly recommended that volatile +mounts are only used if data written to the overlay can be recreated +without significant effort. + +The advantage of mounting with the "volatile" option is that all forms of +sync calls to the upper filesystem are omitted. + +In order to avoid a giving a false sense of safety, the syncfs (and fsync) +semantics of volatile mounts are slightly different than that of the rest of +VFS. If any writeback error occurs on the upperdir's filesystem after a +volatile mount takes place, all sync functions will return an error. Once this +condition is reached, the filesystem will not recover, and every subsequent sync +call will return an error, even if the upperdir has not experience a new error +since the last sync call. + +When overlay is mounted with "volatile" option, the directory +"$workdir/work/incompat/volatile" is created. During next mount, overlay +checks for this directory and refuses to mount if present. This is a strong +indicator that user should throw away upper and work directories and create +fresh one. In very limited cases where the user knows that the system has +not crashed and contents of upperdir are intact, The "volatile" directory +can be removed. + Testsuite --------- diff --git a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst index febccbc5689d0..1b05e8d0ddff7 100644 --- a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst +++ b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst @@ -5,7 +5,7 @@ Referencing hierarchical data nodes =================================== -:Copyright: |copy| 2018 Intel Corporation +:Copyright: |copy| 2018, 2021 Intel Corporation :Author: Sakari Ailus ACPI in general allows referring to device objects in the tree only. @@ -52,12 +52,14 @@ the ANOD object which is also the final target node of the reference. Name (NOD0, Package() { ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), Package () { + Package () { "reg", 0 }, Package () { "random-property", 3 }, } }) Name (NOD1, Package() { ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"), Package () { + Package () { "reg", 1 }, Package () { "anothernode", "ANOD" }, } }) @@ -74,7 +76,11 @@ the ANOD object which is also the final target node of the reference. Name (_DSD, Package () { ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), Package () { - Package () { "reference", ^DEV0, "node@1", "anothernode" }, + Package () { + "reference", Package () { + ^DEV0, "node@1", "anothernode" + } + }, } }) } diff --git a/Documentation/hwmon/k10temp.rst b/Documentation/hwmon/k10temp.rst index 4451d59b94259..91b99adc6c487 100644 --- a/Documentation/hwmon/k10temp.rst +++ b/Documentation/hwmon/k10temp.rst @@ -100,9 +100,10 @@ socket type, not the processor's actual capabilities. Therefore, if you are using an AM3 processor on an AM2+ mainboard, you can safely use the "force=1" parameter. -There is one temperature measurement value, available as temp1_input in -sysfs. It is measured in degrees Celsius with a resolution of 1/8th degree. -Please note that it is defined as a relative value; to quote the AMD manual:: +For CPUs older than Family 17h, there is one temperature measurement value, +available as temp1_input in sysfs. It is measured in degrees Celsius with a +resolution of 1/8th degree. Please note that it is defined as a relative +value; to quote the AMD manual:: Tctl is the processor temperature control value, used by the platform to control cooling systems. Tctl is a non-physical temperature on an @@ -126,3 +127,8 @@ it. Models from 17h family report relative temperature, the driver aims to compensate and report the real temperature. + +On Family 17h and Family 18h CPUs, additional temperature sensors may report +Core Complex Die (CCD) temperatures. Up to 8 such temperatures are reported +as temp{3..10}_input, labeled Tccd{1..8}. Actual support depends on the CPU +variant. diff --git a/Documentation/hwmon/lm90.rst b/Documentation/hwmon/lm90.rst index 953315987c06e..03851cbe637ce 100644 --- a/Documentation/hwmon/lm90.rst +++ b/Documentation/hwmon/lm90.rst @@ -123,6 +123,18 @@ Supported chips: http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3497 + * Maxim MAX6654 + + Prefix: 'max6654' + + Addresses scanned: I2C 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b, + + 0x4c, 0x4d and 0x4e + + Datasheet: Publicly available at the Maxim website + + https://www.maximintegrated.com/en/products/sensors/MAX6654.html + * Maxim MAX6657 Prefix: 'max6657' @@ -253,6 +265,16 @@ Supported chips: http://www.ti.com/litv/pdf/sbos686 + * Texas Instruments TMP461 + + Prefix: 'tmp461' + + Addresses scanned: I2C 0x48 through 0x4F + + Datasheet: Publicly available at TI website + + https://www.ti.com/lit/gpn/tmp461 + Author: Jean Delvare @@ -301,6 +323,13 @@ ADT7461, ADT7461A, NCT1008: * Extended temperature range (breaks compatibility) * Lower resolution for remote temperature +MAX6654: + * Better local resolution + * Selectable address + * Remote sensor type selection + * Extended temperature range + * Extended resolution only available when conversion rate <= 1 Hz + MAX6657 and MAX6658: * Better local resolution * Remote sensor type selection @@ -336,8 +365,8 @@ SA56004X: All temperature values are given in degrees Celsius. Resolution is 1.0 degree for the local temperature, 0.125 degree for the remote -temperature, except for the MAX6657, MAX6658 and MAX6659 which have a -resolution of 0.125 degree for both temperatures. +temperature, except for the MAX6654, MAX6657, MAX6658 and MAX6659 which have +a resolution of 0.125 degree for both temperatures. Each sensor has its own high and low limits, plus a critical limit. Additionally, there is a relative hysteresis value common to both critical diff --git a/Documentation/i2c/busses/i2c-i801.rst b/Documentation/i2c/busses/i2c-i801.rst index 2a570c214880d..c53a0e7f5f3ae 100644 --- a/Documentation/i2c/busses/i2c-i801.rst +++ b/Documentation/i2c/busses/i2c-i801.rst @@ -42,6 +42,7 @@ Supported adapters: * Intel Comet Lake (PCH) * Intel Elkhart Lake (PCH) * Intel Tiger Lake (PCH) + * Intel Emmitsburg (PCH) Datasheets: Publicly available at the Intel website diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index e3abfbd32f71e..b020e6ce6dd49 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -191,11 +191,12 @@ ad_actor_sys_prio ad_actor_system In an AD system, this specifies the mac-address for the actor in - protocol packet exchanges (LACPDUs). The value cannot be NULL or - multicast. It is preferred to have the local-admin bit set for this - mac but driver does not enforce it. If the value is not given then - system defaults to using the masters' mac address as actors' system - address. + protocol packet exchanges (LACPDUs). The value cannot be a multicast + address. If the all-zeroes MAC is specified, bonding will internally + use the MAC of the bond itself. It is preferred to have the + local-admin bit set for this mac but driver does not enforce it. If + the value is not given then system defaults to using the masters' + mac address as actors' system address. This parameter has effect only in 802.3ad mode and is available through SysFs interface. diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst new file mode 100644 index 0000000000000..58659a82d88a6 --- /dev/null +++ b/Documentation/networking/ethtool-netlink.rst @@ -0,0 +1,1402 @@ +============================= +Netlink interface for ethtool +============================= + + +Basic information +================= + +Netlink interface for ethtool uses generic netlink family ``ethtool`` +(userspace application should use macros ``ETHTOOL_GENL_NAME`` and +``ETHTOOL_GENL_VERSION`` defined in ```` uapi +header). This family does not use a specific header, all information in +requests and replies is passed using netlink attributes. + +The ethtool netlink interface uses extended ACK for error and warning +reporting, userspace application developers are encouraged to make these +messages available to user in a suitable way. + +Requests can be divided into three categories: "get" (retrieving information), +"set" (setting parameters) and "action" (invoking an action). + +All "set" and "action" type requests require admin privileges +(``CAP_NET_ADMIN`` in the namespace). Most "get" type requests are allowed for +anyone but there are exceptions (where the response contains sensitive +information). In some cases, the request as such is allowed for anyone but +unprivileged users have attributes with sensitive information (e.g. +wake-on-lan password) omitted. + + +Conventions +=========== + +Attributes which represent a boolean value usually use NLA_U8 type so that we +can distinguish three states: "on", "off" and "not present" (meaning the +information is not available in "get" requests or value is not to be changed +in "set" requests). For these attributes, the "true" value should be passed as +number 1 but any non-zero value should be understood as "true" by recipient. +In the tables below, "bool" denotes NLA_U8 attributes interpreted in this way. + +In the message structure descriptions below, if an attribute name is suffixed +with "+", parent nest can contain multiple attributes of the same type. This +implements an array of entries. + + +Request header +============== + +Each request or reply message contains a nested attribute with common header. +Structure of this header is + + ============================== ====== ============================= + ``ETHTOOL_A_HEADER_DEV_INDEX`` u32 device ifindex + ``ETHTOOL_A_HEADER_DEV_NAME`` string device name + ``ETHTOOL_A_HEADER_FLAGS`` u32 flags common for all requests + ============================== ====== ============================= + +``ETHTOOL_A_HEADER_DEV_INDEX`` and ``ETHTOOL_A_HEADER_DEV_NAME`` identify the +device message relates to. One of them is sufficient in requests, if both are +used, they must identify the same device. Some requests, e.g. global string +sets, do not require device identification. Most ``GET`` requests also allow +dump requests without device identification to query the same information for +all devices providing it (each device in a separate message). + +``ETHTOOL_A_HEADER_FLAGS`` is a bitmap of request flags common for all request +types. The interpretation of these flags is the same for all request types but +the flags may not apply to requests. Recognized flags are: + + ================================= =================================== + ``ETHTOOL_FLAG_COMPACT_BITSETS`` use compact format bitsets in reply + ``ETHTOOL_FLAG_OMIT_REPLY`` omit optional reply (_SET and _ACT) + ``ETHTOOL_FLAG_STATS`` include optional device statistics + ================================= =================================== + +New request flags should follow the general idea that if the flag is not set, +the behaviour is backward compatible, i.e. requests from old clients not aware +of the flag should be interpreted the way the client expects. A client must +not set flags it does not understand. + + +Bit sets +======== + +For short bitmaps of (reasonably) fixed length, standard ``NLA_BITFIELD32`` +type is used. For arbitrary length bitmaps, ethtool netlink uses a nested +attribute with contents of one of two forms: compact (two binary bitmaps +representing bit values and mask of affected bits) and bit-by-bit (list of +bits identified by either index or name). + +Verbose (bit-by-bit) bitsets allow sending symbolic names for bits together +with their values which saves a round trip (when the bitset is passed in a +request) or at least a second request (when the bitset is in a reply). This is +useful for one shot applications like traditional ethtool command. On the +other hand, long running applications like ethtool monitor (displaying +notifications) or network management daemons may prefer fetching the names +only once and using compact form to save message size. Notifications from +ethtool netlink interface always use compact form for bitsets. + +A bitset can represent either a value/mask pair (``ETHTOOL_A_BITSET_NOMASK`` +not set) or a single bitmap (``ETHTOOL_A_BITSET_NOMASK`` set). In requests +modifying a bitmap, the former changes the bit set in mask to values set in +value and preserves the rest; the latter sets the bits set in the bitmap and +clears the rest. + +Compact form: nested (bitset) atrribute contents: + + ============================ ====== ============================ + ``ETHTOOL_A_BITSET_NOMASK`` flag no mask, only a list + ``ETHTOOL_A_BITSET_SIZE`` u32 number of significant bits + ``ETHTOOL_A_BITSET_VALUE`` binary bitmap of bit values + ``ETHTOOL_A_BITSET_MASK`` binary bitmap of valid bits + ============================ ====== ============================ + +Value and mask must have length at least ``ETHTOOL_A_BITSET_SIZE`` bits +rounded up to a multiple of 32 bits. They consist of 32-bit words in host byte +order, words ordered from least significant to most significant (i.e. the same +way as bitmaps are passed with ioctl interface). + +For compact form, ``ETHTOOL_A_BITSET_SIZE`` and ``ETHTOOL_A_BITSET_VALUE`` are +mandatory. ``ETHTOOL_A_BITSET_MASK`` attribute is mandatory if +``ETHTOOL_A_BITSET_NOMASK`` is not set (bitset represents a value/mask pair); +if ``ETHTOOL_A_BITSET_NOMASK`` is not set, ``ETHTOOL_A_BITSET_MASK`` is not +allowed (bitset represents a single bitmap. + +Kernel bit set length may differ from userspace length if older application is +used on newer kernel or vice versa. If userspace bitmap is longer, an error is +issued only if the request actually tries to set values of some bits not +recognized by kernel. + +Bit-by-bit form: nested (bitset) attribute contents: + + +------------------------------------+--------+-----------------------------+ + | ``ETHTOOL_A_BITSET_NOMASK`` | flag | no mask, only a list | + +------------------------------------+--------+-----------------------------+ + | ``ETHTOOL_A_BITSET_SIZE`` | u32 | number of significant bits | + +------------------------------------+--------+-----------------------------+ + | ``ETHTOOL_A_BITSET_BITS`` | nested | array of bits | + +-+----------------------------------+--------+-----------------------------+ + | | ``ETHTOOL_A_BITSET_BITS_BIT+`` | nested | one bit | + +-+-+--------------------------------+--------+-----------------------------+ + | | | ``ETHTOOL_A_BITSET_BIT_INDEX`` | u32 | bit index (0 for LSB) | + +-+-+--------------------------------+--------+-----------------------------+ + | | | ``ETHTOOL_A_BITSET_BIT_NAME`` | string | bit name | + +-+-+--------------------------------+--------+-----------------------------+ + | | | ``ETHTOOL_A_BITSET_BIT_VALUE`` | flag | present if bit is set | + +-+-+--------------------------------+--------+-----------------------------+ + +Bit size is optional for bit-by-bit form. ``ETHTOOL_A_BITSET_BITS`` nest can +only contain ``ETHTOOL_A_BITSET_BITS_BIT`` attributes but there can be an +arbitrary number of them. A bit may be identified by its index or by its +name. When used in requests, listed bits are set to 0 or 1 according to +``ETHTOOL_A_BITSET_BIT_VALUE``, the rest is preserved. A request fails if +index exceeds kernel bit length or if name is not recognized. + +When ``ETHTOOL_A_BITSET_NOMASK`` flag is present, bitset is interpreted as +a simple bitmap. ``ETHTOOL_A_BITSET_BIT_VALUE`` attributes are not used in +such case. Such bitset represents a bitmap with listed bits set and the rest +zero. + +In requests, application can use either form. Form used by kernel in reply is +determined by ``ETHTOOL_FLAG_COMPACT_BITSETS`` flag in flags field of request +header. Semantics of value and mask depends on the attribute. + + +List of message types +===================== + +All constants identifying message types use ``ETHTOOL_CMD_`` prefix and suffix +according to message purpose: + + ============== ====================================== + ``_GET`` userspace request to retrieve data + ``_SET`` userspace request to set data + ``_ACT`` userspace request to perform an action + ``_GET_REPLY`` kernel reply to a ``GET`` request + ``_SET_REPLY`` kernel reply to a ``SET`` request + ``_ACT_REPLY`` kernel reply to an ``ACT`` request + ``_NTF`` kernel notification + ============== ====================================== + +Userspace to kernel: + + ===================================== ================================ + ``ETHTOOL_MSG_STRSET_GET`` get string set + ``ETHTOOL_MSG_LINKINFO_GET`` get link settings + ``ETHTOOL_MSG_LINKINFO_SET`` set link settings + ``ETHTOOL_MSG_LINKMODES_GET`` get link modes info + ``ETHTOOL_MSG_LINKMODES_SET`` set link modes info + ``ETHTOOL_MSG_LINKSTATE_GET`` get link state + ``ETHTOOL_MSG_DEBUG_GET`` get debugging settings + ``ETHTOOL_MSG_DEBUG_SET`` set debugging settings + ``ETHTOOL_MSG_WOL_GET`` get wake-on-lan settings + ``ETHTOOL_MSG_WOL_SET`` set wake-on-lan settings + ``ETHTOOL_MSG_FEATURES_GET`` get device features + ``ETHTOOL_MSG_FEATURES_SET`` set device features + ``ETHTOOL_MSG_PRIVFLAGS_GET`` get private flags + ``ETHTOOL_MSG_PRIVFLAGS_SET`` set private flags + ``ETHTOOL_MSG_RINGS_GET`` get ring sizes + ``ETHTOOL_MSG_RINGS_SET`` set ring sizes + ``ETHTOOL_MSG_CHANNELS_GET`` get channel counts + ``ETHTOOL_MSG_CHANNELS_SET`` set channel counts + ``ETHTOOL_MSG_COALESCE_GET`` get coalescing parameters + ``ETHTOOL_MSG_COALESCE_SET`` set coalescing parameters + ``ETHTOOL_MSG_PAUSE_GET`` get pause parameters + ``ETHTOOL_MSG_PAUSE_SET`` set pause parameters + ``ETHTOOL_MSG_EEE_GET`` get EEE settings + ``ETHTOOL_MSG_EEE_SET`` set EEE settings + ``ETHTOOL_MSG_TSINFO_GET`` get timestamping info + ``ETHTOOL_MSG_CABLE_TEST_ACT`` action start cable test + ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT`` action start raw TDR cable test + ``ETHTOOL_MSG_TUNNEL_INFO_GET`` get tunnel offload info + ``ETHTOOL_MSG_FEC_GET`` get FEC settings + ``ETHTOOL_MSG_FEC_SET`` set FEC settings + ``ETHTOOL_MSG_MODULE_EEPROM_GET`` read SFP module EEPROM + ``ETHTOOL_MSG_STATS_GET`` get standard statistics + ===================================== ================================ + +Kernel to userspace: + + ======================================== ================================= + ``ETHTOOL_MSG_STRSET_GET_REPLY`` string set contents + ``ETHTOOL_MSG_LINKINFO_GET_REPLY`` link settings + ``ETHTOOL_MSG_LINKINFO_NTF`` link settings notification + ``ETHTOOL_MSG_LINKMODES_GET_REPLY`` link modes info + ``ETHTOOL_MSG_LINKMODES_NTF`` link modes notification + ``ETHTOOL_MSG_LINKSTATE_GET_REPLY`` link state info + ``ETHTOOL_MSG_DEBUG_GET_REPLY`` debugging settings + ``ETHTOOL_MSG_DEBUG_NTF`` debugging settings notification + ``ETHTOOL_MSG_WOL_GET_REPLY`` wake-on-lan settings + ``ETHTOOL_MSG_WOL_NTF`` wake-on-lan settings notification + ``ETHTOOL_MSG_FEATURES_GET_REPLY`` device features + ``ETHTOOL_MSG_FEATURES_SET_REPLY`` optional reply to FEATURES_SET + ``ETHTOOL_MSG_FEATURES_NTF`` netdev features notification + ``ETHTOOL_MSG_PRIVFLAGS_GET_REPLY`` private flags + ``ETHTOOL_MSG_PRIVFLAGS_NTF`` private flags + ``ETHTOOL_MSG_RINGS_GET_REPLY`` ring sizes + ``ETHTOOL_MSG_RINGS_NTF`` ring sizes + ``ETHTOOL_MSG_CHANNELS_GET_REPLY`` channel counts + ``ETHTOOL_MSG_CHANNELS_NTF`` channel counts + ``ETHTOOL_MSG_COALESCE_GET_REPLY`` coalescing parameters + ``ETHTOOL_MSG_COALESCE_NTF`` coalescing parameters + ``ETHTOOL_MSG_PAUSE_GET_REPLY`` pause parameters + ``ETHTOOL_MSG_PAUSE_NTF`` pause parameters + ``ETHTOOL_MSG_EEE_GET_REPLY`` EEE settings + ``ETHTOOL_MSG_EEE_NTF`` EEE settings + ``ETHTOOL_MSG_TSINFO_GET_REPLY`` timestamping info + ``ETHTOOL_MSG_CABLE_TEST_NTF`` Cable test results + ``ETHTOOL_MSG_CABLE_TEST_TDR_NTF`` Cable test TDR results + ``ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY`` tunnel offload info + ``ETHTOOL_MSG_FEC_GET_REPLY`` FEC settings + ``ETHTOOL_MSG_FEC_NTF`` FEC settings + ``ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY`` read SFP module EEPROM + ``ETHTOOL_MSG_STATS_GET_REPLY`` standard statistics + ======================================== ================================= + +``GET`` requests are sent by userspace applications to retrieve device +information. They usually do not contain any message specific attributes. +Kernel replies with corresponding "GET_REPLY" message. For most types, ``GET`` +request with ``NLM_F_DUMP`` and no device identification can be used to query +the information for all devices supporting the request. + +If the data can be also modified, corresponding ``SET`` message with the same +layout as corresponding ``GET_REPLY`` is used to request changes. Only +attributes where a change is requested are included in such request (also, not +all attributes may be changed). Replies to most ``SET`` request consist only +of error code and extack; if kernel provides additional data, it is sent in +the form of corresponding ``SET_REPLY`` message which can be suppressed by +setting ``ETHTOOL_FLAG_OMIT_REPLY`` flag in request header. + +Data modification also triggers sending a ``NTF`` message with a notification. +These usually bear only a subset of attributes which was affected by the +change. The same notification is issued if the data is modified using other +means (mostly ioctl ethtool interface). Unlike notifications from ethtool +netlink code which are only sent if something actually changed, notifications +triggered by ioctl interface may be sent even if the request did not actually +change any data. + +``ACT`` messages request kernel (driver) to perform a specific action. If some +information is reported by kernel (which can be suppressed by setting +``ETHTOOL_FLAG_OMIT_REPLY`` flag in request header), the reply takes form of +an ``ACT_REPLY`` message. Performing an action also triggers a notification +(``NTF`` message). + +Later sections describe the format and semantics of these messages. + + +STRSET_GET +========== + +Requests contents of a string set as provided by ioctl commands +``ETHTOOL_GSSET_INFO`` and ``ETHTOOL_GSTRINGS.`` String sets are not user +writeable so that the corresponding ``STRSET_SET`` message is only used in +kernel replies. There are two types of string sets: global (independent of +a device, e.g. device feature names) and device specific (e.g. device private +flags). + +Request contents: + + +---------------------------------------+--------+------------------------+ + | ``ETHTOOL_A_STRSET_HEADER`` | nested | request header | + +---------------------------------------+--------+------------------------+ + | ``ETHTOOL_A_STRSET_STRINGSETS`` | nested | string set to request | + +-+-------------------------------------+--------+------------------------+ + | | ``ETHTOOL_A_STRINGSETS_STRINGSET+`` | nested | one string set | + +-+-+-----------------------------------+--------+------------------------+ + | | | ``ETHTOOL_A_STRINGSET_ID`` | u32 | set id | + +-+-+-----------------------------------+--------+------------------------+ + +Kernel response contents: + + +---------------------------------------+--------+-----------------------+ + | ``ETHTOOL_A_STRSET_HEADER`` | nested | reply header | + +---------------------------------------+--------+-----------------------+ + | ``ETHTOOL_A_STRSET_STRINGSETS`` | nested | array of string sets | + +-+-------------------------------------+--------+-----------------------+ + | | ``ETHTOOL_A_STRINGSETS_STRINGSET+`` | nested | one string set | + +-+-+-----------------------------------+--------+-----------------------+ + | | | ``ETHTOOL_A_STRINGSET_ID`` | u32 | set id | + +-+-+-----------------------------------+--------+-----------------------+ + | | | ``ETHTOOL_A_STRINGSET_COUNT`` | u32 | number of strings | + +-+-+-----------------------------------+--------+-----------------------+ + | | | ``ETHTOOL_A_STRINGSET_STRINGS`` | nested | array of strings | + +-+-+-+---------------------------------+--------+-----------------------+ + | | | | ``ETHTOOL_A_STRINGS_STRING+`` | nested | one string | + +-+-+-+-+-------------------------------+--------+-----------------------+ + | | | | | ``ETHTOOL_A_STRING_INDEX`` | u32 | string index | + +-+-+-+-+-------------------------------+--------+-----------------------+ + | | | | | ``ETHTOOL_A_STRING_VALUE`` | string | string value | + +-+-+-+-+-------------------------------+--------+-----------------------+ + | ``ETHTOOL_A_STRSET_COUNTS_ONLY`` | flag | return only counts | + +---------------------------------------+--------+-----------------------+ + +Device identification in request header is optional. Depending on its presence +a and ``NLM_F_DUMP`` flag, there are three type of ``STRSET_GET`` requests: + + - no ``NLM_F_DUMP,`` no device: get "global" stringsets + - no ``NLM_F_DUMP``, with device: get string sets related to the device + - ``NLM_F_DUMP``, no device: get device related string sets for all devices + +If there is no ``ETHTOOL_A_STRSET_STRINGSETS`` array, all string sets of +requested type are returned, otherwise only those specified in the request. +Flag ``ETHTOOL_A_STRSET_COUNTS_ONLY`` tells kernel to only return string +counts of the sets, not the actual strings. + + +LINKINFO_GET +============ + +Requests link settings as provided by ``ETHTOOL_GLINKSETTINGS`` except for +link modes and autonegotiation related information. The request does not use +any attributes. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_LINKINFO_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_LINKINFO_HEADER`` nested reply header + ``ETHTOOL_A_LINKINFO_PORT`` u8 physical port + ``ETHTOOL_A_LINKINFO_PHYADDR`` u8 phy MDIO address + ``ETHTOOL_A_LINKINFO_TP_MDIX`` u8 MDI(-X) status + ``ETHTOOL_A_LINKINFO_TP_MDIX_CTRL`` u8 MDI(-X) control + ``ETHTOOL_A_LINKINFO_TRANSCEIVER`` u8 transceiver + ==================================== ====== ========================== + +Attributes and their values have the same meaning as matching members of the +corresponding ioctl structures. + +``LINKINFO_GET`` allows dump requests (kernel returns reply message for all +devices supporting the request). + + +LINKINFO_SET +============ + +``LINKINFO_SET`` request allows setting some of the attributes reported by +``LINKINFO_GET``. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_LINKINFO_HEADER`` nested request header + ``ETHTOOL_A_LINKINFO_PORT`` u8 physical port + ``ETHTOOL_A_LINKINFO_PHYADDR`` u8 phy MDIO address + ``ETHTOOL_A_LINKINFO_TP_MDIX_CTRL`` u8 MDI(-X) control + ==================================== ====== ========================== + +MDI(-X) status and transceiver cannot be set, request with the corresponding +attributes is rejected. + + +LINKMODES_GET +============= + +Requests link modes (supported, advertised and peer advertised) and related +information (autonegotiation status, link speed and duplex) as provided by +``ETHTOOL_GLINKSETTINGS``. The request does not use any attributes. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_LINKMODES_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ========================================== ====== ========================== + ``ETHTOOL_A_LINKMODES_HEADER`` nested reply header + ``ETHTOOL_A_LINKMODES_AUTONEG`` u8 autonegotiation status + ``ETHTOOL_A_LINKMODES_OURS`` bitset advertised link modes + ``ETHTOOL_A_LINKMODES_PEER`` bitset partner link modes + ``ETHTOOL_A_LINKMODES_SPEED`` u32 link speed (Mb/s) + ``ETHTOOL_A_LINKMODES_DUPLEX`` u8 duplex mode + ``ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG`` u8 Master/slave port mode + ``ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE`` u8 Master/slave port state + ========================================== ====== ========================== + +For ``ETHTOOL_A_LINKMODES_OURS``, value represents advertised modes and mask +represents supported modes. ``ETHTOOL_A_LINKMODES_PEER`` in the reply is a bit +list. + +``LINKMODES_GET`` allows dump requests (kernel returns reply messages for all +devices supporting the request). + + +LINKMODES_SET +============= + +Request contents: + + ========================================== ====== ========================== + ``ETHTOOL_A_LINKMODES_HEADER`` nested request header + ``ETHTOOL_A_LINKMODES_AUTONEG`` u8 autonegotiation status + ``ETHTOOL_A_LINKMODES_OURS`` bitset advertised link modes + ``ETHTOOL_A_LINKMODES_PEER`` bitset partner link modes + ``ETHTOOL_A_LINKMODES_SPEED`` u32 link speed (Mb/s) + ``ETHTOOL_A_LINKMODES_DUPLEX`` u8 duplex mode + ``ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG`` u8 Master/slave port mode + ``ETHTOOL_A_LINKMODES_LANES`` u32 lanes + ========================================== ====== ========================== + +``ETHTOOL_A_LINKMODES_OURS`` bit set allows setting advertised link modes. If +autonegotiation is on (either set now or kept from before), advertised modes +are not changed (no ``ETHTOOL_A_LINKMODES_OURS`` attribute) and at least one +of speed, duplex and lanes is specified, kernel adjusts advertised modes to all +supported modes matching speed, duplex, lanes or all (whatever is specified). +This autoselection is done on ethtool side with ioctl interface, netlink +interface is supposed to allow requesting changes without knowing what exactly +kernel supports. + + +LINKSTATE_GET +============= + +Requests link state information. Link up/down flag (as provided by +``ETHTOOL_GLINK`` ioctl command) is provided. Optionally, extended state might +be provided as well. In general, extended state describes reasons for why a port +is down, or why it operates in some non-obvious mode. This request does not have +any attributes. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_LINKSTATE_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ============================ + ``ETHTOOL_A_LINKSTATE_HEADER`` nested reply header + ``ETHTOOL_A_LINKSTATE_LINK`` bool link state (up/down) + ``ETHTOOL_A_LINKSTATE_SQI`` u32 Current Signal Quality Index + ``ETHTOOL_A_LINKSTATE_SQI_MAX`` u32 Max support SQI value + ``ETHTOOL_A_LINKSTATE_EXT_STATE`` u8 link extended state + ``ETHTOOL_A_LINKSTATE_EXT_SUBSTATE`` u8 link extended substate + ==================================== ====== ============================ + +For most NIC drivers, the value of ``ETHTOOL_A_LINKSTATE_LINK`` returns +carrier flag provided by ``netif_carrier_ok()`` but there are drivers which +define their own handler. + +``ETHTOOL_A_LINKSTATE_EXT_STATE`` and ``ETHTOOL_A_LINKSTATE_EXT_SUBSTATE`` are +optional values. ethtool core can provide either both +``ETHTOOL_A_LINKSTATE_EXT_STATE`` and ``ETHTOOL_A_LINKSTATE_EXT_SUBSTATE``, +or only ``ETHTOOL_A_LINKSTATE_EXT_STATE``, or none of them. + +``LINKSTATE_GET`` allows dump requests (kernel returns reply messages for all +devices supporting the request). + + +Link extended states: + + ================================================ ============================================ + ``ETHTOOL_LINK_EXT_STATE_AUTONEG`` States relating to the autonegotiation or + issues therein + + ``ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE`` Failure during link training + + ``ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH`` Logical mismatch in physical coding sublayer + or forward error correction sublayer + + ``ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY`` Signal integrity issues + + ``ETHTOOL_LINK_EXT_STATE_NO_CABLE`` No cable connected + + ``ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE`` Failure is related to cable, + e.g., unsupported cable + + ``ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE`` Failure is related to EEPROM, e.g., failure + during reading or parsing the data + + ``ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE`` Failure during calibration algorithm + + ``ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED`` The hardware is not able to provide the + power required from cable or module + + ``ETHTOOL_LINK_EXT_STATE_OVERHEAT`` The module is overheated + ================================================ ============================================ + +Link extended substates: + + Autoneg substates: + + =============================================================== ================================ + ``ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED`` Peer side is down + + ``ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED`` Ack not received from peer side + + ``ETHTOOL_LINK_EXT_SUBSTATE_AN_NEXT_PAGE_EXCHANGE_FAILED`` Next page exchange failed + + ``ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED_FORCE_MODE`` Peer side is down during force + mode or there is no agreement of + speed + + ``ETHTOOL_LINK_EXT_SUBSTATE_AN_FEC_MISMATCH_DURING_OVERRIDE`` Forward error correction modes + in both sides are mismatched + + ``ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD`` No Highest Common Denominator + =============================================================== ================================ + + Link training substates: + + =========================================================================== ==================== + ``ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED`` Frames were not + recognized, the + lock failed + + ``ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT`` The lock did not + occur before + timeout + + ``ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY`` Peer side did not + send ready signal + after training + process + + ``ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT`` Remote side is not + ready yet + =========================================================================== ==================== + + Link logical mismatch substates: + + ================================================================ =============================== + ``ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK`` Physical coding sublayer was + not locked in first phase - + block lock + + ``ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_AM_LOCK`` Physical coding sublayer was + not locked in second phase - + alignment markers lock + + ``ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_GET_ALIGN_STATUS`` Physical coding sublayer did + not get align status + + ``ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED`` FC forward error correction is + not locked + + ``ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED`` RS forward error correction is + not locked + ================================================================ =============================== + + Bad signal integrity substates: + + ================================================================= ============================= + ``ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS`` Large number of physical + errors + + ``ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE`` The system attempted to + operate the cable at a rate + that is not formally + supported, which led to + signal integrity issues + ================================================================= ============================= + + Cable issue substates: + + =================================================== ============================================ + ``ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE`` Unsupported cable + + ``ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE`` Cable test failure + =================================================== ============================================ + +DEBUG_GET +========= + +Requests debugging settings of a device. At the moment, only message mask is +provided. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_DEBUG_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_DEBUG_HEADER`` nested reply header + ``ETHTOOL_A_DEBUG_MSGMASK`` bitset message mask + ==================================== ====== ========================== + +The message mask (``ETHTOOL_A_DEBUG_MSGMASK``) is equal to message level as +provided by ``ETHTOOL_GMSGLVL`` and set by ``ETHTOOL_SMSGLVL`` in ioctl +interface. While it is called message level there for historical reasons, most +drivers and almost all newer drivers use it as a mask of enabled message +classes (represented by ``NETIF_MSG_*`` constants); therefore netlink +interface follows its actual use in practice. + +``DEBUG_GET`` allows dump requests (kernel returns reply messages for all +devices supporting the request). + + +DEBUG_SET +========= + +Set or update debugging settings of a device. At the moment, only message mask +is supported. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_DEBUG_HEADER`` nested request header + ``ETHTOOL_A_DEBUG_MSGMASK`` bitset message mask + ==================================== ====== ========================== + +``ETHTOOL_A_DEBUG_MSGMASK`` bit set allows setting or modifying mask of +enabled debugging message types for the device. + + +WOL_GET +======= + +Query device wake-on-lan settings. Unlike most "GET" type requests, +``ETHTOOL_MSG_WOL_GET`` requires (netns) ``CAP_NET_ADMIN`` privileges as it +(potentially) provides SecureOn(tm) password which is confidential. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_WOL_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_WOL_HEADER`` nested reply header + ``ETHTOOL_A_WOL_MODES`` bitset mask of enabled WoL modes + ``ETHTOOL_A_WOL_SOPASS`` binary SecureOn(tm) password + ==================================== ====== ========================== + +In reply, ``ETHTOOL_A_WOL_MODES`` mask consists of modes supported by the +device, value of modes which are enabled. ``ETHTOOL_A_WOL_SOPASS`` is only +included in reply if ``WAKE_MAGICSECURE`` mode is supported. + + +WOL_SET +======= + +Set or update wake-on-lan settings. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_WOL_HEADER`` nested request header + ``ETHTOOL_A_WOL_MODES`` bitset enabled WoL modes + ``ETHTOOL_A_WOL_SOPASS`` binary SecureOn(tm) password + ==================================== ====== ========================== + +``ETHTOOL_A_WOL_SOPASS`` is only allowed for devices supporting +``WAKE_MAGICSECURE`` mode. + + +FEATURES_GET +============ + +Gets netdev features like ``ETHTOOL_GFEATURES`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_FEATURES_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_FEATURES_HEADER`` nested reply header + ``ETHTOOL_A_FEATURES_HW`` bitset dev->hw_features + ``ETHTOOL_A_FEATURES_WANTED`` bitset dev->wanted_features + ``ETHTOOL_A_FEATURES_ACTIVE`` bitset dev->features + ``ETHTOOL_A_FEATURES_NOCHANGE`` bitset NETIF_F_NEVER_CHANGE + ==================================== ====== ========================== + +Bitmaps in kernel response have the same meaning as bitmaps used in ioctl +interference but attribute names are different (they are based on +corresponding members of struct net_device). Legacy "flags" are not provided, +if userspace needs them (most likely only ethtool for backward compatibility), +it can calculate their values from related feature bits itself. +ETHA_FEATURES_HW uses mask consisting of all features recognized by kernel (to +provide all names when using verbose bitmap format), the other three use no +mask (simple bit lists). + + +FEATURES_SET +============ + +Request to set netdev features like ``ETHTOOL_SFEATURES`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_FEATURES_HEADER`` nested request header + ``ETHTOOL_A_FEATURES_WANTED`` bitset requested features + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_FEATURES_HEADER`` nested reply header + ``ETHTOOL_A_FEATURES_WANTED`` bitset diff wanted vs. result + ``ETHTOOL_A_FEATURES_ACTIVE`` bitset diff old vs. new active + ==================================== ====== ========================== + +Request constains only one bitset which can be either value/mask pair (request +to change specific feature bits and leave the rest) or only a value (request +to set all features to specified set). + +As request is subject to netdev_change_features() sanity checks, optional +kernel reply (can be suppressed by ``ETHTOOL_FLAG_OMIT_REPLY`` flag in request +header) informs client about the actual result. ``ETHTOOL_A_FEATURES_WANTED`` +reports the difference between client request and actual result: mask consists +of bits which differ between requested features and result (dev->features +after the operation), value consists of values of these bits in the request +(i.e. negated values from resulting features). ``ETHTOOL_A_FEATURES_ACTIVE`` +reports the difference between old and new dev->features: mask consists of +bits which have changed, values are their values in new dev->features (after +the operation). + +``ETHTOOL_MSG_FEATURES_NTF`` notification is sent not only if device features +are modified using ``ETHTOOL_MSG_FEATURES_SET`` request or on of ethtool ioctl +request but also each time features are modified with netdev_update_features() +or netdev_change_features(). + + +PRIVFLAGS_GET +============= + +Gets private flags like ``ETHTOOL_GPFLAGS`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PRIVFLAGS_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PRIVFLAGS_HEADER`` nested reply header + ``ETHTOOL_A_PRIVFLAGS_FLAGS`` bitset private flags + ==================================== ====== ========================== + +``ETHTOOL_A_PRIVFLAGS_FLAGS`` is a bitset with values of device private flags. +These flags are defined by driver, their number and names (and also meaning) +are device dependent. For compact bitset format, names can be retrieved as +``ETH_SS_PRIV_FLAGS`` string set. If verbose bitset format is requested, +response uses all private flags supported by the device as mask so that client +gets the full information without having to fetch the string set with names. + + +PRIVFLAGS_SET +============= + +Sets or modifies values of device private flags like ``ETHTOOL_SPFLAGS`` +ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PRIVFLAGS_HEADER`` nested request header + ``ETHTOOL_A_PRIVFLAGS_FLAGS`` bitset private flags + ==================================== ====== ========================== + +``ETHTOOL_A_PRIVFLAGS_FLAGS`` can either set the whole set of private flags or +modify only values of some of them. + + +RINGS_GET +========= + +Gets ring sizes like ``ETHTOOL_GRINGPARAM`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_RINGS_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_RINGS_HEADER`` nested reply header + ``ETHTOOL_A_RINGS_RX_MAX`` u32 max size of RX ring + ``ETHTOOL_A_RINGS_RX_MINI_MAX`` u32 max size of RX mini ring + ``ETHTOOL_A_RINGS_RX_JUMBO_MAX`` u32 max size of RX jumbo ring + ``ETHTOOL_A_RINGS_TX_MAX`` u32 max size of TX ring + ``ETHTOOL_A_RINGS_RX`` u32 size of RX ring + ``ETHTOOL_A_RINGS_RX_MINI`` u32 size of RX mini ring + ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring + ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring + ==================================== ====== ========================== + + +RINGS_SET +========= + +Sets ring sizes like ``ETHTOOL_SRINGPARAM`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_RINGS_HEADER`` nested reply header + ``ETHTOOL_A_RINGS_RX`` u32 size of RX ring + ``ETHTOOL_A_RINGS_RX_MINI`` u32 size of RX mini ring + ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring + ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring + ==================================== ====== ========================== + +Kernel checks that requested ring sizes do not exceed limits reported by +driver. Driver may impose additional constraints and may not suspport all +attributes. + + +CHANNELS_GET +============ + +Gets channel counts like ``ETHTOOL_GCHANNELS`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_CHANNELS_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_CHANNELS_HEADER`` nested reply header + ``ETHTOOL_A_CHANNELS_RX_MAX`` u32 max receive channels + ``ETHTOOL_A_CHANNELS_TX_MAX`` u32 max transmit channels + ``ETHTOOL_A_CHANNELS_OTHER_MAX`` u32 max other channels + ``ETHTOOL_A_CHANNELS_COMBINED_MAX`` u32 max combined channels + ``ETHTOOL_A_CHANNELS_RX_COUNT`` u32 receive channel count + ``ETHTOOL_A_CHANNELS_TX_COUNT`` u32 transmit channel count + ``ETHTOOL_A_CHANNELS_OTHER_COUNT`` u32 other channel count + ``ETHTOOL_A_CHANNELS_COMBINED_COUNT`` u32 combined channel count + ===================================== ====== ========================== + + +CHANNELS_SET +============ + +Sets channel counts like ``ETHTOOL_SCHANNELS`` ioctl request. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_CHANNELS_HEADER`` nested request header + ``ETHTOOL_A_CHANNELS_RX_COUNT`` u32 receive channel count + ``ETHTOOL_A_CHANNELS_TX_COUNT`` u32 transmit channel count + ``ETHTOOL_A_CHANNELS_OTHER_COUNT`` u32 other channel count + ``ETHTOOL_A_CHANNELS_COMBINED_COUNT`` u32 combined channel count + ===================================== ====== ========================== + +Kernel checks that requested channel counts do not exceed limits reported by +driver. Driver may impose additional constraints and may not suspport all +attributes. + + +COALESCE_GET +============ + +Gets coalescing parameters like ``ETHTOOL_GCOALESCE`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_COALESCE_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + =========================================== ====== ======================= + ``ETHTOOL_A_COALESCE_HEADER`` nested reply header + ``ETHTOOL_A_COALESCE_RX_USECS`` u32 delay (us), normal Rx + ``ETHTOOL_A_COALESCE_RX_MAX_FRAMES`` u32 max packets, normal Rx + ``ETHTOOL_A_COALESCE_RX_USECS_IRQ`` u32 delay (us), Rx in IRQ + ``ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ`` u32 max packets, Rx in IRQ + ``ETHTOOL_A_COALESCE_TX_USECS`` u32 delay (us), normal Tx + ``ETHTOOL_A_COALESCE_TX_MAX_FRAMES`` u32 max packets, normal Tx + ``ETHTOOL_A_COALESCE_TX_USECS_IRQ`` u32 delay (us), Tx in IRQ + ``ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ`` u32 IRQ packets, Tx in IRQ + ``ETHTOOL_A_COALESCE_STATS_BLOCK_USECS`` u32 delay of stats update + ``ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX`` bool adaptive Rx coalesce + ``ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX`` bool adaptive Tx coalesce + ``ETHTOOL_A_COALESCE_PKT_RATE_LOW`` u32 threshold for low rate + ``ETHTOOL_A_COALESCE_RX_USECS_LOW`` u32 delay (us), low Rx + ``ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW`` u32 max packets, low Rx + ``ETHTOOL_A_COALESCE_TX_USECS_LOW`` u32 delay (us), low Tx + ``ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW`` u32 max packets, low Tx + ``ETHTOOL_A_COALESCE_PKT_RATE_HIGH`` u32 threshold for high rate + ``ETHTOOL_A_COALESCE_RX_USECS_HIGH`` u32 delay (us), high Rx + ``ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH`` u32 max packets, high Rx + ``ETHTOOL_A_COALESCE_TX_USECS_HIGH`` u32 delay (us), high Tx + ``ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH`` u32 max packets, high Tx + ``ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL`` u32 rate sampling interval + =========================================== ====== ======================= + +Attributes are only included in reply if their value is not zero or the +corresponding bit in ``ethtool_ops::supported_coalesce_params`` is set (i.e. +they are declared as supported by driver). + + +COALESCE_SET +============ + +Sets coalescing parameters like ``ETHTOOL_SCOALESCE`` ioctl request. + +Request contents: + + =========================================== ====== ======================= + ``ETHTOOL_A_COALESCE_HEADER`` nested request header + ``ETHTOOL_A_COALESCE_RX_USECS`` u32 delay (us), normal Rx + ``ETHTOOL_A_COALESCE_RX_MAX_FRAMES`` u32 max packets, normal Rx + ``ETHTOOL_A_COALESCE_RX_USECS_IRQ`` u32 delay (us), Rx in IRQ + ``ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ`` u32 max packets, Rx in IRQ + ``ETHTOOL_A_COALESCE_TX_USECS`` u32 delay (us), normal Tx + ``ETHTOOL_A_COALESCE_TX_MAX_FRAMES`` u32 max packets, normal Tx + ``ETHTOOL_A_COALESCE_TX_USECS_IRQ`` u32 delay (us), Tx in IRQ + ``ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ`` u32 IRQ packets, Tx in IRQ + ``ETHTOOL_A_COALESCE_STATS_BLOCK_USECS`` u32 delay of stats update + ``ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX`` bool adaptive Rx coalesce + ``ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX`` bool adaptive Tx coalesce + ``ETHTOOL_A_COALESCE_PKT_RATE_LOW`` u32 threshold for low rate + ``ETHTOOL_A_COALESCE_RX_USECS_LOW`` u32 delay (us), low Rx + ``ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW`` u32 max packets, low Rx + ``ETHTOOL_A_COALESCE_TX_USECS_LOW`` u32 delay (us), low Tx + ``ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW`` u32 max packets, low Tx + ``ETHTOOL_A_COALESCE_PKT_RATE_HIGH`` u32 threshold for high rate + ``ETHTOOL_A_COALESCE_RX_USECS_HIGH`` u32 delay (us), high Rx + ``ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH`` u32 max packets, high Rx + ``ETHTOOL_A_COALESCE_TX_USECS_HIGH`` u32 delay (us), high Tx + ``ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH`` u32 max packets, high Tx + ``ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL`` u32 rate sampling interval + =========================================== ====== ======================= + +Request is rejected if it attributes declared as unsupported by driver (i.e. +such that the corresponding bit in ``ethtool_ops::supported_coalesce_params`` +is not set), regardless of their values. Driver may impose additional +constraints on coalescing parameters and their values. + + +PAUSE_GET +========= + +Gets pause frame settings like ``ETHTOOL_GPAUSEPARAM`` ioctl request. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_PAUSE_HEADER`` nested request header + ===================================== ====== ========================== + +Kernel response contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_PAUSE_HEADER`` nested request header + ``ETHTOOL_A_PAUSE_AUTONEG`` bool pause autonegotiation + ``ETHTOOL_A_PAUSE_RX`` bool receive pause frames + ``ETHTOOL_A_PAUSE_TX`` bool transmit pause frames + ``ETHTOOL_A_PAUSE_STATS`` nested pause statistics + ===================================== ====== ========================== + +``ETHTOOL_A_PAUSE_STATS`` are reported if ``ETHTOOL_FLAG_STATS`` was set +in ``ETHTOOL_A_HEADER_FLAGS``. +It will be empty if driver did not report any statistics. Drivers fill in +the statistics in the following structure: + +.. kernel-doc:: include/linux/ethtool.h + :identifiers: ethtool_pause_stats + +Each member has a corresponding attribute defined. + +PAUSE_SET +========= + +Sets pause parameters like ``ETHTOOL_GPAUSEPARAM`` ioctl request. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_PAUSE_HEADER`` nested request header + ``ETHTOOL_A_PAUSE_AUTONEG`` bool pause autonegotiation + ``ETHTOOL_A_PAUSE_RX`` bool receive pause frames + ``ETHTOOL_A_PAUSE_TX`` bool transmit pause frames + ===================================== ====== ========================== + + +EEE_GET +======= + +Gets Energy Efficient Ethernet settings like ``ETHTOOL_GEEE`` ioctl request. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_EEE_HEADER`` nested request header + ===================================== ====== ========================== + +Kernel response contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_EEE_HEADER`` nested request header + ``ETHTOOL_A_EEE_MODES_OURS`` bool supported/advertised modes + ``ETHTOOL_A_EEE_MODES_PEER`` bool peer advertised link modes + ``ETHTOOL_A_EEE_ACTIVE`` bool EEE is actively used + ``ETHTOOL_A_EEE_ENABLED`` bool EEE is enabled + ``ETHTOOL_A_EEE_TX_LPI_ENABLED`` bool Tx lpi enabled + ``ETHTOOL_A_EEE_TX_LPI_TIMER`` u32 Tx lpi timeout (in us) + ===================================== ====== ========================== + +In ``ETHTOOL_A_EEE_MODES_OURS``, mask consists of link modes for which EEE is +enabled, value of link modes for which EEE is advertised. Link modes for which +peer advertises EEE are listed in ``ETHTOOL_A_EEE_MODES_PEER`` (no mask). The +netlink interface allows reporting EEE status for all link modes but only +first 32 are provided by the ``ethtool_ops`` callback. + + +EEE_SET +======= + +Sets Energy Efficient Ethernet parameters like ``ETHTOOL_SEEE`` ioctl request. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_EEE_HEADER`` nested request header + ``ETHTOOL_A_EEE_MODES_OURS`` bool advertised modes + ``ETHTOOL_A_EEE_ENABLED`` bool EEE is enabled + ``ETHTOOL_A_EEE_TX_LPI_ENABLED`` bool Tx lpi enabled + ``ETHTOOL_A_EEE_TX_LPI_TIMER`` u32 Tx lpi timeout (in us) + ===================================== ====== ========================== + +``ETHTOOL_A_EEE_MODES_OURS`` is used to either list link modes to advertise +EEE for (if there is no mask) or specify changes to the list (if there is +a mask). The netlink interface allows reporting EEE status for all link modes +but only first 32 can be set at the moment as that is what the ``ethtool_ops`` +callback supports. + + +TSINFO_GET +========== + +Gets timestamping information like ``ETHTOOL_GET_TS_INFO`` ioctl request. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_TSINFO_HEADER`` nested request header + ===================================== ====== ========================== + +Kernel response contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_TSINFO_HEADER`` nested request header + ``ETHTOOL_A_TSINFO_TIMESTAMPING`` bitset SO_TIMESTAMPING flags + ``ETHTOOL_A_TSINFO_TX_TYPES`` bitset supported Tx types + ``ETHTOOL_A_TSINFO_RX_FILTERS`` bitset supported Rx filters + ``ETHTOOL_A_TSINFO_PHC_INDEX`` u32 PTP hw clock index + ===================================== ====== ========================== + +``ETHTOOL_A_TSINFO_PHC_INDEX`` is absent if there is no associated PHC (there +is no special value for this case). The bitset attributes are omitted if they +would be empty (no bit set). + +CABLE_TEST +========== + +Start a cable test. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_CABLE_TEST_HEADER`` nested request header + ==================================== ====== ========================== + +Notification contents: + +An Ethernet cable typically contains 1, 2 or 4 pairs. The length of +the pair can only be measured when there is a fault in the pair and +hence a reflection. Information about the fault may not be available, +depending on the specific hardware. Hence the contents of the notify +message are mostly optional. The attributes can be repeated an +arbitrary number of times, in an arbitrary order, for an arbitrary +number of pairs. + +The example shows the notification sent when the test is completed for +a T2 cable, i.e. two pairs. One pair is OK and hence has no length +information. The second pair has a fault and does have length +information. + + +---------------------------------------------+--------+---------------------+ + | ``ETHTOOL_A_CABLE_TEST_HEADER`` | nested | reply header | + +---------------------------------------------+--------+---------------------+ + | ``ETHTOOL_A_CABLE_TEST_STATUS`` | u8 | completed | + +---------------------------------------------+--------+---------------------+ + | ``ETHTOOL_A_CABLE_TEST_NTF_NEST`` | nested | all the results | + +-+-------------------------------------------+--------+---------------------+ + | | ``ETHTOOL_A_CABLE_NEST_RESULT`` | nested | cable test result | + +-+-+-----------------------------------------+--------+---------------------+ + | | | ``ETHTOOL_A_CABLE_RESULTS_PAIR`` | u8 | pair number | + +-+-+-----------------------------------------+--------+---------------------+ + | | | ``ETHTOOL_A_CABLE_RESULTS_CODE`` | u8 | result code | + +-+-+-----------------------------------------+--------+---------------------+ + | | ``ETHTOOL_A_CABLE_NEST_RESULT`` | nested | cable test results | + +-+-+-----------------------------------------+--------+---------------------+ + | | | ``ETHTOOL_A_CABLE_RESULTS_PAIR`` | u8 | pair number | + +-+-+-----------------------------------------+--------+---------------------+ + | | | ``ETHTOOL_A_CABLE_RESULTS_CODE`` | u8 | result code | + +-+-+-----------------------------------------+--------+---------------------+ + | | ``ETHTOOL_A_CABLE_NEST_FAULT_LENGTH`` | nested | cable length | + +-+-+-----------------------------------------+--------+---------------------+ + | | | ``ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR`` | u8 | pair number | + +-+-+-----------------------------------------+--------+---------------------+ + | | | ``ETHTOOL_A_CABLE_FAULT_LENGTH_CM`` | u32 | length in cm | + +-+-+-----------------------------------------+--------+---------------------+ + +CABLE_TEST TDR +============== + +Start a cable test and report raw TDR data + +Request contents: + + +--------------------------------------------+--------+-----------------------+ + | ``ETHTOOL_A_CABLE_TEST_TDR_HEADER`` | nested | reply header | + +--------------------------------------------+--------+-----------------------+ + | ``ETHTOOL_A_CABLE_TEST_TDR_CFG`` | nested | test configuration | + +-+------------------------------------------+--------+-----------------------+ + | | ``ETHTOOL_A_CABLE_STEP_FIRST_DISTANCE`` | u32 | first data distance | + +-+-+----------------------------------------+--------+-----------------------+ + | | ``ETHTOOL_A_CABLE_STEP_LAST_DISTANCE`` | u32 | last data distance | + +-+-+----------------------------------------+--------+-----------------------+ + | | ``ETHTOOL_A_CABLE_STEP_STEP_DISTANCE`` | u32 | distance of each step | + +-+-+----------------------------------------+--------+-----------------------+ + | | ``ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR`` | u8 | pair to test | + +-+-+----------------------------------------+--------+-----------------------+ + +The ETHTOOL_A_CABLE_TEST_TDR_CFG is optional, as well as all members +of the nest. All distances are expressed in centimeters. The PHY takes +the distances as a guide, and rounds to the nearest distance it +actually supports. If a pair is passed, only that one pair will be +tested. Otherwise all pairs are tested. + +Notification contents: + +Raw TDR data is gathered by sending a pulse down the cable and +recording the amplitude of the reflected pulse for a given distance. + +It can take a number of seconds to collect TDR data, especial if the +full 100 meters is probed at 1 meter intervals. When the test is +started a notification will be sent containing just +ETHTOOL_A_CABLE_TEST_TDR_STATUS with the value +ETHTOOL_A_CABLE_TEST_NTF_STATUS_STARTED. + +When the test has completed a second notification will be sent +containing ETHTOOL_A_CABLE_TEST_TDR_STATUS with the value +ETHTOOL_A_CABLE_TEST_NTF_STATUS_COMPLETED and the TDR data. + +The message may optionally contain the amplitude of the pulse send +down the cable. This is measured in mV. A reflection should not be +bigger than transmitted pulse. + +Before the raw TDR data should be an ETHTOOL_A_CABLE_TDR_NEST_STEP +nest containing information about the distance along the cable for the +first reading, the last reading, and the step between each +reading. Distances are measured in centimeters. These should be the +exact values the PHY used. These may be different to what the user +requested, if the native measurement resolution is greater than 1 cm. + +For each step along the cable, a ETHTOOL_A_CABLE_TDR_NEST_AMPLITUDE is +used to report the amplitude of the reflection for a given pair. + + +---------------------------------------------+--------+----------------------+ + | ``ETHTOOL_A_CABLE_TEST_TDR_HEADER`` | nested | reply header | + +---------------------------------------------+--------+----------------------+ + | ``ETHTOOL_A_CABLE_TEST_TDR_STATUS`` | u8 | completed | + +---------------------------------------------+--------+----------------------+ + | ``ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST`` | nested | all the results | + +-+-------------------------------------------+--------+----------------------+ + | | ``ETHTOOL_A_CABLE_TDR_NEST_PULSE`` | nested | TX Pulse amplitude | + +-+-+-----------------------------------------+--------+----------------------+ + | | | ``ETHTOOL_A_CABLE_PULSE_mV`` | s16 | Pulse amplitude | + +-+-+-----------------------------------------+--------+----------------------+ + | | ``ETHTOOL_A_CABLE_NEST_STEP`` | nested | TDR step info | + +-+-+-----------------------------------------+--------+----------------------+ + | | | ``ETHTOOL_A_CABLE_STEP_FIRST_DISTANCE`` | u32 | First data distance | + +-+-+-----------------------------------------+--------+----------------------+ + | | | ``ETHTOOL_A_CABLE_STEP_LAST_DISTANCE`` | u32 | Last data distance | + +-+-+-----------------------------------------+--------+----------------------+ + | | | ``ETHTOOL_A_CABLE_STEP_STEP_DISTANCE`` | u32 | distance of each step| + +-+-+-----------------------------------------+--------+----------------------+ + | | ``ETHTOOL_A_CABLE_TDR_NEST_AMPLITUDE`` | nested | Reflection amplitude | + +-+-+-----------------------------------------+--------+----------------------+ + | | | ``ETHTOOL_A_CABLE_RESULTS_PAIR`` | u8 | pair number | + +-+-+-----------------------------------------+--------+----------------------+ + | | | ``ETHTOOL_A_CABLE_AMPLITUDE_mV`` | s16 | Reflection amplitude | + +-+-+-----------------------------------------+--------+----------------------+ + | | ``ETHTOOL_A_CABLE_TDR_NEST_AMPLITUDE`` | nested | Reflection amplitude | + +-+-+-----------------------------------------+--------+----------------------+ + | | | ``ETHTOOL_A_CABLE_RESULTS_PAIR`` | u8 | pair number | + +-+-+-----------------------------------------+--------+----------------------+ + | | | ``ETHTOOL_A_CABLE_AMPLITUDE_mV`` | s16 | Reflection amplitude | + +-+-+-----------------------------------------+--------+----------------------+ + | | ``ETHTOOL_A_CABLE_TDR_NEST_AMPLITUDE`` | nested | Reflection amplitude | + +-+-+-----------------------------------------+--------+----------------------+ + | | | ``ETHTOOL_A_CABLE_RESULTS_PAIR`` | u8 | pair number | + +-+-+-----------------------------------------+--------+----------------------+ + | | | ``ETHTOOL_A_CABLE_AMPLITUDE_mV`` | s16 | Reflection amplitude | + +-+-+-----------------------------------------+--------+----------------------+ + +TUNNEL_INFO +=========== + +Gets information about the tunnel state NIC is aware of. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_TUNNEL_INFO_HEADER`` nested request header + ===================================== ====== ========================== + +Kernel response contents: + + +---------------------------------------------+--------+---------------------+ + | ``ETHTOOL_A_TUNNEL_INFO_HEADER`` | nested | reply header | + +---------------------------------------------+--------+---------------------+ + | ``ETHTOOL_A_TUNNEL_INFO_UDP_PORTS`` | nested | all UDP port tables | + +-+-------------------------------------------+--------+---------------------+ + | | ``ETHTOOL_A_TUNNEL_UDP_TABLE`` | nested | one UDP port table | + +-+-+-----------------------------------------+--------+---------------------+ + | | | ``ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE`` | u32 | max size of the | + | | | | | table | + +-+-+-----------------------------------------+--------+---------------------+ + | | | ``ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES`` | bitset | tunnel types which | + | | | | | table can hold | + +-+-+-----------------------------------------+--------+---------------------+ + | | | ``ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY`` | nested | offloaded UDP port | + +-+-+-+---------------------------------------+--------+---------------------+ + | | | | ``ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT`` | be16 | UDP port | + +-+-+-+---------------------------------------+--------+---------------------+ + | | | | ``ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE`` | u32 | tunnel type | + +-+-+-+---------------------------------------+--------+---------------------+ + +For UDP tunnel table empty ``ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES`` indicates that +the table contains static entries, hard-coded by the NIC. + +FEC_GET +======= + +Gets FEC configuration and state like ``ETHTOOL_GFECPARAM`` ioctl request. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_FEC_HEADER`` nested request header + ===================================== ====== ========================== + +Kernel response contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_FEC_HEADER`` nested request header + ``ETHTOOL_A_FEC_MODES`` bitset configured modes + ``ETHTOOL_A_FEC_AUTO`` bool FEC mode auto selection + ``ETHTOOL_A_FEC_ACTIVE`` u32 index of active FEC mode + ``ETHTOOL_A_FEC_STATS`` nested FEC statistics + ===================================== ====== ========================== + +``ETHTOOL_A_FEC_ACTIVE`` is the bit index of the FEC link mode currently +active on the interface. This attribute may not be present if device does +not support FEC. + +``ETHTOOL_A_FEC_MODES`` and ``ETHTOOL_A_FEC_AUTO`` are only meaningful when +autonegotiation is disabled. If ``ETHTOOL_A_FEC_AUTO`` is non-zero driver will +select the FEC mode automatically based on the parameters of the SFP module. +This is equivalent to the ``ETHTOOL_FEC_AUTO`` bit of the ioctl interface. +``ETHTOOL_A_FEC_MODES`` carry the current FEC configuration using link mode +bits (rather than old ``ETHTOOL_FEC_*`` bits). + +``ETHTOOL_A_FEC_STATS`` are reported if ``ETHTOOL_FLAG_STATS`` was set in +``ETHTOOL_A_HEADER_FLAGS``. +Each attribute carries an array of 64bit statistics. First entry in the array +contains the total number of events on the port, while the following entries +are counters corresponding to lanes/PCS instances. The number of entries in +the array will be: + ++--------------+---------------------------------------------+ +| `0` | device does not support FEC statistics | ++--------------+---------------------------------------------+ +| `1` | device does not support per-lane break down | ++--------------+---------------------------------------------+ +| `1 + #lanes` | device has full support for FEC stats | ++--------------+---------------------------------------------+ + +Drivers fill in the statistics in the following structure: + +.. kernel-doc:: include/linux/ethtool.h + :identifiers: ethtool_fec_stats + +FEC_SET +======= + +Sets FEC parameters like ``ETHTOOL_SFECPARAM`` ioctl request. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_FEC_HEADER`` nested request header + ``ETHTOOL_A_FEC_MODES`` bitset configured modes + ``ETHTOOL_A_FEC_AUTO`` bool FEC mode auto selection + ===================================== ====== ========================== + +``FEC_SET`` is only meaningful when autonegotiation is disabled. Otherwise +FEC mode is selected as part of autonegotiation. + +``ETHTOOL_A_FEC_MODES`` selects which FEC mode should be used. It's recommended +to set only one bit, if multiple bits are set driver may choose between them +in an implementation specific way. + +``ETHTOOL_A_FEC_AUTO`` requests the driver to choose FEC mode based on SFP +module parameters. This does not mean autonegotiation. + +MODULE_EEPROM +============= + +Fetch module EEPROM data dump. +This interface is designed to allow dumps of at most 1/2 page at once. This +means only dumps of 128 (or less) bytes are allowed, without crossing half page +boundary located at offset 128. For pages other than 0 only high 128 bytes are +accessible. + +Request contents: + + ======================================= ====== ========================== + ``ETHTOOL_A_MODULE_EEPROM_HEADER`` nested request header + ``ETHTOOL_A_MODULE_EEPROM_OFFSET`` u32 offset within a page + ``ETHTOOL_A_MODULE_EEPROM_LENGTH`` u32 amount of bytes to read + ``ETHTOOL_A_MODULE_EEPROM_PAGE`` u8 page number + ``ETHTOOL_A_MODULE_EEPROM_BANK`` u8 bank number + ``ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS`` u8 page I2C address + ======================================= ====== ========================== + +Kernel response contents: + + +---------------------------------------------+--------+---------------------+ + | ``ETHTOOL_A_MODULE_EEPROM_HEADER`` | nested | reply header | + +---------------------------------------------+--------+---------------------+ + | ``ETHTOOL_A_MODULE_EEPROM_DATA`` | nested | array of bytes from | + | | | module EEPROM | + +---------------------------------------------+--------+---------------------+ + +``ETHTOOL_A_MODULE_EEPROM_DATA`` has an attribute length equal to the amount of +bytes driver actually read. + +Request translation + =================== + + ``ETHTOOL_GMODULEINFO`` ``ETHTOOL_MSG_MODULE_EEPROM_GET`` + ``ETHTOOL_GMODULEEEPROM`` ``ETHTOOL_MSG_MODULE_EEPROM_GET`` diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 4797ba2b6117b..e98e5d2368d49 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -889,7 +889,7 @@ cipso_cache_enable - BOOLEAN cipso_cache_bucket_size - INTEGER The CIPSO label cache consists of a fixed size hash table with each hash bucket containing a number of cache entries. This variable limits - the number of entries in each hash bucket; the larger the value the + the number of entries in each hash bucket; the larger the value is, the more CIPSO label mappings that can be cached. When the number of entries in a given hash bucket reaches this limit adding new entries causes the oldest entry in the bucket to be removed to make room. @@ -976,7 +976,7 @@ ip_nonlocal_bind - BOOLEAN which can be quite useful - but may break some applications. Default: 0 -ip_dynaddr - BOOLEAN +ip_dynaddr - INTEGER If set non-zero, enables support for dynamic addresses. If set to a non-zero value larger than 1, a kernel log message will be printed when dynamic address rewriting @@ -2307,7 +2307,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max Default: 4K sctp_wmem - vector of 3 INTEGERs: min, default, max - Currently this tunable has no effect. + Only the first value ("min") is used, "default" and "max" are + ignored. + + min: Minimum size of send buffer that can be used by SCTP sockets. + It is guaranteed to each SCTP socket (but not association) even + under moderate memory pressure. + + Default: 4K addr_scope_policy - INTEGER Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00 diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt index 056898685d408..fc531c29a2e83 100644 --- a/Documentation/networking/ipvs-sysctl.txt +++ b/Documentation/networking/ipvs-sysctl.txt @@ -30,8 +30,7 @@ conn_reuse_mode - INTEGER 0: disable any special handling on port reuse. The new connection will be delivered to the same real server that was - servicing the previous connection. This will effectively - disable expire_nodest_conn. + servicing the previous connection. bit 1: enable rescheduling of new connections when it is safe. That is, whenever expire_nodest_conn and for TCP sockets, when diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst index 2c2ec99b50886..a3a1d04526c24 100644 --- a/Documentation/power/runtime_pm.rst +++ b/Documentation/power/runtime_pm.rst @@ -339,6 +339,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: checked additionally, and -EACCES means that 'power.disable_depth' is different from 0 + `int pm_runtime_resume_and_get(struct device *dev);` + - run pm_runtime_resume(dev) and if successful, increment the device's + usage counter; return the result of pm_runtime_resume + `int pm_request_idle(struct device *dev);` - submit a request to execute the subsystem-level idle callback for the device (the request is represented by a work item in pm_wq); returns 0 on diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst index 8a3b6485b33be..b2eaad23e4715 100644 --- a/Documentation/process/coding-style.rst +++ b/Documentation/process/coding-style.rst @@ -90,7 +90,7 @@ Statements longer than 80 columns should be broken into sensible chunks, unless exceeding 80 columns significantly increases readability and does not hide information. -Descendants are always substantially shorter than the parent and are +Descendants are always substantially shorter than the parent and are placed substantially to the right. A very commonly used style is to align descendants to a function open parenthesis. diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst index 06f743b612c48..bb53707f72cc5 100644 --- a/Documentation/process/stable-kernel-rules.rst +++ b/Documentation/process/stable-kernel-rules.rst @@ -174,7 +174,16 @@ Trees - The finalized and tagged releases of all stable kernels can be found in separate branches per version at: - https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git + https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git + + - The release candidate of all stable kernel versions can be found at: + + https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git/ + + .. warning:: + The -stable-rc tree is a snapshot in time of the stable-queue tree and + will change frequently, hence will be rebased often. It should only be + used for testing purposes (e.g. to be consumed by CI systems). Review committee diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index fb56297f70dc8..857be0d44e809 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -133,7 +133,7 @@ as you intend it to. The maintainer will thank you if you write your patch description in a form which can be easily pulled into Linux's source code management -system, ``git``, as a "commit log". See :ref:`explicit_in_reply_to`. +system, ``git``, as a "commit log". See :ref:`the_canonical_patch_format`. Solve only one problem per patch. If your description starts to get long, that's a sign that you probably need to split up your patch. diff --git a/Documentation/scheduler/sched-bwc.rst b/Documentation/scheduler/sched-bwc.rst index 9801d6b284b1e..d220e383f2a5e 100644 --- a/Documentation/scheduler/sched-bwc.rst +++ b/Documentation/scheduler/sched-bwc.rst @@ -21,33 +21,85 @@ cfs_quota units at each period boundary. As threads consume this bandwidth it is transferred to cpu-local "silos" on a demand basis. The amount transferred within each of these updates is tunable and described as the "slice". +Burst feature +------------- +This feature borrows time now against our future underrun, at the cost of +increased interference against the other system users. All nicely bounded. + +Traditional (UP-EDF) bandwidth control is something like: + + (U = \Sum u_i) <= 1 + +This guaranteeds both that every deadline is met and that the system is +stable. After all, if U were > 1, then for every second of walltime, +we'd have to run more than a second of program time, and obviously miss +our deadline, but the next deadline will be further out still, there is +never time to catch up, unbounded fail. + +The burst feature observes that a workload doesn't always executes the full +quota; this enables one to describe u_i as a statistical distribution. + +For example, have u_i = {x,e}_i, where x is the p(95) and x+e p(100) +(the traditional WCET). This effectively allows u to be smaller, +increasing the efficiency (we can pack more tasks in the system), but at +the cost of missing deadlines when all the odds line up. However, it +does maintain stability, since every overrun must be paired with an +underrun as long as our x is above the average. + +That is, suppose we have 2 tasks, both specify a p(95) value, then we +have a p(95)*p(95) = 90.25% chance both tasks are within their quota and +everything is good. At the same time we have a p(5)p(5) = 0.25% chance +both tasks will exceed their quota at the same time (guaranteed deadline +fail). Somewhere in between there's a threshold where one exceeds and +the other doesn't underrun enough to compensate; this depends on the +specific CDFs. + +At the same time, we can say that the worst case deadline miss, will be +\Sum e_i; that is, there is a bounded tardiness (under the assumption +that x+e is indeed WCET). + +The interferenece when using burst is valued by the possibilities for +missing the deadline and the average WCET. Test results showed that when +there many cgroups or CPU is under utilized, the interference is +limited. More details are shown in: +https://lore.kernel.org/lkml/5371BD36-55AE-4F71-B9D7-B86DC32E3D2B@linux.alibaba.com/ + Management ---------- -Quota and period are managed within the cpu subsystem via cgroupfs. +Quota, period and burst are managed within the cpu subsystem via cgroupfs. -cpu.cfs_quota_us: the total available run-time within a period (in microseconds) -cpu.cfs_period_us: the length of a period (in microseconds) -cpu.stat: exports throttling statistics [explained further below] +- cpu.cfs_quota_us: the total available run-time within a period (in +- cpu.cfs_quota_us: run-time replenished within a period (in microseconds) +- cpu.cfs_period_us: the length of a period (in microseconds) +- cpu.stat: exports throttling statistics [explained further below] +- cpu.cfs_burst_us: the maximum accumulated run-time (in microseconds) The default values are:: cpu.cfs_period_us=100ms - cpu.cfs_quota=-1 + cpu.cfs_quota_us=-1 + cpu.cfs_burst_us=0 A value of -1 for cpu.cfs_quota_us indicates that the group does not have any bandwidth restriction in place, such a group is described as an unconstrained bandwidth group. This represents the traditional work-conserving behavior for CFS. -Writing any (valid) positive value(s) will enact the specified bandwidth limit. -The minimum quota allowed for the quota or period is 1ms. There is also an -upper bound on the period length of 1s. Additional restrictions exist when -bandwidth limits are used in a hierarchical fashion, these are explained in -more detail below. +Writing any (valid) positive value(s) no smaller than cpu.cfs_burst_us will +enact the specified bandwidth limit. The minimum quota allowed for the quota or +period is 1ms. There is also an upper bound on the period length of 1s. +Additional restrictions exist when bandwidth limits are used in a hierarchical +fashion, these are explained in more detail below. Writing any negative value to cpu.cfs_quota_us will remove the bandwidth limit and return the group to an unconstrained state once more. +A value of 0 for cpu.cfs_burst_us indicates that the group can not accumulate +any unused bandwidth. It makes the traditional bandwidth control behavior for +CFS unchanged. Writing any (valid) positive value(s) no larger than +cpu.cfs_quota_us into cpu.cfs_burst_us will enact the cap on unused bandwidth +accumulation. + Any updates to a group's bandwidth specification will result in it becoming unthrottled if it is in a constrained state. @@ -65,9 +117,22 @@ This is tunable via procfs:: Larger slice values will reduce transfer overheads, while smaller values allow for more fine-grained consumption. +Tasks may be throttled when holding locks for a long time by current +cfs bandwidth control mechanism once users set a too small quota/period +ratio, and other processes waiting for the lock will block. Once throttling +occurs in the critical path of the kernel, e.g. fork(), the whole system +may even get stuck. The minimum percentage of quota/period users can set +is constrained by sysctl_sched_cfs_bandwidth_min_ratio. + +This is tunable via procfs:: + /proc/sys/kernel/sysctl_sched_cfs_bandwidth_min_ratio + +The default value is zero and users can set quota and period without +triggering this constraint. + Statistics ---------- -A group's bandwidth statistics are exported via 3 fields in cpu.stat. +A group's bandwidth statistics are exported via 5 fields in cpu.stat. cpu.stat: @@ -75,6 +140,9 @@ cpu.stat: - nr_throttled: Number of times the group has been throttled/limited. - throttled_time: The total time duration (in nanoseconds) for which entities of the group have been throttled. +- nr_bursts: Number of periods burst occurs. +- burst_time: Cumulative wall-time (in nanoseconds) that any CPUs has used + above quota in respective periods This interface is read-only. @@ -172,3 +240,15 @@ Examples By using a small period here we are ensuring a consistent latency response at the expense of burst capacity. + +4. Limit a group to 40% of 1 CPU, and allow accumulate up to 20% of 1 CPU + additionally, in case accumulation has been done. + + With 50ms period, 20ms quota will be equivalent to 40% of 1 CPU. + And 10ms burst will be equivalent to 20% of 1 CPU. + + # echo 20000 > cpu.cfs_quota_us /* quota = 20ms */ + # echo 50000 > cpu.cfs_period_us /* period = 50ms */ + # echo 10000 > cpu.cfs_burst_us /* burst = 10ms */ + + Larger buffer setting (no larger than quota) allows greater burst capacity. diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst index 0ea967d345838..4c91abad7b35c 100644 --- a/Documentation/sound/hd-audio/models.rst +++ b/Documentation/sound/hd-audio/models.rst @@ -261,6 +261,10 @@ alc-sense-combo huawei-mbx-stereo Enable initialization verbs for Huawei MBX stereo speakers; might be risky, try this at your own risk +alc298-samsung-headphone + Samsung laptops with ALC298 +alc256-samsung-headphone + Samsung laptops with ALC256 ALC66x/67x/892 ============== diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst index 30813498c74d9..7d3692df39fba 100644 --- a/Documentation/vm/index.rst +++ b/Documentation/vm/index.rst @@ -50,5 +50,6 @@ descriptions of data structures and algorithms. split_page_table_lock transhuge unevictable-lru + vmemmap_dedup z3fold zsmalloc diff --git a/Documentation/vm/memory-model.rst b/Documentation/vm/memory-model.rst index 58a12376b7df7..94db75ba7fbe2 100644 --- a/Documentation/vm/memory-model.rst +++ b/Documentation/vm/memory-model.rst @@ -52,8 +52,7 @@ wrapper :c:func:`free_area_init`. Yet, the mappings array is not usable until the call to :c:func:`memblock_free_all` that hands all the memory to the page allocator. -If an architecture enables `CONFIG_ARCH_HAS_HOLES_MEMORYMODEL` option, -it may free parts of the `mem_map` array that do not cover the +An architecture may free parts of the `mem_map` array that do not cover the actual physical pages. In such case, the architecture specific :c:func:`pfn_valid` implementation should take the holes in the `mem_map` into account. diff --git a/Documentation/vm/vmemmap_dedup.rst b/Documentation/vm/vmemmap_dedup.rst new file mode 100644 index 0000000000000..485ccf4f7b10b --- /dev/null +++ b/Documentation/vm/vmemmap_dedup.rst @@ -0,0 +1,173 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================== +Free some vmemmap pages of HugeTLB +================================== + +The struct page structures (page structs) are used to describe a physical +page frame. By default, there is a one-to-one mapping from a page frame to +it's corresponding page struct. + +HugeTLB pages consist of multiple base page size pages and is supported by many +architectures. See Documentation/admin-guide/mm/hugetlbpage.rst for more +details. On the x86-64 architecture, HugeTLB pages of size 2MB and 1GB are +currently supported. Since the base page size on x86 is 4KB, a 2MB HugeTLB page +consists of 512 base pages and a 1GB HugeTLB page consists of 4096 base pages. +For each base page, there is a corresponding page struct. + +Within the HugeTLB subsystem, only the first 4 page structs are used to +contain unique information about a HugeTLB page. __NR_USED_SUBPAGE provides +this upper limit. The only 'useful' information in the remaining page structs +is the compound_head field, and this field is the same for all tail pages. + +By removing redundant page structs for HugeTLB pages, memory can be returned +to the buddy allocator for other uses. + +Different architectures support different HugeTLB pages. For example, the +following table is the HugeTLB page size supported by x86 and arm64 +architectures. Because arm64 supports 4k, 16k, and 64k base pages and +supports contiguous entries, so it supports many kinds of sizes of HugeTLB +page. + ++--------------+-----------+-----------------------------------------------+ +| Architecture | Page Size | HugeTLB Page Size | ++--------------+-----------+-----------+-----------+-----------+-----------+ +| x86-64 | 4KB | 2MB | 1GB | | | ++--------------+-----------+-----------+-----------+-----------+-----------+ +| | 4KB | 64KB | 2MB | 32MB | 1GB | +| +-----------+-----------+-----------+-----------+-----------+ +| arm64 | 16KB | 2MB | 32MB | 1GB | | +| +-----------+-----------+-----------+-----------+-----------+ +| | 64KB | 2MB | 512MB | 16GB | | ++--------------+-----------+-----------+-----------+-----------+-----------+ + +When the system boot up, every HugeTLB page has more than one struct page +structs which size is (unit: pages):: + + struct_size = HugeTLB_Size / PAGE_SIZE * sizeof(struct page) / PAGE_SIZE + +Where HugeTLB_Size is the size of the HugeTLB page. We know that the size +of the HugeTLB page is always n times PAGE_SIZE. So we can get the following +relationship:: + + HugeTLB_Size = n * PAGE_SIZE + +Then:: + + struct_size = n * PAGE_SIZE / PAGE_SIZE * sizeof(struct page) / PAGE_SIZE + = n * sizeof(struct page) / PAGE_SIZE + +We can use huge mapping at the pud/pmd level for the HugeTLB page. + +For the HugeTLB page of the pmd level mapping, then:: + + struct_size = n * sizeof(struct page) / PAGE_SIZE + = PAGE_SIZE / sizeof(pte_t) * sizeof(struct page) / PAGE_SIZE + = sizeof(struct page) / sizeof(pte_t) + = 64 / 8 + = 8 (pages) + +Where n is how many pte entries which one page can contains. So the value of +n is (PAGE_SIZE / sizeof(pte_t)). + +This optimization only supports 64-bit system, so the value of sizeof(pte_t) +is 8. And this optimization also applicable only when the size of struct page +is a power of two. In most cases, the size of struct page is 64 bytes (e.g. +x86-64 and arm64). So if we use pmd level mapping for a HugeTLB page, the +size of struct page structs of it is 8 page frames which size depends on the +size of the base page. + +For the HugeTLB page of the pud level mapping, then:: + + struct_size = PAGE_SIZE / sizeof(pmd_t) * struct_size(pmd) + = PAGE_SIZE / 8 * 8 (pages) + = PAGE_SIZE (pages) + +Where the struct_size(pmd) is the size of the struct page structs of a +HugeTLB page of the pmd level mapping. + +E.g.: A 2MB HugeTLB page on x86_64 consists in 8 page frames while 1GB +HugeTLB page consists in 4096. + +Next, we take the pmd level mapping of the HugeTLB page as an example to +show the internal implementation of this optimization. There are 8 pages +struct page structs associated with a HugeTLB page which is pmd mapped. + +Here is how things look before optimization:: + + HugeTLB struct pages(8 pages) page frame(8 pages) + +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+ + | | | 0 | -------------> | 0 | + | | +-----------+ +-----------+ + | | | 1 | -------------> | 1 | + | | +-----------+ +-----------+ + | | | 2 | -------------> | 2 | + | | +-----------+ +-----------+ + | | | 3 | -------------> | 3 | + | | +-----------+ +-----------+ + | | | 4 | -------------> | 4 | + | PMD | +-----------+ +-----------+ + | level | | 5 | -------------> | 5 | + | mapping | +-----------+ +-----------+ + | | | 6 | -------------> | 6 | + | | +-----------+ +-----------+ + | | | 7 | -------------> | 7 | + | | +-----------+ +-----------+ + | | + | | + | | + +-----------+ + +The value of page->compound_head is the same for all tail pages. The first +page of page structs (page 0) associated with the HugeTLB page contains the 4 +page structs necessary to describe the HugeTLB. The only use of the remaining +pages of page structs (page 1 to page 7) is to point to page->compound_head. +Therefore, we can remap pages 1 to 7 to page 0. Only 1 page of page structs +will be used for each HugeTLB page. This will allow us to free the remaining +7 pages to the buddy allocator. + +Here is how things look after remapping:: + + HugeTLB struct pages(8 pages) page frame(8 pages) + +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+ + | | | 0 | -------------> | 0 | + | | +-----------+ +-----------+ + | | | 1 | ---------------^ ^ ^ ^ ^ ^ ^ + | | +-----------+ | | | | | | + | | | 2 | -----------------+ | | | | | + | | +-----------+ | | | | | + | | | 3 | -------------------+ | | | | + | | +-----------+ | | | | + | | | 4 | ---------------------+ | | | + | PMD | +-----------+ | | | + | level | | 5 | -----------------------+ | | + | mapping | +-----------+ | | + | | | 6 | -------------------------+ | + | | +-----------+ | + | | | 7 | ---------------------------+ + | | +-----------+ + | | + | | + | | + +-----------+ + +When a HugeTLB is freed to the buddy system, we should allocate 7 pages for +vmemmap pages and restore the previous mapping relationship. + +For the HugeTLB page of the pud level mapping. It is similar to the former. +We also can use this approach to free (PAGE_SIZE - 1) vmemmap pages. + +Apart from the HugeTLB page of the pmd/pud level mapping, some architectures +(e.g. aarch64) provides a contiguous bit in the translation table entries +that hints to the MMU to indicate that it is one of a contiguous set of +entries that can be cached in a single TLB entry. + +The contiguous bit is used to increase the mapping size at the pmd and pte +(last) level. So this type of HugeTLB page can be optimized only when its +size of the struct page structs is greater than 1 page. + +Notice: The head vmemmap page is not freed to the buddy allocator and all +tail vmemmap pages are mapped to the head vmemmap page frame. So we can see +more than one struct page struct with PG_head (e.g. 8 per 2 MB HugeTLB page) +associated with each HugeTLB page. The compound_head() can handle this +correctly (more details refer to the comment above compound_head()). diff --git a/MAINTAINERS b/MAINTAINERS index 40b7b6d1d1e79..1ce80bf6681e4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13651,6 +13651,7 @@ F: arch/mips/configs/generic/board-ranchu.config RANDOM NUMBER DRIVER M: "Theodore Ts'o" +M: Jason A. Donenfeld S: Maintained F: drivers/char/random.c diff --git a/Makefile b/Makefile index e99fabc4dfc8c..74abb7e389f33 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 143 +SUBLEVEL = 210 EXTRAVERSION = NAME = Kleptomaniac Octopus @@ -1022,7 +1022,7 @@ HOST_LIBELF_LIBS = $(shell pkg-config libelf --libs 2>/dev/null || echo -lelf) ifdef CONFIG_STACK_VALIDATION has_libelf := $(call try-run,\ - echo "int main() {}" | $(HOSTCC) -xc -o /dev/null $(HOST_LIBELF_LIBS) -,1,0) + echo "int main() {}" | $(HOSTCC) $(KBUILD_HOSTLDFLAGS) -xc -o /dev/null $(HOST_LIBELF_LIBS) -,1,0) ifeq ($(has_libelf),1) objtool_target := tools/objtool FORCE else @@ -1073,7 +1073,7 @@ PHONY += autoksyms_recursive ifdef CONFIG_TRIM_UNUSED_KSYMS autoksyms_recursive: descend modules.order $(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \ - "$(MAKE) -f $(srctree)/Makefile vmlinux" + "$(MAKE) -f $(srctree)/Makefile autoksyms_recursive" endif # For the kernel to actually contain only the needed exported symbols, diff --git a/arch/Kconfig b/arch/Kconfig index bb67f906faa22..2884f1b73aabe 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -147,8 +147,8 @@ config HAVE_64BIT_ALIGNED_ACCESS accesses are required to be 64 bit aligned in this way even though it is not a 64 bit architecture. - See Documentation/unaligned-memory-access.txt for more - information on the topic of unaligned memory accesses. + See Documentation/core-api/unaligned-memory-access.rst for + more information on the topic of unaligned memory accesses. config HAVE_EFFICIENT_UNALIGNED_ACCESS bool @@ -919,27 +919,6 @@ config STRICT_MODULE_RWX config ARCH_HAS_PHYS_TO_DMA bool -config ARCH_HAS_REFCOUNT - bool - help - An architecture selects this when it has implemented refcount_t - using open coded assembly primitives that provide an optimized - refcount_t implementation, possibly at the expense of some full - refcount state checks of CONFIG_REFCOUNT_FULL=y. - - The refcount overflow check behavior, however, must be retained. - Catching overflows is the primary security concern for protecting - against bugs in reference counts. - -config REFCOUNT_FULL - bool "Perform full reference count validation at the expense of speed" - help - Enabling this switches the refcounting infrastructure from a fast - unchecked atomic_t implementation to a fully state checked - implementation, which can be (slightly) slower but provides protections - against various use-after-free conditions that can be used in - security flaw exploits. - config HAVE_ARCH_COMPILER_H bool help diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index 103270d5a9fc6..66a384a4ddbad 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -61,7 +61,7 @@ extern inline void set_hae(unsigned long new_hae) * Change virtual addresses to physical addresses and vv. */ #ifdef USE_48_BIT_KSEG -static inline unsigned long virt_to_phys(void *address) +static inline unsigned long virt_to_phys(volatile void *address) { return (unsigned long)address - IDENT_ADDR; } @@ -71,7 +71,7 @@ static inline void * phys_to_virt(unsigned long address) return (void *) (address + IDENT_ADDR); } #else -static inline unsigned long virt_to_phys(void *address) +static inline unsigned long virt_to_phys(volatile void *address) { unsigned long phys = (unsigned long)address; @@ -107,7 +107,7 @@ static inline void * phys_to_virt(unsigned long address) extern unsigned long __direct_map_base; extern unsigned long __direct_map_size; -static inline unsigned long __deprecated virt_to_bus(void *address) +static inline unsigned long __deprecated virt_to_bus(volatile void *address) { unsigned long phys = virt_to_phys(address); unsigned long bus = phys + __direct_map_base; diff --git a/arch/alpha/include/asm/timex.h b/arch/alpha/include/asm/timex.h index b565cc6f408e9..f89798da8a147 100644 --- a/arch/alpha/include/asm/timex.h +++ b/arch/alpha/include/asm/timex.h @@ -28,5 +28,6 @@ static inline cycles_t get_cycles (void) __asm__ __volatile__ ("rpcc %0" : "=r"(ret)); return ret; } +#define get_cycles get_cycles #endif diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c index 438b10c44d732..2b7a314b84522 100644 --- a/arch/alpha/kernel/srmcons.c +++ b/arch/alpha/kernel/srmcons.c @@ -59,7 +59,7 @@ srmcons_do_receive_chars(struct tty_port *port) } while((result.bits.status & 1) && (++loops < 10)); if (count) - tty_schedule_flip(port); + tty_flip_buffer_push(port); return count; } diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 8383155c8c824..a9d0b5310165f 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -29,6 +29,7 @@ config ARC select GENERIC_SMP_IDLE_THREAD select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK + select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_STACKOVERFLOW select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_IOREMAP_PROT diff --git a/arch/arc/include/asm/syscalls.h b/arch/arc/include/asm/syscalls.h index 7ddba13e9b599..c3f4714a4f5cd 100644 --- a/arch/arc/include/asm/syscalls.h +++ b/arch/arc/include/asm/syscalls.h @@ -11,6 +11,7 @@ #include int sys_clone_wrapper(int, int, int, int, int); +int sys_clone3_wrapper(void *, size_t); int sys_cacheflush(uint32_t, uint32_t uint32_t); int sys_arc_settls(void *); int sys_arc_gettls(void); diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h index 5eafa11151623..fa2713ae6bea5 100644 --- a/arch/arc/include/uapi/asm/unistd.h +++ b/arch/arc/include/uapi/asm/unistd.h @@ -21,6 +21,7 @@ #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_TIME32_SYSCALLS diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index a0caf81dff038..6b8dbb45b992c 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -35,6 +35,18 @@ ENTRY(sys_clone_wrapper) b .Lret_from_system_call END(sys_clone_wrapper) +ENTRY(sys_clone3_wrapper) + SAVE_CALLEE_SAVED_USER + bl @sys_clone3 + DISCARD_CALLEE_SAVED_USER + + GET_CURR_THR_INFO_FLAGS r10 + btst r10, TIF_SYSCALL_TRACE + bnz tracesys_exit + + b .Lret_from_system_call +END(sys_clone3_wrapper) + ENTRY(ret_from_fork) ; when the forked child comes here from the __switch_to function ; r0 has the last task pointer. @@ -187,6 +199,7 @@ tracesys_exit: st r0, [sp, PT_r0] ; sys call return value in pt_regs ;POST Sys Call Ptrace Hook + mov r0, sp ; pt_regs needed bl @syscall_trace_exit b ret_from_exception ; NOT ret_from_system_call at is saves r0 which ; we'd done before calling post hook above diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 2c6d3865cc011..7cd10fabd60f4 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -171,9 +171,8 @@ asmlinkage void ret_from_fork(void); * | user_r25 | * ------------------ <===== END of PAGE */ -int copy_thread(unsigned long clone_flags, - unsigned long usp, unsigned long kthread_arg, - struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long usp, + unsigned long kthread_arg, struct task_struct *p, unsigned long tls) { struct pt_regs *c_regs; /* child's pt_regs */ unsigned long *childksp; /* to unwind out of __switch_to() */ @@ -231,7 +230,7 @@ int copy_thread(unsigned long clone_flags, * set task's userland tls data ptr from 4th arg * clone C-lib call is difft from clone sys-call */ - task_thread_info(p)->thr_ptr = regs->r3; + task_thread_info(p)->thr_ptr = tls; } else { /* Normal fork case: set parent's TLS ptr in child */ task_thread_info(p)->thr_ptr = diff --git a/arch/arc/kernel/sys.c b/arch/arc/kernel/sys.c index fddecc76efb7c..1069446bdc589 100644 --- a/arch/arc/kernel/sys.c +++ b/arch/arc/kernel/sys.c @@ -7,6 +7,7 @@ #include #define sys_clone sys_clone_wrapper +#define sys_clone3 sys_clone3_wrapper #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index 6c693a9d29b6d..0391b8293ad85 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -88,6 +88,8 @@ SECTIONS CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) } diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index a2fbea3ee07c7..102418ac5ff4a 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -1123,7 +1123,7 @@ void clear_user_page(void *to, unsigned long u_vaddr, struct page *page) clear_page(to); clear_bit(PG_dc_clean, &page->flags); } - +EXPORT_SYMBOL(clear_user_page); /********************************************************************** * Explicit Cache flush request from user space via syscall diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9aa88715f196c..a4364cce85f8d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -26,7 +26,7 @@ config ARM select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAS_GCOV_PROFILE_ALL - select ARCH_KEEP_MEMBLOCK if HAVE_ARCH_PFN_VALID || KEXEC + select ARCH_KEEP_MEMBLOCK select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_NO_SG_CHAIN if !ARM_HAS_SG_CHAIN select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX @@ -85,6 +85,7 @@ config ARM select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG select HAVE_FUNCTION_TRACER if !XIP_KERNEL && (CC_IS_GCC || CLANG_VERSION >= 100000) + select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) select HAVE_IDE if PCI || ISA || PCMCIA @@ -118,7 +119,6 @@ config ARM select OLD_SIGSUSPEND3 select PCI_SYSCALL if PCI select PERF_USE_VMALLOC - select REFCOUNT_FULL select RTC_LIB select SYS_SUPPORTS_APM_EMULATION # Above selects are sorted alphabetically; please add new ones @@ -520,7 +520,6 @@ config ARCH_S3C24XX config ARCH_OMAP1 bool "TI OMAP1" depends on MMU - select ARCH_HAS_HOLES_MEMORYMODEL select ARCH_OMAP select CLKDEV_LOOKUP select CLKSRC_MMIO @@ -1517,9 +1516,6 @@ config OABI_COMPAT UNPREDICTABLE (in fact it can be predicted that it won't work at all). If in doubt say N. -config ARCH_HAS_HOLES_MEMORYMODEL - bool - config ARCH_SPARSEMEM_ENABLE bool @@ -1527,7 +1523,7 @@ config ARCH_SPARSEMEM_DEFAULT def_bool ARCH_SPARSEMEM_ENABLE config HAVE_ARCH_PFN_VALID - def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM + def_bool y config HIGHMEM bool "High Memory Support" diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 1fc32b611f8a4..4f098edfbf202 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -66,15 +66,15 @@ KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. -arch-$(CONFIG_CPU_32v7M) =-D__LINUX_ARM_ARCH__=7 -march=armv7-m -Wa,-march=armv7-m -arch-$(CONFIG_CPU_32v7) =-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a) -arch-$(CONFIG_CPU_32v6) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) +arch-$(CONFIG_CPU_32v7M) =-D__LINUX_ARM_ARCH__=7 -march=armv7-m +arch-$(CONFIG_CPU_32v7) =-D__LINUX_ARM_ARCH__=7 -march=armv7-a +arch-$(CONFIG_CPU_32v6) =-D__LINUX_ARM_ARCH__=6 -march=armv6 # Only override the compiler option if ARMv6. The ARMv6K extensions are # always available in ARMv7 ifeq ($(CONFIG_CPU_32v6),y) -arch-$(CONFIG_CPU_32v6K) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6k,-march=armv5t -Wa$(comma)-march=armv6k) +arch-$(CONFIG_CPU_32v6K) =-D__LINUX_ARM_ARCH__=6 -march=armv6k endif -arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t) +arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 -march=armv5te arch-$(CONFIG_CPU_32v4T) =-D__LINUX_ARM_ARCH__=4 -march=armv4t arch-$(CONFIG_CPU_32v4) =-D__LINUX_ARM_ARCH__=4 -march=armv4 arch-$(CONFIG_CPU_32v3) =-D__LINUX_ARM_ARCH__=3 -march=armv3m @@ -88,7 +88,7 @@ tune-$(CONFIG_CPU_ARM720T) =-mtune=arm7tdmi tune-$(CONFIG_CPU_ARM740T) =-mtune=arm7tdmi tune-$(CONFIG_CPU_ARM9TDMI) =-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM940T) =-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM946E) =$(call cc-option,-mtune=arm9e,-mtune=arm9tdmi) +tune-$(CONFIG_CPU_ARM946E) =-mtune=arm9e tune-$(CONFIG_CPU_ARM920T) =-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM922T) =-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM925T) =-mtune=arm9tdmi @@ -96,11 +96,11 @@ tune-$(CONFIG_CPU_ARM926T) =-mtune=arm9tdmi tune-$(CONFIG_CPU_FA526) =-mtune=arm9tdmi tune-$(CONFIG_CPU_SA110) =-mtune=strongarm110 tune-$(CONFIG_CPU_SA1100) =-mtune=strongarm1100 -tune-$(CONFIG_CPU_XSCALE) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale -tune-$(CONFIG_CPU_XSC3) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale -tune-$(CONFIG_CPU_FEROCEON) =$(call cc-option,-mtune=marvell-f,-mtune=xscale) -tune-$(CONFIG_CPU_V6) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) -tune-$(CONFIG_CPU_V6K) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) +tune-$(CONFIG_CPU_XSCALE) =-mtune=xscale +tune-$(CONFIG_CPU_XSC3) =-mtune=xscale +tune-$(CONFIG_CPU_FEROCEON) =-mtune=xscale +tune-$(CONFIG_CPU_V6) =-mtune=arm1136j-s +tune-$(CONFIG_CPU_V6K) =-mtune=arm1136j-s # Evaluate tune cc-option calls now tune-y := $(tune-y) diff --git a/arch/arm/boot/bootp/init.S b/arch/arm/boot/bootp/init.S index 5c476bd2b4ce9..b562da2f70408 100644 --- a/arch/arm/boot/bootp/init.S +++ b/arch/arm/boot/bootp/init.S @@ -13,7 +13,7 @@ * size immediately following the kernel, we could build this into * a binary blob, and concatenate the zImage using the cat command. */ - .section .start,#alloc,#execinstr + .section .start, "ax" .type _start, #function .globl _start diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index f0b3a9281d69b..fb6cb24bde5c9 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -90,6 +90,8 @@ $(addprefix $(obj)/,$(libfdt_objs) atags_to_fdt.o): \ $(addprefix $(obj)/,$(libfdt_hdrs)) ifeq ($(CONFIG_ARM_ATAG_DTB_COMPAT),y) +CFLAGS_REMOVE_atags_to_fdt.o += -Wframe-larger-than=${CONFIG_FRAME_WARN} +CFLAGS_atags_to_fdt.o += -Wframe-larger-than=1280 OBJS += $(libfdt_objs) atags_to_fdt.o endif diff --git a/arch/arm/boot/compressed/big-endian.S b/arch/arm/boot/compressed/big-endian.S index 88e2a88d324b2..0e092c36da2f2 100644 --- a/arch/arm/boot/compressed/big-endian.S +++ b/arch/arm/boot/compressed/big-endian.S @@ -6,7 +6,7 @@ * Author: Nicolas Pitre */ - .section ".start", #alloc, #execinstr + .section ".start", "ax" mrc p15, 0, r0, c1, c0, 0 @ read control reg orr r0, r0, #(1 << 7) @ enable big endian mode diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index aa075d8372ea2..74255e8198314 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c @@ -47,7 +47,10 @@ extern char * strchrnul(const char *, int); #endif #ifdef CONFIG_KERNEL_XZ +/* Prevent KASAN override of string helpers in decompressor */ +#undef memmove #define memmove memmove +#undef memcpy #define memcpy memcpy #include "../../../../lib/decompress_unxz.c" #endif diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S index a5983588f96b8..dd53d6eb53ade 100644 --- a/arch/arm/boot/compressed/efi-header.S +++ b/arch/arm/boot/compressed/efi-header.S @@ -9,16 +9,22 @@ #include .macro __nop -#ifdef CONFIG_EFI_STUB - @ This is almost but not quite a NOP, since it does clobber the - @ condition flags. But it is the best we can do for EFI, since - @ PE/COFF expects the magic string "MZ" at offset 0, while the - @ ARM/Linux boot protocol expects an executable instruction - @ there. - .inst MZ_MAGIC | (0x1310 << 16) @ tstne r0, #0x4d000 -#else AR_CLASS( mov r0, r0 ) M_CLASS( nop.w ) + .endm + + .macro __initial_nops +#ifdef CONFIG_EFI_STUB + @ This is a two-instruction NOP, which happens to bear the + @ PE/COFF signature "MZ" in the first two bytes, so the kernel + @ is accepted as an EFI binary. Booting via the UEFI stub + @ will not execute those instructions, but the ARM/Linux + @ boot protocol does, so we need some NOPs here. + .inst MZ_MAGIC | (0xe225 << 16) @ eor r5, r5, 0x4d000 + eor r5, r5, 0x4d000 @ undo previous insn +#else + __nop + __nop #endif .endm diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index cbe126297f549..17f87f4c74f51 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -140,7 +140,7 @@ #endif .endm - .section ".start", #alloc, #execinstr + .section ".start", "ax" /* * sort out different calling conventions */ @@ -165,7 +165,8 @@ start: * were patching the initial instructions of the kernel, i.e * had started to exploit this "patch area". */ - .rept 7 + __initial_nops + .rept 5 __nop .endr #ifndef CONFIG_THUMB2_KERNEL @@ -1273,7 +1274,7 @@ iflush: __armv5tej_mmu_cache_flush: tst r4, #1 movne pc, lr -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate D cache +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate D cache bne 1b mcr p15, 0, r0, c7, c5, 0 @ flush I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB diff --git a/arch/arm/boot/compressed/piggy.S b/arch/arm/boot/compressed/piggy.S index 0284f84dcf380..27577644ee721 100644 --- a/arch/arm/boot/compressed/piggy.S +++ b/arch/arm/boot/compressed/piggy.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ - .section .piggydata,#alloc + .section .piggydata, "a" .globl input_data input_data: .incbin "arch/arm/boot/compressed/piggy_data" diff --git a/arch/arm/boot/dts/am3517-evm.dts b/arch/arm/boot/dts/am3517-evm.dts index a1fd3e63e86ec..3db30ce134b95 100644 --- a/arch/arm/boot/dts/am3517-evm.dts +++ b/arch/arm/boot/dts/am3517-evm.dts @@ -160,6 +160,8 @@ /* HS USB Host PHY on PORT 1 */ hsusb1_phy: hsusb1_phy { + pinctrl-names = "default"; + pinctrl-0 = <&hsusb1_rst_pins>; compatible = "usb-nop-xceiv"; reset-gpios = <&gpio2 25 GPIO_ACTIVE_LOW>; /* gpio_57 */ #phy-cells = <0>; @@ -167,7 +169,9 @@ }; &davinci_emac { - status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <ðernet_pins>; + status = "okay"; }; &davinci_mdio { @@ -192,6 +196,8 @@ }; &i2c2 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c2_pins>; clock-frequency = <400000>; /* User DIP swithes [1:8] / User LEDS [1:2] */ tca6416: gpio@21 { @@ -204,6 +210,8 @@ }; &i2c3 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c3_pins>; clock-frequency = <400000>; }; @@ -222,6 +230,8 @@ }; &usbhshost { + pinctrl-names = "default"; + pinctrl-0 = <&hsusb1_pins>; port1-mode = "ehci-phy"; }; @@ -230,8 +240,35 @@ }; &omap3_pmx_core { - pinctrl-names = "default"; - pinctrl-0 = <&hsusb1_rst_pins>; + + ethernet_pins: pinmux_ethernet_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21fe, PIN_INPUT | MUX_MODE0) /* rmii_mdio_data */ + OMAP3_CORE1_IOPAD(0x2200, MUX_MODE0) /* rmii_mdio_clk */ + OMAP3_CORE1_IOPAD(0x2202, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_rxd0 */ + OMAP3_CORE1_IOPAD(0x2204, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_rxd1 */ + OMAP3_CORE1_IOPAD(0x2206, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_crs_dv */ + OMAP3_CORE1_IOPAD(0x2208, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_rxer */ + OMAP3_CORE1_IOPAD(0x220a, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_txd0 */ + OMAP3_CORE1_IOPAD(0x220c, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_txd1 */ + OMAP3_CORE1_IOPAD(0x220e, PIN_OUTPUT_PULLDOWN |MUX_MODE0) /* rmii_txen */ + OMAP3_CORE1_IOPAD(0x2210, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_50mhz_clk */ + >; + }; + + i2c2_pins: pinmux_i2c2_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21be, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c2_scl */ + OMAP3_CORE1_IOPAD(0x21c0, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c2_sda */ + >; + }; + + i2c3_pins: pinmux_i2c3_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21c2, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c3_scl */ + OMAP3_CORE1_IOPAD(0x21c4, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c3_sda */ + >; + }; leds_pins: pinmux_leds_pins { pinctrl-single,pins = < @@ -299,8 +336,6 @@ }; &omap3_pmx_core2 { - pinctrl-names = "default"; - pinctrl-0 = <&hsusb1_pins>; hsusb1_pins: pinmux_hsusb1_pins { pinctrl-single,pins = < diff --git a/arch/arm/boot/dts/am3517-som.dtsi b/arch/arm/boot/dts/am3517-som.dtsi index 8b669e2eafec4..f7b680f6c48ad 100644 --- a/arch/arm/boot/dts/am3517-som.dtsi +++ b/arch/arm/boot/dts/am3517-som.dtsi @@ -69,6 +69,8 @@ }; &i2c1 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c1_pins>; clock-frequency = <400000>; s35390a: s35390a@30 { @@ -179,6 +181,13 @@ &omap3_pmx_core { + i2c1_pins: pinmux_i2c1_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c1_scl */ + OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c1_sda */ + >; + }; + wl12xx_buffer_pins: pinmux_wl12xx_buffer_pins { pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x2156, PIN_OUTPUT | MUX_MODE4) /* mmc1_dat7.gpio_129 */ diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi index 669da3a33d82c..5b82e58a1cf06 100644 --- a/arch/arm/boot/dts/armada-38x.dtsi +++ b/arch/arm/boot/dts/armada-38x.dtsi @@ -165,7 +165,7 @@ }; uart0: serial@12000 { - compatible = "marvell,armada-38x-uart"; + compatible = "marvell,armada-38x-uart", "ns16550a"; reg = <0x12000 0x100>; reg-shift = <2>; interrupts = ; @@ -175,7 +175,7 @@ }; uart1: serial@12100 { - compatible = "marvell,armada-38x-uart"; + compatible = "marvell,armada-38x-uart", "ns16550a"; reg = <0x12100 0x100>; reg-shift = <2>; interrupts = ; diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi index 5b8bf58e89cb4..ac723fe898c76 100644 --- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi +++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi @@ -117,11 +117,6 @@ groups = "FWSPID"; }; - pinctrl_fwqspid_default: fwqspid_default { - function = "FWQSPID"; - groups = "FWQSPID"; - }; - pinctrl_fwspiwp_default: fwspiwp_default { function = "FWSPIWP"; groups = "FWSPIWP"; @@ -208,12 +203,12 @@ }; pinctrl_hvi3c3_default: hvi3c3_default { - function = "HVI3C3"; + function = "I3C3"; groups = "HVI3C3"; }; pinctrl_hvi3c4_default: hvi3c4_default { - function = "HVI3C4"; + function = "I3C4"; groups = "HVI3C4"; }; @@ -653,12 +648,12 @@ }; pinctrl_qspi1_default: qspi1_default { - function = "QSPI1"; + function = "SPI1"; groups = "QSPI1"; }; pinctrl_qspi2_default: qspi2_default { - function = "QSPI2"; + function = "SPI2"; groups = "QSPI2"; }; diff --git a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts index 89f0c9979b89c..4f63158d6b9bd 100644 --- a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts @@ -69,7 +69,6 @@ isc: isc@f0008000 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_isc_base &pinctrl_isc_data_8bit &pinctrl_isc_data_9_10 &pinctrl_isc_data_11_12>; - status = "okay"; }; qspi1: spi@f0024000 { diff --git a/arch/arm/boot/dts/at91-tse850-3.dts b/arch/arm/boot/dts/at91-tse850-3.dts index 3ca97b47c69ce..7e5c598e7e68f 100644 --- a/arch/arm/boot/dts/at91-tse850-3.dts +++ b/arch/arm/boot/dts/at91-tse850-3.dts @@ -262,7 +262,7 @@ &macb1 { status = "okay"; - phy-mode = "rgmii"; + phy-mode = "rmii"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi index bda22700110cd..287566e09a673 100644 --- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi +++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi @@ -217,6 +217,12 @@ wm8731: wm8731@1b { compatible = "wm8731"; reg = <0x1b>; + + /* PCK0 at 12MHz */ + clocks = <&pmc PMC_TYPE_SYSTEM 8>; + clock-names = "mclk"; + assigned-clocks = <&pmc PMC_TYPE_SYSTEM 8>; + assigned-clock-rates = <12000000>; }; }; diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index 43ff85d31dc12..5a1352fd90d16 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -77,7 +77,7 @@ interrupt-affinity = <&cpu0>, <&cpu1>; }; - mpcore@19000000 { + mpcore-bus@19000000 { compatible = "simple-bus"; ranges = <0x00000000 0x19000000 0x00023000>; #address-cells = <1>; @@ -217,7 +217,7 @@ #dma-cells = <1>; }; - sdio: sdhci@21000 { + sdio: mmc@21000 { compatible = "brcm,sdhci-iproc-cygnus"; reg = <0x21000 0x100>; interrupts = ; diff --git a/arch/arm/boot/dts/bcm2835-rpi-b.dts b/arch/arm/boot/dts/bcm2835-rpi-b.dts index 2b69957e0113e..1838e0fa0ff59 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-b.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-b.dts @@ -53,18 +53,17 @@ "GPIO18", "NC", /* GPIO19 */ "NC", /* GPIO20 */ - "GPIO21", + "CAM_GPIO0", "GPIO22", "GPIO23", "GPIO24", "GPIO25", "NC", /* GPIO26 */ - "CAM_GPIO0", - /* Binary number representing build/revision */ - "CONFIG0", - "CONFIG1", - "CONFIG2", - "CONFIG3", + "GPIO27", + "GPIO28", + "GPIO29", + "GPIO30", + "GPIO31", "NC", /* GPIO32 */ "NC", /* GPIO33 */ "NC", /* GPIO34 */ diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts index f65448c01e317..34a85ad9f03c2 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts @@ -74,16 +74,18 @@ "GPIO27", "SDA0", "SCL0", - "NC", /* GPIO30 */ - "NC", /* GPIO31 */ - "NC", /* GPIO32 */ - "NC", /* GPIO33 */ - "NC", /* GPIO34 */ - "NC", /* GPIO35 */ - "NC", /* GPIO36 */ - "NC", /* GPIO37 */ - "NC", /* GPIO38 */ - "NC", /* GPIO39 */ + /* Used by BT module */ + "CTS0", + "RTS0", + "TXD0", + "RXD0", + /* Used by Wifi */ + "SD1_CLK", + "SD1_CMD", + "SD1_DATA0", + "SD1_DATA1", + "SD1_DATA2", + "SD1_DATA3", "CAM_GPIO1", /* GPIO40 */ "WL_ON", /* GPIO41 */ "NC", /* GPIO42 */ diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts index 74ed6d0478070..d9f63fc59f165 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts @@ -43,7 +43,7 @@ #gpio-cells = <2>; gpio-line-names = "BT_ON", "WL_ON", - "STATUS_LED_R", + "PWR_LED_R", "LAN_RUN", "", "CAM_GPIO0", diff --git a/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts b/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts index 588d9411ceb61..3dfce4312dfc4 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts @@ -63,8 +63,8 @@ "GPIO43", "GPIO44", "GPIO45", - "GPIO46", - "GPIO47", + "SMPS_SCL", + "SMPS_SDA", /* Used by eMMC */ "SD_CLK_R", "SD_CMD_R", diff --git a/arch/arm/boot/dts/bcm2837.dtsi b/arch/arm/boot/dts/bcm2837.dtsi index beb6c502dadc7..bcad098a7fccb 100644 --- a/arch/arm/boot/dts/bcm2837.dtsi +++ b/arch/arm/boot/dts/bcm2837.dtsi @@ -38,12 +38,26 @@ #size-cells = <0>; enable-method = "brcm,bcm2836-smp"; // for ARM 32-bit + /* Source for d/i-cache-line-size and d/i-cache-sets + * https://developer.arm.com/documentation/ddi0500/e/level-1-memory-system + * /about-the-l1-memory-system?lang=en + * + * Source for d/i-cache-size + * https://magpi.raspberrypi.com/articles/raspberry-pi-3-specs-benchmarks + */ cpu0: cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a53"; reg = <0>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000d8>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set + i-cache-size = <0x8000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + next-level-cache = <&l2>; }; cpu1: cpu@1 { @@ -52,6 +66,13 @@ reg = <1>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000e0>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set + i-cache-size = <0x8000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + next-level-cache = <&l2>; }; cpu2: cpu@2 { @@ -60,6 +81,13 @@ reg = <2>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000e8>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set + i-cache-size = <0x8000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + next-level-cache = <&l2>; }; cpu3: cpu@3 { @@ -68,6 +96,27 @@ reg = <3>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000f0>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set + i-cache-size = <0x8000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + next-level-cache = <&l2>; + }; + + /* Source for cache-line-size + cache-sets + * https://developer.arm.com/documentation/ddi0500 + * /e/level-2-memory-system/about-the-l2-memory-system?lang=en + * Source for cache-size + * https://datasheets.raspberrypi.com/cm/cm1-and-cm3-datasheet.pdf + */ + l2: l2-cache0 { + compatible = "cache"; + cache-size = <0x80000>; + cache-line-size = <64>; + cache-sets = <512>; // 512KiB(size)/64(line-size)=8192ways/16-way set + cache-level = <2>; }; }; }; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 50c64146d4926..af81f386793ca 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -183,6 +183,7 @@ interrupt-controller; #interrupt-cells = <2>; + gpio-ranges = <&gpio 0 0 54>; /* Defines pin muxing groups according to * BCM2835-ARM-Peripherals.pdf page 102. diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi index 9711170649b69..05d67f9769118 100644 --- a/arch/arm/boot/dts/bcm5301x.dtsi +++ b/arch/arm/boot/dts/bcm5301x.dtsi @@ -242,6 +242,8 @@ gpio-controller; #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; }; pcie0: pcie@12000 { @@ -387,7 +389,7 @@ i2c0: i2c@18009000 { compatible = "brcm,iproc-i2c"; reg = <0x18009000 0x50>; - interrupts = ; + interrupts = ; #address-cells = <1>; #size-cells = <0>; clock-frequency = <100000>; diff --git a/arch/arm/boot/dts/exynos5250-pinctrl.dtsi b/arch/arm/boot/dts/exynos5250-pinctrl.dtsi index d31a68672bfac..d7d756614edd1 100644 --- a/arch/arm/boot/dts/exynos5250-pinctrl.dtsi +++ b/arch/arm/boot/dts/exynos5250-pinctrl.dtsi @@ -260,7 +260,7 @@ }; uart3_data: uart3-data { - samsung,pins = "gpa1-4", "gpa1-4"; + samsung,pins = "gpa1-4", "gpa1-5"; samsung,pin-function = ; samsung,pin-pud = ; samsung,pin-drv = ; diff --git a/arch/arm/boot/dts/exynos5250-smdk5250.dts b/arch/arm/boot/dts/exynos5250-smdk5250.dts index 70a2b6e2ad3fa..c7e350ea03fe3 100644 --- a/arch/arm/boot/dts/exynos5250-smdk5250.dts +++ b/arch/arm/boot/dts/exynos5250-smdk5250.dts @@ -117,6 +117,9 @@ status = "okay"; ddc = <&i2c_2>; hpd-gpios = <&gpx3 7 GPIO_ACTIVE_HIGH>; + vdd-supply = <&ldo8_reg>; + vdd_osc-supply = <&ldo10_reg>; + vdd_pll-supply = <&ldo8_reg>; }; &i2c_0 { @@ -125,7 +128,7 @@ samsung,i2c-max-bus-freq = <20000>; eeprom@50 { - compatible = "samsung,s524ad0xd1"; + compatible = "samsung,s524ad0xd1", "atmel,24c128"; reg = <0x50>; }; @@ -284,7 +287,7 @@ samsung,i2c-max-bus-freq = <20000>; eeprom@51 { - compatible = "samsung,s524ad0xd1"; + compatible = "samsung,s524ad0xd1", "atmel,24c128"; reg = <0x51>; }; diff --git a/arch/arm/boot/dts/exynos5420-smdk5420.dts b/arch/arm/boot/dts/exynos5420-smdk5420.dts index 8240e51869729..fb92d87d8dedc 100644 --- a/arch/arm/boot/dts/exynos5420-smdk5420.dts +++ b/arch/arm/boot/dts/exynos5420-smdk5420.dts @@ -132,6 +132,9 @@ hpd-gpios = <&gpx3 7 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&hdmi_hpd_irq>; + vdd-supply = <&ldo6_reg>; + vdd_osc-supply = <&ldo7_reg>; + vdd_pll-supply = <&ldo6_reg>; }; &hsi2c_4 { diff --git a/arch/arm/boot/dts/gemini-nas4220b.dts b/arch/arm/boot/dts/gemini-nas4220b.dts index e1020e07e1366..60cec653ac7c6 100644 --- a/arch/arm/boot/dts/gemini-nas4220b.dts +++ b/arch/arm/boot/dts/gemini-nas4220b.dts @@ -84,7 +84,7 @@ partitions { compatible = "redboot-fis"; /* Eraseblock at 0xfe0000 */ - fis-index-block = <0x1fc>; + fis-index-block = <0x7f>; }; }; diff --git a/arch/arm/boot/dts/imx23-evk.dts b/arch/arm/boot/dts/imx23-evk.dts index 0b2701ca29212..858b8057e8540 100644 --- a/arch/arm/boot/dts/imx23-evk.dts +++ b/arch/arm/boot/dts/imx23-evk.dts @@ -79,7 +79,6 @@ MX23_PAD_LCD_RESET__GPIO_1_18 MX23_PAD_PWM3__GPIO_1_29 MX23_PAD_PWM4__GPIO_1_30 - MX23_PAD_SSP1_DETECT__SSP1_DETECT >; fsl,drive-strength = ; fsl,voltage = ; diff --git a/arch/arm/boot/dts/imx53-m53menlo.dts b/arch/arm/boot/dts/imx53-m53menlo.dts index 64faf5b46d92f..d002c8f738b53 100644 --- a/arch/arm/boot/dts/imx53-m53menlo.dts +++ b/arch/arm/boot/dts/imx53-m53menlo.dts @@ -53,14 +53,40 @@ }; }; + lvds-decoder { + compatible = "ti,ds90cf364a", "lvds-decoder"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + lvds_decoder_in: endpoint { + remote-endpoint = <&lvds0_out>; + }; + }; + + port@1 { + reg = <1>; + + lvds_decoder_out: endpoint { + remote-endpoint = <&panel_in>; + }; + }; + }; + }; + panel { compatible = "edt,etm0700g0dh6"; pinctrl-0 = <&pinctrl_display_gpio>; + pinctrl-names = "default"; enable-gpios = <&gpio6 0 GPIO_ACTIVE_HIGH>; port { panel_in: endpoint { - remote-endpoint = <&lvds0_out>; + remote-endpoint = <&lvds_decoder_out>; }; }; }; @@ -76,8 +102,7 @@ regulator-name = "vbus"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; - gpio = <&gpio1 2 GPIO_ACTIVE_HIGH>; - enable-active-high; + gpio = <&gpio1 2 0>; }; }; @@ -450,7 +475,7 @@ reg = <2>; lvds0_out: endpoint { - remote-endpoint = <&panel_in>; + remote-endpoint = <&lvds_decoder_in>; }; }; }; diff --git a/arch/arm/boot/dts/imx53-ppd.dts b/arch/arm/boot/dts/imx53-ppd.dts index 5ff9a179c83c3..c80d1700e0949 100644 --- a/arch/arm/boot/dts/imx53-ppd.dts +++ b/arch/arm/boot/dts/imx53-ppd.dts @@ -70,6 +70,12 @@ clock-frequency = <11289600>; }; + achc_24M: achc-clock { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <24000000>; + }; + sgtlsound: sound { compatible = "fsl,imx53-cpuvo-sgtl5000", "fsl,imx-audio-sgtl5000"; @@ -287,16 +293,13 @@ &gpio4 12 GPIO_ACTIVE_LOW>; status = "okay"; - spidev0: spi@0 { - compatible = "ge,achc"; - reg = <0>; - spi-max-frequency = <1000000>; - }; - - spidev1: spi@1 { - compatible = "ge,achc"; - reg = <1>; - spi-max-frequency = <1000000>; + spidev0: spi@1 { + compatible = "ge,achc", "nxp,kinetis-k20"; + reg = <1>, <0>; + vdd-supply = <®_3v3>; + vdda-supply = <®_3v3>; + clocks = <&achc_24M>; + reset-gpios = <&gpio3 6 GPIO_ACTIVE_LOW>; }; gpioxra0: gpio@2 { diff --git a/arch/arm/boot/dts/imx6qdl-apalis.dtsi b/arch/arm/boot/dts/imx6qdl-apalis.dtsi index 7c4ad541c3f5e..b165bd6ea53b6 100644 --- a/arch/arm/boot/dts/imx6qdl-apalis.dtsi +++ b/arch/arm/boot/dts/imx6qdl-apalis.dtsi @@ -314,6 +314,8 @@ codec: sgtl5000@a { compatible = "fsl,sgtl5000"; reg = <0x0a>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_sgtl5000>; clocks = <&clks IMX6QDL_CLK_CKO>; VDDA-supply = <®_module_3v3_audio>; VDDIO-supply = <®_module_3v3>; @@ -543,8 +545,6 @@ MX6QDL_PAD_DISP0_DAT21__AUD4_TXD 0x130b0 MX6QDL_PAD_DISP0_DAT22__AUD4_TXFS 0x130b0 MX6QDL_PAD_DISP0_DAT23__AUD4_RXD 0x130b0 - /* SGTL5000 sys_mclk */ - MX6QDL_PAD_GPIO_5__CCM_CLKO1 0x130b0 >; }; @@ -810,6 +810,12 @@ >; }; + pinctrl_sgtl5000: sgtl5000grp { + fsl,pins = < + MX6QDL_PAD_GPIO_5__CCM_CLKO1 0x130b0 + >; + }; + pinctrl_spdif: spdifgrp { fsl,pins = < MX6QDL_PAD_GPIO_16__SPDIF_IN 0x1b0b0 diff --git a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi index f0be516dc28e1..9181fbeb833d3 100644 --- a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi +++ b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi @@ -226,7 +226,7 @@ reg = <0x28>; #gpio-cells = <2>; gpio-controller; - ngpio = <32>; + ngpios = <62>; }; sgtl5000: codec@a { diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi index 16672cbada287..6c8da3f037335 100644 --- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi +++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi @@ -5,6 +5,8 @@ * Author: Fabio Estevam */ +#include + / { aliases { backlight = &backlight; @@ -218,6 +220,7 @@ MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059 MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059 MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059 + MX6QDL_PAD_SD3_DAT5__GPIO7_IO00 0x1b0b0 >; }; @@ -290,7 +293,7 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - non-removable; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index bc488df315117..e9955ef12e02d 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -766,7 +766,7 @@ regulator-name = "vddpu"; regulator-min-microvolt = <725000>; regulator-max-microvolt = <1450000>; - regulator-enable-ramp-delay = <150>; + regulator-enable-ramp-delay = <380>; anatop-reg-offset = <0x140>; anatop-vol-bit-shift = <9>; anatop-vol-bit-width = <5>; diff --git a/arch/arm/boot/dts/imx6ull-colibri.dtsi b/arch/arm/boot/dts/imx6ull-colibri.dtsi index d56728f03c35f..c83323b9ea53c 100644 --- a/arch/arm/boot/dts/imx6ull-colibri.dtsi +++ b/arch/arm/boot/dts/imx6ull-colibri.dtsi @@ -37,7 +37,7 @@ reg_sd1_vmmc: regulator-sd1-vmmc { compatible = "regulator-gpio"; - gpio = <&gpio5 9 GPIO_ACTIVE_HIGH>; + gpios = <&gpio5 9 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_snvs_reg_sd>; regulator-always-on; diff --git a/arch/arm/boot/dts/imx6ull-pinfunc.h b/arch/arm/boot/dts/imx6ull-pinfunc.h index eb025a9d47592..7328d4ef8559f 100644 --- a/arch/arm/boot/dts/imx6ull-pinfunc.h +++ b/arch/arm/boot/dts/imx6ull-pinfunc.h @@ -82,6 +82,6 @@ #define MX6ULL_PAD_CSI_DATA04__ESAI_TX_FS 0x01F4 0x0480 0x0000 0x9 0x0 #define MX6ULL_PAD_CSI_DATA05__ESAI_TX_CLK 0x01F8 0x0484 0x0000 0x9 0x0 #define MX6ULL_PAD_CSI_DATA06__ESAI_TX5_RX0 0x01FC 0x0488 0x0000 0x9 0x0 -#define MX6ULL_PAD_CSI_DATA07__ESAI_T0 0x0200 0x048C 0x0000 0x9 0x0 +#define MX6ULL_PAD_CSI_DATA07__ESAI_TX0 0x0200 0x048C 0x0000 0x9 0x0 #endif /* __DTS_IMX6ULL_PINFUNC_H */ diff --git a/arch/arm/boot/dts/imx7-colibri.dtsi b/arch/arm/boot/dts/imx7-colibri.dtsi index 8bba03de51ad4..1e61e38621066 100644 --- a/arch/arm/boot/dts/imx7-colibri.dtsi +++ b/arch/arm/boot/dts/imx7-colibri.dtsi @@ -77,7 +77,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&codec>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -152,7 +152,7 @@ compatible = "fsl,sgtl5000"; #sound-dai-cells = <0>; reg = <0x0a>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sai1_mclk>; VDDA-supply = <®_module_3v3_avdd>; diff --git a/arch/arm/boot/dts/imx7-mba7.dtsi b/arch/arm/boot/dts/imx7-mba7.dtsi index 50abf18ad30b2..887497e3bb4b8 100644 --- a/arch/arm/boot/dts/imx7-mba7.dtsi +++ b/arch/arm/boot/dts/imx7-mba7.dtsi @@ -250,7 +250,7 @@ tlv320aic32x4: audio-codec@18 { compatible = "ti,tlv320aic32x4"; reg = <0x18>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; clock-names = "mclk"; ldoin-supply = <®_audio_3v3>; iov-supply = <®_audio_3v3>; diff --git a/arch/arm/boot/dts/imx7d-nitrogen7.dts b/arch/arm/boot/dts/imx7d-nitrogen7.dts index 6b4acea1ef795..ecfa179ccab1c 100644 --- a/arch/arm/boot/dts/imx7d-nitrogen7.dts +++ b/arch/arm/boot/dts/imx7d-nitrogen7.dts @@ -284,7 +284,7 @@ codec: wm8960@1a { compatible = "wlf,wm8960"; reg = <0x1a>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; clock-names = "mclk"; wlf,shared-lrclk; }; diff --git a/arch/arm/boot/dts/imx7d-pico-hobbit.dts b/arch/arm/boot/dts/imx7d-pico-hobbit.dts index 7b2198a9372c6..d917dc4f2f227 100644 --- a/arch/arm/boot/dts/imx7d-pico-hobbit.dts +++ b/arch/arm/boot/dts/imx7d-pico-hobbit.dts @@ -31,7 +31,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&sgtl5000>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -41,7 +41,7 @@ #sound-dai-cells = <0>; reg = <0x0a>; compatible = "fsl,sgtl5000"; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; VDDA-supply = <®_2p5v>; VDDIO-supply = <®_vref_1v8>; }; diff --git a/arch/arm/boot/dts/imx7d-pico-pi.dts b/arch/arm/boot/dts/imx7d-pico-pi.dts index 70bea95c06d83..f263e391e24cb 100644 --- a/arch/arm/boot/dts/imx7d-pico-pi.dts +++ b/arch/arm/boot/dts/imx7d-pico-pi.dts @@ -31,7 +31,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&sgtl5000>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -41,7 +41,7 @@ #sound-dai-cells = <0>; reg = <0x0a>; compatible = "fsl,sgtl5000"; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; VDDA-supply = <®_2p5v>; VDDIO-supply = <®_vref_1v8>; }; diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts index 869efbc4af42c..a97cda17e484e 100644 --- a/arch/arm/boot/dts/imx7d-sdb.dts +++ b/arch/arm/boot/dts/imx7d-sdb.dts @@ -356,7 +356,7 @@ codec: wm8960@1a { compatible = "wlf,wm8960"; reg = <0x1a>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; clock-names = "mclk"; wlf,shared-lrclk; }; diff --git a/arch/arm/boot/dts/imx7s-warp.dts b/arch/arm/boot/dts/imx7s-warp.dts index d6b4888fa686b..e035dd5bf4f62 100644 --- a/arch/arm/boot/dts/imx7s-warp.dts +++ b/arch/arm/boot/dts/imx7s-warp.dts @@ -75,7 +75,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&codec>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -232,7 +232,7 @@ #sound-dai-cells = <0>; reg = <0x0a>; compatible = "fsl,sgtl5000"; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sai1_mclk>; VDDA-supply = <&vgen4_reg>; diff --git a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts index f7a841a288654..270e4986b6e64 100644 --- a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts @@ -11,3 +11,18 @@ model = "LogicPD Zoom OMAP35xx SOM-LV Development Kit"; compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3"; }; + +&omap3_pmx_core2 { + pinctrl-names = "default"; + pinctrl-0 = <&hsusb2_2_pins>; + hsusb2_2_pins: pinmux_hsusb2_2_pins { + pinctrl-single,pins = < + OMAP3430_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3) /* etk_d10.hsusb2_clk */ + OMAP3430_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3) /* etk_d11.hsusb2_stp */ + OMAP3430_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d12.hsusb2_dir */ + OMAP3430_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d13.hsusb2_nxt */ + OMAP3430_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d14.hsusb2_data0 */ + OMAP3430_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d15.hsusb2_data1 */ + >; + }; +}; diff --git a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts index a604d92221a4f..c757f0d7781c1 100644 --- a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts @@ -11,3 +11,18 @@ model = "LogicPD Zoom DM3730 SOM-LV Development Kit"; compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3630", "ti,omap3"; }; + +&omap3_pmx_core2 { + pinctrl-names = "default"; + pinctrl-0 = <&hsusb2_2_pins>; + hsusb2_2_pins: pinmux_hsusb2_2_pins { + pinctrl-single,pins = < + OMAP3630_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3) /* etk_d10.hsusb2_clk */ + OMAP3630_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3) /* etk_d11.hsusb2_stp */ + OMAP3630_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d12.hsusb2_dir */ + OMAP3630_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d13.hsusb2_nxt */ + OMAP3630_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d14.hsusb2_data0 */ + OMAP3630_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d15.hsusb2_data1 */ + >; + }; +}; diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index b56524cc7fe27..55b619c99e24d 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -265,21 +265,6 @@ }; }; -&omap3_pmx_core2 { - pinctrl-names = "default"; - pinctrl-0 = <&hsusb2_2_pins>; - hsusb2_2_pins: pinmux_hsusb2_2_pins { - pinctrl-single,pins = < - OMAP3630_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3) /* etk_d10.hsusb2_clk */ - OMAP3630_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3) /* etk_d11.hsusb2_stp */ - OMAP3630_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d12.hsusb2_dir */ - OMAP3630_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d13.hsusb2_nxt */ - OMAP3630_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d14.hsusb2_data0 */ - OMAP3630_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d15.hsusb2_data1 */ - >; - }; -}; - &uart2 { interrupts-extended = <&intc 73 &omap3_pmx_core OMAP3_UART2_RX>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/ls1021a-tsn.dts b/arch/arm/boot/dts/ls1021a-tsn.dts index 5b7689094b70e..7235ce2a32936 100644 --- a/arch/arm/boot/dts/ls1021a-tsn.dts +++ b/arch/arm/boot/dts/ls1021a-tsn.dts @@ -247,7 +247,7 @@ flash@0 { /* Rev. A uses 64MB flash, Rev. B & C use 32MB flash */ - compatible = "jedec,spi-nor", "s25fl256s1", "s25fl512s"; + compatible = "jedec,spi-nor"; spi-max-frequency = <20000000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index c62fcca7b4263..aeb8a40b6b601 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -311,39 +311,6 @@ #thermal-sensor-cells = <1>; }; - thermal-zones { - cpu_thermal: cpu-thermal { - polling-delay-passive = <1000>; - polling-delay = <5000>; - - thermal-sensors = <&tmu 0>; - - trips { - cpu_alert: cpu-alert { - temperature = <85000>; - hysteresis = <2000>; - type = "passive"; - }; - cpu_crit: cpu-crit { - temperature = <95000>; - hysteresis = <2000>; - type = "critical"; - }; - }; - - cooling-maps { - map0 { - trip = <&cpu_alert>; - cooling-device = - <&cpu0 THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu1 THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>; - }; - }; - }; - }; - dspi0: spi@2100000 { compatible = "fsl,ls1021a-v1.0-dspi"; #address-cells = <1>; @@ -984,4 +951,37 @@ }; }; + + thermal-zones { + cpu_thermal: cpu-thermal { + polling-delay-passive = <1000>; + polling-delay = <5000>; + + thermal-sensors = <&tmu 0>; + + trips { + cpu_alert: cpu-alert { + temperature = <85000>; + hysteresis = <2000>; + type = "passive"; + }; + cpu_crit: cpu-crit { + temperature = <95000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&cpu_alert>; + cooling-device = + <&cpu0 THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + }; + }; + }; + }; }; diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi index c4447f6c8b2cb..e141ce7484841 100644 --- a/arch/arm/boot/dts/meson.dtsi +++ b/arch/arm/boot/dts/meson.dtsi @@ -49,14 +49,14 @@ }; uart_A: serial@84c0 { - compatible = "amlogic,meson6-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart"; reg = <0x84c0 0x18>; interrupts = ; status = "disabled"; }; uart_B: serial@84dc { - compatible = "amlogic,meson6-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart"; reg = <0x84dc 0x18>; interrupts = ; status = "disabled"; @@ -94,7 +94,7 @@ }; uart_C: serial@8700 { - compatible = "amlogic,meson6-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart"; reg = <0x8700 0x18>; interrupts = ; status = "disabled"; @@ -196,7 +196,7 @@ }; uart_AO: serial@4c0 { - compatible = "amlogic,meson6-uart", "amlogic,meson-ao-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart", "amlogic,meson-ao-uart"; reg = <0x4c0 0x18>; interrupts = ; status = "disabled"; diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index 3efe9d41c2bb6..d7c9dbee0f016 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -241,8 +241,13 @@ "pp2", "ppmmu2", "pp4", "ppmmu4", "pp5", "ppmmu5", "pp6", "ppmmu6"; resets = <&reset RESET_MALI>; + clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>; clock-names = "bus", "core"; + + assigned-clocks = <&clkc CLKID_MALI>; + assigned-clock-rates = <318750000>; + operating-points-v2 = <&gpu_opp_table>; }; }; diff --git a/arch/arm/boot/dts/meson8b-ec100.dts b/arch/arm/boot/dts/meson8b-ec100.dts index bed1dfef19857..32d1c322dbc65 100644 --- a/arch/arm/boot/dts/meson8b-ec100.dts +++ b/arch/arm/boot/dts/meson8b-ec100.dts @@ -148,7 +148,7 @@ regulator-min-microvolt = <860000>; regulator-max-microvolt = <1140000>; - vin-supply = <&vcc_5v>; + pwm-supply = <&vcc_5v>; pwms = <&pwm_cd 0 1148 0>; pwm-dutycycle-range = <100 0>; @@ -232,7 +232,7 @@ regulator-min-microvolt = <860000>; regulator-max-microvolt = <1140000>; - vin-supply = <&vcc_5v>; + pwm-supply = <&vcc_5v>; pwms = <&pwm_cd 1 1148 0>; pwm-dutycycle-range = <100 0>; diff --git a/arch/arm/boot/dts/meson8b-mxq.dts b/arch/arm/boot/dts/meson8b-mxq.dts index 6e39ad52e42d3..ab8fe55963f7c 100644 --- a/arch/arm/boot/dts/meson8b-mxq.dts +++ b/arch/arm/boot/dts/meson8b-mxq.dts @@ -39,6 +39,8 @@ regulator-min-microvolt = <860000>; regulator-max-microvolt = <1140000>; + pwm-supply = <&vcc_5v>; + pwms = <&pwm_cd 0 1148 0>; pwm-dutycycle-range = <100 0>; @@ -84,7 +86,7 @@ regulator-min-microvolt = <860000>; regulator-max-microvolt = <1140000>; - vin-supply = <&vcc_5v>; + pwm-supply = <&vcc_5v>; pwms = <&pwm_cd 1 1148 0>; pwm-dutycycle-range = <100 0>; diff --git a/arch/arm/boot/dts/meson8b-odroidc1.dts b/arch/arm/boot/dts/meson8b-odroidc1.dts index 0f9c71137bed5..c413af9a7af8e 100644 --- a/arch/arm/boot/dts/meson8b-odroidc1.dts +++ b/arch/arm/boot/dts/meson8b-odroidc1.dts @@ -130,7 +130,7 @@ regulator-min-microvolt = <860000>; regulator-max-microvolt = <1140000>; - vin-supply = <&p5v0>; + pwm-supply = <&p5v0>; pwms = <&pwm_cd 0 12218 0>; pwm-dutycycle-range = <91 0>; @@ -162,7 +162,7 @@ regulator-min-microvolt = <860000>; regulator-max-microvolt = <1140000>; - vin-supply = <&p5v0>; + pwm-supply = <&p5v0>; pwms = <&pwm_cd 1 12218 0>; pwm-dutycycle-range = <91 0>; diff --git a/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi b/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi index 7f6aefd134514..e7534fe9c53cf 100644 --- a/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi +++ b/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi @@ -29,7 +29,7 @@ compatible = "smsc,lan9221","smsc,lan9115"; bank-width = <2>; - gpmc,mux-add-data; + gpmc,mux-add-data = <0>; gpmc,cs-on-ns = <0>; gpmc,cs-rd-off-ns = <42>; gpmc,cs-wr-off-ns = <36>; diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index b6ef1a7ac8a4e..089e9f1f442b5 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -31,6 +31,8 @@ aliases { display0 = &lcd; display1 = &tv0; + /delete-property/ mmc2; + /delete-property/ mmc3; }; ldo_3v3: fixedregulator { @@ -515,7 +517,7 @@ compatible = "bosch,bma180"; reg = <0x41>; pinctrl-names = "default"; - pintcrl-0 = <&bma180_pins>; + pinctrl-0 = <&bma180_pins>; interrupt-parent = <&gpio4>; interrupts = <19 IRQ_TYPE_LEVEL_HIGH>; /* GPIO_115 */ }; diff --git a/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi b/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi index e5da3bc6f1050..218a10c0d8159 100644 --- a/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi +++ b/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi @@ -22,7 +22,7 @@ compatible = "smsc,lan9221","smsc,lan9115"; bank-width = <2>; - gpmc,mux-add-data; + gpmc,mux-add-data = <0>; gpmc,cs-on-ns = <0>; gpmc,cs-rd-off-ns = <42>; gpmc,cs-wr-off-ns = <36>; diff --git a/arch/arm/boot/dts/omap3430-sdp.dts b/arch/arm/boot/dts/omap3430-sdp.dts index 0abd61108a539..ec16979825378 100644 --- a/arch/arm/boot/dts/omap3430-sdp.dts +++ b/arch/arm/boot/dts/omap3430-sdp.dts @@ -101,7 +101,7 @@ nand@1,0 { compatible = "ti,omap2-nand"; - reg = <0 0 4>; /* CS0, offset 0, IO size 4 */ + reg = <1 0 4>; /* CS1, offset 0, IO size 4 */ interrupt-parent = <&gpmc>; interrupts = <0 IRQ_TYPE_NONE>, /* fifoevent */ <1 IRQ_TYPE_NONE>; /* termcount */ diff --git a/arch/arm/boot/dts/ox820.dtsi b/arch/arm/boot/dts/ox820.dtsi index 90846a7655b49..dde4364892bf0 100644 --- a/arch/arm/boot/dts/ox820.dtsi +++ b/arch/arm/boot/dts/ox820.dtsi @@ -287,7 +287,7 @@ clocks = <&armclk>; }; - gic: gic@1000 { + gic: interrupt-controller@1000 { compatible = "arm,arm11mp-gic"; interrupt-controller; #interrupt-cells = <3>; diff --git a/arch/arm/boot/dts/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom-apq8064.dtsi index 8b79b4112ee1a..764984c95c686 100644 --- a/arch/arm/boot/dts/qcom-apq8064.dtsi +++ b/arch/arm/boot/dts/qcom-apq8064.dtsi @@ -198,7 +198,7 @@ clock-frequency = <19200000>; }; - pxo_board { + pxo_board: pxo_board { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <27000000>; @@ -1147,7 +1147,7 @@ }; gpu: adreno-3xx@4300000 { - compatible = "qcom,adreno-3xx"; + compatible = "qcom,adreno-320.2", "qcom,adreno"; reg = <0x04300000 0x20000>; reg-names = "kgsl_3d0_reg_memory"; interrupts = ; @@ -1162,7 +1162,6 @@ <&mmcc GFX3D_AHB_CLK>, <&mmcc GFX3D_AXI_CLK>, <&mmcc MMSS_IMEM_AHB_CLK>; - qcom,chipid = <0x03020002>; iommus = <&gfx3d 0 &gfx3d 1 @@ -1261,9 +1260,9 @@ <&mmcc DSI1_BYTE_CLK>, <&mmcc DSI_PIXEL_CLK>, <&mmcc DSI1_ESC_CLK>; - clock-names = "iface_clk", "bus_clk", "core_mmss_clk", - "src_clk", "byte_clk", "pixel_clk", - "core_clk"; + clock-names = "iface", "bus", "core_mmss", + "src", "byte", "pixel", + "core"; assigned-clocks = <&mmcc DSI1_BYTE_SRC>, <&mmcc DSI1_ESC_SRC>, @@ -1305,7 +1304,7 @@ reg-names = "dsi_pll", "dsi_phy", "dsi_phy_regulator"; clock-names = "iface_clk", "ref"; clocks = <&mmcc DSI_M_AHB_CLK>, - <&cxo_board>; + <&pxo_board>; }; diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom-ipq4019.dtsi index 56f51599852d0..338256c59ca5a 100644 --- a/arch/arm/boot/dts/qcom-ipq4019.dtsi +++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi @@ -141,7 +141,8 @@ clocks { sleep_clk: sleep_clk { compatible = "fixed-clock"; - clock-frequency = <32768>; + clock-frequency = <32000>; + clock-output-names = "gcc_sleep_clk_src"; #clock-cells = <0>; }; diff --git a/arch/arm/boot/dts/qcom-msm8960.dtsi b/arch/arm/boot/dts/qcom-msm8960.dtsi index f2aeaccdc1ad6..15ff0e8fd0d30 100644 --- a/arch/arm/boot/dts/qcom-msm8960.dtsi +++ b/arch/arm/boot/dts/qcom-msm8960.dtsi @@ -145,7 +145,9 @@ reg = <0x108000 0x1000>; qcom,ipc = <&l2cc 0x8 2>; - interrupts = <0 19 0>, <0 21 0>, <0 22 0>; + interrupts = , + , + ; interrupt-names = "ack", "err", "wakeup"; regulators { @@ -191,7 +193,7 @@ compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm"; reg = <0x16440000 0x1000>, <0x16400000 0x1000>; - interrupts = <0 154 0x0>; + interrupts = ; clocks = <&gcc GSBI5_UART_CLK>, <&gcc GSBI5_H_CLK>; clock-names = "core", "iface"; status = "disabled"; @@ -317,7 +319,7 @@ #address-cells = <1>; #size-cells = <0>; reg = <0x16080000 0x1000>; - interrupts = <0 147 0>; + interrupts = ; spi-max-frequency = <24000000>; cs-gpios = <&msmgpio 8 0>; diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi index 369e58f64145d..9de4f17955e31 100644 --- a/arch/arm/boot/dts/qcom-msm8974.dtsi +++ b/arch/arm/boot/dts/qcom-msm8974.dtsi @@ -1213,8 +1213,8 @@ #phy-cells = <0>; qcom,dsi-phy-index = <0>; - clocks = <&mmcc MDSS_AHB_CLK>; - clock-names = "iface"; + clocks = <&mmcc MDSS_AHB_CLK>, <&xo_board>; + clock-names = "iface", "ref"; }; }; }; diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi index 140e22d74dcfb..d393bb481e747 100644 --- a/arch/arm/boot/dts/rk322x.dtsi +++ b/arch/arm/boot/dts/rk322x.dtsi @@ -635,8 +635,8 @@ interrupts = ; assigned-clocks = <&cru SCLK_HDMI_PHY>; assigned-clock-parents = <&hdmi_phy>; - clocks = <&cru SCLK_HDMI_HDCP>, <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_CEC>; - clock-names = "isfr", "iahb", "cec"; + clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_HDCP>, <&cru SCLK_HDMI_CEC>; + clock-names = "iahb", "isfr", "cec"; pinctrl-names = "default"; pinctrl-0 = <&hdmii2c_xfer &hdmi_hpd &hdmi_cec>; resets = <&cru SRST_HDMI_P>; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 658ceb96d8bd1..7dcafd0833ba8 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -975,7 +975,7 @@ status = "disabled"; }; - crypto: cypto-controller@ff8a0000 { + crypto: crypto@ff8a0000 { compatible = "rockchip,rk3288-crypto"; reg = <0x0 0xff8a0000 0x0 0x4000>; interrupts = ; diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index b05bab57f90a3..dbfdffd54003a 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -526,7 +526,7 @@ pmecc: ecc-engine@f8014070 { compatible = "atmel,sama5d2-pmecc"; reg = <0xf8014070 0x490>, - <0xf8014500 0x100>; + <0xf8014500 0x200>; }; }; @@ -933,7 +933,7 @@ clocks = <&pmc PMC_TYPE_PERIPHERAL 55>, <&pmc PMC_TYPE_GCK 55>; clock-names = "pclk", "gclk"; assigned-clocks = <&pmc PMC_TYPE_CORE PMC_I2S1_MUX>; - assigned-parrents = <&pmc PMC_TYPE_GCK 55>; + assigned-clock-parents = <&pmc PMC_TYPE_GCK 55>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts b/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts index b4c0a76a4d1af..4c2fcfcc7baed 100644 --- a/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts +++ b/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts @@ -12,7 +12,7 @@ flash0: n25q00@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00aa"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_arria5_socdk.dts b/arch/arm/boot/dts/socfpga_arria5_socdk.dts index 90e676e7019f2..1b02d46496a85 100644 --- a/arch/arm/boot/dts/socfpga_arria5_socdk.dts +++ b/arch/arm/boot/dts/socfpga_arria5_socdk.dts @@ -119,7 +119,7 @@ flash: flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q256a"; + compatible = "micron,n25q256a", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts b/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts index 6f138b2b26163..51bb436784e24 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts @@ -124,7 +124,7 @@ flash0: n25q00@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; /* chip select */ spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts index c155ff02eb6e0..cae9ddd5ed38b 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts @@ -169,7 +169,7 @@ flash: flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts b/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts index 8d5d3996f6f27..ca18b959e6559 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts @@ -80,7 +80,7 @@ flash: flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q256a"; + compatible = "micron,n25q256a", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; m25p,fast-read; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts b/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts index 99a71757cdf46..3f7aa7bf0863a 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts @@ -116,7 +116,7 @@ flash0: n25q512a@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q512a"; + compatible = "micron,n25q512a", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts index a060718758b67..25874e1b9c829 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts @@ -224,7 +224,7 @@ n25q128@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q128"; + compatible = "micron,n25q128", "jedec,spi-nor"; reg = <0>; /* chip select */ spi-max-frequency = <100000000>; m25p,fast-read; @@ -241,7 +241,7 @@ n25q00@1 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <1>; /* chip select */ spi-max-frequency = <100000000>; m25p,fast-read; diff --git a/arch/arm/boot/dts/spear1340.dtsi b/arch/arm/boot/dts/spear1340.dtsi index 1a8f5e8b10e3a..66cd473ecb617 100644 --- a/arch/arm/boot/dts/spear1340.dtsi +++ b/arch/arm/boot/dts/spear1340.dtsi @@ -136,9 +136,9 @@ reg = <0xb4100000 0x1000>; interrupts = <0 105 0x4>; status = "disabled"; - dmas = <&dwdma0 12 0 1>, - <&dwdma0 13 1 0>; - dma-names = "tx", "rx"; + dmas = <&dwdma0 13 0 1>, + <&dwdma0 12 1 0>; + dma-names = "rx", "tx"; }; thermal@e07008c4 { diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi index f187da4485f46..78672db9068be 100644 --- a/arch/arm/boot/dts/spear13xx.dtsi +++ b/arch/arm/boot/dts/spear13xx.dtsi @@ -284,9 +284,9 @@ #size-cells = <0>; interrupts = <0 31 0x4>; status = "disabled"; - dmas = <&dwdma0 4 0 0>, - <&dwdma0 5 0 0>; - dma-names = "tx", "rx"; + dmas = <&dwdma0 5 0 0>, + <&dwdma0 4 0 0>; + dma-names = "rx", "tx"; }; rtc@e0580000 { diff --git a/arch/arm/boot/dts/spear3xx.dtsi b/arch/arm/boot/dts/spear3xx.dtsi index f266b7b034823..cc88ebe7a60ce 100644 --- a/arch/arm/boot/dts/spear3xx.dtsi +++ b/arch/arm/boot/dts/spear3xx.dtsi @@ -47,7 +47,7 @@ }; gmac: eth@e0800000 { - compatible = "st,spear600-gmac"; + compatible = "snps,dwmac-3.40a"; reg = <0xe0800000 0x8000>; interrupts = <23 22>; interrupt-names = "macirq", "eth_wake_irq"; diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi index eca469a64a977..0e9e930c60f06 100644 --- a/arch/arm/boot/dts/stm32mp157c.dtsi +++ b/arch/arm/boot/dts/stm32mp157c.dtsi @@ -515,7 +515,7 @@ compatible = "st,stm32-cec"; reg = <0x40016000 0x400>; interrupts = ; - clocks = <&rcc CEC_K>, <&clk_lse>; + clocks = <&rcc CEC_K>, <&rcc CEC>; clock-names = "cec", "hdmi-cec"; status = "disabled"; }; @@ -773,7 +773,7 @@ #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-a"; - reg = <0x4 0x1c>; + reg = <0x4 0x20>; clocks = <&rcc SAI1_K>; clock-names = "sai_ck"; dmas = <&dmamux1 87 0x400 0x01>; @@ -783,7 +783,7 @@ sai1b: audio-controller@4400a024 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-b"; - reg = <0x24 0x1c>; + reg = <0x24 0x20>; clocks = <&rcc SAI1_K>; clock-names = "sai_ck"; dmas = <&dmamux1 88 0x400 0x01>; @@ -804,7 +804,7 @@ sai2a: audio-controller@4400b004 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-a"; - reg = <0x4 0x1c>; + reg = <0x4 0x20>; clocks = <&rcc SAI2_K>; clock-names = "sai_ck"; dmas = <&dmamux1 89 0x400 0x01>; @@ -814,7 +814,7 @@ sai2b: audio-controller@4400b024 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-b"; - reg = <0x24 0x1c>; + reg = <0x24 0x20>; clocks = <&rcc SAI2_K>; clock-names = "sai_ck"; dmas = <&dmamux1 90 0x400 0x01>; @@ -835,7 +835,7 @@ sai3a: audio-controller@4400c004 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-a"; - reg = <0x04 0x1c>; + reg = <0x04 0x20>; clocks = <&rcc SAI3_K>; clock-names = "sai_ck"; dmas = <&dmamux1 113 0x400 0x01>; @@ -845,7 +845,7 @@ sai3b: audio-controller@4400c024 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-b"; - reg = <0x24 0x1c>; + reg = <0x24 0x20>; clocks = <&rcc SAI3_K>; clock-names = "sai_ck"; dmas = <&dmamux1 114 0x400 0x01>; @@ -1191,7 +1191,7 @@ sai4a: audio-controller@50027004 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-a"; - reg = <0x04 0x1c>; + reg = <0x04 0x20>; clocks = <&rcc SAI4_K>; clock-names = "sai_ck"; dmas = <&dmamux1 99 0x400 0x01>; @@ -1201,7 +1201,7 @@ sai4b: audio-controller@50027024 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-b"; - reg = <0x24 0x1c>; + reg = <0x24 0x20>; clocks = <&rcc SAI4_K>; clock-names = "sai_ck"; dmas = <&dmamux1 100 0x400 0x01>; diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts index 9ba62774e89a1..488933b87ad5a 100644 --- a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts +++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts @@ -112,7 +112,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts index f19ed981da9d9..3706216ffb40b 100644 --- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts +++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts @@ -169,7 +169,7 @@ flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "mxicy,mx25l1606e", "winbond,w25q128"; + compatible = "mxicy,mx25l1606e", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <40000000>; }; diff --git a/arch/arm/boot/dts/suniv-f1c100s.dtsi b/arch/arm/boot/dts/suniv-f1c100s.dtsi index 6100d3b75f613..def8301014487 100644 --- a/arch/arm/boot/dts/suniv-f1c100s.dtsi +++ b/arch/arm/boot/dts/suniv-f1c100s.dtsi @@ -104,8 +104,10 @@ wdt: watchdog@1c20ca0 { compatible = "allwinner,suniv-f1c100s-wdt", - "allwinner,sun4i-a10-wdt"; + "allwinner,sun6i-a31-wdt"; reg = <0x01c20ca0 0x20>; + interrupts = <16>; + clocks = <&osc32k>; }; uart0: serial@1c25000 { diff --git a/arch/arm/boot/dts/tegra20-tamonten.dtsi b/arch/arm/boot/dts/tegra20-tamonten.dtsi index 20137fc578b1b..69cb65d86c467 100644 --- a/arch/arm/boot/dts/tegra20-tamonten.dtsi +++ b/arch/arm/boot/dts/tegra20-tamonten.dtsi @@ -183,10 +183,11 @@ }; conf_ata { nvidia,pins = "ata", "atb", "atc", "atd", "ate", - "cdev1", "cdev2", "dap1", "dtb", "gma", - "gmb", "gmc", "gmd", "gme", "gpu7", - "gpv", "i2cp", "pta", "rm", "slxa", - "slxk", "spia", "spib", "uac"; + "cdev1", "cdev2", "dap1", "dtb", "dtf", + "gma", "gmb", "gmc", "gmd", "gme", "gpu7", + "gpv", "i2cp", "irrx", "irtx", "pta", + "rm", "slxa", "slxk", "spia", "spib", + "uac"; nvidia,pull = ; nvidia,tristate = ; }; @@ -202,7 +203,7 @@ }; conf_crtp { nvidia,pins = "crtp", "dap2", "dap3", "dap4", - "dtc", "dte", "dtf", "gpu", "sdio1", + "dtc", "dte", "gpu", "sdio1", "slxc", "slxd", "spdi", "spdo", "spig", "uda"; nvidia,pull = ; @@ -211,7 +212,7 @@ conf_ddc { nvidia,pins = "ddc", "dta", "dtd", "kbca", "kbcb", "kbcc", "kbcd", "kbce", "kbcf", - "sdc"; + "sdc", "uad", "uca"; nvidia,pull = ; nvidia,tristate = ; }; @@ -221,10 +222,9 @@ "lvp0", "owc", "sdb"; nvidia,tristate = ; }; - conf_irrx { - nvidia,pins = "irrx", "irtx", "sdd", "spic", - "spie", "spih", "uaa", "uab", "uad", - "uca", "ucb"; + conf_sdd { + nvidia,pins = "sdd", "spic", "spie", "spih", + "uaa", "uab", "ucb"; nvidia,pull = ; nvidia,tristate = ; }; diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 043b0b18bf7e0..f747caea10ffa 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -30,7 +30,7 @@ config CRYPTO_SHA1_ARM_NEON config CRYPTO_SHA1_ARM_CE tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_SHA1_ARM select CRYPTO_HASH help @@ -39,7 +39,7 @@ config CRYPTO_SHA1_ARM_CE config CRYPTO_SHA2_ARM_CE tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_SHA256_ARM select CRYPTO_HASH help @@ -96,7 +96,7 @@ config CRYPTO_AES_ARM_BS config CRYPTO_AES_ARM_CE tristate "Accelerated AES using ARMv8 Crypto Extensions" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_BLKCIPHER select CRYPTO_LIB_AES select CRYPTO_SIMD @@ -106,7 +106,7 @@ config CRYPTO_AES_ARM_CE config CRYPTO_GHASH_ARM_CE tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_HASH select CRYPTO_CRYPTD select CRYPTO_GF128MUL @@ -118,12 +118,14 @@ config CRYPTO_GHASH_ARM_CE config CRYPTO_CRCT10DIF_ARM_CE tristate "CRCT10DIF digest algorithm using PMULL instructions" - depends on KERNEL_MODE_NEON && CRC_T10DIF + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + depends on CRC_T10DIF select CRYPTO_HASH config CRYPTO_CRC32_ARM_CE tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions" - depends on KERNEL_MODE_NEON && CRC32 + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + depends on CRC32 select CRYPTO_HASH config CRYPTO_CHACHA20_NEON diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 4180f3a13512c..c0d36771a6934 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -12,32 +12,12 @@ obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o -ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o -crc-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o - -ifneq ($(crc-obj-y)$(crc-obj-m),) -ifeq ($(call as-instr,.arch armv8-a\n.arch_extension crc,y,n),y) -ce-obj-y += $(crc-obj-y) -ce-obj-m += $(crc-obj-m) -else -$(warning These CRC Extensions modules need binutils 2.23 or higher) -$(warning $(crc-obj-y) $(crc-obj-m)) -endif -endif - -ifneq ($(ce-obj-y)$(ce-obj-m),) -ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y) -obj-y += $(ce-obj-y) -obj-m += $(ce-obj-m) -else -$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher) -$(warning $(ce-obj-y) $(ce-obj-m)) -endif -endif +obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o +obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o +obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o +obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o +obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o +obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o aes-arm-y := aes-cipher-core.o aes-cipher-glue.o aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/crypto/crct10dif-ce-core.S index 86be258a803fa..46c02c518a300 100644 --- a/arch/arm/crypto/crct10dif-ce-core.S +++ b/arch/arm/crypto/crct10dif-ce-core.S @@ -72,7 +72,7 @@ #endif .text - .arch armv7-a + .arch armv8-a .fpu crypto-neon-fp-armv8 init_crc .req r0 diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S index c47fe81abcb01..9f51e3fa45268 100644 --- a/arch/arm/crypto/ghash-ce-core.S +++ b/arch/arm/crypto/ghash-ce-core.S @@ -8,6 +8,9 @@ #include #include + .arch armv8-a + .fpu crypto-neon-fp-armv8 + SHASH .req q0 T1 .req q1 XL .req q2 @@ -88,7 +91,6 @@ T3_H .req d17 .text - .fpu crypto-neon-fp-armv8 .macro __pmull_p64, rd, rn, rm, b1, b2, b3, b4 vmull.p64 \rd, \rn, \rm diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S index 49a74a441aec7..8a702e051738a 100644 --- a/arch/arm/crypto/sha1-ce-core.S +++ b/arch/arm/crypto/sha1-ce-core.S @@ -10,6 +10,7 @@ #include .text + .arch armv8-a .fpu crypto-neon-fp-armv8 k0 .req q0 diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S index 4ad517577e230..b6369d2440a19 100644 --- a/arch/arm/crypto/sha2-ce-core.S +++ b/arch/arm/crypto/sha2-ce-core.S @@ -10,6 +10,7 @@ #include .text + .arch armv8-a .fpu crypto-neon-fp-armv8 k0 .req q7 diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl index a03cf4dfb7818..d927483985c2d 100644 --- a/arch/arm/crypto/sha256-armv4.pl +++ b/arch/arm/crypto/sha256-armv4.pl @@ -175,7 +175,6 @@ sub BODY_16_XX { #else .syntax unified # ifdef __thumb2__ -# define adrl adr .thumb # else .code 32 @@ -471,7 +470,8 @@ () stmdb sp!,{r4-r12,lr} sub $H,sp,#16*4+16 - adrl $Ktbl,K256 + adr $Ktbl,.Lsha256_block_data_order + sub $Ktbl,$Ktbl,#.Lsha256_block_data_order-K256 bic $H,$H,#15 @ align for 128-bit stores mov $t2,sp mov sp,$H @ alloca diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped index 054aae0edfce5..9deb515f3c9f2 100644 --- a/arch/arm/crypto/sha256-core.S_shipped +++ b/arch/arm/crypto/sha256-core.S_shipped @@ -56,7 +56,6 @@ #else .syntax unified # ifdef __thumb2__ -# define adrl adr .thumb # else .code 32 @@ -1885,7 +1884,8 @@ sha256_block_data_order_neon: stmdb sp!,{r4-r12,lr} sub r11,sp,#16*4+16 - adrl r14,K256 + adr r14,.Lsha256_block_data_order + sub r14,r14,#.Lsha256_block_data_order-K256 bic r11,r11,#15 @ align for 128-bit stores mov r12,sp mov sp,r11 @ alloca diff --git a/arch/arm/crypto/sha512-armv4.pl b/arch/arm/crypto/sha512-armv4.pl index 788c17b56ecce..2a0bdf7dd87c3 100644 --- a/arch/arm/crypto/sha512-armv4.pl +++ b/arch/arm/crypto/sha512-armv4.pl @@ -212,7 +212,6 @@ () #else .syntax unified # ifdef __thumb2__ -# define adrl adr .thumb # else .code 32 @@ -602,7 +601,8 @@ () dmb @ errata #451034 on early Cortex A8 add $len,$inp,$len,lsl#7 @ len to point at the end of inp VFP_ABI_PUSH - adrl $Ktbl,K512 + adr $Ktbl,.Lsha512_block_data_order + sub $Ktbl,$Ktbl,.Lsha512_block_data_order-K512 vldmia $ctx,{$A-$H} @ load context .Loop_neon: ___ diff --git a/arch/arm/crypto/sha512-core.S_shipped b/arch/arm/crypto/sha512-core.S_shipped index 710ea309769e7..cf5a7a70ff008 100644 --- a/arch/arm/crypto/sha512-core.S_shipped +++ b/arch/arm/crypto/sha512-core.S_shipped @@ -79,7 +79,6 @@ #else .syntax unified # ifdef __thumb2__ -# define adrl adr .thumb # else .code 32 @@ -543,7 +542,8 @@ sha512_block_data_order_neon: dmb @ errata #451034 on early Cortex A8 add r2,r1,r2,lsl#7 @ len to point at the end of inp VFP_ABI_PUSH - adrl r3,K512 + adr r3,.Lsha512_block_data_order + sub r3,r3,.Lsha512_block_data_order-K512 vldmia r0,{d16-d23} @ load context .Loop_neon: vshr.u64 d24,d20,#14 @ 0 diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 6b3e64e19fb6f..70e1c23feedb7 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -279,10 +279,9 @@ .endif ;\ .popsection #define ALT_UP_B(label) \ - .equ up_b_offset, label - 9998b ;\ .pushsection ".alt.smp.init", "a" ;\ .long 9998b ;\ - W(b) . + up_b_offset ;\ + W(b) . + (label - 9998b) ;\ .popsection #else #define ALT_SMP(instr...) diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h index 6b2ff7243b4b2..0230c60e7df0f 100644 --- a/arch/arm/include/asm/cpuidle.h +++ b/arch/arm/include/asm/cpuidle.h @@ -49,4 +49,9 @@ extern int arm_cpuidle_suspend(int index); extern int arm_cpuidle_init(int cpu); +struct arm_cpuidle_irq_context { }; + +#define arm_cpuidle_save_irq_context(c) (void)c +#define arm_cpuidle_restore_irq_context(c) (void)c + #endif diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 18b0197f23848..15bd9af13497f 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -16,6 +16,9 @@ extern void __gnu_mcount_nc(void); #ifdef CONFIG_DYNAMIC_FTRACE struct dyn_arch_ftrace { +#ifdef CONFIG_ARM_MODULE_PLTS + struct module *mod; +#endif }; static inline unsigned long ftrace_call_adjust(unsigned long addr) diff --git a/arch/arm/include/asm/insn.h b/arch/arm/include/asm/insn.h index f20e08ac85aeb..5475cbf9fb6b4 100644 --- a/arch/arm/include/asm/insn.h +++ b/arch/arm/include/asm/insn.h @@ -13,18 +13,18 @@ arm_gen_nop(void) } unsigned long -__arm_gen_branch(unsigned long pc, unsigned long addr, bool link); +__arm_gen_branch(unsigned long pc, unsigned long addr, bool link, bool warn); static inline unsigned long arm_gen_branch(unsigned long pc, unsigned long addr) { - return __arm_gen_branch(pc, addr, false); + return __arm_gen_branch(pc, addr, false, true); } static inline unsigned long -arm_gen_branch_link(unsigned long pc, unsigned long addr) +arm_gen_branch_link(unsigned long pc, unsigned long addr, bool warn) { - return __arm_gen_branch(pc, addr, true); + return __arm_gen_branch(pc, addr, true, warn); } #endif diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 7a0596fcb2e77..3cc0f6d508836 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -457,6 +457,9 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr); extern int valid_phys_addr_range(phys_addr_t addr, size_t size); extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); extern int devmem_is_allowed(unsigned long pfn); +extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags); +#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap #endif /* diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 32564b017ba06..d8ac89879327d 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -424,4 +425,10 @@ static inline bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) #define kvm_arm_vcpu_loaded(vcpu) (false) +static inline int kvm_arm_get_spectre_bhb_state(void) +{ + /* 32bit guests don't need firmware for this */ + return SPECTRE_VULNERABLE; /* aka SMCCC_RET_NOT_SUPPORTED */ +} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index 92282558caf7c..2b8970d8e5a2f 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h @@ -27,6 +27,7 @@ enum { MT_HIGH_VECTORS, MT_MEMORY_RWX, MT_MEMORY_RW, + MT_MEMORY_RO, MT_ROM, MT_MEMORY_RWX_NONCACHED, MT_MEMORY_RW_DTCM, diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index 182163b55546c..961fedbd810ec 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -19,8 +19,18 @@ enum { }; #endif +#define PLT_ENT_STRIDE L1_CACHE_BYTES +#define PLT_ENT_COUNT (PLT_ENT_STRIDE / sizeof(u32)) +#define PLT_ENT_SIZE (sizeof(struct plt_entries) / PLT_ENT_COUNT) + +struct plt_entries { + u32 ldr[PLT_ENT_COUNT]; + u32 lit[PLT_ENT_COUNT]; +}; + struct mod_plt_sec { struct elf32_shdr *plt; + struct plt_entries *plt_ent; int plt_count; }; diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 91d6b7856be4b..73c83f4d33b3b 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -164,5 +164,31 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) ((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1; \ }) + +/* + * Update ITSTATE after normal execution of an IT block instruction. + * + * The 8 IT state bits are split into two parts in CPSR: + * ITSTATE<1:0> are in CPSR<26:25> + * ITSTATE<7:2> are in CPSR<15:10> + */ +static inline unsigned long it_advance(unsigned long cpsr) +{ + if ((cpsr & 0x06000400) == 0) { + /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ + cpsr &= ~PSR_IT_MASK; + } else { + /* We need to shift left ITSTATE<4:0> */ + const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ + unsigned long it = cpsr & mask; + it <<= 1; + it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ + it &= mask; + cpsr &= ~mask; + cpsr |= it; + } + return cpsr; +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h index 7c3b3671d6c25..6d1337c169cd3 100644 --- a/arch/arm/include/asm/timex.h +++ b/arch/arm/include/asm/timex.h @@ -11,5 +11,6 @@ typedef unsigned long cycles_t; #define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; }) +#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback()) #endif diff --git a/arch/arm/include/asm/vfpmacros.h b/arch/arm/include/asm/vfpmacros.h index 628c336e8e3b2..947ee5395e1fb 100644 --- a/arch/arm/include/asm/vfpmacros.h +++ b/arch/arm/include/asm/vfpmacros.h @@ -19,23 +19,25 @@ @ read all the working registers back into the VFP .macro VFPFLDMIA, base, tmp + .fpu vfpv2 #if __LINUX_ARM_ARCH__ < 6 - LDC p11, cr0, [\base],#33*4 @ FLDMIAX \base!, {d0-d15} + fldmiax \base!, {d0-d15} #else - LDC p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d0-d15} + vldmia \base!, {d0-d15} #endif #ifdef CONFIG_VFPv3 + .fpu vfpv3 #if __LINUX_ARM_ARCH__ <= 6 ldr \tmp, =elf_hwcap @ may not have MVFR regs ldr \tmp, [\tmp, #0] tst \tmp, #HWCAP_VFPD32 - ldclne p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} + vldmiane \base!, {d16-d31} addeq \base, \base, #32*4 @ step over unused register space #else VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 and \tmp, \tmp, #MVFR0_A_SIMD_MASK @ A_SIMD field cmp \tmp, #2 @ 32 x 64bit registers? - ldcleq p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} + vldmiaeq \base!, {d16-d31} addne \base, \base, #32*4 @ step over unused register space #endif #endif @@ -44,22 +46,23 @@ @ write all the working registers out of the VFP .macro VFPFSTMIA, base, tmp #if __LINUX_ARM_ARCH__ < 6 - STC p11, cr0, [\base],#33*4 @ FSTMIAX \base!, {d0-d15} + fstmiax \base!, {d0-d15} #else - STC p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d0-d15} + vstmia \base!, {d0-d15} #endif #ifdef CONFIG_VFPv3 + .fpu vfpv3 #if __LINUX_ARM_ARCH__ <= 6 ldr \tmp, =elf_hwcap @ may not have MVFR regs ldr \tmp, [\tmp, #0] tst \tmp, #HWCAP_VFPD32 - stclne p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} + vstmiane \base!, {d16-d31} addeq \base, \base, #32*4 @ step over unused register space #else VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 and \tmp, \tmp, #MVFR0_A_SIMD_MASK @ A_SIMD field cmp \tmp, #2 @ 32 x 64bit registers? - stcleq p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} + vstmiaeq \base!, {d16-d31} addne \base, \base, #32*4 @ step over unused register space #endif #endif diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 2769360f195ca..89b8e70068a13 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -227,6 +227,12 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 KVM_REG_ARM_FW_REG(3) + /* Higher values mean better protection. */ +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED 2 + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 70fd896c132a6..dc31426cae6d8 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -17,10 +17,14 @@ CFLAGS_REMOVE_return_address.o = -pg # Object file lists. obj-y := elf.o entry-common.o irq.o opcodes.o \ - process.o ptrace.o reboot.o return_address.o \ + process.o ptrace.o reboot.o \ setup.o signal.o sigreturn_codes.o \ stacktrace.o sys_arm.o time.o traps.o +ifneq ($(CONFIG_ARM_UNWIND),y) +obj-$(CONFIG_FRAME_POINTER) += return_address.o +endif + obj-$(CONFIG_ATAGS) += atags_parse.o obj-$(CONFIG_ATAGS_PROC) += atags_proc.o obj-$(CONFIG_DEPRECATED_PARAM_STRUCT) += atags_compat.o diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index da740c1ca457d..8e8efe28d7995 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -596,11 +596,9 @@ call_fpe: tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2 reteq lr and r8, r0, #0x00000f00 @ mask out CP number - THUMB( lsr r8, r8, #8 ) mov r7, #1 - add r6, r10, #TI_USED_CP - ARM( strb r7, [r6, r8, lsr #8] ) @ set appropriate used_cp[] - THUMB( strb r7, [r6, r8] ) @ set appropriate used_cp[] + add r6, r10, r8, lsr #8 @ add used_cp[] array offset first + strb r7, [r6, #TI_USED_CP] @ set appropriate used_cp[] #ifdef CONFIG_IWMMXT @ Test if we need to give access to iWMMXt coprocessors ldr r5, [r10, #TI_FLAGS] @@ -609,7 +607,7 @@ call_fpe: bcs iwmmxt_task_enable #endif ARM( add pc, pc, r8, lsr #6 ) - THUMB( lsl r8, r8, #2 ) + THUMB( lsr r8, r8, #6 ) THUMB( add pc, r8 ) nop @@ -1045,7 +1043,7 @@ vector_bhb_loop8_\name: @ bhb workaround mov r0, #8 -3: b . + 4 +3: W(b) . + 4 subs r0, r0, #1 bne 3b dsb diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index bda949fd84e8b..12b6da56f88dd 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -71,9 +71,10 @@ int ftrace_arch_code_modify_post_process(void) return 0; } -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr, + bool warn) { - return arm_gen_branch_link(pc, addr); + return arm_gen_branch_link(pc, addr, warn); } static int ftrace_modify_code(unsigned long pc, unsigned long old, @@ -112,14 +113,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int ret; pc = (unsigned long)&ftrace_call; - new = ftrace_call_replace(pc, (unsigned long)func); + new = ftrace_call_replace(pc, (unsigned long)func, true); ret = ftrace_modify_code(pc, 0, new, false); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS if (!ret) { pc = (unsigned long)&ftrace_regs_call; - new = ftrace_call_replace(pc, (unsigned long)func); + new = ftrace_call_replace(pc, (unsigned long)func, true); ret = ftrace_modify_code(pc, 0, new, false); } @@ -132,10 +133,22 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long new, old; unsigned long ip = rec->ip; + unsigned long aaddr = adjust_address(rec, addr); + struct module *mod = NULL; + +#ifdef CONFIG_ARM_MODULE_PLTS + mod = rec->arch.mod; +#endif old = ftrace_nop_replace(rec); - new = ftrace_call_replace(ip, adjust_address(rec, addr)); + new = ftrace_call_replace(ip, aaddr, !mod); +#ifdef CONFIG_ARM_MODULE_PLTS + if (!new && mod) { + aaddr = get_module_plt(mod, ip, aaddr); + new = ftrace_call_replace(ip, aaddr, true); + } +#endif return ftrace_modify_code(rec->ip, old, new, true); } @@ -148,9 +161,9 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long new, old; unsigned long ip = rec->ip; - old = ftrace_call_replace(ip, adjust_address(rec, old_addr)); + old = ftrace_call_replace(ip, adjust_address(rec, old_addr), true); - new = ftrace_call_replace(ip, adjust_address(rec, addr)); + new = ftrace_call_replace(ip, adjust_address(rec, addr), true); return ftrace_modify_code(rec->ip, old, new, true); } @@ -160,12 +173,29 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { + unsigned long aaddr = adjust_address(rec, addr); unsigned long ip = rec->ip; unsigned long old; unsigned long new; int ret; - old = ftrace_call_replace(ip, adjust_address(rec, addr)); +#ifdef CONFIG_ARM_MODULE_PLTS + /* mod is only supplied during module loading */ + if (!mod) + mod = rec->arch.mod; + else + rec->arch.mod = mod; +#endif + + old = ftrace_call_replace(ip, aaddr, + !IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || !mod); +#ifdef CONFIG_ARM_MODULE_PLTS + if (!old && mod) { + aaddr = get_module_plt(mod, ip, aaddr); + old = ftrace_call_replace(ip, aaddr, true); + } +#endif + new = ftrace_nop_replace(rec); ret = ftrace_modify_code(ip, old, new, true); diff --git a/arch/arm/kernel/insn.c b/arch/arm/kernel/insn.c index 2e844b70386b3..db0acbb7d7a02 100644 --- a/arch/arm/kernel/insn.c +++ b/arch/arm/kernel/insn.c @@ -3,8 +3,9 @@ #include #include -static unsigned long -__arm_gen_branch_thumb2(unsigned long pc, unsigned long addr, bool link) +static unsigned long __arm_gen_branch_thumb2(unsigned long pc, + unsigned long addr, bool link, + bool warn) { unsigned long s, j1, j2, i1, i2, imm10, imm11; unsigned long first, second; @@ -12,7 +13,7 @@ __arm_gen_branch_thumb2(unsigned long pc, unsigned long addr, bool link) offset = (long)addr - (long)(pc + 4); if (offset < -16777216 || offset > 16777214) { - WARN_ON_ONCE(1); + WARN_ON_ONCE(warn); return 0; } @@ -33,8 +34,8 @@ __arm_gen_branch_thumb2(unsigned long pc, unsigned long addr, bool link) return __opcode_thumb32_compose(first, second); } -static unsigned long -__arm_gen_branch_arm(unsigned long pc, unsigned long addr, bool link) +static unsigned long __arm_gen_branch_arm(unsigned long pc, unsigned long addr, + bool link, bool warn) { unsigned long opcode = 0xea000000; long offset; @@ -44,7 +45,7 @@ __arm_gen_branch_arm(unsigned long pc, unsigned long addr, bool link) offset = (long)addr - (long)(pc + 8); if (unlikely(offset < -33554432 || offset > 33554428)) { - WARN_ON_ONCE(1); + WARN_ON_ONCE(warn); return 0; } @@ -54,10 +55,10 @@ __arm_gen_branch_arm(unsigned long pc, unsigned long addr, bool link) } unsigned long -__arm_gen_branch(unsigned long pc, unsigned long addr, bool link) +__arm_gen_branch(unsigned long pc, unsigned long addr, bool link, bool warn) { if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) - return __arm_gen_branch_thumb2(pc, addr, link); + return __arm_gen_branch_thumb2(pc, addr, link, warn); else - return __arm_gen_branch_arm(pc, addr, link); + return __arm_gen_branch_arm(pc, addr, link, warn); } diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S index 0dcae787b004d..d2b4ac06e4ed8 100644 --- a/arch/arm/kernel/iwmmxt.S +++ b/arch/arm/kernel/iwmmxt.S @@ -16,6 +16,7 @@ #include #include #include +#include "iwmmxt.h" #if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B) #define PJ4(code...) code @@ -113,33 +114,33 @@ concan_save: concan_dump: - wstrw wCSSF, [r1, #MMX_WCSSF] - wstrw wCASF, [r1, #MMX_WCASF] - wstrw wCGR0, [r1, #MMX_WCGR0] - wstrw wCGR1, [r1, #MMX_WCGR1] - wstrw wCGR2, [r1, #MMX_WCGR2] - wstrw wCGR3, [r1, #MMX_WCGR3] + wstrw wCSSF, r1, MMX_WCSSF + wstrw wCASF, r1, MMX_WCASF + wstrw wCGR0, r1, MMX_WCGR0 + wstrw wCGR1, r1, MMX_WCGR1 + wstrw wCGR2, r1, MMX_WCGR2 + wstrw wCGR3, r1, MMX_WCGR3 1: @ MUP? wRn tst r2, #0x2 beq 2f - wstrd wR0, [r1, #MMX_WR0] - wstrd wR1, [r1, #MMX_WR1] - wstrd wR2, [r1, #MMX_WR2] - wstrd wR3, [r1, #MMX_WR3] - wstrd wR4, [r1, #MMX_WR4] - wstrd wR5, [r1, #MMX_WR5] - wstrd wR6, [r1, #MMX_WR6] - wstrd wR7, [r1, #MMX_WR7] - wstrd wR8, [r1, #MMX_WR8] - wstrd wR9, [r1, #MMX_WR9] - wstrd wR10, [r1, #MMX_WR10] - wstrd wR11, [r1, #MMX_WR11] - wstrd wR12, [r1, #MMX_WR12] - wstrd wR13, [r1, #MMX_WR13] - wstrd wR14, [r1, #MMX_WR14] - wstrd wR15, [r1, #MMX_WR15] + wstrd wR0, r1, MMX_WR0 + wstrd wR1, r1, MMX_WR1 + wstrd wR2, r1, MMX_WR2 + wstrd wR3, r1, MMX_WR3 + wstrd wR4, r1, MMX_WR4 + wstrd wR5, r1, MMX_WR5 + wstrd wR6, r1, MMX_WR6 + wstrd wR7, r1, MMX_WR7 + wstrd wR8, r1, MMX_WR8 + wstrd wR9, r1, MMX_WR9 + wstrd wR10, r1, MMX_WR10 + wstrd wR11, r1, MMX_WR11 + wstrd wR12, r1, MMX_WR12 + wstrd wR13, r1, MMX_WR13 + wstrd wR14, r1, MMX_WR14 + wstrd wR15, r1, MMX_WR15 2: teq r0, #0 @ anything to load? reteq lr @ if not, return @@ -147,30 +148,30 @@ concan_dump: concan_load: @ Load wRn - wldrd wR0, [r0, #MMX_WR0] - wldrd wR1, [r0, #MMX_WR1] - wldrd wR2, [r0, #MMX_WR2] - wldrd wR3, [r0, #MMX_WR3] - wldrd wR4, [r0, #MMX_WR4] - wldrd wR5, [r0, #MMX_WR5] - wldrd wR6, [r0, #MMX_WR6] - wldrd wR7, [r0, #MMX_WR7] - wldrd wR8, [r0, #MMX_WR8] - wldrd wR9, [r0, #MMX_WR9] - wldrd wR10, [r0, #MMX_WR10] - wldrd wR11, [r0, #MMX_WR11] - wldrd wR12, [r0, #MMX_WR12] - wldrd wR13, [r0, #MMX_WR13] - wldrd wR14, [r0, #MMX_WR14] - wldrd wR15, [r0, #MMX_WR15] + wldrd wR0, r0, MMX_WR0 + wldrd wR1, r0, MMX_WR1 + wldrd wR2, r0, MMX_WR2 + wldrd wR3, r0, MMX_WR3 + wldrd wR4, r0, MMX_WR4 + wldrd wR5, r0, MMX_WR5 + wldrd wR6, r0, MMX_WR6 + wldrd wR7, r0, MMX_WR7 + wldrd wR8, r0, MMX_WR8 + wldrd wR9, r0, MMX_WR9 + wldrd wR10, r0, MMX_WR10 + wldrd wR11, r0, MMX_WR11 + wldrd wR12, r0, MMX_WR12 + wldrd wR13, r0, MMX_WR13 + wldrd wR14, r0, MMX_WR14 + wldrd wR15, r0, MMX_WR15 @ Load wCx - wldrw wCSSF, [r0, #MMX_WCSSF] - wldrw wCASF, [r0, #MMX_WCASF] - wldrw wCGR0, [r0, #MMX_WCGR0] - wldrw wCGR1, [r0, #MMX_WCGR1] - wldrw wCGR2, [r0, #MMX_WCGR2] - wldrw wCGR3, [r0, #MMX_WCGR3] + wldrw wCSSF, r0, MMX_WCSSF + wldrw wCASF, r0, MMX_WCASF + wldrw wCGR0, r0, MMX_WCGR0 + wldrw wCGR1, r0, MMX_WCGR1 + wldrw wCGR2, r0, MMX_WCGR2 + wldrw wCGR3, r0, MMX_WCGR3 @ clear CUP/MUP (only if r1 != 0) teq r1, #0 diff --git a/arch/arm/kernel/iwmmxt.h b/arch/arm/kernel/iwmmxt.h new file mode 100644 index 0000000000000..fb627286f5bb9 --- /dev/null +++ b/arch/arm/kernel/iwmmxt.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __IWMMXT_H__ +#define __IWMMXT_H__ + +.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +.set .LwR\b, \b +.set .Lr\b, \b +.endr + +.set .LwCSSF, 0x2 +.set .LwCASF, 0x3 +.set .LwCGR0, 0x8 +.set .LwCGR1, 0x9 +.set .LwCGR2, 0xa +.set .LwCGR3, 0xb + +.macro wldrd, reg:req, base:req, offset:req +.inst 0xedd00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wldrw, reg:req, base:req, offset:req +.inst 0xfd900100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wstrd, reg:req, base:req, offset:req +.inst 0xedc00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wstrw, reg:req, base:req, offset:req +.inst 0xfd800100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +#ifdef __clang__ + +#define wCon c1 + +.macro tmrc, dest:req, control:req +mrc p1, 0, \dest, \control, c0, 0 +.endm + +.macro tmcr, control:req, src:req +mcr p1, 0, \src, \control, c0, 0 +.endm +#endif + +#endif diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 6a95b92966406..183a6f2f165ad 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -154,22 +154,38 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr) return 0; } -static struct undef_hook kgdb_brkpt_hook = { +static struct undef_hook kgdb_brkpt_arm_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_BREAKINST, - .cpsr_mask = MODE_MASK, + .cpsr_mask = PSR_T_BIT | MODE_MASK, .cpsr_val = SVC_MODE, .fn = kgdb_brk_fn }; -static struct undef_hook kgdb_compiled_brkpt_hook = { +static struct undef_hook kgdb_brkpt_thumb_hook = { + .instr_mask = 0xffff, + .instr_val = KGDB_BREAKINST & 0xffff, + .cpsr_mask = PSR_T_BIT | MODE_MASK, + .cpsr_val = PSR_T_BIT | SVC_MODE, + .fn = kgdb_brk_fn +}; + +static struct undef_hook kgdb_compiled_brkpt_arm_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_COMPILED_BREAK, - .cpsr_mask = MODE_MASK, + .cpsr_mask = PSR_T_BIT | MODE_MASK, .cpsr_val = SVC_MODE, .fn = kgdb_compiled_brk_fn }; +static struct undef_hook kgdb_compiled_brkpt_thumb_hook = { + .instr_mask = 0xffff, + .instr_val = KGDB_COMPILED_BREAK & 0xffff, + .cpsr_mask = PSR_T_BIT | MODE_MASK, + .cpsr_val = PSR_T_BIT | SVC_MODE, + .fn = kgdb_compiled_brk_fn +}; + static int __kgdb_notify(struct die_args *args, unsigned long cmd) { struct pt_regs *regs = args->regs; @@ -210,8 +226,10 @@ int kgdb_arch_init(void) if (ret != 0) return ret; - register_undef_hook(&kgdb_brkpt_hook); - register_undef_hook(&kgdb_compiled_brkpt_hook); + register_undef_hook(&kgdb_brkpt_arm_hook); + register_undef_hook(&kgdb_brkpt_thumb_hook); + register_undef_hook(&kgdb_compiled_brkpt_arm_hook); + register_undef_hook(&kgdb_compiled_brkpt_thumb_hook); return 0; } @@ -224,8 +242,10 @@ int kgdb_arch_init(void) */ void kgdb_arch_exit(void) { - unregister_undef_hook(&kgdb_brkpt_hook); - unregister_undef_hook(&kgdb_compiled_brkpt_hook); + unregister_undef_hook(&kgdb_brkpt_arm_hook); + unregister_undef_hook(&kgdb_brkpt_thumb_hook); + unregister_undef_hook(&kgdb_compiled_brkpt_arm_hook); + unregister_undef_hook(&kgdb_compiled_brkpt_thumb_hook); unregister_die_notifier(&kgdb_notifier); } diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index b647741c0ab06..d1c2d3bd55b64 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -11,10 +12,6 @@ #include #include -#define PLT_ENT_STRIDE L1_CACHE_BYTES -#define PLT_ENT_COUNT (PLT_ENT_STRIDE / sizeof(u32)) -#define PLT_ENT_SIZE (sizeof(struct plt_entries) / PLT_ENT_COUNT) - #ifdef CONFIG_THUMB2_KERNEL #define PLT_ENT_LDR __opcode_to_mem_thumb32(0xf8dff000 | \ (PLT_ENT_STRIDE - 4)) @@ -23,9 +20,11 @@ (PLT_ENT_STRIDE - 8)) #endif -struct plt_entries { - u32 ldr[PLT_ENT_COUNT]; - u32 lit[PLT_ENT_COUNT]; +static const u32 fixed_plts[] = { +#ifdef CONFIG_DYNAMIC_FTRACE + FTRACE_ADDR, + MCOUNT_ADDR, +#endif }; static bool in_init(const struct module *mod, unsigned long loc) @@ -33,14 +32,40 @@ static bool in_init(const struct module *mod, unsigned long loc) return loc - (u32)mod->init_layout.base < mod->init_layout.size; } +static void prealloc_fixed(struct mod_plt_sec *pltsec, struct plt_entries *plt) +{ + int i; + + if (!ARRAY_SIZE(fixed_plts) || pltsec->plt_count) + return; + pltsec->plt_count = ARRAY_SIZE(fixed_plts); + + for (i = 0; i < ARRAY_SIZE(plt->ldr); ++i) + plt->ldr[i] = PLT_ENT_LDR; + + BUILD_BUG_ON(sizeof(fixed_plts) > sizeof(plt->lit)); + memcpy(plt->lit, fixed_plts, sizeof(fixed_plts)); +} + u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) { struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core : &mod->arch.init; + struct plt_entries *plt; + int idx; + + /* cache the address, ELF header is available only during module load */ + if (!pltsec->plt_ent) + pltsec->plt_ent = (struct plt_entries *)pltsec->plt->sh_addr; + plt = pltsec->plt_ent; - struct plt_entries *plt = (struct plt_entries *)pltsec->plt->sh_addr; - int idx = 0; + prealloc_fixed(pltsec, plt); + + for (idx = 0; idx < ARRAY_SIZE(fixed_plts); ++idx) + if (plt->lit[idx] == val) + return (u32)&plt->ldr[idx]; + idx = 0; /* * Look for an existing entry pointing to 'val'. Given that the * relocations are sorted, this will be the last entry we allocated. @@ -188,8 +213,8 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base, int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { - unsigned long core_plts = 0; - unsigned long init_plts = 0; + unsigned long core_plts = ARRAY_SIZE(fixed_plts); + unsigned long init_plts = ARRAY_SIZE(fixed_plts); Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; Elf32_Sym *syms = NULL; @@ -244,6 +269,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.core.plt->sh_size = round_up(core_plts * PLT_ENT_SIZE, sizeof(struct plt_entries)); mod->arch.core.plt_count = 0; + mod->arch.core.plt_ent = NULL; mod->arch.init.plt->sh_type = SHT_NOBITS; mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; @@ -251,6 +277,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.init.plt->sh_size = round_up(init_plts * PLT_ENT_SIZE, sizeof(struct plt_entries)); mod->arch.init.plt_count = 0; + mod->arch.init.plt_ent = NULL; pr_debug("%s: plt=%x, init.plt=%x\n", __func__, mod->arch.core.plt->sh_size, mod->arch.init.plt->sh_size); diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c index 3b69a76d341e7..1626dfc6f6ce6 100644 --- a/arch/arm/kernel/perf_callchain.c +++ b/arch/arm/kernel/perf_callchain.c @@ -62,9 +62,10 @@ user_backtrace(struct frame_tail __user *tail, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct frame_tail __user *tail; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -98,9 +99,10 @@ callchain_trace(struct stackframe *fr, void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct stackframe fr; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -111,18 +113,21 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re unsigned long perf_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + + if (guest_cbs && guest_cbs->is_in_guest()) + return guest_cbs->get_guest_ip(); return instruction_pointer(regs); } unsigned long perf_misc_flags(struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); int misc = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) + if (guest_cbs && guest_cbs->is_in_guest()) { + if (guest_cbs->is_user_mode()) misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c index b0d2f1fe891d1..7b42ac010fdfd 100644 --- a/arch/arm/kernel/return_address.c +++ b/arch/arm/kernel/return_address.c @@ -7,8 +7,6 @@ */ #include #include - -#if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) #include #include @@ -53,6 +51,4 @@ void *return_address(unsigned int level) return NULL; } -#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */ - EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index 76ea4178a55cb..8247749998259 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -53,18 +53,17 @@ int notrace unwind_frame(struct stackframe *frame) return -EINVAL; frame->sp = frame->fp; - frame->fp = *(unsigned long *)(fp); - frame->pc = frame->lr; - frame->lr = *(unsigned long *)(fp + 4); + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 4)); #else /* check current frame pointer is within bounds */ if (fp < low + 12 || fp > high - 4) return -EINVAL; /* restore the registers from the stack frame */ - frame->fp = *(unsigned long *)(fp - 12); - frame->sp = *(unsigned long *)(fp - 8); - frame->pc = *(unsigned long *)(fp - 4); + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 12)); + frame->sp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 8)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 4)); #endif return 0; diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 8c74037ade229..b1f366df620b0 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -180,7 +180,7 @@ ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE, ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA") #endif -#ifdef CONFIG_ARM_MPU +#if defined(CONFIG_ARM_MPU) && !defined(CONFIG_COMPILE_TEST) /* * Due to PMSAv7 restriction on base address and size we have to * enforce minimal alignment restrictions. It was seen that weaker diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c index b99dd8e1c93f1..7ba6cf8261626 100644 --- a/arch/arm/lib/xor-neon.c +++ b/arch/arm/lib/xor-neon.c @@ -26,8 +26,9 @@ MODULE_LICENSE("GPL"); * While older versions of GCC do not generate incorrect code, they fail to * recognize the parallel nature of these functions, and emit plain ARM code, * which is known to be slower than the optimized ARM code in asm-arm/xor.h. + * + * #warning This code requires at least version 4.6 of GCC */ -#warning This code requires at least version 4.6 of GCC #endif #pragma GCC diagnostic ignored "-Wunused-variable" diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 676cc2a318f41..5d75ab82d5a6a 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -103,7 +103,7 @@ static const struct wakeup_source_info ws_info[] = { static const struct of_device_id sama5d2_ws_ids[] = { { .compatible = "atmel,sama5d2-gem", .data = &ws_info[0] }, - { .compatible = "atmel,at91rm9200-rtc", .data = &ws_info[1] }, + { .compatible = "atmel,sama5d2-rtc", .data = &ws_info[1] }, { .compatible = "atmel,sama5d3-udc", .data = &ws_info[2] }, { .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] }, { .compatible = "usb-ohci", .data = &ws_info[2] }, @@ -114,12 +114,12 @@ static const struct of_device_id sama5d2_ws_ids[] = { }; static const struct of_device_id sam9x60_ws_ids[] = { - { .compatible = "atmel,at91sam9x5-rtc", .data = &ws_info[1] }, + { .compatible = "microchip,sam9x60-rtc", .data = &ws_info[1] }, { .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] }, { .compatible = "usb-ohci", .data = &ws_info[2] }, { .compatible = "atmel,at91sam9g45-ehci", .data = &ws_info[2] }, { .compatible = "usb-ehci", .data = &ws_info[2] }, - { .compatible = "atmel,at91sam9260-rtt", .data = &ws_info[4] }, + { .compatible = "microchip,sam9x60-rtt", .data = &ws_info[4] }, { .compatible = "cdns,sam9x60-macb", .data = &ws_info[5] }, { /* sentinel */ } }; diff --git a/arch/arm/mach-axxia/platsmp.c b/arch/arm/mach-axxia/platsmp.c index 512943eae30a5..2e203626eda52 100644 --- a/arch/arm/mach-axxia/platsmp.c +++ b/arch/arm/mach-axxia/platsmp.c @@ -39,6 +39,7 @@ static int axxia_boot_secondary(unsigned int cpu, struct task_struct *idle) return -ENOENT; syscon = of_iomap(syscon_np, 0); + of_node_put(syscon_np); if (!syscon) return -ENOMEM; diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index 5e5f1fabc3d40..634d1bc3c0114 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -214,7 +214,6 @@ config ARCH_BRCMSTB select HAVE_ARM_ARCH_TIMER select BRCMSTB_L2_IRQ select BCM7120_L2_IRQ - select ARCH_HAS_HOLES_MEMORYMODEL select ZONE_DMA if ARM_LPAE select SOC_BRCMSTB select SOC_BUS diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c index 1d61a7701c11f..d7e63f57b426a 100644 --- a/arch/arm/mach-cns3xxx/core.c +++ b/arch/arm/mach-cns3xxx/core.c @@ -376,6 +376,7 @@ static void __init cns3xxx_init(void) /* De-Asscer SATA Reset */ cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SATA)); } + of_node_put(dn); dn = of_find_compatible_node(NULL, NULL, "cavium,cns3420-sdhci"); if (of_device_is_available(dn)) { @@ -389,6 +390,7 @@ static void __init cns3xxx_init(void) cns3xxx_pwr_clk_en(CNS3XXX_PWR_CLK_EN(SDIO)); cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SDIO)); } + of_node_put(dn); pm_power_off = cns3xxx_power_off; diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig index 02b180ad72454..4d3b7d0418c40 100644 --- a/arch/arm/mach-davinci/Kconfig +++ b/arch/arm/mach-davinci/Kconfig @@ -5,7 +5,6 @@ menuconfig ARCH_DAVINCI depends on ARCH_MULTI_V5 select DAVINCI_TIMER select ZONE_DMA - select ARCH_HAS_HOLES_MEMORYMODEL select PM_GENERIC_DOMAINS if PM select PM_GENERIC_DOMAINS_OF if PM && OF select REGMAP_MMIO diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index 5b3549f1236c5..b2ede9bf82dff 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -1101,11 +1101,13 @@ static int __init da850_evm_config_emac(void) int ret; u32 val; struct davinci_soc_info *soc_info = &davinci_soc_info; - u8 rmii_en = soc_info->emac_pdata->rmii_en; + u8 rmii_en; if (!machine_is_davinci_da850_evm()) return 0; + rmii_en = soc_info->emac_pdata->rmii_en; + cfg_chip3_base = DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP3_REG); val = __raw_readl(cfg_chip3_base); diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 9dab1f50a02f8..fc01137628e4b 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -8,7 +8,6 @@ menuconfig ARCH_EXYNOS bool "Samsung EXYNOS" depends on ARCH_MULTI_V7 - select ARCH_HAS_HOLES_MEMORYMODEL select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA select ARM_GIC diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c index f226d4f57bf41..9025067ef6f57 100644 --- a/arch/arm/mach-exynos/exynos.c +++ b/arch/arm/mach-exynos/exynos.c @@ -136,6 +136,7 @@ static void exynos_map_pmu(void) np = of_find_matching_node(NULL, exynos_dt_pmu_match); if (np) pmu_base_addr = of_iomap(np, 0); + of_node_put(np); } static void __init exynos_init_irq(void) diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig index 1bc68913d62c1..9de38ce8124f2 100644 --- a/arch/arm/mach-highbank/Kconfig +++ b/arch/arm/mach-highbank/Kconfig @@ -2,7 +2,6 @@ config ARCH_HIGHBANK bool "Calxeda ECX-1000/2000 (Highbank/Midway)" depends on ARCH_MULTI_V7 - select ARCH_HAS_HOLES_MEMORYMODEL select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA select ARM_ERRATA_764369 if SMP diff --git a/arch/arm/mach-hisi/platsmp.c b/arch/arm/mach-hisi/platsmp.c index da7a09c1dae56..1cd1d9b0aabf9 100644 --- a/arch/arm/mach-hisi/platsmp.c +++ b/arch/arm/mach-hisi/platsmp.c @@ -67,14 +67,17 @@ static void __init hi3xxx_smp_prepare_cpus(unsigned int max_cpus) } ctrl_base = of_iomap(np, 0); if (!ctrl_base) { + of_node_put(np); pr_err("failed to map address\n"); return; } if (of_property_read_u32(np, "smp-offset", &offset) < 0) { + of_node_put(np); pr_err("failed to find smp-offset property\n"); return; } ctrl_base += offset; + of_node_put(np); } } @@ -160,6 +163,7 @@ static int hip01_boot_secondary(unsigned int cpu, struct task_struct *idle) if (WARN_ON(!node)) return -1; ctrl_base = of_iomap(node, 0); + of_node_put(node); /* set the secondary core boot from DDR */ remap_reg_value = readl_relaxed(ctrl_base + REG_SC_CTRL); diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c index baf3b47601af0..1b73e4e76310c 100644 --- a/arch/arm/mach-imx/pm-imx6.c +++ b/arch/arm/mach-imx/pm-imx6.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -618,6 +619,7 @@ static void __init imx6_pm_common_init(const struct imx6_pm_socdata static void imx6_pm_stby_poweroff(void) { + gic_cpu_if_down(0); imx6_set_lpm(STOP_POWER_OFF); imx6q_suspend_finish(0); diff --git a/arch/arm/mach-iop32x/include/mach/entry-macro.S b/arch/arm/mach-iop32x/include/mach/entry-macro.S index 8e6766d4621eb..341e5d9a6616d 100644 --- a/arch/arm/mach-iop32x/include/mach/entry-macro.S +++ b/arch/arm/mach-iop32x/include/mach/entry-macro.S @@ -20,7 +20,7 @@ mrc p6, 0, \irqstat, c8, c0, 0 @ Read IINTSRC cmp \irqstat, #0 clzne \irqnr, \irqstat - rsbne \irqnr, \irqnr, #31 + rsbne \irqnr, \irqnr, #32 .endm .macro arch_ret_to_user, tmp1, tmp2 diff --git a/arch/arm/mach-iop32x/include/mach/irqs.h b/arch/arm/mach-iop32x/include/mach/irqs.h index c4e78df428e86..e09ae5f48aec5 100644 --- a/arch/arm/mach-iop32x/include/mach/irqs.h +++ b/arch/arm/mach-iop32x/include/mach/irqs.h @@ -9,6 +9,6 @@ #ifndef __IRQS_H #define __IRQS_H -#define NR_IRQS 32 +#define NR_IRQS 33 #endif diff --git a/arch/arm/mach-iop32x/irq.c b/arch/arm/mach-iop32x/irq.c index 2d48bf1398c10..d1e8824cbd824 100644 --- a/arch/arm/mach-iop32x/irq.c +++ b/arch/arm/mach-iop32x/irq.c @@ -32,14 +32,14 @@ static void intstr_write(u32 val) static void iop32x_irq_mask(struct irq_data *d) { - iop32x_mask &= ~(1 << d->irq); + iop32x_mask &= ~(1 << (d->irq - 1)); intctl_write(iop32x_mask); } static void iop32x_irq_unmask(struct irq_data *d) { - iop32x_mask |= 1 << d->irq; + iop32x_mask |= 1 << (d->irq - 1); intctl_write(iop32x_mask); } @@ -65,7 +65,7 @@ void __init iop32x_init_irq(void) machine_is_em7210()) *IOP3XX_PCIIRSR = 0x0f; - for (i = 0; i < NR_IRQS; i++) { + for (i = 1; i < NR_IRQS; i++) { irq_set_chip_and_handler(i, &ext_chip, handle_level_irq); irq_clear_status_flags(i, IRQ_NOREQUEST | IRQ_NOPROBE); } diff --git a/arch/arm/mach-iop32x/irqs.h b/arch/arm/mach-iop32x/irqs.h index 69858e4e905d1..e1dfc8b4e7d7e 100644 --- a/arch/arm/mach-iop32x/irqs.h +++ b/arch/arm/mach-iop32x/irqs.h @@ -7,36 +7,40 @@ #ifndef __IOP32X_IRQS_H #define __IOP32X_IRQS_H +/* Interrupts in Linux start at 1, hardware starts at 0 */ + +#define IOP_IRQ(x) ((x) + 1) + /* * IOP80321 chipset interrupts */ -#define IRQ_IOP32X_DMA0_EOT 0 -#define IRQ_IOP32X_DMA0_EOC 1 -#define IRQ_IOP32X_DMA1_EOT 2 -#define IRQ_IOP32X_DMA1_EOC 3 -#define IRQ_IOP32X_AA_EOT 6 -#define IRQ_IOP32X_AA_EOC 7 -#define IRQ_IOP32X_CORE_PMON 8 -#define IRQ_IOP32X_TIMER0 9 -#define IRQ_IOP32X_TIMER1 10 -#define IRQ_IOP32X_I2C_0 11 -#define IRQ_IOP32X_I2C_1 12 -#define IRQ_IOP32X_MESSAGING 13 -#define IRQ_IOP32X_ATU_BIST 14 -#define IRQ_IOP32X_PERFMON 15 -#define IRQ_IOP32X_CORE_PMU 16 -#define IRQ_IOP32X_BIU_ERR 17 -#define IRQ_IOP32X_ATU_ERR 18 -#define IRQ_IOP32X_MCU_ERR 19 -#define IRQ_IOP32X_DMA0_ERR 20 -#define IRQ_IOP32X_DMA1_ERR 21 -#define IRQ_IOP32X_AA_ERR 23 -#define IRQ_IOP32X_MSG_ERR 24 -#define IRQ_IOP32X_SSP 25 -#define IRQ_IOP32X_XINT0 27 -#define IRQ_IOP32X_XINT1 28 -#define IRQ_IOP32X_XINT2 29 -#define IRQ_IOP32X_XINT3 30 -#define IRQ_IOP32X_HPI 31 +#define IRQ_IOP32X_DMA0_EOT IOP_IRQ(0) +#define IRQ_IOP32X_DMA0_EOC IOP_IRQ(1) +#define IRQ_IOP32X_DMA1_EOT IOP_IRQ(2) +#define IRQ_IOP32X_DMA1_EOC IOP_IRQ(3) +#define IRQ_IOP32X_AA_EOT IOP_IRQ(6) +#define IRQ_IOP32X_AA_EOC IOP_IRQ(7) +#define IRQ_IOP32X_CORE_PMON IOP_IRQ(8) +#define IRQ_IOP32X_TIMER0 IOP_IRQ(9) +#define IRQ_IOP32X_TIMER1 IOP_IRQ(10) +#define IRQ_IOP32X_I2C_0 IOP_IRQ(11) +#define IRQ_IOP32X_I2C_1 IOP_IRQ(12) +#define IRQ_IOP32X_MESSAGING IOP_IRQ(13) +#define IRQ_IOP32X_ATU_BIST IOP_IRQ(14) +#define IRQ_IOP32X_PERFMON IOP_IRQ(15) +#define IRQ_IOP32X_CORE_PMU IOP_IRQ(16) +#define IRQ_IOP32X_BIU_ERR IOP_IRQ(17) +#define IRQ_IOP32X_ATU_ERR IOP_IRQ(18) +#define IRQ_IOP32X_MCU_ERR IOP_IRQ(19) +#define IRQ_IOP32X_DMA0_ERR IOP_IRQ(20) +#define IRQ_IOP32X_DMA1_ERR IOP_IRQ(21) +#define IRQ_IOP32X_AA_ERR IOP_IRQ(23) +#define IRQ_IOP32X_MSG_ERR IOP_IRQ(24) +#define IRQ_IOP32X_SSP IOP_IRQ(25) +#define IRQ_IOP32X_XINT0 IOP_IRQ(27) +#define IRQ_IOP32X_XINT1 IOP_IRQ(28) +#define IRQ_IOP32X_XINT2 IOP_IRQ(29) +#define IRQ_IOP32X_XINT3 IOP_IRQ(30) +#define IRQ_IOP32X_HPI IOP_IRQ(31) #endif diff --git a/arch/arm/mach-mediatek/Kconfig b/arch/arm/mach-mediatek/Kconfig index 9e0f592d87d8e..35a3430c7942d 100644 --- a/arch/arm/mach-mediatek/Kconfig +++ b/arch/arm/mach-mediatek/Kconfig @@ -30,6 +30,7 @@ config MACH_MT7623 config MACH_MT7629 bool "MediaTek MT7629 SoCs support" default ARCH_MEDIATEK + select HAVE_ARM_ARCH_TIMER config MACH_MT8127 bool "MediaTek MT8127 SoCs support" diff --git a/arch/arm/mach-meson/platsmp.c b/arch/arm/mach-meson/platsmp.c index 4b8ad728bb42a..32ac60b89fdcc 100644 --- a/arch/arm/mach-meson/platsmp.c +++ b/arch/arm/mach-meson/platsmp.c @@ -71,6 +71,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible, } sram_base = of_iomap(node, 0); + of_node_put(node); if (!sram_base) { pr_err("Couldn't map SRAM registers\n"); return; @@ -91,6 +92,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible, } scu_base = of_iomap(node, 0); + of_node_put(node); if (!scu_base) { pr_err("Couldn't map SCU registers\n"); return; diff --git a/arch/arm/mach-mmp/sram.c b/arch/arm/mach-mmp/sram.c index 6794e2db1ad5f..ecc46c31004f6 100644 --- a/arch/arm/mach-mmp/sram.c +++ b/arch/arm/mach-mmp/sram.c @@ -72,6 +72,8 @@ static int sram_probe(struct platform_device *pdev) if (!info) return -ENOMEM; + platform_set_drvdata(pdev, info); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (res == NULL) { dev_err(&pdev->dev, "no memory resource defined\n"); @@ -107,8 +109,6 @@ static int sram_probe(struct platform_device *pdev) list_add(&info->node, &sram_bank_list); mutex_unlock(&sram_lock); - platform_set_drvdata(pdev, info); - dev_info(&pdev->dev, "initialized\n"); return 0; @@ -127,17 +127,19 @@ static int sram_remove(struct platform_device *pdev) struct sram_bank_info *info; info = platform_get_drvdata(pdev); - if (info == NULL) - return -ENODEV; - mutex_lock(&sram_lock); - list_del(&info->node); - mutex_unlock(&sram_lock); + if (info->sram_size) { + mutex_lock(&sram_lock); + list_del(&info->node); + mutex_unlock(&sram_lock); + + gen_pool_destroy(info->gpool); + iounmap(info->sram_virt); + kfree(info->pool_name); + } - gen_pool_destroy(info->gpool); - iounmap(info->sram_virt); - kfree(info->pool_name); kfree(info); + return 0; } diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c index bd5be82101f32..d89bda12bf3cd 100644 --- a/arch/arm/mach-omap1/clock.c +++ b/arch/arm/mach-omap1/clock.c @@ -41,7 +41,7 @@ static DEFINE_SPINLOCK(clockfw_lock); unsigned long omap1_uart_recalc(struct clk *clk) { unsigned int val = __raw_readl(clk->enable_reg); - return val & clk->enable_bit ? 48000000 : 12000000; + return val & 1 << clk->enable_bit ? 48000000 : 12000000; } unsigned long omap1_sossi_recalc(struct clk *clk) diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index fdb6743760a2e..0211f4aa8cc75 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -94,7 +94,7 @@ config SOC_DRA7XX config ARCH_OMAP2PLUS bool select ARCH_HAS_BANDGAP - select ARCH_HAS_HOLES_MEMORYMODEL + select ARCH_HAS_RESET_CONTROLLER select ARCH_OMAP select CLKSRC_MMIO select GENERIC_IRQ_CHIP diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 46012ca812f48..1bd64f6ba8cfe 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -263,9 +263,9 @@ static int __init omapdss_init_of(void) } r = of_platform_populate(node, NULL, NULL, &pdev->dev); + put_device(&pdev->dev); if (r) { pr_err("Unable to populate DSS submodule devices\n"); - put_device(&pdev->dev); return r; } diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index 5c3845730dbf5..0b80f8bcd3047 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -314,10 +314,12 @@ void __init omap_gic_of_init(void) np = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-gic"); gic_dist_base_addr = of_iomap(np, 0); + of_node_put(np); WARN_ON(!gic_dist_base_addr); np = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-twd-timer"); twd_base = of_iomap(np, 0); + of_node_put(np); WARN_ON(!twd_base); skip_errata_init: diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index eb74aa1826614..202b740adee0e 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -782,8 +782,10 @@ static int __init _init_clkctrl_providers(void) for_each_matching_node(np, ti_clkctrl_match_table) { ret = _setup_clkctrl_provider(np); - if (ret) + if (ret) { + of_node_put(np); break; + } } return ret; @@ -3656,6 +3658,8 @@ int omap_hwmod_init_module(struct device *dev, oh->flags |= HWMOD_SWSUP_SIDLE_ACT; if (data->cfg->quirks & SYSC_QUIRK_SWSUP_MSTANDBY) oh->flags |= HWMOD_SWSUP_MSTANDBY; + if (data->cfg->quirks & SYSC_QUIRK_CLKDM_NOAUTO) + oh->flags |= HWMOD_CLKDM_NOAUTO; error = omap_hwmod_check_module(dev, oh, data, sysc_fields, rev_offs, sysc_offs, syss_offs, diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S index ac1324c6453b5..c4e97d35c310d 100644 --- a/arch/arm/mach-omap2/sleep34xx.S +++ b/arch/arm/mach-omap2/sleep34xx.S @@ -72,7 +72,7 @@ ENTRY(enable_omap3630_toggle_l2_on_restore) stmfd sp!, {lr} @ save registers on stack /* Setup so that we will disable and enable l2 */ mov r1, #0x1 - adrl r3, l2dis_3630_offset @ may be too distant for plain adr + adr r3, l2dis_3630_offset ldr r2, [r3] @ value for offset str r1, [r2, r3] @ write to l2dis_3630 ldmfd sp!, {pc} @ restore regs and return diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c index 425855f456f2b..719e6395797cb 100644 --- a/arch/arm/mach-pxa/cm-x300.c +++ b/arch/arm/mach-pxa/cm-x300.c @@ -355,13 +355,13 @@ static struct platform_device cm_x300_spi_gpio = { static struct gpiod_lookup_table cm_x300_spi_gpiod_table = { .dev_id = "spi_gpio", .table = { - GPIO_LOOKUP("gpio-pxa", GPIO_LCD_SCL, + GPIO_LOOKUP("pca9555.1", GPIO_LCD_SCL - GPIO_LCD_BASE, "sck", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-pxa", GPIO_LCD_DIN, + GPIO_LOOKUP("pca9555.1", GPIO_LCD_DIN - GPIO_LCD_BASE, "mosi", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-pxa", GPIO_LCD_DOUT, + GPIO_LOOKUP("pca9555.1", GPIO_LCD_DOUT - GPIO_LCD_BASE, "miso", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-pxa", GPIO_LCD_CS, + GPIO_LOOKUP("pca9555.1", GPIO_LCD_CS - GPIO_LCD_BASE, "cs", GPIO_ACTIVE_HIGH), { }, }, diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c index e1a394ac3eea7..8f2d4faa26120 100644 --- a/arch/arm/mach-pxa/magician.c +++ b/arch/arm/mach-pxa/magician.c @@ -675,7 +675,7 @@ static struct platform_device bq24022 = { static struct gpiod_lookup_table bq24022_gpiod_table = { .dev_id = "gpio-regulator", .table = { - GPIO_LOOKUP("gpio-pxa", EGPIO_MAGICIAN_BQ24022_ISET2, + GPIO_LOOKUP("htc-egpio-0", EGPIO_MAGICIAN_BQ24022_ISET2 - MAGICIAN_EGPIO_BASE, NULL, GPIO_ACTIVE_HIGH), GPIO_LOOKUP("gpio-pxa", GPIO30_MAGICIAN_BQ24022_nCHARGE_EN, "enable", GPIO_ACTIVE_LOW), diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c index f537ff1c3ba7e..3fbcaa3b4e182 100644 --- a/arch/arm/mach-pxa/tosa.c +++ b/arch/arm/mach-pxa/tosa.c @@ -295,9 +295,9 @@ static struct gpiod_lookup_table tosa_mci_gpio_table = { .table = { GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_nSD_DETECT, "cd", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_SD_WP, + GPIO_LOOKUP("sharp-scoop.0", TOSA_GPIO_SD_WP - TOSA_SCOOP_GPIO_BASE, "wp", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_PWR_ON, + GPIO_LOOKUP("sharp-scoop.0", TOSA_GPIO_PWR_ON - TOSA_SCOOP_GPIO_BASE, "power", GPIO_ACTIVE_HIGH), { }, }, diff --git a/arch/arm/mach-s3c24xx/mach-jive.c b/arch/arm/mach-s3c24xx/mach-jive.c index 885e8f12e4b91..eedc9f8ed2109 100644 --- a/arch/arm/mach-s3c24xx/mach-jive.c +++ b/arch/arm/mach-s3c24xx/mach-jive.c @@ -237,11 +237,11 @@ static int __init jive_mtdset(char *options) unsigned long set; if (options == NULL || options[0] == '\0') - return 0; + return 1; if (kstrtoul(options, 10, &set)) { printk(KERN_ERR "failed to parse mtdset=%s\n", options); - return 0; + return 1; } switch (set) { @@ -256,7 +256,7 @@ static int __init jive_mtdset(char *options) "using default.", set); } - return 0; + return 1; } /* parse the mtdset= option given to the kernel command line */ diff --git a/arch/arm/mach-s5pv210/Kconfig b/arch/arm/mach-s5pv210/Kconfig index 03984a7918791..69ff1bb89f38f 100644 --- a/arch/arm/mach-s5pv210/Kconfig +++ b/arch/arm/mach-s5pv210/Kconfig @@ -8,7 +8,6 @@ config ARCH_S5PV210 bool "Samsung S5PV210/S5PC110" depends on ARCH_MULTI_V7 - select ARCH_HAS_HOLES_MEMORYMODEL select ARM_VIC select CLKSRC_SAMSUNG_PWM select COMMON_CLK_SAMSUNG diff --git a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c index ee949255ced3f..09ef73b99dd86 100644 --- a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c +++ b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c @@ -154,8 +154,10 @@ static int __init rcar_gen2_regulator_quirk(void) return -ENODEV; for_each_matching_node_and_match(np, rcar_gen2_quirk_match, &id) { - if (!of_device_is_available(np)) + if (!of_device_is_available(np)) { + of_node_put(np); break; + } ret = of_property_read_u32(np, "reg", &addr); if (ret) /* Skip invalid entry and continue */ @@ -164,6 +166,7 @@ static int __init rcar_gen2_regulator_quirk(void) quirk = kzalloc(sizeof(*quirk), GFP_KERNEL); if (!quirk) { ret = -ENOMEM; + of_node_put(np); goto err_mem; } diff --git a/arch/arm/mach-socfpga/Kconfig b/arch/arm/mach-socfpga/Kconfig index 22af5e308db6c..61b872792c4cb 100644 --- a/arch/arm/mach-socfpga/Kconfig +++ b/arch/arm/mach-socfpga/Kconfig @@ -2,6 +2,7 @@ menuconfig ARCH_SOCFPGA bool "Altera SOCFPGA family" depends on ARCH_MULTI_V7 + select ARCH_HAS_RESET_CONTROLLER select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA select ARM_GIC @@ -19,6 +20,7 @@ menuconfig ARCH_SOCFPGA select PL310_ERRATA_727915 select PL310_ERRATA_753970 if PL310 select PL310_ERRATA_769419 + select RESET_CONTROLLER if ARCH_SOCFPGA config SOCFPGA_SUSPEND diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h index fc2608b18a0d0..18f01190dcfd4 100644 --- a/arch/arm/mach-socfpga/core.h +++ b/arch/arm/mach-socfpga/core.h @@ -33,7 +33,7 @@ extern void __iomem *sdr_ctl_base_addr; u32 socfpga_sdram_self_refresh(u32 sdr_base); extern unsigned int socfpga_sdram_self_refresh_sz; -extern char secondary_trampoline, secondary_trampoline_end; +extern char secondary_trampoline[], secondary_trampoline_end[]; extern unsigned long socfpga_cpu1start_addr; diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c index fbb80b883e5dd..201191cf68f32 100644 --- a/arch/arm/mach-socfpga/platsmp.c +++ b/arch/arm/mach-socfpga/platsmp.c @@ -20,14 +20,14 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle) { - int trampoline_size = &secondary_trampoline_end - &secondary_trampoline; + int trampoline_size = secondary_trampoline_end - secondary_trampoline; if (socfpga_cpu1start_addr) { /* This will put CPU #1 into reset. */ writel(RSTMGR_MPUMODRST_CPU1, rst_manager_base_addr + SOCFPGA_RSTMGR_MODMPURST); - memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size); + memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size); writel(__pa_symbol(secondary_startup), sys_manager_base_addr + (socfpga_cpu1start_addr & 0x000000ff)); @@ -45,12 +45,12 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle) static int socfpga_a10_boot_secondary(unsigned int cpu, struct task_struct *idle) { - int trampoline_size = &secondary_trampoline_end - &secondary_trampoline; + int trampoline_size = secondary_trampoline_end - secondary_trampoline; if (socfpga_cpu1start_addr) { writel(RSTMGR_MPUMODRST_CPU1, rst_manager_base_addr + SOCFPGA_A10_RSTMGR_MODMPURST); - memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size); + memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size); writel(__pa_symbol(secondary_startup), sys_manager_base_addr + (socfpga_cpu1start_addr & 0x00000fff)); diff --git a/arch/arm/mach-tango/Kconfig b/arch/arm/mach-tango/Kconfig index 25b2fd4348617..a9eeda36aeb15 100644 --- a/arch/arm/mach-tango/Kconfig +++ b/arch/arm/mach-tango/Kconfig @@ -3,7 +3,6 @@ config ARCH_TANGO bool "Sigma Designs Tango4 (SMP87xx)" depends on ARCH_MULTI_V7 # Cortex-A9 MPCore r3p0, PL310 r3p2 - select ARCH_HAS_HOLES_MEMORYMODEL select ARM_ERRATA_754322 select ARM_ERRATA_764369 if SMP select ARM_ERRATA_775420 diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c index 46a903c88c6a0..f553cde614f92 100644 --- a/arch/arm/mach-vexpress/dcscb.c +++ b/arch/arm/mach-vexpress/dcscb.c @@ -143,6 +143,7 @@ static int __init dcscb_init(void) if (!node) return -ENODEV; dcscb_base = of_iomap(node, 0); + of_node_put(node); if (!dcscb_base) return -EADDRNOTAVAIL; cfg = readl_relaxed(dcscb_base + DCS_CFG_R); diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index 1da11bdb1dfbd..1c6500c4e6a17 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -580,7 +580,7 @@ static int __init ve_spc_clk_init(void) } cluster = topology_physical_package_id(cpu_dev->id); - if (init_opp_table[cluster]) + if (cluster < 0 || init_opp_table[cluster]) continue; if (ve_init_opp_table(cpu_dev)) diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index b2d522f207826..00ffee644372e 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -743,6 +743,7 @@ config SWP_EMULATE config CPU_BIG_ENDIAN bool "Build big-endian kernel" depends on ARCH_SUPPORTS_BIG_ENDIAN + depends on !LD_IS_LLD help Say Y if you plan on running a kernel in big-endian mode. Note that your board must be properly built and your board @@ -752,7 +753,7 @@ config CPU_BIG_ENDIAN config CPU_ENDIAN_BE8 bool depends on CPU_BIG_ENDIAN - default CPU_V6 || CPU_V6K || CPU_V7 + default CPU_V6 || CPU_V6K || CPU_V7 || CPU_V7M help Support for the BE-8 (big-endian) mode on ARMv6 and ARMv7 processors. diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 788c5cf46de59..1fc6c4810a5c3 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -935,6 +935,9 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (type == TYPE_LDST) do_alignment_finish_ldst(addr, instr, regs, offset); + if (thumb_mode(regs)) + regs->ARM_cpsr = it_advance(regs->ARM_cpsr); + return 0; bad_or_fault: diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 0804a6af4a3b7..ff2cd985d20e0 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -176,11 +176,22 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max_low, int pfn_valid(unsigned long pfn) { phys_addr_t addr = __pfn_to_phys(pfn); + unsigned long pageblock_size = PAGE_SIZE * pageblock_nr_pages; if (__phys_to_pfn(addr) != pfn) return 0; - return memblock_is_map_memory(__pfn_to_phys(pfn)); + /* + * If address less than pageblock_size bytes away from a present + * memory chunk there still will be a memory map entry for it + * because we round freed memory map to the pageblock boundaries. + */ + if (memblock_overlaps_region(&memblock.memory, + ALIGN_DOWN(addr, pageblock_size), + pageblock_size)) + return 1; + + return 0; } EXPORT_SYMBOL(pfn_valid); #endif @@ -371,14 +382,14 @@ static void __init free_unused_memmap(void) */ start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); -#else +#endif /* - * Align down here since the VM subsystem insists that the - * memmap entries are valid from the bank start aligned to - * MAX_ORDER_NR_PAGES. + * Align down here since many operations in VM subsystem + * presume that there are no holes in the memory map inside + * a pageblock */ - start = round_down(start, MAX_ORDER_NR_PAGES); -#endif + start = round_down(start, pageblock_nr_pages); + /* * If we had a previous bank, and there is a space * between the current bank and the previous, free it. @@ -387,18 +398,20 @@ static void __init free_unused_memmap(void) free_memmap(prev_end, start); /* - * Align up here since the VM subsystem insists that the - * memmap entries are valid from the bank end aligned to - * MAX_ORDER_NR_PAGES. + * Align up here since many operations in VM subsystem + * presume that there are no holes in the memory map inside + * a pageblock */ prev_end = ALIGN(memblock_region_memory_end_pfn(reg), - MAX_ORDER_NR_PAGES); + pageblock_nr_pages); } #ifdef CONFIG_SPARSEMEM - if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) + if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) { + prev_end = ALIGN(prev_end, pageblock_nr_pages); free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION)); + } #endif } @@ -469,7 +482,11 @@ static void __init free_highpages(void) void __init mem_init(void) { #ifdef CONFIG_ARM_LPAE - swiotlb_init(1); + if (swiotlb_force == SWIOTLB_FORCE || + max_pfn > arm_dma_pfn_limit) + swiotlb_init(1); + else + swiotlb_force = SWIOTLB_NO_FORCE; #endif set_max_mapnr(pfn_to_page(max_pfn) - mem_map); diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index d42b933161832..841b66515b379 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -301,7 +302,8 @@ static void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, * Don't allow RAM to be mapped with mismatched attributes - this * causes problems with ARMv6+ */ - if (WARN_ON(pfn_valid(pfn) && mtype != MT_MEMORY_RW)) + if (WARN_ON(memblock_is_map_memory(PFN_PHYS(pfn)) && + mtype != MT_MEMORY_RW)) return NULL; area = get_vm_area_caller(size, VM_IOREMAP, caller); @@ -498,3 +500,11 @@ void __init early_ioremap_init(void) { early_ioremap_setup(); } + +bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags) +{ + unsigned long pfn = PHYS_PFN(offset); + + return memblock_is_map_memory(pfn); +} diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index ee943ac325560..463cbb0631be2 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -229,12 +229,14 @@ early_param("ecc", early_ecc); static int __init early_cachepolicy(char *p) { pr_warn("cachepolicy kernel parameter not supported without cp15\n"); + return 0; } early_param("cachepolicy", early_cachepolicy); static int __init noalign_setup(char *__unused) { pr_warn("noalign kernel parameter not supported without cp15\n"); + return 1; } __setup("noalign", noalign_setup); @@ -314,6 +316,13 @@ static struct mem_type mem_types[] __ro_after_init = { .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, + [MT_MEMORY_RO] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_XN | L_PTE_RDONLY, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT, + .domain = DOMAIN_KERNEL, + }, [MT_ROM] = { .prot_sect = PMD_TYPE_SECT, .domain = DOMAIN_KERNEL, @@ -415,9 +424,9 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) FIXADDR_END); BUG_ON(idx >= __end_of_fixed_addresses); - /* we only support device mappings until pgprot_kernel has been set */ + /* We support only device mappings before pgprot_kernel is set. */ if (WARN_ON(pgprot_val(prot) != pgprot_val(FIXMAP_PAGE_IO) && - pgprot_val(pgprot_kernel) == 0)) + pgprot_val(prot) && pgprot_val(pgprot_kernel) == 0)) return; if (pgprot_val(prot)) @@ -513,6 +522,7 @@ static void __init build_mem_type_table(void) /* Also setup NX memory mapping */ mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_XN; } if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { /* @@ -595,6 +605,7 @@ static void __init build_mem_type_table(void) mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; #endif /* @@ -615,6 +626,8 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RO].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED; @@ -678,6 +691,8 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd; mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_RO].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY_RO].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask; mem_types[MT_ROM].prot_sect |= cp->pmd; @@ -1359,7 +1374,7 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK); map.virtual = FDT_FIXED_BASE; map.length = FDT_FIXED_SIZE; - map.type = MT_ROM; + map.type = MT_MEMORY_RO; create_mapping(&map); } diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index 4fa5371bc6624..2785da387c910 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -491,7 +491,7 @@ cpu_arm1020_name: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm1020_proc_info,#object __arm1020_proc_info: diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index 5d8a8339e09a4..e9ea237ed7852 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -449,7 +449,7 @@ arm1020e_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm1020e_proc_info,#object __arm1020e_proc_info: diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index b3dd95c345e48..920c279e7879d 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -443,7 +443,7 @@ arm1022_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm1022_proc_info,#object __arm1022_proc_info: diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index ac5afde12f35c..0bdf25a95b107 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -138,7 +138,7 @@ ENTRY(arm1026_flush_kern_cache_all) mov ip, #0 __flush_whole_cache: #ifndef CONFIG_CPU_DCACHE_DISABLE -1: mrc p15, 0, r15, c7, c14, 3 @ test, clean, invalidate +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test, clean, invalidate bne 1b #endif tst r2, #VM_EXEC @@ -363,7 +363,7 @@ ENTRY(cpu_arm1026_switch_mm) #ifdef CONFIG_MMU mov r1, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE -1: mrc p15, 0, r15, c7, c14, 3 @ test, clean, invalidate +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test, clean, invalidate bne 1b #endif #ifndef CONFIG_CPU_ICACHE_DISABLE @@ -437,7 +437,7 @@ arm1026_crval: string cpu_arm1026_name, "ARM1026EJ-S" .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm1026_proc_info,#object __arm1026_proc_info: diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S index c99d24363f32e..39361e196d61b 100644 --- a/arch/arm/mm/proc-arm720.S +++ b/arch/arm/mm/proc-arm720.S @@ -172,7 +172,7 @@ arm720_crval: * See for a definition of this structure. */ - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req .type __\name\()_proc_info,#object diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index 1b4a3838393fb..1a94bbf6e53fc 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -128,7 +128,7 @@ __arm740_setup: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm740_proc_info,#object __arm740_proc_info: .long 0x41807400 diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S index 17a4687065c7f..52b66cf0259e3 100644 --- a/arch/arm/mm/proc-arm7tdmi.S +++ b/arch/arm/mm/proc-arm7tdmi.S @@ -72,7 +72,7 @@ __arm7tdmi_setup: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \ extra_hwcaps=0 diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 298c76b47749f..31ac8acc34dc5 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -434,7 +434,7 @@ arm920_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm920_proc_info,#object __arm920_proc_info: diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index 824be3a0bc238..ca2c7ca8af214 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -412,7 +412,7 @@ arm922_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm922_proc_info,#object __arm922_proc_info: diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index d40cff8f102c2..a381a0c9f1092 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -477,7 +477,7 @@ arm925_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache .type __\name\()_proc_info,#object diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index f3cd08f353f00..1ba253c2bce19 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -131,7 +131,7 @@ __flush_whole_cache: #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache #else -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate bne 1b #endif tst r2, #VM_EXEC @@ -358,7 +358,7 @@ ENTRY(cpu_arm926_switch_mm) mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache #else @ && 'Clean & Invalidate whole DCache' -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate bne 1b #endif mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache @@ -460,7 +460,7 @@ arm926_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm926_proc_info,#object __arm926_proc_info: diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index 1c26d991386d7..4b8a00220cc97 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -340,7 +340,7 @@ __arm940_setup: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm940_proc_info,#object __arm940_proc_info: diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index 2dc1c75a4fd4a..555becf9c758d 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -395,7 +395,7 @@ __arm946_setup: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm946_proc_info,#object __arm946_proc_info: .long 0x41009460 diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S index 913c06e590af5..ef517530130b0 100644 --- a/arch/arm/mm/proc-arm9tdmi.S +++ b/arch/arm/mm/proc-arm9tdmi.S @@ -66,7 +66,7 @@ __arm9tdmi_setup: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req .type __\name\()_proc_info, #object diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S index 8120b6f4dbb83..dddf833fe0007 100644 --- a/arch/arm/mm/proc-fa526.S +++ b/arch/arm/mm/proc-fa526.S @@ -185,7 +185,7 @@ fa526_cr1_set: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __fa526_proc_info,#object __fa526_proc_info: diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index bb6dc34d42a37..b12b76bc8d30c 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -571,7 +571,7 @@ feroceon_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req .type __\name\()_proc_info,#object diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 60ac7c5999a98..86e54447dc916 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -342,6 +342,7 @@ ENTRY(\name\()_cache_fns) .macro define_tlb_functions name:req, flags_up:req, flags_smp .type \name\()_tlb_fns, #object + .align 2 ENTRY(\name\()_tlb_fns) .long \name\()_flush_user_tlb_range .long \name\()_flush_kern_tlb_range diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index f083085788857..d47d6c5cee63a 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -416,7 +416,7 @@ mohawk_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __88sv331x_proc_info,#object __88sv331x_proc_info: diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index d5bc5d7025639..baba503ba8166 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -196,7 +196,7 @@ sa110_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __sa110_proc_info,#object __sa110_proc_info: diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index be7b611c76c76..75ebacc8e4e5c 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -239,7 +239,7 @@ sa1100_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req .type __\name\()_proc_info,#object diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index c1c85eb3484f3..1dd0d5ca27da8 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -261,7 +261,7 @@ v6_crval: string cpu_elf_name, "v6" .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" /* * Match any ARMv6 processor core. diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 097ef85bb7f21..b09c54499f955 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -109,8 +109,7 @@ static unsigned int spectre_v2_install_workaround(unsigned int method) #else static unsigned int spectre_v2_install_workaround(unsigned int method) { - pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n", - smp_processor_id()); + pr_info_once("Spectre V2: workarounds disabled by configuration\n"); return SPECTRE_VULNERABLE; } @@ -222,10 +221,10 @@ static int spectre_bhb_install_workaround(int method) return SPECTRE_VULNERABLE; spectre_bhb_method = method; - } - pr_info("CPU%u: Spectre BHB: using %s workaround\n", - smp_processor_id(), spectre_bhb_method_name(method)); + pr_info("CPU%u: Spectre BHB: enabling %s workaround for all CPUs\n", + smp_processor_id(), spectre_bhb_method_name(method)); + } return SPECTRE_MITIGATED; } @@ -301,6 +300,7 @@ void cpu_v7_ca15_ibe(void) { if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0))) cpu_v7_spectre_v2_init(); + cpu_v7_spectre_bhb_init(); } void cpu_v7_bugs_init(void) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index c4e8006a1a8cd..48e0ef6f0dccf 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -644,7 +644,7 @@ __v7_setup_stack: string cpu_elf_name, "v7" .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" /* * Standard v7 proc info content diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 1a49d503eafc8..84459c1d31b87 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -93,7 +93,7 @@ ENTRY(cpu_cm7_proc_fin) ret lr ENDPROC(cpu_cm7_proc_fin) - .section ".init.text", #alloc, #execinstr + .section ".init.text", "ax" __v7m_cm7_setup: mov r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP) @@ -177,7 +177,7 @@ ENDPROC(__v7m_setup) string cpu_elf_name "v7m" string cpu_v7m_name "ARMv7-M" - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro __v7m_proc name, initfunc, cache_fns = nop_cache_fns, hwcaps = 0, proc_fns = v7m_processor_functions .long 0 /* proc_info_list.__cpu_mm_mmu_flags */ diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index 1ac0fbbe9f127..42eaecc43cfef 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -496,7 +496,7 @@ xsc3_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req .type __\name\()_proc_info,#object diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index bdb2b7749b039..18ac5a1f89225 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -610,7 +610,7 @@ xscale_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache .type __\name\()_proc_info,#object diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 7216653424fd6..1c6e57f1dbc48 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -36,6 +36,10 @@ * +-----+ * |RSVD | JIT scratchpad * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) + * | ... | caller-saved registers + * +-----+ + * | ... | arguments passed on stack + * ARM_SP during call => +-----| * | | * | ... | Function call stack * | | @@ -63,6 +67,12 @@ * * When popping registers off the stack at the end of a BPF function, we * reference them via the current ARM_FP register. + * + * Some eBPF operations are implemented via a call to a helper function. + * Such calls are "invisible" in the eBPF code, so it is up to the calling + * program to preserve any caller-saved ARM registers during the call. The + * JIT emits code to push and pop those registers onto the stack, immediately + * above the callee stack frame. */ #define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \ @@ -70,6 +80,8 @@ #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) +#define CALLER_MASK (1 << ARM_R0 | 1 << ARM_R1 | 1 << ARM_R2 | 1 << ARM_R3) + enum { /* Stack layout - these are offsets from (top of stack - 4) */ BPF_R2_HI, @@ -464,6 +476,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) { + const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1); const s8 *tmp = bpf2a32[TMP_REG_1]; #if __LINUX_ARM_ARCH__ == 7 @@ -495,11 +508,17 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_MOV_R(ARM_R0, rm), ctx); } + /* Push caller-saved registers on stack */ + emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx); + /* Call appropriate function */ emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv32 : (u32)jit_mod32, ctx); emit_blx_r(ARM_IP, ctx); + /* Restore caller-saved registers from stack */ + emit(ARM_POP(CALLER_MASK & ~exclude_mask), ctx); + /* Save return value */ if (rd != ARM_R0) emit(ARM_MOV_R(rd, ARM_R0), ctx); @@ -1602,6 +1621,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/arm/probes/decode.h b/arch/arm/probes/decode.h index 9731735989921..facc889d05eee 100644 --- a/arch/arm/probes/decode.h +++ b/arch/arm/probes/decode.h @@ -14,6 +14,7 @@ #include #include #include +#include #include void __init arm_probes_decode_init(void); @@ -35,31 +36,6 @@ void __init find_str_pc_offset(void); #endif -/* - * Update ITSTATE after normal execution of an IT block instruction. - * - * The 8 IT state bits are split into two parts in CPSR: - * ITSTATE<1:0> are in CPSR<26:25> - * ITSTATE<7:2> are in CPSR<15:10> - */ -static inline unsigned long it_advance(unsigned long cpsr) - { - if ((cpsr & 0x06000400) == 0) { - /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ - cpsr &= ~PSR_IT_MASK; - } else { - /* We need to shift left ITSTATE<4:0> */ - const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ - unsigned long it = cpsr & mask; - it <<= 1; - it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ - it &= mask; - cpsr &= ~mask; - cpsr |= it; - } - return cpsr; -} - static inline void __kprobes bx_write_pc(long pcv, struct pt_regs *regs) { long cpsr = regs->ARM_cpsr; diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 90b5bc723c83f..0a783bd4641c5 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -534,7 +534,7 @@ static struct undef_hook kprobes_arm_break_hook = { #endif /* !CONFIG_THUMB2_KERNEL */ -int __init arch_init_kprobes() +int __init arch_init_kprobes(void) { arm_probes_decode_init(); #ifdef CONFIG_THUMB2_KERNEL diff --git a/arch/arm/vfp/Makefile b/arch/arm/vfp/Makefile index 9975b63ac3b0d..749901a72d6dc 100644 --- a/arch/arm/vfp/Makefile +++ b/arch/arm/vfp/Makefile @@ -8,6 +8,4 @@ # ccflags-y := -DDEBUG # asflags-y := -DDEBUG -KBUILD_AFLAGS :=$(KBUILD_AFLAGS:-msoft-float=-Wa,-mfpu=softvfp+vfp -mfloat-abi=soft) - obj-y += vfpmodule.o entry.o vfphw.o vfpsingle.o vfpdouble.o diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S index b530db8f2c6c8..772c6a3b1f724 100644 --- a/arch/arm/vfp/vfphw.S +++ b/arch/arm/vfp/vfphw.S @@ -253,11 +253,14 @@ vfp_current_hw_state_address: ENTRY(vfp_get_float) tbl_branch r0, r3, #3 + .fpu vfpv2 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: mrc p10, 0, r0, c\dr, c0, 0 @ fmrs r0, s0 +1: vmov r0, s\dr ret lr .org 1b + 8 -1: mrc p10, 0, r0, c\dr, c0, 4 @ fmrs r0, s1 + .endr + .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 +1: vmov r0, s\dr ret lr .org 1b + 8 .endr @@ -265,11 +268,14 @@ ENDPROC(vfp_get_float) ENTRY(vfp_put_float) tbl_branch r1, r3, #3 + .fpu vfpv2 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: mcr p10, 0, r0, c\dr, c0, 0 @ fmsr r0, s0 +1: vmov s\dr, r0 ret lr .org 1b + 8 -1: mcr p10, 0, r0, c\dr, c0, 4 @ fmsr r0, s1 + .endr + .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 +1: vmov s\dr, r0 ret lr .org 1b + 8 .endr @@ -277,15 +283,17 @@ ENDPROC(vfp_put_float) ENTRY(vfp_get_double) tbl_branch r0, r3, #3 + .fpu vfpv2 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: fmrrd r0, r1, d\dr +1: vmov r0, r1, d\dr ret lr .org 1b + 8 .endr #ifdef CONFIG_VFPv3 @ d16 - d31 registers - .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: mrrc p11, 3, r0, r1, c\dr @ fmrrd r0, r1, d\dr + .fpu vfpv3 + .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 +1: vmov r0, r1, d\dr ret lr .org 1b + 8 .endr @@ -299,15 +307,17 @@ ENDPROC(vfp_get_double) ENTRY(vfp_put_double) tbl_branch r2, r3, #3 + .fpu vfpv2 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: fmdrr d\dr, r0, r1 +1: vmov d\dr, r0, r1 ret lr .org 1b + 8 .endr #ifdef CONFIG_VFPv3 + .fpu vfpv3 @ d16 - d31 registers - .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: mcrr p11, 3, r0, r1, c\dr @ fmdrr r0, r1, d\dr + .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 +1: vmov d\dr, r0, r1 ret lr .org 1b + 8 .endr diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index acb464547a54f..4a1991a103ea0 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -62,11 +62,12 @@ static int xen_add_phys_to_mach_entry(struct xen_p2m_entry *new) unsigned long __pfn_to_mfn(unsigned long pfn) { - struct rb_node *n = phys_to_mach.rb_node; + struct rb_node *n; struct xen_p2m_entry *entry; unsigned long irqflags; read_lock_irqsave(&p2m_lock, irqflags); + n = phys_to_mach.rb_node; while (n) { entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); if (entry->pfn <= pfn && @@ -153,10 +154,11 @@ bool __set_phys_to_machine_multi(unsigned long pfn, int rc; unsigned long irqflags; struct xen_p2m_entry *p2m_entry; - struct rb_node *n = phys_to_mach.rb_node; + struct rb_node *n; if (mfn == INVALID_P2M_ENTRY) { write_lock_irqsave(&p2m_lock, irqflags); + n = phys_to_mach.rb_node; while (n) { p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); if (p2m_entry->pfn <= pfn && diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7d6be6144b090..1b2c1f30503fc 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -73,6 +73,7 @@ config ARM64 select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) + select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP select ARCH_HAS_UBSAN_SANITIZE_ALL select ARM_AMBA select ARM_ARCH_TIMER @@ -181,7 +182,6 @@ config ARM64 select PCI_SYSCALL if PCI select POWER_RESET select POWER_SUPPLY - select REFCOUNT_FULL select SPARSE_IRQ select SWIOTLB select SYSCTL_EXCEPTION_TRACE @@ -1156,6 +1156,15 @@ config ARM64_SSBD If unsure, say Y. +config MITIGATE_SPECTRE_BRANCH_HISTORY + bool "Mitigate Spectre style attacks against branch history" if EXPERT + default y + help + Speculation attacks against some high-performance processors can + make use of branch history to influence future speculation. + When taking an exception from user-space, a sequence of branches + or a firmware call overwrites the branch history. + config RODATA_FULL_DEFAULT_ENABLED bool "Apply r/o permissions of VM areas also to their linear aliases" default y diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 9dccf4db319b1..90202e5608d1e 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -225,6 +225,7 @@ config ARCH_STRATIX10 config ARCH_SYNQUACER bool "Socionext SynQuacer SoC Family" + select IRQ_FASTEOI_HIERARCHY_HANDLERS config ARCH_TEGRA bool "NVIDIA Tegra SoC Family" diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts index 57a6f45036c1f..d7177465b0968 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts @@ -114,7 +114,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts index db6ea7b58999b..19f930f43936e 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts @@ -72,7 +72,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index 9533c85fb0a30..d2d255a988a81 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -76,6 +76,12 @@ no-map; }; + /* 32 MiB reserved for ARM Trusted Firmware (BL32) */ + secmon_reserved_bl32: secmon@5300000 { + reg = <0x0 0x05300000 0x0 0x2000000>; + no-map; + }; + linux,cma { compatible = "shared-dma-pool"; reusable; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts index c9fa23a565624..c76bf498ee388 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts @@ -139,7 +139,7 @@ regulator-min-microvolt = <721000>; regulator-max-microvolt = <1022000>; - vin-supply = <&dc_in>; + pwm-supply = <&dc_in>; pwms = <&pwm_AO_cd 1 1250 0>; pwm-dutycycle-range = <100 0>; @@ -157,14 +157,6 @@ regulator-always-on; }; - reserved-memory { - /* TEE Reserved Memory */ - bl32_reserved: bl32@5000000 { - reg = <0x0 0x05300000 0x0 0x2000000>; - no-map; - }; - }; - sdio_pwrseq: sdio-pwrseq { compatible = "mmc-pwrseq-simple"; reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts index 2a324f0136e3f..02ec6eda03b1c 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts @@ -139,7 +139,7 @@ regulator-min-microvolt = <721000>; regulator-max-microvolt = <1022000>; - vin-supply = <&main_12v>; + pwm-supply = <&main_12v>; pwms = <&pwm_AO_cd 1 1250 0>; pwm-dutycycle-range = <100 0>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts index c48125bf9d1e3..5209c44fda01a 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts @@ -139,7 +139,7 @@ regulator-min-microvolt = <721000>; regulator-max-microvolt = <1022000>; - vin-supply = <&dc_in>; + pwm-supply = <&dc_in>; pwms = <&pwm_AO_cd 1 1250 0>; pwm-dutycycle-range = <100 0>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi index d61f43052a344..8e9ad1e51d665 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi @@ -11,26 +11,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <731000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <731000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <731000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <667000000>; - opp-microvolt = <731000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <761000>; @@ -71,26 +51,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <731000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <731000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <731000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <667000000>; - opp-microvolt = <731000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <731000>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi index 046cc332d07f9..f2543da63c39d 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi @@ -11,26 +11,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <731000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <731000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <731000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <667000000>; - opp-microvolt = <731000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <731000>; @@ -71,26 +51,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <751000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <751000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <751000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <667000000>; - opp-microvolt = <751000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <771000>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index ce230d6ac35cd..ad7bc0eec6682 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -41,6 +41,12 @@ no-map; }; + /* 32 MiB reserved for ARM Trusted Firmware (BL32) */ + secmon_reserved_bl32: secmon@5300000 { + reg = <0x0 0x05300000 0x0 0x2000000>; + no-map; + }; + linux,cma { compatible = "shared-dma-pool"; reusable; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi index e3d17569d98ad..e94f09c2d4e32 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi @@ -6,6 +6,7 @@ */ #include "meson-gxbb.dtsi" +#include / { aliases { @@ -64,6 +65,7 @@ regulator-name = "VDDIO_AO18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + regulator-always-on; }; vcc_3v3: regulator-vcc_3v3 { @@ -157,6 +159,7 @@ status = "okay"; pinctrl-0 = <&hdmi_hpd_pins>, <&hdmi_i2c_pins>; pinctrl-names = "default"; + hdmi-supply = <&vddio_ao18>; }; &hdmi_tx_tmds_port { diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts index 29ac78ddc057e..85fb59060cdff 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts @@ -164,14 +164,6 @@ regulator-always-on; }; - reserved-memory { - /* TEE Reserved Memory */ - bl32_reserved: bl32@5000000 { - reg = <0x0 0x05300000 0x0 0x2000000>; - no-map; - }; - }; - sdio_pwrseq: sdio-pwrseq { compatible = "mmc-pwrseq-simple"; reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>; diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi b/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi index 8ba3555ca3693..dd62a608c805a 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi @@ -55,26 +55,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <730000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <730000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <730000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <666666666>; - opp-microvolt = <750000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <770000>; diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts b/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts index ec19fbf928a14..12a4b1c03390c 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts @@ -111,8 +111,8 @@ compatible = "silabs,si3226x"; reg = <0>; spi-max-frequency = <5000000>; - spi-cpha = <1>; - spi-cpol = <1>; + spi-cpha; + spi-cpol; pl022,hierarchy = <0>; pl022,interface = <0>; pl022,slave-tx-disable = <0>; @@ -135,8 +135,8 @@ at25,byte-len = <0x8000>; at25,addr-mode = <2>; at25,page-size = <64>; - spi-cpha = <1>; - spi-cpol = <1>; + spi-cpha; + spi-cpol; pl022,hierarchy = <0>; pl022,interface = <0>; pl022,slave-tx-disable = <0>; diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi index 39802066232e1..edc1a8a4c4bc0 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi @@ -687,7 +687,7 @@ }; }; - sata: ahci@663f2000 { + sata: sata@663f2000 { compatible = "brcm,iproc-ahci", "generic-ahci"; reg = <0x663f2000 0x1000>; dma-coherent; diff --git a/arch/arm64/boot/dts/exynos/exynos7.dtsi b/arch/arm64/boot/dts/exynos/exynos7.dtsi index 25549d9552ae2..84f92b44c3235 100644 --- a/arch/arm64/boot/dts/exynos/exynos7.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos7.dtsi @@ -113,7 +113,7 @@ #address-cells = <0>; interrupt-controller; reg = <0x11001000 0x1000>, - <0x11002000 0x1000>, + <0x11002000 0x2000>, <0x11004000 0x2000>, <0x11006000 0x2000>; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts index 078a5010228cd..0b3a93c4155d2 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts @@ -161,11 +161,6 @@ vcc-supply = <&sb_3v3>; }; - rtc@51 { - compatible = "nxp,pcf2129"; - reg = <0x51>; - }; - eeprom@56 { compatible = "atmel,24c512"; reg = <0x56>; @@ -209,6 +204,15 @@ }; +&i2c1 { + status = "okay"; + + rtc@51 { + compatible = "nxp,pcf2129"; + reg = <0x51>; + }; +}; + &enetc_port1 { phy-handle = <&qds_phy1>; phy-connection-type = "rgmii-id"; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 5716ac20bddd1..02ae6bfff5658 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -287,6 +287,24 @@ status = "disabled"; }; + can0: can@2180000 { + compatible = "fsl,ls1028ar1-flexcan", "fsl,lx2160ar1-flexcan"; + reg = <0x0 0x2180000 0x0 0x10000>; + interrupts = ; + clocks = <&sysclk>, <&clockgen 4 1>; + clock-names = "ipg", "per"; + status = "disabled"; + }; + + can1: can@2190000 { + compatible = "fsl,ls1028ar1-flexcan", "fsl,lx2160ar1-flexcan"; + reg = <0x0 0x2190000 0x0 0x10000>; + interrupts = ; + clocks = <&sysclk>, <&clockgen 4 1>; + clock-names = "ipg", "per"; + status = "disabled"; + }; + duart0: serial@21c0500 { compatible = "fsl,ns16550", "ns16550a"; reg = <0x00 0x21c0500 0x0 0x100>; @@ -496,14 +514,14 @@ compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc000000 0x0 0x1000>; clocks = <&clockgen 4 15>, <&clockgen 4 15>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster1_core1_watchdog: watchdog@c010000 { compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc010000 0x0 0x1000>; clocks = <&clockgen 4 15>, <&clockgen 4 15>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; sai1: audio-controller@f100000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-frwy.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-frwy.dts index 3595be0f25277..2d6c73d7d397c 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-frwy.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-frwy.dts @@ -83,15 +83,9 @@ }; eeprom@52 { - compatible = "atmel,24c512"; + compatible = "onnn,cat24c04", "atmel,24c04"; reg = <0x52>; }; - - eeprom@53 { - compatible = "atmel,24c512"; - reg = <0x53>; - }; - }; }; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts index 2743397591141..8858c1e92f23c 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts @@ -58,14 +58,9 @@ }; eeprom@52 { - compatible = "atmel,24c512"; + compatible = "onnn,cat24c05", "atmel,24c04"; reg = <0x52>; }; - - eeprom@53 { - compatible = "atmel,24c512"; - reg = <0x53>; - }; }; &i2c3 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi index c676d0771762f..6b1b728de9e9c 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi @@ -637,59 +637,59 @@ }; cluster1_core0_watchdog: wdt@c000000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc000000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster1_core1_watchdog: wdt@c010000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc010000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster1_core2_watchdog: wdt@c020000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc020000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster1_core3_watchdog: wdt@c030000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc030000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core0_watchdog: wdt@c100000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc100000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core1_watchdog: wdt@c110000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc110000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core2_watchdog: wdt@c120000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc120000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core3_watchdog: wdt@c130000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc130000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; fsl_mc: fsl-mc@80c000000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index cdb2fa47637da..4bf4a22faa61a 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -227,59 +227,59 @@ }; cluster1_core0_watchdog: wdt@c000000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc000000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster1_core1_watchdog: wdt@c010000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc010000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core0_watchdog: wdt@c100000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc100000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core1_watchdog: wdt@c110000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc110000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster3_core0_watchdog: wdt@c200000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc200000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster3_core1_watchdog: wdt@c210000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc210000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster4_core0_watchdog: wdt@c300000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc300000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster4_core1_watchdog: wdt@c310000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc310000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; crypto: crypto@8000000 { diff --git a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts index 9ad1d43b8ce75..4fa39654b695d 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts @@ -213,6 +213,10 @@ interrupts = <3 GPIO_ACTIVE_LOW>; rohm,reset-snvs-powered; + #clock-cells = <0>; + clocks = <&osc_32k 0>; + clock-output-names = "clk-32k-out"; + regulators { buck1_reg: BUCK1 { regulator-name = "BUCK1"; diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi index 253cc345f143a..0c88b72094774 100644 --- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi @@ -1086,7 +1086,7 @@ }; watchdog0: watchdog@e8a06000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xe8a06000 0x0 0x1000>; interrupts = ; clocks = <&crg_ctrl HI3660_OSC32K>; @@ -1094,7 +1094,7 @@ }; watchdog1: watchdog@e8a07000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xe8a07000 0x0 0x1000>; interrupts = ; clocks = <&crg_ctrl HI3660_OSC32K>; diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi index 108e2a4227f66..568faaba7ace9 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi @@ -839,7 +839,7 @@ }; watchdog0: watchdog@f8005000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xf8005000 0x0 0x1000>; interrupts = ; clocks = <&ao_ctrl HI6220_WDT0_PCLK>; diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi index d911d38877e52..19f17bb29e4bd 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi @@ -369,7 +369,7 @@ }; usb0: usb@ffb00000 { - compatible = "snps,dwc2"; + compatible = "intel,socfpga-agilex-hsotg", "snps,dwc2"; reg = <0xffb00000 0x40000>; interrupts = <0 93 4>; phys = <&usbphy0>; @@ -381,7 +381,7 @@ }; usb1: usb@ffb40000 { - compatible = "snps,dwc2"; + compatible = "intel,socfpga-agilex-hsotg", "snps,dwc2"; reg = <0xffb40000 0x40000>; interrupts = <0 94 4>; phys = <&usbphy0>; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-db.dts b/arch/arm64/boot/dts/marvell/armada-3720-db.dts index f2cc00594d64a..3e5789f372069 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-db.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-db.dts @@ -128,6 +128,9 @@ /* CON15(V2.0)/CON17(V1.4) : PCIe / CON15(V2.0)/CON12(V1.4) :mini-PCIe */ &pcie0 { + pinctrl-names = "default"; + pinctrl-0 = <&pcie_reset_pins &pcie_clkreq_pins>; + reset-gpios = <&gpiosb 3 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts index 6226e7e809807..a75bb2ea3506d 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts @@ -59,6 +59,7 @@ phys = <&comphy1 0>; pinctrl-names = "default"; pinctrl-0 = <&pcie_reset_pins &pcie_clkreq_pins>; + reset-gpios = <&gpiosb 3 GPIO_ACTIVE_LOW>; }; /* J6 */ diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index 025e02d23da9b..2e8239d489f82 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -18,6 +18,7 @@ aliases { spi0 = &spi0; + ethernet0 = ð0; ethernet1 = ð1; }; @@ -127,10 +128,6 @@ }; }; -&pcie_reset_pins { - function = "gpio"; -}; - &pcie0 { pinctrl-names = "default"; pinctrl-0 = <&pcie_reset_pins &pcie_clkreq_pins>; @@ -138,6 +135,28 @@ max-link-speed = <2>; reset-gpios = <&gpiosb 3 GPIO_ACTIVE_LOW>; phys = <&comphy1 0>; + /* + * U-Boot port for Turris Mox has a bug which always expects that "ranges" DT property + * contains exactly 2 ranges with 3 (child) address cells, 2 (parent) address cells and + * 2 size cells and also expects that the second range starts at 16 MB offset. Also it + * expects that first range uses same address for PCI (child) and CPU (parent) cells (so + * no remapping) and that this address is the lowest from all specified ranges. If these + * conditions are not met then U-Boot crashes during loading kernel DTB file. PCIe address + * space is 128 MB long, so the best split between MEM and IO is to use fixed 16 MB window + * for IO and the rest 112 MB (64+32+16) for MEM, despite that maximal IO size is just 64 kB. + * This bug is not present in U-Boot ports for other Armada 3700 devices and is fixed in + * U-Boot version 2021.07. See relevant U-Boot commits (the last one contains fix): + * https://source.denx.de/u-boot/u-boot/-/commit/cb2ddb291ee6fcbddd6d8f4ff49089dfe580f5d7 + * https://source.denx.de/u-boot/u-boot/-/commit/c64ac3b3185aeb3846297ad7391fc6df8ecd73bf + * https://source.denx.de/u-boot/u-boot/-/commit/4a82fca8e330157081fc132a591ebd99ba02ee33 + * Bug related to requirement of same child and parent addresses for first range is fixed + * in U-Boot version 2022.04 by following commit: + * https://source.denx.de/u-boot/u-boot/-/commit/1fd54253bca7d43d046bba4853fe5fafd034bc17 + */ + #address-cells = <3>; + #size-cells = <2>; + ranges = <0x81000000 0 0xe8000000 0 0xe8000000 0 0x01000000 /* Port 0 IO */ + 0x82000000 0 0xe9000000 0 0xe9000000 0 0x07000000>; /* Port 0 MEM */ /* enabled by U-Boot if PCIe module is present */ status = "disabled"; diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index 52767037e0494..9405d9c619ca6 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -318,7 +318,7 @@ pcie_reset_pins: pcie-reset-pins { groups = "pcie1"; - function = "pcie"; + function = "gpio"; }; pcie_clkreq_pins: pcie-clkreq-pins { @@ -487,8 +487,15 @@ #interrupt-cells = <1>; msi-parent = <&pcie0>; msi-controller; - ranges = <0x82000000 0 0xe8000000 0 0xe8000000 0 0x1000000 /* Port 0 MEM */ - 0x81000000 0 0xe9000000 0 0xe9000000 0 0x10000>; /* Port 0 IO*/ + /* + * The 128 MiB address range [0xe8000000-0xf0000000] is + * dedicated for PCIe and can be assigned to 8 windows + * with size a power of two. Use one 64 KiB window for + * IO at the end and the remaining seven windows + * (totaling 127 MiB) for MEM. + */ + ranges = <0x82000000 0 0xe8000000 0 0xe8000000 0 0x07f00000 /* Port 0 MEM */ + 0x81000000 0 0x00000000 0 0xefff0000 0 0x00010000>; /* Port 0 IO */ interrupt-map-mask = <0 0 0 7>; interrupt-map = <0 0 0 1 &pcie_intc 0>, <0 0 0 2 &pcie_intc 1>, diff --git a/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi b/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi index d250f4b2bfedb..bf443ca1fcf11 100644 --- a/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi @@ -71,6 +71,7 @@ tx-fault-gpio = <&cp1_gpio1 26 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&cp1_sfpp0_pins>; + maximum-power-milliwatt = <2000>; }; sfp_eth1: sfp-eth1 { @@ -83,6 +84,7 @@ tx-fault-gpio = <&cp0_gpio2 30 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&cp1_sfpp1_pins &cp0_sfpp1_pins>; + maximum-power-milliwatt = <2000>; }; sfp_eth3: sfp-eth3 { @@ -95,6 +97,7 @@ tx-fault-gpio = <&cp0_gpio2 19 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&cp0_sfp_1g_pins &cp1_sfp_1g_pins>; + maximum-power-milliwatt = <2000>; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra132.dtsi b/arch/arm64/boot/dts/nvidia/tegra132.dtsi index 631a7f77c3869..0b3eb8c0b8df0 100644 --- a/arch/arm64/boot/dts/nvidia/tegra132.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra132.dtsi @@ -1082,13 +1082,13 @@ cpu@0 { device_type = "cpu"; - compatible = "nvidia,denver"; + compatible = "nvidia,tegra132-denver"; reg = <0>; }; cpu@1 { device_type = "cpu"; - compatible = "nvidia,denver"; + compatible = "nvidia,tegra132-denver"; reg = <1>; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi index 9abf0cb1dd67f..4457262750734 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi @@ -709,7 +709,7 @@ ccplex@e000000 { compatible = "nvidia,tegra186-ccplex-cluster"; - reg = <0x0 0x0e000000 0x0 0x3fffff>; + reg = <0x0 0x0e000000 0x0 0x400000>; nvidia,bpmp = <&bpmp>; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 0821754f0fd6d..90adff8aa9baf 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -1434,7 +1434,7 @@ }; pcie_ep@14160000 { - compatible = "nvidia,tegra194-pcie-ep", "snps,dw-pcie-ep"; + compatible = "nvidia,tegra194-pcie-ep"; power-domains = <&bpmp TEGRA194_POWER_DOMAIN_PCIEX4A>; reg = <0x00 0x14160000 0x0 0x00020000 /* appl registers (128K) */ 0x00 0x36040000 0x0 0x00040000 /* iATU_DMA reg space (256K) */ @@ -1466,7 +1466,7 @@ }; pcie_ep@14180000 { - compatible = "nvidia,tegra194-pcie-ep", "snps,dw-pcie-ep"; + compatible = "nvidia,tegra194-pcie-ep"; power-domains = <&bpmp TEGRA194_POWER_DOMAIN_PCIEX8B>; reg = <0x00 0x14180000 0x0 0x00020000 /* appl registers (128K) */ 0x00 0x38040000 0x0 0x00040000 /* iATU_DMA reg space (256K) */ @@ -1498,7 +1498,7 @@ }; pcie_ep@141a0000 { - compatible = "nvidia,tegra194-pcie-ep", "snps,dw-pcie-ep"; + compatible = "nvidia,tegra194-pcie-ep"; power-domains = <&bpmp TEGRA194_POWER_DOMAIN_PCIEX8A>; reg = <0x00 0x141a0000 0x0 0x00020000 /* appl registers (128K) */ 0x00 0x3a040000 0x0 0x00040000 /* iATU_DMA reg space (256K) */ diff --git a/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts b/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts index 70be3f95209bc..830d9f2c1e5f2 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts +++ b/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts @@ -20,7 +20,7 @@ stdout-path = "serial0"; }; - memory { + memory@40000000 { device_type = "memory"; reg = <0x0 0x40000000 0x0 0x20000000>; }; diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index 67ee5f5601046..7822592664ffb 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -482,7 +482,7 @@ clocks { sleep_clk: sleep_clk { compatible = "fixed-clock"; - clock-frequency = <32000>; + clock-frequency = <32768>; #clock-cells = <0>; }; diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index 449843f2184d8..301c1c467c0b7 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -16,8 +16,8 @@ #size-cells = <2>; aliases { - sdhc1 = &sdhc_1; /* SDC1 eMMC slot */ - sdhc2 = &sdhc_2; /* SDC2 SD card slot */ + mmc0 = &sdhc_1; /* SDC1 eMMC slot */ + mmc1 = &sdhc_2; /* SDC2 SD card slot */ }; chosen { }; diff --git a/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts b/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts index a5f9a6ab512c4..9b989cc30edc8 100644 --- a/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts +++ b/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts @@ -30,3 +30,7 @@ }; }; }; + +&msmgpio { + gpio-reserved-ranges = <85 4>; +}; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index d303df3887d9f..f1d3c51ea8d0d 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -2098,9 +2098,6 @@ nvmem-cells = <&gpu_speed_bin>; nvmem-cell-names = "speed_bin"; - qcom,gpu-quirk-two-pass-use-wfi; - qcom,gpu-quirk-fault-detect-mask; - operating-points-v2 = <&gpu_opp_table>; gpu_opp_table: opp-table { diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index ccd535edbf4e1..dcb79003ca0e6 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -246,38 +246,42 @@ LITTLE_CPU_SLEEP_0: cpu-sleep-0-0 { compatible = "arm,idle-state"; idle-state-name = "little-retention"; + /* CPU Retention (C2D), L2 Active */ arm,psci-suspend-param = <0x00000002>; entry-latency-us = <81>; exit-latency-us = <86>; - min-residency-us = <200>; + min-residency-us = <504>; }; LITTLE_CPU_SLEEP_1: cpu-sleep-0-1 { compatible = "arm,idle-state"; idle-state-name = "little-power-collapse"; + /* CPU + L2 Power Collapse (C3, D4) */ arm,psci-suspend-param = <0x40000003>; - entry-latency-us = <273>; - exit-latency-us = <612>; - min-residency-us = <1000>; + entry-latency-us = <814>; + exit-latency-us = <4562>; + min-residency-us = <9183>; local-timer-stop; }; BIG_CPU_SLEEP_0: cpu-sleep-1-0 { compatible = "arm,idle-state"; idle-state-name = "big-retention"; + /* CPU Retention (C2D), L2 Active */ arm,psci-suspend-param = <0x00000002>; entry-latency-us = <79>; exit-latency-us = <82>; - min-residency-us = <200>; + min-residency-us = <1302>; }; BIG_CPU_SLEEP_1: cpu-sleep-1-1 { compatible = "arm,idle-state"; idle-state-name = "big-power-collapse"; + /* CPU + L2 Power Collapse (C3, D4) */ arm,psci-suspend-param = <0x40000003>; - entry-latency-us = <336>; - exit-latency-us = <525>; - min-residency-us = <1000>; + entry-latency-us = <724>; + exit-latency-us = <2027>; + min-residency-us = <9419>; local-timer-stop; }; }; diff --git a/arch/arm64/boot/dts/qcom/pm8150.dtsi b/arch/arm64/boot/dts/qcom/pm8150.dtsi index c0b197458665d..6f7dfcb8c0421 100644 --- a/arch/arm64/boot/dts/qcom/pm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/pm8150.dtsi @@ -17,7 +17,7 @@ #size-cells = <0>; pon: power-on@800 { - compatible = "qcom,pm8916-pon"; + compatible = "qcom,pm8998-pon"; reg = <0x0800>; pwrkey { compatible = "qcom,pm8941-pwrkey"; diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 9573da378826b..1954cef8c6f0b 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -459,9 +459,9 @@ qcom,tcs-offset = <0xd00>; qcom,drv-id = <2>; qcom,tcs-config = , - , - , - ; + , + , + ; rpmhcc: clock-controller { compatible = "qcom,sm8150-rpmh-clk"; diff --git a/arch/arm64/boot/dts/renesas/r8a77995-draak.dts b/arch/arm64/boot/dts/renesas/r8a77995-draak.dts index 67634cb01d6b6..cbdd46ed3ca63 100644 --- a/arch/arm64/boot/dts/renesas/r8a77995-draak.dts +++ b/arch/arm64/boot/dts/renesas/r8a77995-draak.dts @@ -277,10 +277,6 @@ interrupt-parent = <&gpio1>; interrupts = <28 IRQ_TYPE_LEVEL_LOW>; - /* Depends on LVDS */ - max-clock = <135000000>; - min-vrefresh = <50>; - adi,input-depth = <8>; adi,input-colorspace = "rgb"; adi,input-clock = "1x"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 44ad744c4710d..6ddb6b8c1fad5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -555,7 +555,7 @@ gpu: gpu@ff300000 { compatible = "rockchip,rk3328-mali", "arm,mali-450"; - reg = <0x0 0xff300000 0x0 0x40000>; + reg = <0x0 0xff300000 0x0 0x30000>; interrupts = , , , diff --git a/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts b/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts index 76f5db696009b..5b7e8fbf1ffec 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts @@ -666,8 +666,8 @@ sd-uhs-sdr104; /* Power supply */ - vqmmc-supply = &vcc1v8_s3; /* IO line */ - vmmc-supply = &vcc_sdio; /* card's power */ + vqmmc-supply = <&vcc1v8_s3>; /* IO line */ + vmmc-supply = <&vcc_sdio>; /* card's power */ #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index dd5624975c9b4..b7e7bb3517c03 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -281,7 +281,7 @@ sound: sound { compatible = "rockchip,rk3399-gru-sound"; - rockchip,cpu = <&i2s0 &i2s2>; + rockchip,cpu = <&i2s0 &spdif>; }; }; @@ -432,10 +432,6 @@ ap_i2c_audio: &i2c8 { status = "okay"; }; -&i2s2 { - status = "okay"; -}; - &io_domains { status = "okay"; @@ -532,6 +528,17 @@ ap_i2c_audio: &i2c8 { vqmmc-supply = <&ppvar_sd_card_io>; }; +&spdif { + status = "okay"; + + /* + * SPDIF is routed internally to DP; we either don't use these pins, or + * mux them to something else. + */ + /delete-property/ pinctrl-0; + /delete-property/ pinctrl-names; +}; + &spi1 { status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi index e87a04477440e..292ca70c512b5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi @@ -685,7 +685,6 @@ &sdhci { bus-width = <8>; mmc-hs400-1_8v; - mmc-hs400-enhanced-strobe; non-removable; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts b/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts index 73be38a537960..a72e77c261ef3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts @@ -49,7 +49,7 @@ regulator-boot-on; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; - vim-supply = <&vcc3v3_sys>; + vin-supply = <&vcc3v3_sys>; }; vcc3v3_sys: vcc3v3-sys { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 45b86933c6ea0..390b86ec65389 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -467,6 +467,12 @@ }; &sdhci { + /* + * Signal integrity isn't great at 200MHz but 100MHz has proven stable + * enough. + */ + max-frequency = <100000000>; + bus-width = <8>; mmc-hs400-1_8v; mmc-hs400-enhanced-strobe; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts index 1ae1ebd4efdd0..da3b031d4befa 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts @@ -452,7 +452,7 @@ status = "okay"; bt656-supply = <&vcc_3v0>; - audio-supply = <&vcc_3v0>; + audio-supply = <&vcc1v8_codec>; sdmmc-supply = <&vcc_sdio>; gpio1830-supply = <&vcc_3v0>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 750dad0d17400..4496f7e1c68f8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1447,6 +1447,7 @@ reg = <0xf780 0x24>; clocks = <&sdhci>; clock-names = "emmcclk"; + drive-impedance-ohm = <50>; #phy-cells = <0>; status = "disabled"; }; @@ -1457,7 +1458,6 @@ clock-names = "refclk"; #phy-cells = <1>; resets = <&cru SRST_PCIEPHY>; - drive-impedance-ohm = <50>; reset-names = "phy"; status = "disabled"; }; @@ -1746,10 +1746,10 @@ interrupts = ; clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_SFR>, - <&cru PLL_VPLL>, + <&cru SCLK_HDMI_CEC>, <&cru PCLK_VIO_GRF>, - <&cru SCLK_HDMI_CEC>; - clock-names = "iahb", "isfr", "vpll", "grf", "cec"; + <&cru PLL_VPLL>; + clock-names = "iahb", "isfr", "cec", "grf", "vpll"; power-domains = <&power RK3399_PD_HDCP>; reg-io-width = <4>; rockchip,grf = <&grf>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e.dtsi b/arch/arm64/boot/dts/ti/k3-j721e.dtsi index 43ea1ba979220..5a6e74636d6fc 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e.dtsi @@ -60,7 +60,7 @@ i-cache-sets = <256>; d-cache-size = <0x8000>; d-cache-line-size = <64>; - d-cache-sets = <128>; + d-cache-sets = <256>; next-level-cache = <&L2_0>; }; @@ -74,7 +74,7 @@ i-cache-sets = <256>; d-cache-size = <0x8000>; d-cache-line-size = <64>; - d-cache-sets = <128>; + d-cache-sets = <256>; next-level-cache = <&L2_0>; }; }; @@ -84,7 +84,7 @@ cache-level = <2>; cache-size = <0x100000>; cache-line-size = <64>; - cache-sets = <2048>; + cache-sets = <1024>; next-level-cache = <&msmc_l3>; }; diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts b/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts index 2421ec71a201c..41a66787247b6 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts +++ b/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts @@ -131,7 +131,7 @@ reg = <0>; partition@0 { - label = "data"; + label = "spi0-data"; reg = <0x0 0x100000>; }; }; @@ -149,7 +149,7 @@ reg = <0>; partition@0 { - label = "data"; + label = "spi1-data"; reg = <0x0 0x84000>; }; }; diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi index a2645262f8623..b92549fb32400 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi @@ -582,7 +582,7 @@ }; uart0: serial@ff000000 { - compatible = "cdns,uart-r1p12", "xlnx,xuartps"; + compatible = "xlnx,zynqmp-uart", "cdns,uart-r1p12"; status = "disabled"; interrupt-parent = <&gic>; interrupts = <0 21 4>; @@ -591,7 +591,7 @@ }; uart1: serial@ff010000 { - compatible = "cdns,uart-r1p12", "xlnx,xuartps"; + compatible = "xlnx,zynqmp-uart", "cdns,uart-r1p12"; status = "disabled"; interrupt-parent = <&gic>; interrupts = <0 22 4>; diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 4922c4451e7c3..7163328213189 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -45,13 +45,13 @@ config CRYPTO_SM3_ARM64_CE tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_HASH - select CRYPTO_SM3 + select CRYPTO_LIB_SM3 config CRYPTO_SM4_ARM64_CE tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_ALGAPI - select CRYPTO_SM4 + select CRYPTO_LIB_SM4 config CRYPTO_GHASH_ARM64_CE tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions" diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c index d71faca322f2a..ee98954ae8ca6 100644 --- a/arch/arm64/crypto/sm3-ce-glue.c +++ b/arch/arm64/crypto/sm3-ce-glue.c @@ -26,8 +26,10 @@ asmlinkage void sm3_ce_transform(struct sm3_state *sst, u8 const *src, static int sm3_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - if (!crypto_simd_usable()) - return crypto_sm3_update(desc, data, len); + if (!crypto_simd_usable()) { + sm3_update(shash_desc_ctx(desc), data, len); + return 0; + } kernel_neon_begin(); sm3_base_do_update(desc, data, len, sm3_ce_transform); @@ -38,8 +40,10 @@ static int sm3_ce_update(struct shash_desc *desc, const u8 *data, static int sm3_ce_final(struct shash_desc *desc, u8 *out) { - if (!crypto_simd_usable()) - return crypto_sm3_finup(desc, NULL, 0, out); + if (!crypto_simd_usable()) { + sm3_final(shash_desc_ctx(desc), out); + return 0; + } kernel_neon_begin(); sm3_base_do_finalize(desc, sm3_ce_transform); @@ -51,14 +55,22 @@ static int sm3_ce_final(struct shash_desc *desc, u8 *out) static int sm3_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - if (!crypto_simd_usable()) - return crypto_sm3_finup(desc, data, len, out); + if (!crypto_simd_usable()) { + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (len) + sm3_update(sctx, data, len); + sm3_final(sctx, out); + return 0; + } kernel_neon_begin(); - sm3_base_do_update(desc, data, len, sm3_ce_transform); + if (len) + sm3_base_do_update(desc, data, len, sm3_ce_transform); + sm3_base_do_finalize(desc, sm3_ce_transform); kernel_neon_end(); - return sm3_ce_final(desc, out); + return sm3_base_finish(desc, out); } static struct shash_alg sm3_alg = { diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index 2754c875d39c3..9c93cfc4841bc 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -17,12 +17,20 @@ MODULE_LICENSE("GPL v2"); asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in); +static int sm4_ce_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); if (!crypto_simd_usable()) { - crypto_sm4_encrypt(tfm, out, in); + sm4_crypt_block(ctx->rkey_enc, out, in); } else { kernel_neon_begin(); sm4_ce_do_crypt(ctx->rkey_enc, out, in); @@ -32,10 +40,10 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); if (!crypto_simd_usable()) { - crypto_sm4_decrypt(tfm, out, in); + sm4_crypt_block(ctx->rkey_dec, out, in); } else { kernel_neon_begin(); sm4_ce_do_crypt(ctx->rkey_dec, out, in); @@ -49,12 +57,12 @@ static struct crypto_alg sm4_ce_alg = { .cra_priority = 200, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = SM4_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_sm4_ctx), + .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, .cra_u.cipher = { .cia_min_keysize = SM4_KEY_SIZE, .cia_max_keysize = SM4_KEY_SIZE, - .cia_setkey = crypto_sm4_set_key, + .cia_setkey = sm4_ce_setkey, .cia_encrypt = sm4_ce_encrypt, .cia_decrypt = sm4_ce_decrypt } diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index ee9bdaa40532d..a7b5f2230de1b 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -166,7 +166,7 @@ static inline void gic_pmr_mask_irqs(void) static inline void gic_arch_enable_irqs(void) { - asm volatile ("msr daifclr, #2" : : : "memory"); + asm volatile ("msr daifclr, #3" : : : "memory"); } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 4a4258f17c868..546d2f52653ca 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -46,9 +46,9 @@ msr daif, \tmp .endm - /* IRQ is the lowest priority flag, unconditionally unmask the rest. */ - .macro enable_da_f - msr daifclr, #(8 | 4 | 1) + /* IRQ/FIQ are the lowest priority flags, unconditionally unmask the rest. */ + .macro enable_da + msr daifclr, #(8 | 4) .endm /* @@ -56,7 +56,7 @@ */ .macro save_and_disable_irq, flags mrs \flags, daif - msr daifset, #2 + msr daifset, #3 .endm .macro restore_irq, flags @@ -110,6 +110,13 @@ hint #20 .endm +/* + * Clear Branch History instruction + */ + .macro clearbhb + hint #22 + .endm + /* * Speculation barrier */ @@ -757,4 +764,30 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .Lyield_out_\@ : .endm + .macro __mitigate_spectre_bhb_loop tmp +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY +alternative_cb spectre_bhb_patch_loop_iter + mov \tmp, #32 // Patched to correct the immediate +alternative_cb_end +.Lspectre_bhb_loop\@: + b . + 4 + subs \tmp, \tmp, #1 + b.ne .Lspectre_bhb_loop\@ + sb +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm + + /* Save/restores x0-x3 to the stack */ + .macro __mitigate_spectre_bhb_fw +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + stp x0, x1, [sp, #-16]! + stp x2, x3, [sp, #-16]! + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3 +alternative_cb arm64_update_smccc_conduit + nop // Patched to SMC/HVC #0 +alternative_cb_end + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 0fcd854fc95f3..ee67911dab69e 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -29,6 +29,18 @@ SB_BARRIER_INSN"nop\n", \ ARM64_HAS_SB)) +#ifdef CONFIG_ARM64_PSEUDO_NMI +#define pmr_sync() \ + do { \ + extern struct static_key_false gic_pmr_sync; \ + \ + if (static_branch_unlikely(&gic_pmr_sync)) \ + dsb(sy); \ + } while(0) +#else +#define pmr_sync() do {} while (0) +#endif + #define mb() dsb(sy) #define rmb() dsb(ld) #define wmb() dsb(st) diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h index e3d47b52161db..ec7720dbe2c80 100644 --- a/arch/arm64/include/asm/brk-imm.h +++ b/arch/arm64/include/asm/brk-imm.h @@ -10,6 +10,7 @@ * #imm16 values used for BRK instruction generation * 0x004: for installing kprobes * 0x005: for installing uprobes + * 0x006: for kprobe software single-step * Allowed values for kgdb are 0x400 - 0x7ff * 0x100: for triggering a fault on purpose (reserved) * 0x400: for dynamic BRK instruction @@ -19,6 +20,7 @@ */ #define KPROBES_BRK_IMM 0x004 #define UPROBES_BRK_IMM 0x005 +#define KPROBES_BRK_SS_IMM 0x006 #define FAULT_BRK_IMM 0x100 #define KGDB_DYN_DBG_BRK_IMM 0x400 #define KGDB_COMPILED_DBG_BRK_IMM 0x401 diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index d72d995b7e258..85cc06380e93e 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -25,6 +25,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64dfr1; u64 reg_id_aa64isar0; u64 reg_id_aa64isar1; + u64 reg_id_aa64isar2; u64 reg_id_aa64mmfr0; u64 reg_id_aa64mmfr1; u64 reg_id_aa64mmfr2; diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 1dc3c762fdcb9..4ffa86149d28d 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -55,7 +55,8 @@ #define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45 #define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46 #define ARM64_WORKAROUND_1542419 47 +#define ARM64_SPECTRE_BHB 48 -#define ARM64_NCAPS 48 +#define ARM64_NCAPS 49 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index ccae05da98a7f..587b8a804fc7a 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -449,6 +449,29 @@ cpuid_feature_extract_unsigned_field(u64 features, int field) return cpuid_feature_extract_unsigned_field_width(features, field, 4); } +/* + * Fields that identify the version of the Performance Monitors Extension do + * not follow the standard ID scheme. See ARM DDI 0487E.a page D13-2825, + * "Alternative ID scheme used for the Performance Monitors Extension version". + */ +static inline u64 __attribute_const__ +cpuid_feature_cap_perfmon_field(u64 features, int field, u64 cap) +{ + u64 val = cpuid_feature_extract_unsigned_field(features, field); + u64 mask = GENMASK_ULL(field + 3, field); + + /* Treat IMPLEMENTATION DEFINED functionality as unimplemented */ + if (val == 0xf) + val = 0; + + if (val > cap) { + features &= ~mask; + features |= (cap << field) & mask; + } + + return features; +} + static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp) { return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift); @@ -508,6 +531,34 @@ static inline bool cpu_supports_mixed_endian_el0(void) return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); } +static inline bool supports_csv2p3(int scope) +{ + u64 pfr0; + u8 csv2_val; + + if (scope == SCOPE_LOCAL_CPU) + pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1); + else + pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + + csv2_val = cpuid_feature_extract_unsigned_field(pfr0, + ID_AA64PFR0_CSV2_SHIFT); + return csv2_val == 3; +} + +static inline bool supports_clearbhb(int scope) +{ + u64 isar2; + + if (scope == SCOPE_LOCAL_CPU) + isar2 = read_sysreg_s(SYS_ID_AA64ISAR2_EL1); + else + isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); + + return cpuid_feature_extract_unsigned_field(isar2, + ID_AA64ISAR2_CLEARBHB_SHIFT); +} + static inline bool system_supports_32bit_el0(void) { return cpus_have_const_cap(ARM64_HAS_32BIT_EL0); @@ -639,6 +690,18 @@ static inline int arm64_get_ssbd_state(void) void arm64_set_ssbd_mitigation(bool state); +/* Watch out, ordering is important here. */ +enum mitigation_state { + SPECTRE_UNAFFECTED, + SPECTRE_MITIGATED, + SPECTRE_VULNERABLE, +}; + +enum mitigation_state arm64_get_spectre_bhb_state(void); +bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); +u8 spectre_bhb_loop_affected(int scope); +void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); + extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange) diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h index 3c5ddb429ea29..14a19d1141bd2 100644 --- a/arch/arm64/include/asm/cpuidle.h +++ b/arch/arm64/include/asm/cpuidle.h @@ -18,4 +18,39 @@ static inline int arm_cpuidle_suspend(int index) return -EOPNOTSUPP; } #endif + +#ifdef CONFIG_ARM64_PSEUDO_NMI +#include + +struct arm_cpuidle_irq_context { + unsigned long pmr; + unsigned long daif_bits; +}; + +#define arm_cpuidle_save_irq_context(__c) \ + do { \ + struct arm_cpuidle_irq_context *c = __c; \ + if (system_uses_irq_prio_masking()) { \ + c->daif_bits = read_sysreg(daif); \ + write_sysreg(c->daif_bits | PSR_I_BIT | PSR_F_BIT, \ + daif); \ + c->pmr = gic_read_pmr(); \ + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); \ + } \ + } while (0) + +#define arm_cpuidle_restore_irq_context(__c) \ + do { \ + struct arm_cpuidle_irq_context *c = __c; \ + if (system_uses_irq_prio_masking()) { \ + gic_write_pmr(c->pmr); \ + write_sysreg(c->daif_bits, daif); \ + } \ + } while (0) +#else +struct arm_cpuidle_irq_context { }; + +#define arm_cpuidle_save_irq_context(c) (void)c +#define arm_cpuidle_restore_irq_context(c) (void)c +#endif #endif diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index aca07c2f6e6e3..f0165df489a38 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -71,6 +71,14 @@ #define ARM_CPU_PART_CORTEX_A55 0xD05 #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C +#define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_NEOVERSE_V1 0xD40 +#define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_X1 0xD44 +#define ARM_CPU_PART_CORTEX_A710 0xD47 +#define ARM_CPU_PART_CORTEX_X2 0xD48 +#define ARM_CPU_PART_NEOVERSE_N2 0xD49 +#define ARM_CPU_PART_CORTEX_A78C 0xD4B #define APM_CPU_PART_POTENZA 0x000 @@ -102,6 +110,14 @@ #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) +#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) +#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) +#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) +#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) +#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) +#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 48bfbf70dbb07..009abc5b9e231 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -8,11 +8,12 @@ #include #include +#include #include #define DAIF_PROCCTX 0 -#define DAIF_PROCCTX_NOIRQ PSR_I_BIT -#define DAIF_ERRCTX (PSR_I_BIT | PSR_A_BIT) +#define DAIF_PROCCTX_NOIRQ (PSR_I_BIT | PSR_F_BIT) +#define DAIF_ERRCTX (PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) #define DAIF_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) @@ -45,7 +46,7 @@ static inline unsigned long local_daif_save_flags(void) if (system_uses_irq_prio_masking()) { /* If IRQs are masked with PMR, reflect it in the flags */ if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON) - flags |= PSR_I_BIT; + flags |= PSR_I_BIT | PSR_F_BIT; } return flags; @@ -67,14 +68,14 @@ static inline void local_daif_restore(unsigned long flags) bool irq_disabled = flags & PSR_I_BIT; WARN_ON(system_has_prio_mask_debugging() && - !(read_sysreg(daif) & PSR_I_BIT)); + (read_sysreg(daif) & (PSR_I_BIT | PSR_F_BIT)) != (PSR_I_BIT | PSR_F_BIT)); if (!irq_disabled) { trace_hardirqs_on(); if (system_uses_irq_prio_masking()) { gic_write_pmr(GIC_PRIO_IRQON); - dsb(sy); + pmr_sync(); } } else if (system_uses_irq_prio_masking()) { u64 pmr; @@ -84,7 +85,7 @@ static inline void local_daif_restore(unsigned long flags) * If interrupts are disabled but we can take * asynchronous errors, we can take NMIs */ - flags &= ~PSR_I_BIT; + flags &= ~(PSR_I_BIT | PSR_F_BIT); pmr = GIC_PRIO_IRQOFF; } else { pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET; diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index d825e3585e28d..0253691c4b3a1 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -53,6 +53,7 @@ /* kprobes BRK opcodes with ESR encoding */ #define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5)) +#define BRK64_OPCODE_KPROBES_SS (AARCH64_BREAK_MON | (KPROBES_BRK_SS_IMM << 5)) /* uprobes BRK opcodes with ESR encoding */ #define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5)) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index f987b8a8f325e..928a96b9b1617 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -63,9 +63,11 @@ enum fixed_addresses { #endif /* CONFIG_ACPI_APEI_GHES */ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + FIX_ENTRY_TRAMP_TEXT3, + FIX_ENTRY_TRAMP_TEXT2, + FIX_ENTRY_TRAMP_TEXT1, FIX_ENTRY_TRAMP_DATA, - FIX_ENTRY_TRAMP_TEXT, -#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT)) +#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1)) #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ __end_of_permanent_fixed_addresses, diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 323cb306bd288..8ac55ff3094ae 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -204,4 +204,8 @@ extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); extern int devmem_is_allowed(unsigned long pfn); +extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags); +#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap + #endif /* __ASM_IO_H */ diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 1a59f0ed1ae39..d1930bbd95e49 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -6,20 +6,19 @@ #define __ASM_IRQFLAGS_H #include +#include #include #include /* * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and - * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'dai' + * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'daif' * order: * Masking debug exceptions causes all other exceptions to be masked too/ - * Masking SError masks irq, but not debug exceptions. Masking irqs has no - * side effects for other flags. Keeping to this order makes it easier for - * entry.S to know which exceptions should be unmasked. - * - * FIQ is never expected, but we mask it when we disable debug exceptions, and - * unmask it at all other times. + * Masking SError masks IRQ/FIQ, but not debug exceptions. IRQ and FIQ are + * always masked and unmasked together, and have no side effects for other + * flags. Keeping to this order makes it easier for entry.S to know which + * exceptions should be unmasked. */ /* @@ -34,14 +33,14 @@ static inline void arch_local_irq_enable(void) } asm volatile(ALTERNATIVE( - "msr daifclr, #2 // arch_local_irq_enable\n" - "nop", - __msr_s(SYS_ICC_PMR_EL1, "%0") - "dsb sy", + "msr daifclr, #3 // arch_local_irq_enable", + __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : : "r" ((unsigned long) GIC_PRIO_IRQON) : "memory"); + + pmr_sync(); } static inline void arch_local_irq_disable(void) @@ -53,7 +52,7 @@ static inline void arch_local_irq_disable(void) } asm volatile(ALTERNATIVE( - "msr daifset, #2 // arch_local_irq_disable", + "msr daifset, #3 // arch_local_irq_disable", __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : @@ -116,14 +115,14 @@ static inline unsigned long arch_local_irq_save(void) static inline void arch_local_irq_restore(unsigned long flags) { asm volatile(ALTERNATIVE( - "msr daif, %0\n" - "nop", - __msr_s(SYS_ICC_PMR_EL1, "%0") - "dsb sy", - ARM64_HAS_IRQ_PRIO_MASKING) + "msr daif, %0", + __msr_s(SYS_ICC_PMR_EL1, "%0"), + ARM64_HAS_IRQ_PRIO_MASKING) : : "r" (flags) : "memory"); + + pmr_sync(); } #endif /* __ASM_IRQFLAGS_H */ diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 817efd95d539f..9679b74a20817 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -65,8 +65,8 @@ #define EARLY_KASLR (0) #endif -#define EARLY_ENTRIES(vstart, vend, shift) (((vend) >> (shift)) \ - - ((vstart) >> (shift)) + 1 + EARLY_KASLR) +#define EARLY_ENTRIES(vstart, vend, shift) \ + ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1 + EARLY_KASLR) #define EARLY_PGDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT)) diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index 97e511d645a21..8699ce30f587e 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -16,7 +16,7 @@ #include #define __ARCH_WANT_KPROBES_INSN_SLOT -#define MAX_INSN_SIZE 1 +#define MAX_INSN_SIZE 2 #define flush_insn_slot(p) do { } while (0) #define kretprobe_blacklist_size 0 diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 697702a1a1ff1..9fbd77b292f86 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -605,8 +605,7 @@ static inline void kvm_arm_vhe_guest_enter(void) * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. */ - if (system_uses_irq_prio_masking()) - dsb(sy); + pmr_sync(); } static inline void kvm_arm_vhe_guest_exit(void) @@ -684,4 +683,9 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_arm_vcpu_loaded(vcpu) ((vcpu)->arch.sysregs_loaded_on_cpu) +static inline enum mitigation_state kvm_arm_get_spectre_bhb_state(void) +{ + return arm64_get_spectre_bhb_state(); +} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index befe37d4bc0e5..ffe0aad96b17b 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -478,7 +478,9 @@ static inline void *kvm_get_hyp_vector(void) void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); int slot = -1; - if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) && data->fn) { + if ((cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) || + cpus_have_const_cap(ARM64_SPECTRE_BHB)) && + data && data->template_start) { vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs_start)); slot = data->hyp_vectors_slot; } @@ -507,7 +509,8 @@ static inline int kvm_map_vectors(void) * !HBP + HEL2 -> allocate one vector slot and use exec mapping * HBP + HEL2 -> use hardened vertors and use exec mapping */ - if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) { + if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) || + cpus_have_const_cap(ARM64_SPECTRE_BHB)) { __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs_start); __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base); } diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 005690b7d2348..cefbca519a9cc 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -32,7 +32,7 @@ typedef struct { */ #define ASID(mm) ((mm)->context.id.counter & 0xffff) -static inline bool arm64_kernel_unmapped_at_el0(void) +static __always_inline bool arm64_kernel_unmapped_at_el0(void) { return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0); @@ -85,6 +85,12 @@ typedef void (*bp_hardening_cb_t)(void); struct bp_hardening_data { int hyp_vectors_slot; bp_hardening_cb_t fn; + + /* + * template_start is only used by the BHB mitigation to identify the + * hyp_vectors_slot sequence. + */ + const char *template_start; }; #if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \ diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 2bdbc79bbd01b..e7765b62c7125 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -176,9 +176,10 @@ #define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ #define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ #define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ +#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ #define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ #define ARMV8_PMU_PMCR_N_MASK 0x1f -#define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */ +#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ /* * PMOVSR: counters overflow flag status reg diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 74dffb48b3970..c7b54b0fc20b6 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -54,9 +54,15 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; * page table entry, taking care of 52-bit addresses. */ #ifdef CONFIG_ARM64_PA_BITS_52 -#define __pte_to_phys(pte) \ - ((pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << 36)) -#define __phys_to_pte_val(phys) (((phys) | ((phys) >> 36)) & PTE_ADDR_MASK) +static inline phys_addr_t __pte_to_phys(pte_t pte) +{ + return (pte_val(pte) & PTE_ADDR_LOW) | + ((pte_val(pte) & PTE_ADDR_HIGH) << 36); +} +static inline pteval_t __phys_to_pte_val(phys_addr_t phys) +{ + return (phys | (phys >> 36)) & PTE_ADDR_MASK; +} #else #define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK) #define __phys_to_pte_val(phys) (phys) diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 25a73aab438f9..a75f2882cc7cb 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -20,4 +20,9 @@ extern char __irqentry_text_start[], __irqentry_text_end[]; extern char __mmuoff_data_start[], __mmuoff_data_end[]; extern char __entry_tramp_text_start[], __entry_tramp_text_end[]; +static inline size_t entry_tramp_text_size(void) +{ + return __entry_tramp_text_end - __entry_tramp_text_start; +} + #endif /* __ASM_SECTIONS_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9b68f1b3915ec..7288fa8bf9a89 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -165,6 +165,7 @@ #define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0) #define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1) +#define SYS_ID_AA64ISAR2_EL1 sys_reg(3, 0, 0, 6, 2) #define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) #define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) @@ -575,6 +576,21 @@ #define ID_AA64ISAR1_GPI_NI 0x0 #define ID_AA64ISAR1_GPI_IMP_DEF 0x1 +/* id_aa64isar2 */ +#define ID_AA64ISAR2_CLEARBHB_SHIFT 28 +#define ID_AA64ISAR2_RPRES_SHIFT 4 +#define ID_AA64ISAR2_WFXT_SHIFT 0 + +#define ID_AA64ISAR2_RPRES_8BIT 0x0 +#define ID_AA64ISAR2_RPRES_12BIT 0x1 +/* + * Value 0x1 has been removed from the architecture, and is + * reserved, but has not yet been removed from the ARM ARM + * as of ARM DDI 0487G.b. + */ +#define ID_AA64ISAR2_WFXT_NI 0x0 +#define ID_AA64ISAR2_WFXT_SUPPORTED 0x2 + /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV2_SHIFT 56 @@ -646,6 +662,7 @@ #endif /* id_aa64mmfr1 */ +#define ID_AA64MMFR1_ECBHB_SHIFT 60 #define ID_AA64MMFR1_PAN_SHIFT 20 #define ID_AA64MMFR1_LOR_SHIFT 16 #define ID_AA64MMFR1_HPD_SHIFT 12 @@ -674,6 +691,16 @@ #define ID_AA64DFR0_TRACEVER_SHIFT 4 #define ID_AA64DFR0_DEBUGVER_SHIFT 0 +#define ID_AA64DFR0_PMUVER_8_0 0x1 +#define ID_AA64DFR0_PMUVER_8_1 0x4 +#define ID_AA64DFR0_PMUVER_8_4 0x5 +#define ID_AA64DFR0_PMUVER_8_5 0x6 +#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf + +#define ID_DFR0_PERFMON_SHIFT 24 + +#define ID_DFR0_PERFMON_8_1 0x4 + #define ID_ISAR5_RDM_SHIFT 24 #define ID_ISAR5_CRC32_SHIFT 16 #define ID_ISAR5_SHA2_SHIFT 12 diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h new file mode 100644 index 0000000000000..bc9a2145f4194 --- /dev/null +++ b/arch/arm64/include/asm/vectors.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 ARM Ltd. + */ +#ifndef __ASM_VECTORS_H +#define __ASM_VECTORS_H + +#include +#include + +#include + +extern char vectors[]; +extern char tramp_vectors[]; +extern char __bp_harden_el1_vectors[]; + +/* + * Note: the order of this enum corresponds to two arrays in entry.S: + * tramp_vecs and __bp_harden_el1_vectors. By default the canonical + * 'full fat' vectors are used directly. + */ +enum arm64_bp_harden_el1_vectors { +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + /* + * Perform the BHB loop mitigation, before branching to the canonical + * vectors. + */ + EL1_VECTOR_BHB_LOOP, + + /* + * Make the SMC call for firmware mitigation, before branching to the + * canonical vectors. + */ + EL1_VECTOR_BHB_FW, + + /* + * Use the ClearBHB instruction, before branching to the canonical + * vectors. + */ + EL1_VECTOR_BHB_CLEAR_INSN, +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + + /* + * Remap the kernel before branching to the canonical vectors. + */ + EL1_VECTOR_KPTI, +}; + +#ifndef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY +#define EL1_VECTOR_BHB_LOOP -1 +#define EL1_VECTOR_BHB_FW -1 +#define EL1_VECTOR_BHB_CLEAR_INSN -1 +#endif /* !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + +/* The vectors to use on return from EL0. e.g. to remap the kernel */ +DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector); + +#ifndef CONFIG_UNMAP_KERNEL_AT_EL0 +#define TRAMP_VALIAS 0ul +#endif + +static inline const char * +arm64_get_bp_hardening_vector(enum arm64_bp_harden_el1_vectors slot) +{ + if (arm64_kernel_unmapped_at_el0()) + return (char *)(TRAMP_VALIAS + SZ_2K * slot); + + WARN_ON_ONCE(slot == EL1_VECTOR_KPTI); + + return __bp_harden_el1_vectors + SZ_2K * slot; +} + +#endif /* __ASM_VECTORS_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 67c21f9bdbad2..08440ce57a1c2 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -240,6 +240,11 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 KVM_REG_ARM_FW_REG(3) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED 2 + /* SVE registers */ #define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 73039949b5ce2..5f8e4c2df53cc 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -41,7 +41,7 @@ bool alternative_is_applied(u16 cpufeature) /* * Check if the target PC is within an alternative block. */ -static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) +static __always_inline bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) { unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt); return !(pc >= replptr && pc <= (replptr + alt->alt_len)); @@ -49,7 +49,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) #define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1)) -static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) +static __always_inline u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) { u32 insn; @@ -94,7 +94,7 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp return insn; } -static void patch_alternative(struct alt_instr *alt, +static noinstr void patch_alternative(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) { __le32 *replptr; diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 7fa6828bb488a..587543c6c51cb 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -43,7 +43,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, this_leaf->type = type; } -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { unsigned int ctype, level, leaves, fw_level; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -78,7 +78,7 @@ static int __init_cache_level(unsigned int cpu) return 0; } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { unsigned int level, idx; enum cache_type type; @@ -97,6 +97,3 @@ static int __populate_cache_leaves(unsigned int cpu) } return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 1e16c4e00e771..33b33416fea42 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -13,6 +13,7 @@ #include #include #include +#include static bool __maybe_unused is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) @@ -116,6 +117,16 @@ DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); #ifdef CONFIG_KVM_INDIRECT_VECTORS extern char __smccc_workaround_1_smc_start[]; extern char __smccc_workaround_1_smc_end[]; +extern char __smccc_workaround_3_smc_start[]; +extern char __smccc_workaround_3_smc_end[]; +extern char __spectre_bhb_loop_k8_start[]; +extern char __spectre_bhb_loop_k8_end[]; +extern char __spectre_bhb_loop_k24_start[]; +extern char __spectre_bhb_loop_k24_end[]; +extern char __spectre_bhb_loop_k32_start[]; +extern char __spectre_bhb_loop_k32_end[]; +extern char __spectre_bhb_clearbhb_start[]; +extern char __spectre_bhb_clearbhb_end[]; static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, const char *hyp_vecs_end) @@ -129,11 +140,11 @@ static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); } +static DEFINE_RAW_SPINLOCK(bp_lock); static void install_bp_hardening_cb(bp_hardening_cb_t fn, const char *hyp_vecs_start, const char *hyp_vecs_end) { - static DEFINE_RAW_SPINLOCK(bp_lock); int cpu, slot = -1; /* @@ -161,6 +172,7 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn, __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); __this_cpu_write(bp_hardening_data.fn, fn); + __this_cpu_write(bp_hardening_data.template_start, hyp_vecs_start); raw_spin_unlock(&bp_lock); } #else @@ -927,6 +939,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .cpu_enable = cpu_enable_ssbd_mitigation, .midr_range_list = arm64_ssb_cpus, }, + { + .desc = "Spectre-BHB", + .capability = ARM64_SPECTRE_BHB, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = is_spectre_bhb_affected, + .cpu_enable = spectre_bhb_enable_mitigation, + }, #ifdef CONFIG_ARM64_ERRATUM_1418040 { .desc = "ARM erratum 1418040", @@ -989,15 +1008,41 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, return sprintf(buf, "Mitigation: __user pointer sanitization\n"); } +static const char *get_bhb_affected_string(enum mitigation_state bhb_state) +{ + switch (bhb_state) { + case SPECTRE_UNAFFECTED: + return ""; + default: + case SPECTRE_VULNERABLE: + return ", but not BHB"; + case SPECTRE_MITIGATED: + return ", BHB"; + } +} + ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { + enum mitigation_state bhb_state = arm64_get_spectre_bhb_state(); + const char *bhb_str = get_bhb_affected_string(bhb_state); + const char *v2_str = "Branch predictor hardening"; + switch (get_spectre_v2_workaround_state()) { case ARM64_BP_HARDEN_NOT_REQUIRED: - return sprintf(buf, "Not affected\n"); - case ARM64_BP_HARDEN_WA_NEEDED: - return sprintf(buf, "Mitigation: Branch predictor hardening\n"); - case ARM64_BP_HARDEN_UNKNOWN: + if (bhb_state == SPECTRE_UNAFFECTED) + return sprintf(buf, "Not affected\n"); + + /* + * Platforms affected by Spectre-BHB can't report + * "Not affected" for Spectre-v2. + */ + v2_str = "CSV2"; + fallthrough; + case ARM64_BP_HARDEN_WA_NEEDED: + return sprintf(buf, "Mitigation: %s%s\n", v2_str, bhb_str); + case ARM64_BP_HARDEN_UNKNOWN: + fallthrough; default: return sprintf(buf, "Vulnerable\n"); } @@ -1019,3 +1064,333 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, return sprintf(buf, "Vulnerable\n"); } + +/* + * We try to ensure that the mitigation state can never change as the result of + * onlining a late CPU. + */ +static void update_mitigation_state(enum mitigation_state *oldp, + enum mitigation_state new) +{ + enum mitigation_state state; + + do { + state = READ_ONCE(*oldp); + if (new <= state) + break; + } while (cmpxchg_relaxed(oldp, state, new) != state); +} + +/* + * Spectre BHB. + * + * A CPU is either: + * - Mitigated by a branchy loop a CPU specific number of times, and listed + * in our "loop mitigated list". + * - Mitigated in software by the firmware Spectre v2 call. + * - Has the ClearBHB instruction to perform the mitigation. + * - Has the 'Exception Clears Branch History Buffer' (ECBHB) feature, so no + * software mitigation in the vectors is needed. + * - Has CSV2.3, so is unaffected. + */ +static enum mitigation_state spectre_bhb_state; + +enum mitigation_state arm64_get_spectre_bhb_state(void) +{ + return spectre_bhb_state; +} + +/* + * This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any + * SCOPE_SYSTEM call will give the right answer. + */ +u8 spectre_bhb_loop_affected(int scope) +{ + u8 k = 0; + static u8 max_bhb_k; + + if (scope == SCOPE_LOCAL_CPU) { + static const struct midr_range spectre_bhb_k32_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + {}, + }; + static const struct midr_range spectre_bhb_k24_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), + {}, + }; + static const struct midr_range spectre_bhb_k8_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + {}, + }; + + if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list)) + k = 32; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list)) + k = 24; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list)) + k = 8; + + max_bhb_k = max(max_bhb_k, k); + } else { + k = max_bhb_k; + } + + return k; +} + +static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void) +{ + int ret; + struct arm_smccc_res res; + + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) + return SPECTRE_VULNERABLE; + + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_3, &res); + break; + + case PSCI_CONDUIT_SMC: + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_3, &res); + break; + + default: + return SPECTRE_VULNERABLE; + } + + ret = res.a0; + switch (ret) { + case SMCCC_RET_SUCCESS: + return SPECTRE_MITIGATED; + case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED: + return SPECTRE_UNAFFECTED; + default: + fallthrough; + case SMCCC_RET_NOT_SUPPORTED: + return SPECTRE_VULNERABLE; + } +} + +static bool is_spectre_bhb_fw_affected(int scope) +{ + static bool system_affected; + enum mitigation_state fw_state; + bool has_smccc = (psci_ops.smccc_version >= SMCCC_VERSION_1_1); + static const struct midr_range spectre_bhb_firmware_mitigated_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), + {}, + }; + bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(), + spectre_bhb_firmware_mitigated_list); + + if (scope != SCOPE_LOCAL_CPU) + return system_affected; + + fw_state = spectre_bhb_get_cpu_fw_mitigation_state(); + if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) { + system_affected = true; + return true; + } + + return false; +} + +static bool supports_ecbhb(int scope) +{ + u64 mmfr1; + + if (scope == SCOPE_LOCAL_CPU) + mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1); + else + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + + return cpuid_feature_extract_unsigned_field(mmfr1, + ID_AA64MMFR1_ECBHB_SHIFT); +} + +bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, + int scope) +{ + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + if (supports_csv2p3(scope)) + return false; + + if (supports_clearbhb(scope)) + return true; + + if (spectre_bhb_loop_affected(scope)) + return true; + + if (is_spectre_bhb_fw_affected(scope)) + return true; + + return false; +} + +static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) +{ + const char *v = arm64_get_bp_hardening_vector(slot); + + if (slot < 0) + return; + + __this_cpu_write(this_cpu_vector, v); + + /* + * When KPTI is in use, the vectors are switched when exiting to + * user-space. + */ + if (arm64_kernel_unmapped_at_el0()) + return; + + write_sysreg(v, vbar_el1); + isb(); +} + +#ifdef CONFIG_KVM_INDIRECT_VECTORS +static const char *kvm_bhb_get_vecs_end(const char *start) +{ + if (start == __smccc_workaround_3_smc_start) + return __smccc_workaround_3_smc_end; + else if (start == __spectre_bhb_loop_k8_start) + return __spectre_bhb_loop_k8_end; + else if (start == __spectre_bhb_loop_k24_start) + return __spectre_bhb_loop_k24_end; + else if (start == __spectre_bhb_loop_k32_start) + return __spectre_bhb_loop_k32_end; + else if (start == __spectre_bhb_clearbhb_start) + return __spectre_bhb_clearbhb_end; + + return NULL; +} + +static void kvm_setup_bhb_slot(const char *hyp_vecs_start) +{ + int cpu, slot = -1; + const char *hyp_vecs_end; + + if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available()) + return; + + hyp_vecs_end = kvm_bhb_get_vecs_end(hyp_vecs_start); + if (WARN_ON_ONCE(!hyp_vecs_start || !hyp_vecs_end)) + return; + + raw_spin_lock(&bp_lock); + for_each_possible_cpu(cpu) { + if (per_cpu(bp_hardening_data.template_start, cpu) == hyp_vecs_start) { + slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); + break; + } + } + + if (slot == -1) { + slot = atomic_inc_return(&arm64_el2_vector_last_slot); + BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); + __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); + } + + __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); + __this_cpu_write(bp_hardening_data.template_start, hyp_vecs_start); + raw_spin_unlock(&bp_lock); +} +#else +#define __smccc_workaround_3_smc_start NULL +#define __spectre_bhb_loop_k8_start NULL +#define __spectre_bhb_loop_k24_start NULL +#define __spectre_bhb_loop_k32_start NULL +#define __spectre_bhb_clearbhb_start NULL + +static void kvm_setup_bhb_slot(const char *hyp_vecs_start) { } +#endif + +void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) +{ + enum mitigation_state fw_state, state = SPECTRE_VULNERABLE; + + if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU)) + return; + + if (get_spectre_v2_workaround_state() == ARM64_BP_HARDEN_UNKNOWN) { + /* No point mitigating Spectre-BHB alone. */ + } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) { + pr_info_once("spectre-bhb mitigation disabled by compile time option\n"); + } else if (cpu_mitigations_off()) { + pr_info_once("spectre-bhb mitigation disabled by command line option\n"); + } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) { + state = SPECTRE_MITIGATED; + } else if (supports_clearbhb(SCOPE_LOCAL_CPU)) { + kvm_setup_bhb_slot(__spectre_bhb_clearbhb_start); + this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN); + + state = SPECTRE_MITIGATED; + } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) { + switch (spectre_bhb_loop_affected(SCOPE_SYSTEM)) { + case 8: + kvm_setup_bhb_slot(__spectre_bhb_loop_k8_start); + break; + case 24: + kvm_setup_bhb_slot(__spectre_bhb_loop_k24_start); + break; + case 32: + kvm_setup_bhb_slot(__spectre_bhb_loop_k32_start); + break; + default: + WARN_ON_ONCE(1); + } + this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP); + + state = SPECTRE_MITIGATED; + } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) { + fw_state = spectre_bhb_get_cpu_fw_mitigation_state(); + if (fw_state == SPECTRE_MITIGATED) { + kvm_setup_bhb_slot(__smccc_workaround_3_smc_start); + this_cpu_set_vectors(EL1_VECTOR_BHB_FW); + + /* + * With WA3 in the vectors, the WA1 calls can be + * removed. + */ + __this_cpu_write(bp_hardening_data.fn, NULL); + + state = SPECTRE_MITIGATED; + } + } + + update_mitigation_state(&spectre_bhb_state, state); +} + +/* Patched to correct the immediate */ +void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + u8 rd; + u32 insn; + u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM); + + BUG_ON(nr_inst != 1); /* MOV -> MOV */ + + if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) + return; + + insn = le32_to_cpu(*origptr); + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn); + insn = aarch64_insn_gen_movewide(rd, loop_count, 0, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_ZERO); + *updptr++ = cpu_to_le32(insn); +} diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index acdef8d76c64d..d07dadd6b8ff7 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -10,11 +10,13 @@ #include #include #include +#include #include #include #include #include #include + #include #include #include @@ -23,6 +25,7 @@ #include #include #include +#include #include /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ @@ -45,6 +48,8 @@ static struct arm64_cpu_capabilities const __ro_after_init *cpu_hwcaps_ptrs[ARM6 /* Need also bit for ARM64_CB_PATCH */ DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE); +DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors; + /* * Flag to indicate if we have computed the system wide * capabilities based on the boot time active CPUs. This @@ -150,6 +155,11 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), @@ -410,6 +420,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 6 */ ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1), + ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), @@ -581,6 +592,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1); init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0); init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1); + init_cpu_ftr_reg(SYS_ID_AA64ISAR2_EL1, info->reg_id_aa64isar2); init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); @@ -704,6 +716,8 @@ void update_cpu_features(int cpu, info->reg_id_aa64isar0, boot->reg_id_aa64isar0); taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu, info->reg_id_aa64isar1, boot->reg_id_aa64isar1); + taint |= check_update_ftr_reg(SYS_ID_AA64ISAR2_EL1, cpu, + info->reg_id_aa64isar2, boot->reg_id_aa64isar2); /* * Differing PARange support is fine as long as all peripherals and @@ -838,6 +852,7 @@ static u64 __read_sysreg_by_encoding(u32 sys_id) read_sysreg_case(SYS_ID_AA64MMFR2_EL1); read_sysreg_case(SYS_ID_AA64ISAR0_EL1); read_sysreg_case(SYS_ID_AA64ISAR1_EL1); + read_sysreg_case(SYS_ID_AA64ISAR2_EL1); read_sysreg_case(SYS_CNTFRQ_EL0); read_sysreg_case(SYS_CTR_EL0); @@ -1038,6 +1053,12 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) static bool kpti_applied = false; int cpu = smp_processor_id(); + if (__this_cpu_read(this_cpu_vector) == vectors) { + const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI); + + __this_cpu_write(this_cpu_vector, v); + } + /* * We don't need to rewrite the page-tables if either we've done * it already or we have KASLR enabled and therefore have not diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index e4d6af2fdec71..5397f0d9e5bbb 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -53,6 +53,9 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu) struct acpi_lpi_state *lpi; struct acpi_processor *pr = per_cpu(processors, cpu); + if (unlikely(!pr || !pr->flags.has_lpi)) + return -EINVAL; + /* * If the PSCI cpu_suspend function hook has not been initialized * idle states must not be enabled, so bail out @@ -60,9 +63,6 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu) if (!psci_ops.cpu_suspend) return -EOPNOTSUPP; - if (unlikely(!pr || !pr->flags.has_lpi)) - return -EINVAL; - count = pr->power.count - 1; if (count <= 0) return -ENODEV; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 05933c065732b..d9a078527c98e 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -139,6 +139,9 @@ static int c_show(struct seq_file *m, void *v) * "processor". Give glibc what it expects. */ seq_printf(m, "processor\t: %d\n", i); +#ifdef CONFIG_NUMA + seq_printf(m, "physical id\t: %d\n", topology_physical_package_id(i)); +#endif if (compat) seq_printf(m, "model name\t: ARMv8 Processor rev %d (%s)\n", MIDR_REVISION(midr), COMPAT_ELF_PLATFORM); @@ -344,6 +347,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1); info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1); info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1); + info->reg_id_aa64isar2 = read_cpuid(ID_AA64ISAR2_EL1); info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index db137746c6fa3..63ca978fa08e3 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -59,18 +59,21 @@ .macro kernel_ventry, el, label, regsize = 64 .align 7 -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -alternative_if ARM64_UNMAP_KERNEL_AT_EL0 +.Lventry_start\@: .if \el == 0 + /* + * This must be the first instruction of the EL0 vector entries. It is + * skipped by the trampoline vectors, to trigger the cleanup. + */ + b .Lskip_tramp_vectors_cleanup\@ .if \regsize == 64 mrs x30, tpidrro_el0 msr tpidrro_el0, xzr .else mov x30, xzr .endif +.Lskip_tramp_vectors_cleanup\@: .endif -alternative_else_nop_endif -#endif sub sp, sp, #S_FRAME_SIZE #ifdef CONFIG_VMAP_STACK @@ -116,11 +119,15 @@ alternative_else_nop_endif mrs x0, tpidrro_el0 #endif b el\()\el\()_\label +.org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm - .macro tramp_alias, dst, sym + .macro tramp_alias, dst, sym, tmp mov_q \dst, TRAMP_VALIAS - add \dst, \dst, #(\sym - .entry.tramp.text) + adr_l \tmp, \sym + add \dst, \dst, \tmp + adr_l \tmp, .entry.tramp.text + sub \dst, \dst, \tmp .endm // This macro corrupts x0-x3. It is the caller's duty @@ -269,8 +276,10 @@ alternative_else_nop_endif alternative_if ARM64_HAS_IRQ_PRIO_MASKING ldr x20, [sp, #S_PMR_SAVE] msr_s SYS_ICC_PMR_EL1, x20 - /* Ensure priority change is seen by redistributor */ - dsb sy + mrs_s x21, SYS_ICC_CTLR_EL1 + tbz x21, #6, .L__skip_pmr_sync\@ // Check for ICC_CTLR_EL1.PMHE + dsb sy // Ensure priority change is seen by redistributor +.L__skip_pmr_sync\@: alternative_else_nop_endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR @@ -361,21 +370,25 @@ alternative_else_nop_endif ldp x24, x25, [sp, #16 * 12] ldp x26, x27, [sp, #16 * 13] ldp x28, x29, [sp, #16 * 14] - ldr lr, [sp, #S_LR] - add sp, sp, #S_FRAME_SIZE // restore sp .if \el == 0 -alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 +alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 + ldr lr, [sp, #S_LR] + add sp, sp, #S_FRAME_SIZE // restore sp + eret +alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 bne 5f - msr far_el1, x30 - tramp_alias x30, tramp_exit_native + msr far_el1, x29 + tramp_alias x30, tramp_exit_native, x29 br x30 5: - tramp_alias x30, tramp_exit_compat + tramp_alias x30, tramp_exit_compat, x29 br x30 #endif .else + ldr lr, [sp, #S_LR] + add sp, sp, #S_FRAME_SIZE // restore sp eret .endif sb @@ -654,7 +667,7 @@ ENDPROC(el1_sync) el1_irq: kernel_entry 1 gic_prio_irq_setup pmr=x20, tmp=x1 - enable_da_f + enable_da #ifdef CONFIG_ARM64_PSEUDO_NMI test_irqs_unmasked res=x0, pmr=x20 @@ -673,8 +686,10 @@ el1_irq: ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count alternative_if ARM64_HAS_IRQ_PRIO_MASKING /* - * DA_F were cleared at start of handling. If anything is set in DAIF, - * we come back from an NMI, so skip preemption + * DA were cleared at start of handling, and IF are cleared by + * the GIC irqchip driver using gic_arch_enable_irqs() for + * normal IRQs. If anything is set, it means we come back from + * an NMI instead of a normal IRQ, so skip preemption */ mrs x0, daif orr x24, x24, x0 @@ -820,7 +835,7 @@ el0_ia: mrs x26, far_el1 gic_prio_kentry_setup tmp=x0 ct_user_exit_irqoff - enable_da_f + enable_da #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif @@ -870,7 +885,7 @@ el0_sp_pc: */ gic_prio_kentry_setup tmp=x0 ct_user_exit_irqoff - enable_da_f + enable_da #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif @@ -910,7 +925,7 @@ el0_dbg: mov x1, x25 mov x2, sp bl do_debug_exception - enable_da_f + enable_da b ret_to_user el0_inv: ct_user_exit_irqoff @@ -928,7 +943,7 @@ el0_irq: el0_irq_naked: gic_prio_irq_setup pmr=x20, tmp=x0 ct_user_exit_irqoff - enable_da_f + enable_da #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off @@ -967,7 +982,7 @@ el0_error_naked: mov x0, sp mov x1, x25 bl do_serror - enable_da_f + enable_da b ret_to_user ENDPROC(el0_error) @@ -1012,12 +1027,6 @@ ENDPROC(el0_svc) .popsection // .entry.text -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -/* - * Exception vectors trampoline. - */ - .pushsection ".entry.tramp.text", "ax" - // Move from tramp_pg_dir to swapper_pg_dir .macro tramp_map_kernel, tmp mrs \tmp, ttbr1_el1 @@ -1051,12 +1060,47 @@ alternative_else_nop_endif */ .endm - .macro tramp_ventry, regsize = 64 + .macro tramp_data_page dst + adr_l \dst, .entry.tramp.text + sub \dst, \dst, PAGE_SIZE + .endm + + .macro tramp_data_read_var dst, var +#ifdef CONFIG_RANDOMIZE_BASE + tramp_data_page \dst + add \dst, \dst, #:lo12:__entry_tramp_data_\var + ldr \dst, [\dst] +#else + ldr \dst, =\var +#endif + .endm + +#define BHB_MITIGATION_NONE 0 +#define BHB_MITIGATION_LOOP 1 +#define BHB_MITIGATION_FW 2 +#define BHB_MITIGATION_INSN 3 + + .macro tramp_ventry, vector_start, regsize, kpti, bhb .align 7 1: .if \regsize == 64 msr tpidrro_el0, x30 // Restored in kernel_ventry .endif + + .if \bhb == BHB_MITIGATION_LOOP + /* + * This sequence must appear before the first indirect branch. i.e. the + * ret out of tramp_ventry. It appears here because x30 is free. + */ + __mitigate_spectre_bhb_loop x30 + .endif // \bhb == BHB_MITIGATION_LOOP + + .if \bhb == BHB_MITIGATION_INSN + clearbhb + isb + .endif // \bhb == BHB_MITIGATION_INSN + + .if \kpti == 1 /* * Defend against branch aliasing attacks by pushing a dummy * entry onto the return stack and using a RET instruction to @@ -1066,46 +1110,79 @@ alternative_else_nop_endif b . 2: tramp_map_kernel x30 -#ifdef CONFIG_RANDOMIZE_BASE - adr x30, tramp_vectors + PAGE_SIZE alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003 - ldr x30, [x30] -#else - ldr x30, =vectors -#endif + tramp_data_read_var x30, vectors alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM - prfm plil1strm, [x30, #(1b - tramp_vectors)] + prfm plil1strm, [x30, #(1b - \vector_start)] alternative_else_nop_endif + msr vbar_el1, x30 - add x30, x30, #(1b - tramp_vectors) isb + .else + ldr x30, =vectors + .endif // \kpti == 1 + + .if \bhb == BHB_MITIGATION_FW + /* + * The firmware sequence must appear before the first indirect branch. + * i.e. the ret out of tramp_ventry. But it also needs the stack to be + * mapped to save/restore the registers the SMC clobbers. + */ + __mitigate_spectre_bhb_fw + .endif // \bhb == BHB_MITIGATION_FW + + add x30, x30, #(1b - \vector_start + 4) ret +.org 1b + 128 // Did we overflow the ventry slot? .endm .macro tramp_exit, regsize = 64 - adr x30, tramp_vectors + tramp_data_read_var x30, this_cpu_vector +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN + mrs x29, tpidr_el1 +alternative_else + mrs x29, tpidr_el2 +alternative_endif + ldr x30, [x30, x29] + msr vbar_el1, x30 - tramp_unmap_kernel x30 + ldr lr, [sp, #S_LR] + tramp_unmap_kernel x29 .if \regsize == 64 - mrs x30, far_el1 + mrs x29, far_el1 .endif + add sp, sp, #S_FRAME_SIZE // restore sp eret sb .endm - .align 11 -ENTRY(tramp_vectors) + .macro generate_tramp_vector, kpti, bhb +.Lvector_start\@: .space 0x400 - tramp_ventry - tramp_ventry - tramp_ventry - tramp_ventry + .rept 4 + tramp_ventry .Lvector_start\@, 64, \kpti, \bhb + .endr + .rept 4 + tramp_ventry .Lvector_start\@, 32, \kpti, \bhb + .endr + .endm - tramp_ventry 32 - tramp_ventry 32 - tramp_ventry 32 - tramp_ventry 32 +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +/* + * Exception vectors trampoline. + * The order must match __bp_harden_el1_vectors and the + * arm64_bp_harden_el1_vectors enum. + */ + .pushsection ".entry.tramp.text", "ax" + .align 11 +ENTRY(tramp_vectors) +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_LOOP + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_FW + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_INSN +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_NONE END(tramp_vectors) ENTRY(tramp_exit_native) @@ -1123,11 +1200,55 @@ END(tramp_exit_compat) .align PAGE_SHIFT .globl __entry_tramp_data_start __entry_tramp_data_start: +__entry_tramp_data_vectors: .quad vectors +#ifdef CONFIG_ARM_SDE_INTERFACE +__entry_tramp_data___sdei_asm_handler: + .quad __sdei_asm_handler +#endif /* CONFIG_ARM_SDE_INTERFACE */ +__entry_tramp_data_this_cpu_vector: + .quad this_cpu_vector .popsection // .rodata #endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ +/* + * Exception vectors for spectre mitigations on entry from EL1 when + * kpti is not in use. + */ + .macro generate_el1_vector, bhb +.Lvector_start\@: + kernel_ventry 1, sync_invalid // Synchronous EL1t + kernel_ventry 1, irq_invalid // IRQ EL1t + kernel_ventry 1, fiq_invalid // FIQ EL1t + kernel_ventry 1, error_invalid // Error EL1t + + kernel_ventry 1, sync // Synchronous EL1h + kernel_ventry 1, irq // IRQ EL1h + kernel_ventry 1, fiq_invalid // FIQ EL1h + kernel_ventry 1, error // Error EL1h + + .rept 4 + tramp_ventry .Lvector_start\@, 64, 0, \bhb + .endr + .rept 4 + tramp_ventry .Lvector_start\@, 32, 0, \bhb + .endr + .endm + +/* The order must match tramp_vecs and the arm64_bp_harden_el1_vectors enum. */ + .pushsection ".entry.text", "ax" + .align 11 +SYM_CODE_START(__bp_harden_el1_vectors) +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + generate_el1_vector bhb=BHB_MITIGATION_LOOP + generate_el1_vector bhb=BHB_MITIGATION_FW + generate_el1_vector bhb=BHB_MITIGATION_INSN +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ +SYM_CODE_END(__bp_harden_el1_vectors) + .popsection + + /* * Register switch for AArch64. The callee-saved registers need to be saved * and restored. On entry: @@ -1214,13 +1335,7 @@ ENTRY(__sdei_asm_entry_trampoline) */ 1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)] -#ifdef CONFIG_RANDOMIZE_BASE - adr x4, tramp_vectors + PAGE_SIZE - add x4, x4, #:lo12:__sdei_asm_trampoline_next_handler - ldr x4, [x4] -#else - ldr x4, =__sdei_asm_handler -#endif + tramp_data_read_var x4, __sdei_asm_handler br x4 ENDPROC(__sdei_asm_entry_trampoline) NOKPROBE(__sdei_asm_entry_trampoline) @@ -1243,12 +1358,6 @@ ENDPROC(__sdei_asm_exit_trampoline) NOKPROBE(__sdei_asm_exit_trampoline) .ltorg .popsection // .entry.tramp.text -#ifdef CONFIG_RANDOMIZE_BASE -.pushsection ".rodata", "a" -__sdei_asm_trampoline_next_handler: - .quad __sdei_asm_handler -.popsection // .rodata -#endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ /* @@ -1344,7 +1453,7 @@ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline + tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline, tmp=x3 br x5 #endif ENDPROC(__sdei_asm_handler) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e7d1fa00e2e56..37eec4d5ba908 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -498,7 +498,7 @@ size_t sve_state_size(struct task_struct const *task) void sve_alloc(struct task_struct *task) { if (task->thread.sve_state) { - memset(task->thread.sve_state, 0, sve_state_size(current)); + memset(task->thread.sve_state, 0, sve_state_size(task)); return; } diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 06e56b4703153..d4a4be02d309c 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -69,13 +69,25 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; u32 old, new; - long offset = (long)pc - (long)addr; + long offset = (long)addr - (long)pc; if (offset < -SZ_128M || offset >= SZ_128M) { #ifdef CONFIG_ARM64_MODULE_PLTS - struct plt_entry trampoline, *dst; struct module *mod; + /* + * There is only one ftrace trampoline per module. For now, + * this is not a problem since on arm64, all dynamic ftrace + * invocations are routed via ftrace_caller(). This will need + * to be revisited if support for multiple ftrace entry points + * is added in the future, but for now, the pr_err() below + * deals with a theoretical issue only. + */ + if (addr != FTRACE_ADDR) { + pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); + return -EINVAL; + } + /* * On kernels that support module PLTs, the offset between the * branch instruction and its target may legally exceed the @@ -93,46 +105,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (WARN_ON(!mod)) return -EINVAL; - /* - * There is only one ftrace trampoline per module. For now, - * this is not a problem since on arm64, all dynamic ftrace - * invocations are routed via ftrace_caller(). This will need - * to be revisited if support for multiple ftrace entry points - * is added in the future, but for now, the pr_err() below - * deals with a theoretical issue only. - * - * Note that PLTs are place relative, and plt_entries_equal() - * checks whether they point to the same target. Here, we need - * to check if the actual opcodes are in fact identical, - * regardless of the offset in memory so use memcmp() instead. - */ - dst = mod->arch.ftrace_trampoline; - trampoline = get_plt_entry(addr, dst); - if (memcmp(dst, &trampoline, sizeof(trampoline))) { - if (plt_entry_is_initialized(dst)) { - pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); - return -EINVAL; - } - - /* point the trampoline to our ftrace entry point */ - module_disable_ro(mod); - *dst = trampoline; - module_enable_ro(mod, true); - - /* - * Ensure updated trampoline is visible to instruction - * fetch before we patch in the branch. Although the - * architecture doesn't require an IPI in this case, - * Neoverse-N1 erratum #1542419 does require one - * if the TLB maintenance in module_enable_ro() is - * skipped due to rodata_enabled. It doesn't seem worth - * it to make it conditional given that this is - * certainly not a fast-path. - */ - flush_icache_range((unsigned long)&dst[0], - (unsigned long)&dst[1]); - } - addr = (unsigned long)dst; + addr = (unsigned long)mod->arch.ftrace_trampoline; #else /* CONFIG_ARM64_MODULE_PLTS */ return -EINVAL; #endif /* CONFIG_ARM64_MODULE_PLTS */ @@ -153,7 +126,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long pc = rec->ip; bool validate = true; u32 old = 0, new; - long offset = (long)pc - (long)addr; + long offset = (long)addr - (long)pc; if (offset < -SZ_128M || offset >= SZ_128M) { #ifdef CONFIG_ARM64_MODULE_PLTS diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a2e0b37549433..2f784d3b4b390 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -194,7 +194,7 @@ ENDPROC(preserve_boot_args) * to be composed of multiple pages. (This effectively scales the end index). * * vstart: virtual address of start of range - * vend: virtual address of end of range + * vend: virtual address of end of range - we map [vstart, vend] * shift: shift used to transform virtual address into index * ptrs: number of entries in page table * istart: index in table corresponding to vstart @@ -231,17 +231,18 @@ ENDPROC(preserve_boot_args) * * tbl: location of page table * rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE) - * vstart: start address to map - * vend: end address to map - we map [vstart, vend] + * vstart: virtual address of start of range + * vend: virtual address of end of range - we map [vstart, vend - 1] * flags: flags to use to map last level entries * phys: physical address corresponding to vstart - physical memory is contiguous * pgds: the number of pgd entries * * Temporaries: istart, iend, tmp, count, sv - these need to be different registers - * Preserves: vstart, vend, flags - * Corrupts: tbl, rtbl, istart, iend, tmp, count, sv + * Preserves: vstart, flags + * Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv */ .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv + sub \vend, \vend, #1 add \rtbl, \tbl, #PAGE_SIZE mov \sv, \rtbl mov \count, #0 diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 53bcf5386907f..a02c294a47530 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -207,8 +207,8 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg) int i, ret = 0; struct aarch64_insn_patch *pp = arg; - /* The first CPU becomes master */ - if (atomic_inc_return(&pp->cpu_count) == 1) { + /* The last CPU becomes master */ + if (atomic_inc_return(&pp->cpu_count) == num_online_cpus()) { for (i = 0; ret == 0 && i < pp->insn_cnt; i++) ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], pp->new_insns[i]); diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 03ff15bffbb6d..d0692ecb99bb0 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -470,22 +471,48 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return -ENOEXEC; } -int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) +static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *name) { const Elf_Shdr *s, *se; const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { - if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) - apply_alternatives_module((void *)s->sh_addr, s->sh_size); -#ifdef CONFIG_ARM64_MODULE_PLTS - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && - !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name)) - me->arch.ftrace_trampoline = (void *)s->sh_addr; -#endif + if (strcmp(name, secstrs + s->sh_name) == 0) + return s; } + return NULL; +} + +static int module_init_ftrace_plt(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *mod) +{ +#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE) + const Elf_Shdr *s; + struct plt_entry *plt; + + s = find_section(hdr, sechdrs, ".text.ftrace_trampoline"); + if (!s) + return -ENOEXEC; + + plt = (void *)s->sh_addr; + *plt = get_plt_entry(FTRACE_ADDR, plt); + mod->arch.ftrace_trampoline = plt; +#endif return 0; } + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + const Elf_Shdr *s; + s = find_section(hdr, sechdrs, ".altinstructions"); + if (s) + apply_alternatives_module((void *)s->sh_addr, s->sh_size); + + return module_init_ftrace_plt(hdr, sechdrs, me); +} diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds index 09a0eef71d12b..9371abe2f4c2d 100644 --- a/arch/arm64/kernel/module.lds +++ b/arch/arm64/kernel/module.lds @@ -1,5 +1,5 @@ SECTIONS { - .plt 0 (NOLOAD) : { BYTE(0) } - .init.plt 0 (NOLOAD) : { BYTE(0) } - .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) } + .plt 0 : { BYTE(0) } + .init.plt 0 : { BYTE(0) } + .text.ftrace_trampoline 0 : { BYTE(0) } } diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index b0e03e052dd1d..b84ec4ce7d8dc 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -102,7 +102,9 @@ compat_user_backtrace(struct compat_frame_tail __user *tail, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -147,9 +149,10 @@ static int callchain_trace(struct stackframe *frame, void *data) void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct stackframe frame; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -160,18 +163,21 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, unsigned long perf_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + + if (guest_cbs && guest_cbs->is_in_guest()) + return guest_cbs->get_guest_ip(); return instruction_pointer(regs); } unsigned long perf_misc_flags(struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); int misc = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) + if (guest_cbs && guest_cbs->is_in_guest()) { + if (guest_cbs->is_user_mode()) misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 19128d994ee97..38a09cd1992ae 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -347,6 +347,17 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { #define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \ (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) + +/* + * We unconditionally enable ARMv8.5-PMU long event counter support + * (64-bit events) where supported. Indicate if this arm_pmu has long + * event counter support. + */ +static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) +{ + return (cpu_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_5); +} + /* * We must chain two programmable counters for 64 bit events, * except when we have allocated the 64bit cycle counter (for CPU @@ -356,9 +367,11 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { static inline bool armv8pmu_event_is_chained(struct perf_event *event) { int idx = event->hw.idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); return !WARN_ON(idx < 0) && armv8pmu_event_is_64bit(event) && + !armv8pmu_has_long_event(cpu_pmu) && (idx != ARMV8_IDX_CYCLE_COUNTER); } @@ -372,6 +385,73 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event) #define ARMV8_IDX_TO_COUNTER(x) \ (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) +/* + * This code is really good + */ + +#define PMEVN_CASE(n, case_macro) \ + case n: case_macro(n); break + +#define PMEVN_SWITCH(x, case_macro) \ + do { \ + switch (x) { \ + PMEVN_CASE(0, case_macro); \ + PMEVN_CASE(1, case_macro); \ + PMEVN_CASE(2, case_macro); \ + PMEVN_CASE(3, case_macro); \ + PMEVN_CASE(4, case_macro); \ + PMEVN_CASE(5, case_macro); \ + PMEVN_CASE(6, case_macro); \ + PMEVN_CASE(7, case_macro); \ + PMEVN_CASE(8, case_macro); \ + PMEVN_CASE(9, case_macro); \ + PMEVN_CASE(10, case_macro); \ + PMEVN_CASE(11, case_macro); \ + PMEVN_CASE(12, case_macro); \ + PMEVN_CASE(13, case_macro); \ + PMEVN_CASE(14, case_macro); \ + PMEVN_CASE(15, case_macro); \ + PMEVN_CASE(16, case_macro); \ + PMEVN_CASE(17, case_macro); \ + PMEVN_CASE(18, case_macro); \ + PMEVN_CASE(19, case_macro); \ + PMEVN_CASE(20, case_macro); \ + PMEVN_CASE(21, case_macro); \ + PMEVN_CASE(22, case_macro); \ + PMEVN_CASE(23, case_macro); \ + PMEVN_CASE(24, case_macro); \ + PMEVN_CASE(25, case_macro); \ + PMEVN_CASE(26, case_macro); \ + PMEVN_CASE(27, case_macro); \ + PMEVN_CASE(28, case_macro); \ + PMEVN_CASE(29, case_macro); \ + PMEVN_CASE(30, case_macro); \ + default: WARN(1, "Invalid PMEV* index\n"); \ + } \ + } while (0) + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(pmevcntr##n##_el0) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, pmevcntr##n##_el0) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, pmevtyper##n##_el0) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); +} + static inline u32 armv8pmu_pmcr_read(void) { return read_sysreg(pmcr_el0); @@ -400,17 +480,11 @@ static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); } -static inline void armv8pmu_select_counter(int idx) +static inline u64 armv8pmu_read_evcntr(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); - write_sysreg(counter, pmselr_el0); - isb(); -} -static inline u32 armv8pmu_read_evcntr(int idx) -{ - armv8pmu_select_counter(idx); - return read_sysreg(pmxevcntr_el0); + return read_pmevcntrn(counter); } static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) @@ -424,6 +498,44 @@ static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) return val; } +/* + * The cycle counter is always a 64-bit counter. When ARMV8_PMU_PMCR_LP + * is set the event counters also become 64-bit counters. Unless the + * user has requested a long counter (attr.config1) then we want to + * interrupt upon 32-bit overflow - we achieve this by applying a bias. + */ +static bool armv8pmu_event_needs_bias(struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (armv8pmu_event_is_64bit(event)) + return false; + + if (armv8pmu_has_long_event(cpu_pmu) || + idx == ARMV8_IDX_CYCLE_COUNTER) + return true; + + return false; +} + +static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value) +{ + if (armv8pmu_event_needs_bias(event)) + value |= GENMASK(63, 32); + + return value; +} + +static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value) +{ + if (armv8pmu_event_needs_bias(event)) + value &= ~GENMASK(63, 32); + + return value; +} + static u64 armv8pmu_read_counter(struct perf_event *event) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); @@ -439,13 +551,14 @@ static u64 armv8pmu_read_counter(struct perf_event *event) else value = armv8pmu_read_hw_counter(event); - return value; + return armv8pmu_unbias_long_counter(event, value); } -static inline void armv8pmu_write_evcntr(int idx, u32 value) +static inline void armv8pmu_write_evcntr(int idx, u64 value) { - armv8pmu_select_counter(idx); - write_sysreg(value, pmxevcntr_el0); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + + write_pmevcntrn(counter, value); } static inline void armv8pmu_write_hw_counter(struct perf_event *event, @@ -467,28 +580,23 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + value = armv8pmu_bias_long_counter(event, value); + if (!armv8pmu_counter_valid(cpu_pmu, idx)) pr_err("CPU%u writing wrong counter %d\n", smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) { - /* - * The cycles counter is really a 64-bit counter. - * When treating it as a 32-bit counter, we only count - * the lower 32 bits, and set the upper 32-bits so that - * we get an interrupt upon 32-bit overflow. - */ - if (!armv8pmu_event_is_64bit(event)) - value |= 0xffffffff00000000ULL; + else if (idx == ARMV8_IDX_CYCLE_COUNTER) write_sysreg(value, pmccntr_el0); - } else + else armv8pmu_write_hw_counter(event, value); } static inline void armv8pmu_write_evtype(int idx, u32 val) { - armv8pmu_select_counter(idx); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + val &= ARMV8_PMU_EVTYPE_MASK; - write_sysreg(val, pmxevtyper_el0); + write_pmevtypern(counter, val); } static inline void armv8pmu_write_event_type(struct perf_event *event) @@ -508,90 +616,91 @@ static inline void armv8pmu_write_event_type(struct perf_event *event) armv8pmu_write_evtype(idx - 1, hwc->config_base); armv8pmu_write_evtype(idx, chain_evt); } else { - armv8pmu_write_evtype(idx, hwc->config_base); + if (idx == ARMV8_IDX_CYCLE_COUNTER) + write_sysreg(hwc->config_base, pmccfiltr_el0); + else + armv8pmu_write_evtype(idx, hwc->config_base); } } -static inline int armv8pmu_enable_counter(int idx) +static u32 armv8pmu_event_cnten_mask(struct perf_event *event) { - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - write_sysreg(BIT(counter), pmcntenset_el0); - return idx; + int counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); + u32 mask = BIT(counter); + + if (armv8pmu_event_is_chained(event)) + mask |= BIT(counter - 1); + return mask; +} + +static inline void armv8pmu_enable_counter(u32 mask) +{ + /* + * Make sure event configuration register writes are visible before we + * enable the counter. + * */ + isb(); + write_sysreg(mask, pmcntenset_el0); } static inline void armv8pmu_enable_event_counter(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; - int idx = event->hw.idx; - u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx)); - - if (armv8pmu_event_is_chained(event)) - counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1)); + u32 mask = armv8pmu_event_cnten_mask(event); - kvm_set_pmu_events(counter_bits, attr); + kvm_set_pmu_events(mask, attr); /* We rely on the hypervisor switch code to enable guest counters */ - if (!kvm_pmu_counter_deferred(attr)) { - armv8pmu_enable_counter(idx); - if (armv8pmu_event_is_chained(event)) - armv8pmu_enable_counter(idx - 1); - } + if (!kvm_pmu_counter_deferred(attr)) + armv8pmu_enable_counter(mask); } -static inline int armv8pmu_disable_counter(int idx) +static inline void armv8pmu_disable_counter(u32 mask) { - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - write_sysreg(BIT(counter), pmcntenclr_el0); - return idx; + write_sysreg(mask, pmcntenclr_el0); + /* + * Make sure the effects of disabling the counter are visible before we + * start configuring the event. + */ + isb(); } static inline void armv8pmu_disable_event_counter(struct perf_event *event) { - struct hw_perf_event *hwc = &event->hw; struct perf_event_attr *attr = &event->attr; - int idx = hwc->idx; - u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx)); - - if (armv8pmu_event_is_chained(event)) - counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1)); + u32 mask = armv8pmu_event_cnten_mask(event); - kvm_clr_pmu_events(counter_bits); + kvm_clr_pmu_events(mask); /* We rely on the hypervisor switch code to disable guest counters */ - if (!kvm_pmu_counter_deferred(attr)) { - if (armv8pmu_event_is_chained(event)) - armv8pmu_disable_counter(idx - 1); - armv8pmu_disable_counter(idx); - } + if (!kvm_pmu_counter_deferred(attr)) + armv8pmu_disable_counter(mask); } -static inline int armv8pmu_enable_intens(int idx) +static inline void armv8pmu_enable_intens(u32 mask) { - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - write_sysreg(BIT(counter), pmintenset_el1); - return idx; + write_sysreg(mask, pmintenset_el1); } -static inline int armv8pmu_enable_event_irq(struct perf_event *event) +static inline void armv8pmu_enable_event_irq(struct perf_event *event) { - return armv8pmu_enable_intens(event->hw.idx); + u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); + armv8pmu_enable_intens(BIT(counter)); } -static inline int armv8pmu_disable_intens(int idx) +static inline void armv8pmu_disable_intens(u32 mask) { - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - write_sysreg(BIT(counter), pmintenclr_el1); + write_sysreg(mask, pmintenclr_el1); isb(); /* Clear the overflow flag in case an interrupt is pending. */ - write_sysreg(BIT(counter), pmovsclr_el0); + write_sysreg(mask, pmovsclr_el0); isb(); - - return idx; } -static inline int armv8pmu_disable_event_irq(struct perf_event *event) +static inline void armv8pmu_disable_event_irq(struct perf_event *event) { - return armv8pmu_disable_intens(event->hw.idx); + u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); + armv8pmu_disable_intens(BIT(counter)); } static inline u32 armv8pmu_getreset_flags(void) @@ -610,15 +719,10 @@ static inline u32 armv8pmu_getreset_flags(void) static void armv8pmu_enable_event(struct perf_event *event) { - unsigned long flags; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - /* * Enable counter and interrupt, and set the counter to count * the event that we're interested in. */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); /* * Disable counter @@ -626,7 +730,7 @@ static void armv8pmu_enable_event(struct perf_event *event) armv8pmu_disable_event_counter(event); /* - * Set event (if destined for PMNx counters). + * Set event. */ armv8pmu_write_event_type(event); @@ -639,21 +743,10 @@ static void armv8pmu_enable_event(struct perf_event *event) * Enable counter */ armv8pmu_enable_event_counter(event); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static void armv8pmu_disable_event(struct perf_event *event) { - unsigned long flags; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - /* - * Disable counter and interrupt - */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); - /* * Disable counter */ @@ -663,30 +756,18 @@ static void armv8pmu_disable_event(struct perf_event *event) * Disable interrupt for this counter */ armv8pmu_disable_event_irq(event); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static void armv8pmu_start(struct arm_pmu *cpu_pmu) { - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Enable all counters */ armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static void armv8pmu_stop(struct arm_pmu *cpu_pmu) { - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Disable all counters */ armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) @@ -739,20 +820,16 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue; + /* + * Perf event overflow will queue the processing of the event as + * an irq_work which will be taken care of in the handling of + * IPI_IRQ_WORK. + */ if (perf_event_overflow(event, &data, regs)) cpu_pmu->disable(event); } armv8pmu_start(cpu_pmu); - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - return IRQ_HANDLED; } @@ -805,7 +882,8 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, /* * Otherwise use events counters */ - if (armv8pmu_event_is_64bit(event)) + if (armv8pmu_event_is_64bit(event) && + !armv8pmu_has_long_event(cpu_pmu)) return armv8pmu_get_chain_idx(cpuc, cpu_pmu); else return armv8pmu_get_single_idx(cpuc, cpu_pmu); @@ -877,13 +955,11 @@ static int armv8pmu_filter_match(struct perf_event *event) static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; - u32 idx, nb_cnt = cpu_pmu->num_events; + u32 pmcr; /* The counter and interrupt enable registers are unknown at reset. */ - for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { - armv8pmu_disable_counter(idx); - armv8pmu_disable_intens(idx); - } + armv8pmu_disable_counter(U32_MAX); + armv8pmu_disable_intens(U32_MAX); /* Clear the counters we flip at guest entry/exit */ kvm_clr_pmu_events(U32_MAX); @@ -892,8 +968,13 @@ static void armv8pmu_reset(void *info) * Initialize & Reset PMNC. Request overflow interrupt for * 64 bit cycle counter but cheat in armv8pmu_write_counter(). */ - armv8pmu_pmcr_write(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | - ARMV8_PMU_PMCR_LC); + pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC; + + /* Enable long event counter support where available */ + if (armv8pmu_has_long_event(cpu_pmu)) + pmcr |= ARMV8_PMU_PMCR_LP; + + armv8pmu_pmcr_write(pmcr); } static int __armv8_pmuv3_map_event(struct perf_event *event, @@ -976,6 +1057,7 @@ static void __armv8pmu_probe_pmu(void *info) if (pmuver == 0xf || pmuver == 0) return; + cpu_pmu->pmuver = pmuver; probe->present = true; /* Read the nb of CNTx counters supported from PMNC */ diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index c4452827419b0..b41ed2f907b0e 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -36,25 +36,16 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); static void __kprobes post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); -static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) -{ - void *addrs[1]; - u32 insns[1]; - - addrs[0] = addr; - insns[0] = opcode; - - return aarch64_insn_patch_text(addrs, insns, 1); -} - static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { + kprobe_opcode_t *addr = p->ainsn.api.insn; + void *addrs[] = {addr, addr + 1}; + u32 insns[] = {p->opcode, BRK64_OPCODE_KPROBES_SS}; + /* prepare insn slot */ - patch_text(p->ainsn.api.insn, p->opcode); + aarch64_insn_patch_text(addrs, insns, 2); - flush_icache_range((uintptr_t) (p->ainsn.api.insn), - (uintptr_t) (p->ainsn.api.insn) + - MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + flush_icache_range((uintptr_t)addr, (uintptr_t)(addr + MAX_INSN_SIZE)); /* * Needs restoring of return address after stepping xol. @@ -134,13 +125,18 @@ void *alloc_insn_page(void) /* arm kprobe: install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) { - patch_text(p->addr, BRK64_OPCODE_KPROBES); + void *addr = p->addr; + u32 insn = BRK64_OPCODE_KPROBES; + + aarch64_insn_patch_text(&addr, &insn, 1); } /* disarm kprobe: remove breakpoint from text */ void __kprobes arch_disarm_kprobe(struct kprobe *p) { - patch_text(p->addr, p->opcode); + void *addr = p->addr; + + aarch64_insn_patch_text(&addr, &p->opcode, 1); } void __kprobes arch_remove_kprobe(struct kprobe *p) @@ -169,20 +165,15 @@ static void __kprobes set_current_kprobe(struct kprobe *p) } /* - * Interrupts need to be disabled before single-step mode is set, and not - * reenabled until after single-step mode ends. - * Without disabling interrupt on local CPU, there is a chance of - * interrupt occurrence in the period of exception return and start of - * out-of-line single-step, that result in wrongly single stepping - * into the interrupt handler. + * Mask all of DAIF while executing the instruction out-of-line, to keep things + * simple and avoid nesting exceptions. Interrupts do have to be disabled since + * the kprobe state is per-CPU and doesn't get migrated. */ static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb, struct pt_regs *regs) { kcb->saved_irqflag = regs->pstate & DAIF_MASK; - regs->pstate |= PSR_I_BIT; - /* Unmask PSTATE.D for enabling software step exceptions. */ - regs->pstate &= ~PSR_D_BIT; + regs->pstate |= DAIF_MASK; } static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb, @@ -225,10 +216,7 @@ static void __kprobes setup_singlestep(struct kprobe *p, slot = (unsigned long)p->ainsn.api.insn; set_ss_context(kcb, slot); /* mark pending ss */ - - /* IRQs and single stepping do not mix well. */ kprobes_save_local_irqflag(kcb, regs); - kernel_enable_single_step(regs); instruction_pointer_set(regs, slot); } else { /* insn simulation */ @@ -279,12 +267,8 @@ post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs) } /* call post handler */ kcb->kprobe_status = KPROBE_HIT_SSDONE; - if (cur->post_handler) { - /* post_handler can hit breakpoint and single step - * again, so we enable D-flag for recursive exception. - */ + if (cur->post_handler) cur->post_handler(cur, regs, 0); - } reset_current_kprobe(); } @@ -308,8 +292,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) if (!instruction_pointer(regs)) BUG(); - kernel_disable_single_step(); - if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); else @@ -371,10 +353,6 @@ static void __kprobes kprobe_handler(struct pt_regs *regs) * pre-handler and it returned non-zero, it will * modify the execution path and no need to single * stepping. Let's just reset current kprobe and exit. - * - * pre_handler can hit a breakpoint and can step thru - * before return, keep PSTATE D-flag enabled until - * pre_handler return back. */ if (!p->pre_handler || !p->pre_handler(p, regs)) { setup_singlestep(p, regs, kcb, 0); @@ -405,7 +383,7 @@ kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr) } static int __kprobes -kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) +kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); int retval; @@ -415,16 +393,15 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) if (retval == DBG_HOOK_HANDLED) { kprobes_restore_local_irqflag(kcb, regs); - kernel_disable_single_step(); - post_kprobe_handler(kcb, regs); } return retval; } -static struct step_hook kprobes_step_hook = { - .fn = kprobe_single_step_handler, +static struct break_hook kprobes_break_ss_hook = { + .imm = KPROBES_BRK_SS_IMM, + .fn = kprobe_breakpoint_ss_handler, }; static int __kprobes @@ -568,7 +545,7 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p) int __init arch_init_kprobes(void) { register_kernel_break_hook(&kprobes_break_hook); - register_kernel_step_hook(&kprobes_step_hook); + register_kernel_break_hook(&kprobes_break_ss_hook); return 0; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 6e89d283d188e..b5f7938c8955e 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -43,9 +43,9 @@ #include #include -#include #include #include +#include #include #include #include @@ -56,7 +56,7 @@ #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include -unsigned long __stack_chk_guard __read_mostly; +unsigned long __stack_chk_guard __ro_after_init; EXPORT_SYMBOL(__stack_chk_guard); #endif @@ -68,33 +68,6 @@ EXPORT_SYMBOL_GPL(pm_power_off); void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); -static void __cpu_do_idle(void) -{ - dsb(sy); - wfi(); -} - -static void __cpu_do_idle_irqprio(void) -{ - unsigned long pmr; - unsigned long daif_bits; - - daif_bits = read_sysreg(daif); - write_sysreg(daif_bits | PSR_I_BIT, daif); - - /* - * Unmask PMR before going idle to make sure interrupts can - * be raised. - */ - pmr = gic_read_pmr(); - gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - - __cpu_do_idle(); - - gic_write_pmr(pmr); - write_sysreg(daif_bits, daif); -} - /* * cpu_do_idle() * @@ -104,18 +77,22 @@ static void __cpu_do_idle_irqprio(void) * ensure that interrupts are not masked at the PMR (because the core will * not wake up if we block the wake up signal in the interrupt controller). */ -void cpu_do_idle(void) +void noinstr cpu_do_idle(void) { - if (system_uses_irq_prio_masking()) - __cpu_do_idle_irqprio(); - else - __cpu_do_idle(); + struct arm_cpuidle_irq_context context; + + arm_cpuidle_save_irq_context(&context); + + dsb(sy); + wfi(); + + arm_cpuidle_restore_irq_context(&context); } /* * This is our default idle handler. */ -void arch_cpu_idle(void) +void noinstr arch_cpu_idle(void) { /* * This should do all the clock switching and wait for interrupt @@ -500,34 +477,26 @@ static void entry_task_switch(struct task_struct *next) /* * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT. - * Assuming the virtual counter is enabled at the beginning of times: - * - * - disable access when switching from a 64bit task to a 32bit task - * - enable access when switching from a 32bit task to a 64bit task + * Ensure access is disabled when switching to a 32bit task, ensure + * access is enabled when switching to a 64bit task. */ -static void erratum_1418040_thread_switch(struct task_struct *prev, - struct task_struct *next) +static void erratum_1418040_thread_switch(struct task_struct *next) { - bool prev32, next32; - u64 val; - - if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040)) - return; - - prev32 = is_compat_thread(task_thread_info(prev)); - next32 = is_compat_thread(task_thread_info(next)); - - if (prev32 == next32 || !this_cpu_has_cap(ARM64_WORKAROUND_1418040)) + if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) || + !this_cpu_has_cap(ARM64_WORKAROUND_1418040)) return; - val = read_sysreg(cntkctl_el1); - - if (!next32) - val |= ARCH_TIMER_USR_VCT_ACCESS_EN; + if (is_compat_thread(task_thread_info(next))) + sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0); else - val &= ~ARCH_TIMER_USR_VCT_ACCESS_EN; + sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN); +} - write_sysreg(val, cntkctl_el1); +static void erratum_1418040_new_exec(void) +{ + preempt_disable(); + erratum_1418040_thread_switch(current); + preempt_enable(); } /* @@ -546,7 +515,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, uao_thread_switch(next); ptrauth_thread_switch(next); ssbs_thread_switch(next); - erratum_1418040_thread_switch(prev, next); + erratum_1418040_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case @@ -609,6 +578,7 @@ void arch_setup_new_exec(void) current->mm->context.flags = is_aarch32_compat_task() ? MMCF_AARCH32 : 0; ptrauth_thread_init_user(current); + erratum_1418040_new_exec(); } #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 987220ac4cfef..f9a402436795c 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -182,6 +182,7 @@ static void init_gic_priority_masking(void) cpuflags = read_sysreg(daif); WARN_ON(!(cpuflags & PSR_I_BIT)); + WARN_ON(!(cpuflags & PSR_F_BIT)); gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); } diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 9405d1b7f4b03..9ef999949c66a 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -88,6 +89,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) int ret = 0; unsigned long flags; struct sleep_stack_data state; + struct arm_cpuidle_irq_context context; /* * From this point debug exceptions are disabled to prevent @@ -97,12 +99,18 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) flags = local_daif_save(); /* - * Function graph tracer state gets incosistent when the kernel + * Function graph tracer state gets inconsistent when the kernel * calls functions that never return (aka suspend finishers) hence * disable graph tracing during their execution. */ pause_graph_tracing(); + /* + * Switch to using DAIF.IF instead of PMR in order to reliably + * resume if we're using pseudo-NMIs. + */ + arm_cpuidle_save_irq_context(&context); + if (__cpu_suspend_enter(&state)) { /* Call the suspend finisher */ ret = fn(arg); @@ -120,6 +128,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) __cpu_suspend_exit(); } + arm_cpuidle_restore_irq_context(&context); + unpause_graph_tracing(); /* diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 3c18c2454089b..51274bab25653 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -115,6 +115,6 @@ long compat_arm_syscall(struct pt_regs *regs, int scno) (compat_thumb_mode(regs) ? 2 : 4); arm64_notify_die("Oops - bad compat syscall(2)", regs, - SIGILL, ILL_ILLTRP, addr, scno); + SIGILL, ILL_ILLTRP, addr, 0); return 0; } diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 40dffe60b8454..894791f03596d 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -32,7 +32,8 @@ cc32-as-instr = $(call try-run,\ # As a result we set our own flags here. # KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile -VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include) +VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc +VDSO_CPPFLAGS += -isystem $(shell $(CC_COMPAT) -print-file-name=include 2>/dev/null) VDSO_CPPFLAGS += $(LINUXINCLUDE) # Common C and assembly flags diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 1f82cf631c3c4..fbab044b3a39a 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -276,7 +276,7 @@ ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) <= SZ_4K, "Hibernate exit text too big or misaligned") #endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, +ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE, "Entry trampoline text too big") #endif /* diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index f36aad0f207bb..99b8ecaae8109 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -113,6 +113,10 @@ el1_hvc_guest: /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */ eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \ ARM_SMCCC_ARCH_WORKAROUND_2) + cbz w1, wa_epilogue + + eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_2 ^ \ + ARM_SMCCC_ARCH_WORKAROUND_3) cbnz w1, el1_trap #ifdef CONFIG_ARM64_SSBD @@ -347,4 +351,64 @@ ENTRY(__smccc_workaround_1_smc_start) ldp x0, x1, [sp, #(8 * 2)] add sp, sp, #(8 * 4) ENTRY(__smccc_workaround_1_smc_end) + +ENTRY(__smccc_workaround_3_smc_start) + esb + sub sp, sp, #(8 * 4) + stp x2, x3, [sp, #(8 * 0)] + stp x0, x1, [sp, #(8 * 2)] + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3 + smc #0 + ldp x2, x3, [sp, #(8 * 0)] + ldp x0, x1, [sp, #(8 * 2)] + add sp, sp, #(8 * 4) +ENTRY(__smccc_workaround_3_smc_end) + +ENTRY(__spectre_bhb_loop_k8_start) + esb + sub sp, sp, #(8 * 2) + stp x0, x1, [sp, #(8 * 0)] + mov x0, #8 +2: b . + 4 + subs x0, x0, #1 + b.ne 2b + dsb nsh + isb + ldp x0, x1, [sp, #(8 * 0)] + add sp, sp, #(8 * 2) +ENTRY(__spectre_bhb_loop_k8_end) + +ENTRY(__spectre_bhb_loop_k24_start) + esb + sub sp, sp, #(8 * 2) + stp x0, x1, [sp, #(8 * 0)] + mov x0, #24 +2: b . + 4 + subs x0, x0, #1 + b.ne 2b + dsb nsh + isb + ldp x0, x1, [sp, #(8 * 0)] + add sp, sp, #(8 * 2) +ENTRY(__spectre_bhb_loop_k24_end) + +ENTRY(__spectre_bhb_loop_k32_start) + esb + sub sp, sp, #(8 * 2) + stp x0, x1, [sp, #(8 * 0)] + mov x0, #32 +2: b . + 4 + subs x0, x0, #1 + b.ne 2b + dsb nsh + isb + ldp x0, x1, [sp, #(8 * 0)] + add sp, sp, #(8 * 2) +ENTRY(__spectre_bhb_loop_k32_end) + +ENTRY(__spectre_bhb_clearbhb_start) + esb + clearbhb + isb +ENTRY(__spectre_bhb_clearbhb_end) #endif diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 14607fac7ca38..28ede7fb4698b 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -12,7 +12,7 @@ #include -#include +#include #include #include #include @@ -25,6 +25,7 @@ #include #include #include +#include extern struct exception_table_entry __start___kvm_ex_table; extern struct exception_table_entry __stop___kvm_ex_table; @@ -152,7 +153,7 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) static void deactivate_traps_vhe(void) { - extern char vectors[]; /* kernel exception vectors */ + const char *host_vectors = vectors; write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); /* @@ -163,7 +164,10 @@ static void deactivate_traps_vhe(void) asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_1165522)); write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); - write_sysreg(vectors, vbar_el1); + + if (!arm64_kernel_unmapped_at_el0()) + host_vectors = __this_cpu_read(this_cpu_vector); + write_sysreg(host_vectors, vbar_el1); } NOKPROBE_SYMBOL(deactivate_traps_vhe); @@ -669,7 +673,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) */ if (system_uses_irq_prio_masking()) { gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - dsb(sy); + pmr_sync(); } vcpu = kern_hyp_va(vcpu); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index da649e90240c8..5aae2f9f0ac87 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1089,6 +1089,16 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, (0xfUL << ID_AA64ISAR1_API_SHIFT) | (0xfUL << ID_AA64ISAR1_GPA_SHIFT) | (0xfUL << ID_AA64ISAR1_GPI_SHIFT)); + } else if (id == SYS_ID_AA64DFR0_EL1) { + /* Limit guests to PMUv3 for ARMv8.1 */ + val = cpuid_feature_cap_perfmon_field(val, + ID_AA64DFR0_PMUVER_SHIFT, + ID_AA64DFR0_PMUVER_8_1); + } else if (id == SYS_ID_DFR0_EL1) { + /* Limit guests to PMUv3 for ARMv8.1 */ + val = cpuid_feature_cap_perfmon_field(val, + ID_DFR0_PERFMON_SHIFT, + ID_DFR0_PERFMON_8_1); } return val; @@ -1454,7 +1464,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* CRm=6 */ ID_SANITISED(ID_AA64ISAR0_EL1), ID_SANITISED(ID_AA64ISAR1_EL1), - ID_UNALLOCATED(6,2), + ID_SANITISED(ID_AA64ISAR2_EL1), ID_UNALLOCATED(6,3), ID_UNALLOCATED(6,4), ID_UNALLOCATED(6,5), diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index db767b072601e..7b054c67acd81 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -228,8 +228,6 @@ ENDPIPROC(__dma_flush_area) * - dir - DMA direction */ ENTRY(__dma_map_area) - cmp w2, #DMA_FROM_DEVICE - b.eq __dma_inv_area b __dma_clean_area ENDPIPROC(__dma_map_area) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index ac485163a4a76..d087a509a20ef 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -55,6 +55,13 @@ void __sync_icache_dcache(pte_t pte) { struct page *page = pte_page(pte); + /* + * HugeTLB pages are always fully mapped, so only setting head page's + * PG_dcache_clean flag is enough. + */ + if (PageHuge(page)) + page = compound_head(page); + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) sync_icache_aliases(page_address(page), page_size(page)); } @@ -67,6 +74,20 @@ EXPORT_SYMBOL_GPL(__sync_icache_dcache); */ void flush_dcache_page(struct page *page) { + /* + * Only the head page's flags of HugeTLB can be cleared since the tail + * vmemmap pages associated with each HugeTLB page are mapped with + * read-only when CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP is enabled (more + * details can refer to vmemmap_remap_pte()). Although + * __sync_icache_dcache() only set PG_dcache_clean flag on the head + * page struct, there is more than one page struct with PG_dcache_clean + * associated with the HugeTLB page since the head vmemmap page frame + * is reused (more details can refer to the comments above + * page_fixed_fake_head()). + */ + if (hugetlb_optimize_vmemmap_enabled() && PageHuge(page)) + page = compound_head(page); + if (test_bit(PG_dcache_clean, &page->flags)) clear_bit(PG_dcache_clean, &page->flags); } diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 9be71bee902ca..8dac7fcfb4bdc 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -100,3 +101,11 @@ void __init early_ioremap_init(void) { early_ioremap_setup(); } + +bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags) +{ + unsigned long pfn = PHYS_PFN(offset); + + return memblock_is_map_memory(pfn); +} diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 99bc0289ab2b6..5cf575f23af28 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -583,6 +583,8 @@ early_param("rodata", parse_rodata); #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __init map_entry_trampoline(void) { + int i; + pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); @@ -591,11 +593,15 @@ static int __init map_entry_trampoline(void) /* Map only the text into the trampoline page table */ memset(tramp_pg_dir, 0, PGD_SIZE); - __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE, - prot, __pgd_pgtable_alloc, 0); + __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, + entry_tramp_text_size(), prot, + __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS); /* Map both the text and data into the kernel page table */ - __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot); + for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++) + __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, + pa_start + i * PAGE_SIZE, prot); + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { extern char __entry_tramp_data_start[]; diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 945e5f690edec..9f71ca4414825 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -701,6 +701,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, } break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + /* + * Nothing required here. + * + * In case of arm64, we rely on the firmware mitigation of + * Speculative Store Bypass as controlled via the ssbd kernel + * parameter. Whenever the mitigation is enabled, it works + * for all of the kernel code with no need to provide any + * additional instructions. + */ + break; + /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: @@ -896,15 +909,18 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_off; } - /* 1. Initial fake pass to compute ctx->idx. */ - - /* Fake pass to fill in ctx->offset. */ - if (build_body(&ctx, extra_pass)) { + /* + * 1. Initial fake pass to compute ctx->idx and ctx->offset. + * + * BPF line info needs ctx->offset[i] to be the offset of + * instruction[i] in jited image, so build prologue first. + */ + if (build_prologue(&ctx, was_classic)) { prog = orig_prog; goto out_off; } - if (build_prologue(&ctx, was_classic)) { + if (build_body(&ctx, extra_pass)) { prog = orig_prog; goto out_off; } @@ -957,6 +973,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) bpf_jit_binary_free(header); prog->bpf_func = NULL; prog->jited = 0; + prog->jited_len = 0; goto out_off; } bpf_jit_binary_lock_ro(header); @@ -970,6 +987,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog->jited_len = image_size; if (!prog->is_func || extra_pass) { + int i; + + /* offset[prog->len] is the size of program */ + for (i = 0; i <= prog->len; i++) + ctx.offset[i] *= AARCH64_INSN_SIZE; bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); out_off: kfree(ctx.offset); @@ -983,6 +1005,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) return prog; } +u64 bpf_jit_alloc_exec_limit(void) +{ + return BPF_JIT_REGION_SIZE; +} + void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, diff --git a/arch/csky/kernel/perf_callchain.c b/arch/csky/kernel/perf_callchain.c index ab55e98ee8f62..35318a635a5fa 100644 --- a/arch/csky/kernel/perf_callchain.c +++ b/arch/csky/kernel/perf_callchain.c @@ -86,10 +86,11 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); unsigned long fp = 0; /* C-SKY does not support virtualization. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + if (guest_cbs && guest_cbs->is_in_guest()) return; fp = regs->regs[4]; @@ -110,10 +111,11 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct stackframe fr; /* C-SKY does not support virtualization. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { pr_warn("C-SKY does not support perf in guest mode!"); return; } diff --git a/arch/csky/kernel/ptrace.c b/arch/csky/kernel/ptrace.c index 313623a19ecbf..885194720fe0b 100644 --- a/arch/csky/kernel/ptrace.c +++ b/arch/csky/kernel/ptrace.c @@ -96,7 +96,8 @@ static int gpr_set(struct task_struct *target, if (ret) return ret; - regs.sr = task_pt_regs(target)->sr; + /* BIT(0) of regs.sr is Condition Code/Carry bit */ + regs.sr = (regs.sr & BIT(0)) | (task_pt_regs(target)->sr & ~BIT(0)); #ifdef CONFIG_CPU_HAS_HILO regs.dcsr = task_pt_regs(target)->dcsr; #endif diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index d44f5fe0731d4..fb394f5f83819 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -52,10 +52,14 @@ static long restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { int err = 0; + unsigned long sr = regs->sr; /* sc_pt_regs is structured the same as the start of pt_regs */ err |= __copy_from_user(regs, &sc->sc_pt_regs, sizeof(struct pt_regs)); + /* BIT(0) of regs->sr is Condition Code/Carry bit */ + regs->sr = (sr & ~1) | (regs->sr & 1); + /* Restore the floating-point state. */ err |= restore_fpu_state(sc); diff --git a/arch/hexagon/lib/io.c b/arch/hexagon/lib/io.c index d35d69d6588c4..55f75392857b0 100644 --- a/arch/hexagon/lib/io.c +++ b/arch/hexagon/lib/io.c @@ -27,6 +27,7 @@ void __raw_readsw(const void __iomem *addr, void *data, int len) *dst++ = *src; } +EXPORT_SYMBOL(__raw_readsw); /* * __raw_writesw - read words a short at a time @@ -47,6 +48,7 @@ void __raw_writesw(void __iomem *addr, const void *data, int len) } +EXPORT_SYMBOL(__raw_writesw); /* Pretty sure len is pre-adjusted for the length of the access already */ void __raw_readsl(const void __iomem *addr, void *data, int len) @@ -62,6 +64,7 @@ void __raw_readsl(const void __iomem *addr, void *data, int len) } +EXPORT_SYMBOL(__raw_readsl); void __raw_writesl(void __iomem *addr, const void *data, int len) { @@ -76,3 +79,4 @@ void __raw_writesl(void __iomem *addr, const void *data, int len) } +EXPORT_SYMBOL(__raw_writesl); diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug index 40ca23bd228d6..2ce008e2d1644 100644 --- a/arch/ia64/Kconfig.debug +++ b/arch/ia64/Kconfig.debug @@ -39,7 +39,7 @@ config DISABLE_VHPT config IA64_DEBUG_CMPXCHG bool "Turn on compare-and-exchange bug checking (slow!)" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && PRINTK help Selecting this option turns on bug checking for the IA-64 compare-and-exchange instructions. This is slow! Itaniums diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h index 869a3ac6bf23a..7ccc077a60bed 100644 --- a/arch/ia64/include/asm/timex.h +++ b/arch/ia64/include/asm/timex.h @@ -39,6 +39,7 @@ get_cycles (void) ret = ia64_getreg(_IA64_REG_AR_ITC); return ret; } +#define get_cycles get_cycles extern void ia64_cpu_local_tick (void); extern unsigned long long ia64_native_sched_clock (void); diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 70d1587ddcd46..361b109933725 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -448,7 +448,8 @@ void __init acpi_numa_fixup(void) if (srat_num_cpus == 0) { node_set_online(0); node_cpuid[0].phys_id = hard_smp_processor_id(); - return; + slit_distance(0, 0) = LOCAL_DISTANCE; + goto out; } /* @@ -491,7 +492,7 @@ void __init acpi_numa_fixup(void) for (j = 0; j < MAX_NUMNODES; j++) slit_distance(i, j) = i == j ? LOCAL_DISTANCE : REMOTE_DISTANCE; - return; + goto out; } memset(numa_slit, -1, sizeof(numa_slit)); @@ -516,6 +517,8 @@ void __init acpi_numa_fixup(void) printk("\n"); } #endif +out: + node_possible_map = node_online_map; } #endif /* CONFIG_ACPI_NUMA */ diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index b3dc39050c1ad..fa10d51f62177 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -396,9 +396,83 @@ static void kretprobe_trampoline(void) { } +/* + * At this point the target function has been tricked into + * returning into our trampoline. Lookup the associated instance + * and then: + * - call the handler function + * - cleanup by marking the instance as unused + * - long jump back to the original return address + */ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - regs->cr_iip = __kretprobe_trampoline_handler(regs, kretprobe_trampoline, NULL); + struct kretprobe_instance *ri = NULL; + struct hlist_head *head, empty_rp; + struct hlist_node *tmp; + unsigned long flags, orig_ret_address = 0; + unsigned long trampoline_address = + (unsigned long)dereference_function_descriptor(kretprobe_trampoline); + + INIT_HLIST_HEAD(&empty_rp); + kretprobe_hash_lock(current, &head, &flags); + + /* + * It is possible to have multiple instances associated with a given + * task either because an multiple functions in the call path + * have a return probe installed on them, and/or more than one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + regs->cr_iip = orig_ret_address; + + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + if (ri->rp && ri->rp->handler) + ri->rp->handler(ri, regs); + + orig_ret_address = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri, &empty_rp); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + kretprobe_hash_unlock(current, &flags); + + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } /* * By returning a non-zero value, we are telling * kprobe_handler() that we don't want the post_handler @@ -411,10 +485,9 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { ri->ret_addr = (kprobe_opcode_t *)regs->b0; - ri->fp = NULL; /* Replace the return addr with trampoline addr */ - regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip; + regs->b0 = (unsigned long)dereference_function_descriptor(kretprobe_trampoline); } /* Check the instruction in the slot is break */ @@ -918,14 +991,14 @@ static struct kprobe trampoline_p = { int __init arch_init_kprobes(void) { trampoline_p.addr = - (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip; + dereference_function_descriptor(kretprobe_trampoline); return register_kprobe(&trampoline_p); } int __kprobes arch_trampoline_kprobe(struct kprobe *p) { if (p->addr == - (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip) + dereference_function_descriptor(kretprobe_trampoline)) return 1; return 0; diff --git a/arch/m68k/Kconfig.bus b/arch/m68k/Kconfig.bus index 9d0a3a23d50e5..355c51309ed85 100644 --- a/arch/m68k/Kconfig.bus +++ b/arch/m68k/Kconfig.bus @@ -63,7 +63,7 @@ source "drivers/zorro/Kconfig" endif -if !MMU +if COLDFIRE config ISA_DMA_API def_bool !M5272 diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 60ac1cd8b96fb..6bc7fc14163f8 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -309,7 +309,7 @@ comment "Processor Specific Options" config M68KFPU_EMU bool "Math emulation support" - depends on MMU + depends on M68KCLASSIC && FPU help At some point in the future, this will cause floating-point math instructions to be emulated by the kernel on machines that lack a diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index 1bbe0dd0c4fe5..f0527b155c057 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -190,6 +190,7 @@ config INIT_LCD config MEMORY_RESERVE int "Memory reservation (MiB)" depends on (UCSIMM || UCDIMM) + default 0 help Reserve certain memory regions on 68x328 based boards. @@ -319,6 +320,7 @@ comment "Machine Options" config UBOOT bool "Support for U-Boot command line parameters" + depends on COLDFIRE help If you say Y here kernel will try to collect command line parameters from the initial u-boot stack. diff --git a/arch/m68k/emu/nfeth.c b/arch/m68k/emu/nfeth.c index a4ebd2445edae..e5831cd293d05 100644 --- a/arch/m68k/emu/nfeth.c +++ b/arch/m68k/emu/nfeth.c @@ -254,8 +254,8 @@ static void __exit nfeth_cleanup(void) for (i = 0; i < MAX_UNIT; i++) { if (nfeth_dev[i]) { - unregister_netdev(nfeth_dev[0]); - free_netdev(nfeth_dev[0]); + unregister_netdev(nfeth_dev[i]); + free_netdev(nfeth_dev[i]); } } free_irq(nfEtherIRQ, nfeth_interrupt); diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h index c18165b0d9043..6b02484665691 100644 --- a/arch/m68k/include/asm/pgtable_no.h +++ b/arch/m68k/include/asm/pgtable_no.h @@ -42,7 +42,8 @@ extern void paging_init(void); * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -#define ZERO_PAGE(vaddr) (virt_to_page(0)) +extern void *empty_zero_page; +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) /* * All 32bit addresses are effectively valid for vmalloc... diff --git a/arch/m68k/include/asm/raw_io.h b/arch/m68k/include/asm/raw_io.h index 8a6dc6e5a279c..8ab3c350bd530 100644 --- a/arch/m68k/include/asm/raw_io.h +++ b/arch/m68k/include/asm/raw_io.h @@ -17,21 +17,21 @@ * two accesses to memory, which may be undesirable for some devices. */ #define in_8(addr) \ - ({ u8 __v = (*(__force volatile u8 *) (addr)); __v; }) + ({ u8 __v = (*(__force volatile u8 *) (unsigned long)(addr)); __v; }) #define in_be16(addr) \ - ({ u16 __v = (*(__force volatile u16 *) (addr)); __v; }) + ({ u16 __v = (*(__force volatile u16 *) (unsigned long)(addr)); __v; }) #define in_be32(addr) \ - ({ u32 __v = (*(__force volatile u32 *) (addr)); __v; }) + ({ u32 __v = (*(__force volatile u32 *) (unsigned long)(addr)); __v; }) #define in_le16(addr) \ - ({ u16 __v = le16_to_cpu(*(__force volatile __le16 *) (addr)); __v; }) + ({ u16 __v = le16_to_cpu(*(__force volatile __le16 *) (unsigned long)(addr)); __v; }) #define in_le32(addr) \ - ({ u32 __v = le32_to_cpu(*(__force volatile __le32 *) (addr)); __v; }) + ({ u32 __v = le32_to_cpu(*(__force volatile __le32 *) (unsigned long)(addr)); __v; }) -#define out_8(addr,b) (void)((*(__force volatile u8 *) (addr)) = (b)) -#define out_be16(addr,w) (void)((*(__force volatile u16 *) (addr)) = (w)) -#define out_be32(addr,l) (void)((*(__force volatile u32 *) (addr)) = (l)) -#define out_le16(addr,w) (void)((*(__force volatile __le16 *) (addr)) = cpu_to_le16(w)) -#define out_le32(addr,l) (void)((*(__force volatile __le32 *) (addr)) = cpu_to_le32(l)) +#define out_8(addr,b) (void)((*(__force volatile u8 *) (unsigned long)(addr)) = (b)) +#define out_be16(addr,w) (void)((*(__force volatile u16 *) (unsigned long)(addr)) = (w)) +#define out_be32(addr,l) (void)((*(__force volatile u32 *) (unsigned long)(addr)) = (l)) +#define out_le16(addr,w) (void)((*(__force volatile __le16 *) (unsigned long)(addr)) = cpu_to_le16(w)) +#define out_le32(addr,l) (void)((*(__force volatile __le32 *) (unsigned long)(addr)) = cpu_to_le32(l)) #define raw_inb in_8 #define raw_inw in_be16 diff --git a/arch/m68k/include/asm/timex.h b/arch/m68k/include/asm/timex.h index 6a21d93582805..f4a7a340f4cae 100644 --- a/arch/m68k/include/asm/timex.h +++ b/arch/m68k/include/asm/timex.h @@ -35,7 +35,7 @@ static inline unsigned long random_get_entropy(void) { if (mach_random_get_entropy) return mach_random_get_entropy(); - return 0; + return random_get_entropy_fallback(); } #define random_get_entropy random_get_entropy diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index 617d105a92197..fb4c850de7652 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -448,7 +448,7 @@ static inline void save_fpu_state(struct sigcontext *sc, struct pt_regs *regs) if (CPU_IS_060 ? sc->sc_fpstate[2] : sc->sc_fpstate[0]) { fpu_version = sc->sc_fpstate[0]; - if (CPU_IS_020_OR_030 && + if (CPU_IS_020_OR_030 && !regs->stkadj && regs->vector >= (VEC_FPBRUC * 4) && regs->vector <= (VEC_FPNAN * 4)) { /* Clear pending exception in 68882 idle frame */ @@ -511,7 +511,7 @@ static inline int rt_save_fpu_state(struct ucontext __user *uc, struct pt_regs * if (!(CPU_IS_060 || CPU_IS_COLDFIRE)) context_size = fpstate[1]; fpu_version = fpstate[0]; - if (CPU_IS_020_OR_030 && + if (CPU_IS_020_OR_030 && !regs->stkadj && regs->vector >= (VEC_FPBRUC * 4) && regs->vector <= (VEC_FPNAN * 4)) { /* Clear pending exception in 68882 idle frame */ @@ -829,18 +829,24 @@ asmlinkage int do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw) return 0; } +static inline struct pt_regs *rte_regs(struct pt_regs *regs) +{ + return (void *)regs + regs->stkadj; +} + static void setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs, unsigned long mask) { + struct pt_regs *tregs = rte_regs(regs); sc->sc_mask = mask; sc->sc_usp = rdusp(); sc->sc_d0 = regs->d0; sc->sc_d1 = regs->d1; sc->sc_a0 = regs->a0; sc->sc_a1 = regs->a1; - sc->sc_sr = regs->sr; - sc->sc_pc = regs->pc; - sc->sc_formatvec = regs->format << 12 | regs->vector; + sc->sc_sr = tregs->sr; + sc->sc_pc = tregs->pc; + sc->sc_formatvec = tregs->format << 12 | tregs->vector; save_a5_state(sc, regs); save_fpu_state(sc, regs); } @@ -848,6 +854,7 @@ static void setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs, static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs *regs) { struct switch_stack *sw = (struct switch_stack *)regs - 1; + struct pt_regs *tregs = rte_regs(regs); greg_t __user *gregs = uc->uc_mcontext.gregs; int err = 0; @@ -868,9 +875,9 @@ static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs * err |= __put_user(sw->a5, &gregs[13]); err |= __put_user(sw->a6, &gregs[14]); err |= __put_user(rdusp(), &gregs[15]); - err |= __put_user(regs->pc, &gregs[16]); - err |= __put_user(regs->sr, &gregs[17]); - err |= __put_user((regs->format << 12) | regs->vector, &uc->uc_formatvec); + err |= __put_user(tregs->pc, &gregs[16]); + err |= __put_user(tregs->sr, &gregs[17]); + err |= __put_user((tregs->format << 12) | tregs->vector, &uc->uc_formatvec); err |= rt_save_fpu_state(uc, regs); return err; } @@ -887,13 +894,14 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { struct sigframe __user *frame; - int fsize = frame_extra_sizes(regs->format); + struct pt_regs *tregs = rte_regs(regs); + int fsize = frame_extra_sizes(tregs->format); struct sigcontext context; int err = 0, sig = ksig->sig; if (fsize < 0) { pr_debug("setup_frame: Unknown frame format %#x\n", - regs->format); + tregs->format); return -EFAULT; } @@ -904,7 +912,7 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, err |= __put_user(sig, &frame->sig); - err |= __put_user(regs->vector, &frame->code); + err |= __put_user(tregs->vector, &frame->code); err |= __put_user(&frame->sc, &frame->psc); if (_NSIG_WORDS > 1) @@ -929,34 +937,28 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, push_cache ((unsigned long) &frame->retcode); - /* - * Set up registers for signal handler. All the state we are about - * to destroy is successfully copied to sigframe. - */ - wrusp ((unsigned long) frame); - regs->pc = (unsigned long) ksig->ka.sa.sa_handler; - adjustformat(regs); - /* * This is subtle; if we build more than one sigframe, all but the * first one will see frame format 0 and have fsize == 0, so we won't * screw stkadj. */ - if (fsize) + if (fsize) { regs->stkadj = fsize; - - /* Prepare to skip over the extra stuff in the exception frame. */ - if (regs->stkadj) { - struct pt_regs *tregs = - (struct pt_regs *)((ulong)regs + regs->stkadj); + tregs = rte_regs(regs); pr_debug("Performing stackadjust=%04lx\n", regs->stkadj); - /* This must be copied with decreasing addresses to - handle overlaps. */ tregs->vector = 0; tregs->format = 0; - tregs->pc = regs->pc; tregs->sr = regs->sr; } + + /* + * Set up registers for signal handler. All the state we are about + * to destroy is successfully copied to sigframe. + */ + wrusp ((unsigned long) frame); + tregs->pc = (unsigned long) ksig->ka.sa.sa_handler; + adjustformat(regs); + return 0; } @@ -964,7 +966,8 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; - int fsize = frame_extra_sizes(regs->format); + struct pt_regs *tregs = rte_regs(regs); + int fsize = frame_extra_sizes(tregs->format); int err = 0, sig = ksig->sig; if (fsize < 0) { @@ -1013,34 +1016,27 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, push_cache ((unsigned long) &frame->retcode); - /* - * Set up registers for signal handler. All the state we are about - * to destroy is successfully copied to sigframe. - */ - wrusp ((unsigned long) frame); - regs->pc = (unsigned long) ksig->ka.sa.sa_handler; - adjustformat(regs); - /* * This is subtle; if we build more than one sigframe, all but the * first one will see frame format 0 and have fsize == 0, so we won't * screw stkadj. */ - if (fsize) + if (fsize) { regs->stkadj = fsize; - - /* Prepare to skip over the extra stuff in the exception frame. */ - if (regs->stkadj) { - struct pt_regs *tregs = - (struct pt_regs *)((ulong)regs + regs->stkadj); + tregs = rte_regs(regs); pr_debug("Performing stackadjust=%04lx\n", regs->stkadj); - /* This must be copied with decreasing addresses to - handle overlaps. */ tregs->vector = 0; tregs->format = 0; - tregs->pc = regs->pc; tregs->sr = regs->sr; } + + /* + * Set up registers for signal handler. All the state we are about + * to destroy is successfully copied to sigframe. + */ + wrusp ((unsigned long) frame); + tregs->pc = (unsigned long) ksig->ka.sa.sa_handler; + adjustformat(regs); return 0; } diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index a1f206b90753a..c8cd66c8d2577 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -171,27 +171,27 @@ extern long __user_bad(void); #define __get_user(x, ptr) \ ({ \ - unsigned long __gu_val = 0; \ long __gu_err; \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_user_asm("lbu", (ptr), __gu_val, __gu_err); \ + __get_user_asm("lbu", (ptr), x, __gu_err); \ break; \ case 2: \ - __get_user_asm("lhu", (ptr), __gu_val, __gu_err); \ + __get_user_asm("lhu", (ptr), x, __gu_err); \ break; \ case 4: \ - __get_user_asm("lw", (ptr), __gu_val, __gu_err); \ + __get_user_asm("lw", (ptr), x, __gu_err); \ break; \ - case 8: \ - __gu_err = __copy_from_user(&__gu_val, ptr, 8); \ - if (__gu_err) \ - __gu_err = -EFAULT; \ + case 8: { \ + __u64 __x = 0; \ + __gu_err = raw_copy_from_user(&__x, ptr, 8) ? \ + -EFAULT : 0; \ + (x) = (typeof(x))(typeof((x) - (x)))__x; \ break; \ + } \ default: \ /* __gu_val = 0; __gu_err = -EINVAL;*/ __gu_err = __user_bad();\ } \ - x = (__force __typeof__(*(ptr))) __gu_val; \ __gu_err; \ }) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 6ecdc690f7336..2811ecc1f3c71 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -46,6 +46,7 @@ config MIPS select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES select HAVE_ASM_MODVERSIONS + select HAVE_CBPF_JIT if !64BIT && !CPU_MICROMIPS select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2 select HAVE_CONTEXT_TRACKING select HAVE_COPY_THREAD_TLS @@ -293,6 +294,9 @@ config BCM63XX select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN select SYS_HAS_EARLY_PRINTK + select SYS_HAS_CPU_BMIPS32_3300 + select SYS_HAS_CPU_BMIPS4350 + select SYS_HAS_CPU_BMIPS4380 select SWAP_IO_SPACE select GPIOLIB select HAVE_CLK @@ -1392,6 +1396,7 @@ config CPU_LOONGSON3 select WEAK_REORDERING_BEYOND_LLSC select MIPS_PGD_C0_CONTEXT select MIPS_L1_CACHE_SHIFT_6 + select MIPS_FP_SUPPORT select GPIOLIB select SWIOTLB help @@ -3054,7 +3059,7 @@ config STACKTRACE_SUPPORT config PGTABLE_LEVELS int default 4 if PAGE_SIZE_4KB && MIPS_VA_BITS_48 - default 3 if 64BIT && !PAGE_SIZE_64KB + default 3 if 64BIT && (!PAGE_SIZE_64KB || MIPS_VA_BITS_48) default 2 config MIPS_AUTO_PFN_OFFSET diff --git a/arch/mips/bcm63xx/clk.c b/arch/mips/bcm63xx/clk.c index 164115944a7fd..dcfa0ea912fe1 100644 --- a/arch/mips/bcm63xx/clk.c +++ b/arch/mips/bcm63xx/clk.c @@ -381,6 +381,18 @@ void clk_disable(struct clk *clk) EXPORT_SYMBOL(clk_disable); +struct clk *clk_get_parent(struct clk *clk) +{ + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + return 0; +} +EXPORT_SYMBOL(clk_set_parent); + unsigned long clk_get_rate(struct clk *clk) { if (!clk) diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c index 2f81a94c71a60..7aee9ff19c1a6 100644 --- a/arch/mips/bmips/setup.c +++ b/arch/mips/bmips/setup.c @@ -167,7 +167,7 @@ void __init plat_mem_setup(void) dtb = phys_to_virt(fw_arg2); else if (fw_passed_dtb) /* UHI interface or appended dtb */ dtb = (void *)fw_passed_dtb; - else if (__dtb_start != __dtb_end) + else if (&__dtb_start != &__dtb_end) dtb = (void *)__dtb_start; else panic("no dtb found"); diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 1d6ebbc2a5d02..7ad20e00c398d 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -49,6 +49,8 @@ $(obj)/uart-ath79.c: $(srctree)/arch/mips/ath79/early_printk.c vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o +vmlinuzobjs-$(CONFIG_KERNEL_ZSTD) += $(obj)/bswapdi.o + extra-y += ashldi3.c $(obj)/ashldi3.c: $(obj)/%.c: $(srctree)/lib/%.c FORCE $(call if_changed,shipped) @@ -57,6 +59,10 @@ extra-y += bswapsi.c $(obj)/bswapsi.c: $(obj)/%.c: $(srctree)/arch/mips/lib/%.c FORCE $(call if_changed,shipped) +extra-y += bswapdi.c +$(obj)/bswapdi.c: $(obj)/%.c: $(srctree)/arch/mips/lib/%.c FORCE + $(call if_changed,shipped) + targets := $(notdir $(vmlinuzobjs-y)) targets += vmlinux.bin diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c index 51685f893eab0..c214fe4e678bb 100644 --- a/arch/mips/cavium-octeon/octeon-platform.c +++ b/arch/mips/cavium-octeon/octeon-platform.c @@ -328,6 +328,7 @@ static int __init octeon_ehci_device_init(void) pd->dev.platform_data = &octeon_ehci_pdata; octeon_ehci_hw_start(&pd->dev); + put_device(&pd->dev); return ret; } @@ -391,6 +392,7 @@ static int __init octeon_ohci_device_init(void) pd->dev.platform_data = &octeon_ohci_pdata; octeon_ohci_hw_start(&pd->dev); + put_device(&pd->dev); return ret; } diff --git a/arch/mips/cavium-octeon/octeon-usb.c b/arch/mips/cavium-octeon/octeon-usb.c index 4017398519cf9..e092d86e63581 100644 --- a/arch/mips/cavium-octeon/octeon-usb.c +++ b/arch/mips/cavium-octeon/octeon-usb.c @@ -544,6 +544,7 @@ static int __init dwc3_octeon_device_init(void) devm_iounmap(&pdev->dev, base); devm_release_mem_region(&pdev->dev, res->start, resource_size(res)); + put_device(&pdev->dev); } } while (node != NULL); diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index a25ef822e7250..5ed38e6180190 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -131,7 +131,7 @@ */ mfc0 t0,CP0_CAUSE # get pending interrupts mfc0 t1,CP0_STATUS -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) lw t2,cpu_fpu_mask #endif andi t0,ST0_IM # CAUSE.CE may be non-zero! @@ -139,7 +139,7 @@ beqz t0,spurious -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) and t2,t0 bnez t2,fpu # handle FPU immediately #endif @@ -280,7 +280,7 @@ handle_it: j dec_irq_dispatch nop -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) fpu: lw t0,fpu_kstat_irq nop diff --git a/arch/mips/dec/prom/Makefile b/arch/mips/dec/prom/Makefile index d95016016b42b..2bad87551203b 100644 --- a/arch/mips/dec/prom/Makefile +++ b/arch/mips/dec/prom/Makefile @@ -6,4 +6,4 @@ lib-y += init.o memory.o cmdline.o identify.o console.o -lib-$(CONFIG_32BIT) += locore.o +lib-$(CONFIG_CPU_R3000) += locore.o diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c index 1fc8dffa8d1d0..649b50ae5b1e3 100644 --- a/arch/mips/dec/setup.c +++ b/arch/mips/dec/setup.c @@ -766,7 +766,8 @@ void __init arch_init_irq(void) dec_interrupt[DEC_IRQ_HALT] = -1; /* Register board interrupts: FPU and cascade. */ - if (dec_interrupt[DEC_IRQ_FPU] >= 0 && cpu_has_fpu) { + if (IS_ENABLED(CONFIG_MIPS_FP_SUPPORT) && + dec_interrupt[DEC_IRQ_FPU] >= 0 && cpu_has_fpu) { struct irq_desc *desc_fpu; int irq_fpu; diff --git a/arch/mips/generic/yamon-dt.c b/arch/mips/generic/yamon-dt.c index a3aa22c77cadc..a07a5edbcda78 100644 --- a/arch/mips/generic/yamon-dt.c +++ b/arch/mips/generic/yamon-dt.c @@ -75,7 +75,7 @@ static unsigned int __init gen_fdt_mem_array( __init int yamon_dt_append_memory(void *fdt, const struct yamon_mem_region *regions) { - unsigned long phys_memsize, memsize; + unsigned long phys_memsize = 0, memsize; __be32 mem_array[2 * MAX_MEM_ARRAY_ENTRIES]; unsigned int mem_entries; int i, err, mem_off; diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index f6136871561dc..9182ce828f540 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -239,6 +239,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr, " .set " MIPS_ISA_ARCH_LEVEL " \n" /* Load 64 bits from ptr */ "1: lld %L0, %3 # __cmpxchg64 \n" + " .set pop \n" /* * Split the 64 bit value we loaded into the 2 registers that hold the * ret variable. @@ -266,6 +267,8 @@ static inline unsigned long __cmpxchg64(volatile void *ptr, " or %L1, %L1, $at \n" " .set at \n" # endif + " .set push \n" + " .set " MIPS_ISA_ARCH_LEVEL " \n" /* Attempt to store new at ptr */ " scd %L1, %2 \n" /* If we failed, loop! */ diff --git a/arch/mips/include/asm/dec/prom.h b/arch/mips/include/asm/dec/prom.h index 62c7dfb90e06c..1e1247add1cf8 100644 --- a/arch/mips/include/asm/dec/prom.h +++ b/arch/mips/include/asm/dec/prom.h @@ -43,16 +43,11 @@ */ #define REX_PROM_MAGIC 0x30464354 -#ifdef CONFIG_64BIT - -#define prom_is_rex(magic) 1 /* KN04 and KN05 are REX PROMs. */ - -#else /* !CONFIG_64BIT */ - -#define prom_is_rex(magic) ((magic) == REX_PROM_MAGIC) - -#endif /* !CONFIG_64BIT */ - +/* KN04 and KN05 are REX PROMs, so only do the check for R3k systems. */ +static inline bool prom_is_rex(u32 magic) +{ + return !IS_ENABLED(CONFIG_CPU_R3000) || magic == REX_PROM_MAGIC; +} /* * 3MIN/MAXINE PROM entry points for DS5000/1xx's, DS5000/xx's and diff --git a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h index 136d6d464e320..93c69fc7bbd8c 100644 --- a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h @@ -28,7 +28,6 @@ #define cpu_has_6k_cache 0 #define cpu_has_8k_cache 0 #define cpu_has_tx39_cache 0 -#define cpu_has_fpu 1 #define cpu_has_nofpuex 0 #define cpu_has_32fpr 1 #define cpu_has_counter 1 diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index aeae2effa123d..23c67c0871b17 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -11,6 +11,7 @@ #ifndef __MIPS_ASM_MIPS_CM_H__ #define __MIPS_ASM_MIPS_CM_H__ +#include #include #include @@ -153,8 +154,8 @@ GCR_ACCESSOR_RO(32, 0x030, rev) #define CM_GCR_REV_MINOR GENMASK(7, 0) #define CM_ENCODE_REV(major, minor) \ - (((major) << __ffs(CM_GCR_REV_MAJOR)) | \ - ((minor) << __ffs(CM_GCR_REV_MINOR))) + (FIELD_PREP(CM_GCR_REV_MAJOR, major) | \ + FIELD_PREP(CM_GCR_REV_MINOR, minor)) #define CM_REV_CM2 CM_ENCODE_REV(6, 0) #define CM_REV_CM2_5 CM_ENCODE_REV(7, 0) @@ -362,10 +363,10 @@ static inline int mips_cm_revision(void) static inline unsigned int mips_cm_max_vp_width(void) { extern int smp_num_siblings; - uint32_t cfg; if (mips_cm_revision() >= CM_REV_CM3) - return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW; + return FIELD_GET(CM_GCR_SYS_CONFIG2_MAXVPW, + read_gcr_sys_config2()); if (mips_cm_present()) { /* @@ -373,8 +374,7 @@ static inline unsigned int mips_cm_max_vp_width(void) * number of VP(E)s, and if that ever changes then this will * need revisiting. */ - cfg = read_gcr_cl_config() & CM_GCR_Cx_CONFIG_PVPE; - return (cfg >> __ffs(CM_GCR_Cx_CONFIG_PVPE)) + 1; + return FIELD_GET(CM_GCR_Cx_CONFIG_PVPE, read_gcr_cl_config()) + 1; } if (IS_ENABLED(CONFIG_SMP)) diff --git a/arch/mips/include/asm/octeon/cvmx-bootinfo.h b/arch/mips/include/asm/octeon/cvmx-bootinfo.h index 62787765575ef..ce6e5fddce0bf 100644 --- a/arch/mips/include/asm/octeon/cvmx-bootinfo.h +++ b/arch/mips/include/asm/octeon/cvmx-bootinfo.h @@ -315,7 +315,7 @@ enum cvmx_chip_types_enum { /* Functions to return string based on type */ #define ENUM_BRD_TYPE_CASE(x) \ - case x: return(#x + 16); /* Skip CVMX_BOARD_TYPE_ */ + case x: return (&#x[16]); /* Skip CVMX_BOARD_TYPE_ */ static inline const char *cvmx_board_type_to_string(enum cvmx_board_types_enum type) { @@ -404,7 +404,7 @@ static inline const char *cvmx_board_type_to_string(enum } #define ENUM_CHIP_TYPE_CASE(x) \ - case x: return(#x + 15); /* Skip CVMX_CHIP_TYPE */ + case x: return (&#x[15]); /* Skip CVMX_CHIP_TYPE */ static inline const char *cvmx_chip_type_to_string(enum cvmx_chip_types_enum type) { diff --git a/arch/mips/include/asm/setup.h b/arch/mips/include/asm/setup.h index bb36a400203df..8c56b862fd9c2 100644 --- a/arch/mips/include/asm/setup.h +++ b/arch/mips/include/asm/setup.h @@ -16,7 +16,7 @@ static inline void setup_8250_early_printk_port(unsigned long base, unsigned int reg_shift, unsigned int timeout) {} #endif -extern void set_handler(unsigned long offset, void *addr, unsigned long len); +void set_handler(unsigned long offset, const void *addr, unsigned long len); extern void set_uncached_handler(unsigned long offset, void *addr, unsigned long len); typedef void (*vi_handler_t)(void); diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h index b05bb70a2e46f..2e107886f97ac 100644 --- a/arch/mips/include/asm/timex.h +++ b/arch/mips/include/asm/timex.h @@ -40,9 +40,9 @@ typedef unsigned int cycles_t; /* - * On R4000/R4400 before version 5.0 an erratum exists such that if the - * cycle counter is read in the exact moment that it is matching the - * compare register, no interrupt will be generated. + * On R4000/R4400 an erratum exists such that if the cycle counter is + * read in the exact moment that it is matching the compare register, + * no interrupt will be generated. * * There is a suggested workaround and also the erratum can't strike if * the compare interrupt isn't being used as the clock source device. @@ -63,7 +63,7 @@ static inline int can_use_mips_counter(unsigned int prid) if (!__builtin_constant_p(cpu_has_counter)) asm volatile("" : "=m" (cpu_data[0].options)); if (likely(cpu_has_counter && - prid >= (PRID_IMP_R4000 | PRID_REV_ENCODE_44(5, 0)))) + prid > (PRID_IMP_R4000 | PRID_REV_ENCODE_44(15, 15)))) return 1; else return 0; @@ -76,25 +76,24 @@ static inline cycles_t get_cycles(void) else return 0; /* no usable counter */ } +#define get_cycles get_cycles /* * Like get_cycles - but where c0_count is not available we desperately * use c0_random in an attempt to get at least a little bit of entropy. - * - * R6000 and R6000A neither have a count register nor a random register. - * That leaves no entropy source in the CPU itself. */ static inline unsigned long random_get_entropy(void) { - unsigned int prid = read_c0_prid(); - unsigned int imp = prid & PRID_IMP_MASK; + unsigned int c0_random; - if (can_use_mips_counter(prid)) + if (can_use_mips_counter(read_c0_prid())) return read_c0_count(); - else if (likely(imp != PRID_IMP_R6000 && imp != PRID_IMP_R6000A)) - return read_c0_random(); + + if (cpu_has_3kex) + c0_random = (read_c0_random() >> 8) & 0x3f; else - return 0; /* no usable register */ + c0_random = read_c0_random() & 0x3f; + return (random_get_entropy_fallback() << 6) | (0x3f - c0_random); } #define random_get_entropy random_get_entropy diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c index dc8ee21e0948e..45e327960a465 100644 --- a/arch/mips/jz4740/setup.c +++ b/arch/mips/jz4740/setup.c @@ -61,7 +61,7 @@ void __init plat_mem_setup(void) jz4740_reset_init(); - if (__dtb_start != __dtb_end) + if (&__dtb_start != &__dtb_end) dtb = __dtb_start; else dtb = (void *)fw_passed_dtb; diff --git a/arch/mips/kernel/cacheinfo.c b/arch/mips/kernel/cacheinfo.c index 47312c5294102..529dab855aac9 100644 --- a/arch/mips/kernel/cacheinfo.c +++ b/arch/mips/kernel/cacheinfo.c @@ -17,7 +17,7 @@ do { \ leaf++; \ } while (0) -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { struct cpuinfo_mips *c = ¤t_cpu_data; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -69,7 +69,7 @@ static void fill_cpumask_cluster(int cpu, cpumask_t *cpu_map) cpumask_set_cpu(cpu1, cpu_map); } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { struct cpuinfo_mips *c = ¤t_cpu_data; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -98,6 +98,3 @@ static int __populate_cache_leaves(unsigned int cpu) return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index a9eab83d9148d..611ef512c0b81 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -179,8 +179,7 @@ static void mips_cm_probe_l2sync(void) phys_addr_t addr; /* L2-only sync was introduced with CM major revision 6 */ - major_rev = (read_gcr_rev() & CM_GCR_REV_MAJOR) >> - __ffs(CM_GCR_REV_MAJOR); + major_rev = FIELD_GET(CM_GCR_REV_MAJOR, read_gcr_rev()); if (major_rev < 6) return; @@ -263,13 +262,13 @@ void mips_cm_lock_other(unsigned int cluster, unsigned int core, preempt_disable(); if (cm_rev >= CM_REV_CM3) { - val = core << __ffs(CM3_GCR_Cx_OTHER_CORE); - val |= vp << __ffs(CM3_GCR_Cx_OTHER_VP); + val = FIELD_PREP(CM3_GCR_Cx_OTHER_CORE, core) | + FIELD_PREP(CM3_GCR_Cx_OTHER_VP, vp); if (cm_rev >= CM_REV_CM3_5) { val |= CM_GCR_Cx_OTHER_CLUSTER_EN; - val |= cluster << __ffs(CM_GCR_Cx_OTHER_CLUSTER); - val |= block << __ffs(CM_GCR_Cx_OTHER_BLOCK); + val |= FIELD_PREP(CM_GCR_Cx_OTHER_CLUSTER, cluster); + val |= FIELD_PREP(CM_GCR_Cx_OTHER_BLOCK, block); } else { WARN_ON(cluster != 0); WARN_ON(block != CM_GCR_Cx_OTHER_BLOCK_LOCAL); @@ -299,7 +298,7 @@ void mips_cm_lock_other(unsigned int cluster, unsigned int core, spin_lock_irqsave(&per_cpu(cm_core_lock, curr_core), per_cpu(cm_core_lock_flags, curr_core)); - val = core << __ffs(CM_GCR_Cx_OTHER_CORENUM); + val = FIELD_PREP(CM_GCR_Cx_OTHER_CORENUM, core); } write_gcr_cl_other(val); @@ -343,8 +342,8 @@ void mips_cm_error_report(void) cm_other = read_gcr_error_mult(); if (revision < CM_REV_CM3) { /* CM2 */ - cause = cm_error >> __ffs(CM_GCR_ERROR_CAUSE_ERRTYPE); - ocause = cm_other >> __ffs(CM_GCR_ERROR_MULT_ERR2ND); + cause = FIELD_GET(CM_GCR_ERROR_CAUSE_ERRTYPE, cm_error); + ocause = FIELD_GET(CM_GCR_ERROR_MULT_ERR2ND, cm_other); if (!cause) return; @@ -386,8 +385,8 @@ void mips_cm_error_report(void) ulong core_id_bits, vp_id_bits, cmd_bits, cmd_group_bits; ulong cm3_cca_bits, mcp_bits, cm3_tr_bits, sched_bit; - cause = cm_error >> __ffs64(CM3_GCR_ERROR_CAUSE_ERRTYPE); - ocause = cm_other >> __ffs(CM_GCR_ERROR_MULT_ERR2ND); + cause = FIELD_GET(CM3_GCR_ERROR_CAUSE_ERRTYPE, cm_error); + ocause = FIELD_GET(CM_GCR_ERROR_MULT_ERR2ND, cm_other); if (!cause) return; diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c index 69e3e0b556bf7..1b0d4bb617a9c 100644 --- a/arch/mips/kernel/mips-cpc.c +++ b/arch/mips/kernel/mips-cpc.c @@ -27,6 +27,7 @@ phys_addr_t __weak mips_cpc_default_phys_base(void) cpc_node = of_find_compatible_node(of_root, NULL, "mti,mips-cpc"); if (cpc_node) { err = of_address_to_resource(cpc_node, 0, &res); + of_node_put(cpc_node); if (!err) return res.start; } diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index 12e58053544fc..cbf6db98cfb38 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -29,8 +29,8 @@ #define EX2(a,b) \ 9: a,##b; \ .section __ex_table,"a"; \ - PTR 9b,bad_stack; \ - PTR 9b+4,bad_stack; \ + PTR 9b,fault; \ + PTR 9b+4,fault; \ .previous .set mips1 diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index f510c00bda882..c563b03bdccc1 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -361,6 +361,9 @@ asmlinkage void start_secondary(void) cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy; + set_cpu_sibling_map(cpu); + set_cpu_core_map(cpu); + cpumask_set_cpu(cpu, &cpu_coherent_mask); notify_cpu_starting(cpu); @@ -372,9 +375,6 @@ asmlinkage void start_secondary(void) /* The CPU is running and counters synchronised, now mark it online */ set_cpu_online(cpu, true); - set_cpu_sibling_map(cpu); - set_cpu_core_map(cpu); - calculate_cpu_foreign_map(); /* diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 3f16f38230310..9a28e79f2e66e 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -239,12 +239,3 @@ SYSCALL_DEFINE3(cachectl, char *, addr, int, nbytes, int, op) { return -ENOSYS; } - -/* - * If we ever come here the user sp is bad. Zap the process right away. - * Due to the bad stack signaling wouldn't work. - */ -asmlinkage void bad_stack(void) -{ - do_exit(SIGSEGV); -} diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index caa01457dce60..ed339d7979f3f 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -141,15 +141,10 @@ static __init int cpu_has_mfc0_count_bug(void) case CPU_R4400MC: /* * The published errata for the R4400 up to 3.0 say the CPU - * has the mfc0 from count bug. + * has the mfc0 from count bug. This seems the last version + * produced. */ - if ((current_cpu_data.processor_id & 0xff) <= 0x30) - return 1; - - /* - * we assume newer revisions are ok - */ - return 0; + return 1; } return 0; diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index c670e122dcab7..cbb508d2d8be5 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2020,19 +2020,19 @@ static void *set_vi_srs_handler(int n, vi_handler_t addr, int srs) * If no shadow set is selected then use the default handler * that does normal register saving and standard interrupt exit */ - extern char except_vec_vi, except_vec_vi_lui; - extern char except_vec_vi_ori, except_vec_vi_end; - extern char rollback_except_vec_vi; - char *vec_start = using_rollback_handler() ? - &rollback_except_vec_vi : &except_vec_vi; + extern const u8 except_vec_vi[], except_vec_vi_lui[]; + extern const u8 except_vec_vi_ori[], except_vec_vi_end[]; + extern const u8 rollback_except_vec_vi[]; + const u8 *vec_start = using_rollback_handler() ? + rollback_except_vec_vi : except_vec_vi; #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN) - const int lui_offset = &except_vec_vi_lui - vec_start + 2; - const int ori_offset = &except_vec_vi_ori - vec_start + 2; + const int lui_offset = except_vec_vi_lui - vec_start + 2; + const int ori_offset = except_vec_vi_ori - vec_start + 2; #else - const int lui_offset = &except_vec_vi_lui - vec_start; - const int ori_offset = &except_vec_vi_ori - vec_start; + const int lui_offset = except_vec_vi_lui - vec_start; + const int ori_offset = except_vec_vi_ori - vec_start; #endif - const int handler_len = &except_vec_vi_end - vec_start; + const int handler_len = except_vec_vi_end - vec_start; if (handler_len > VECTORSPACING) { /* @@ -2240,7 +2240,7 @@ void per_cpu_trap_init(bool is_boot_cpu) } /* Install CPU exception handler */ -void set_handler(unsigned long offset, void *addr, unsigned long size) +void set_handler(unsigned long offset, const void *addr, unsigned long size) { #ifdef CONFIG_CPU_MICROMIPS memcpy((void *)(ebase + offset), ((unsigned char *)addr - 1), size); diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c index dd819e31fcbbf..7a623684d9b5e 100644 --- a/arch/mips/lantiq/clk.c +++ b/arch/mips/lantiq/clk.c @@ -158,6 +158,18 @@ void clk_deactivate(struct clk *clk) } EXPORT_SYMBOL(clk_deactivate); +struct clk *clk_get_parent(struct clk *clk) +{ + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + return 0; +} +EXPORT_SYMBOL(clk_set_parent); + static inline u32 get_counter_resolution(void) { u32 res; diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c index 037b08f3257e0..a2837a54d9726 100644 --- a/arch/mips/lantiq/falcon/sysctrl.c +++ b/arch/mips/lantiq/falcon/sysctrl.c @@ -167,6 +167,8 @@ static inline void clkdev_add_sys(const char *dev, unsigned int module, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = NULL; clk->cl.clk = clk; diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c index 51a218f04fe0d..3f568f5aae2d1 100644 --- a/arch/mips/lantiq/prom.c +++ b/arch/mips/lantiq/prom.c @@ -79,7 +79,7 @@ void __init plat_mem_setup(void) if (fw_passed_dtb) /* UHI interface */ dtb = (void *)fw_passed_dtb; - else if (__dtb_start != __dtb_end) + else if (&__dtb_start != &__dtb_end) dtb = (void *)__dtb_start; else panic("no dtb found"); diff --git a/arch/mips/lantiq/xway/dma.c b/arch/mips/lantiq/xway/dma.c index aeb1b989cd4ee..e45077aecf83a 100644 --- a/arch/mips/lantiq/xway/dma.c +++ b/arch/mips/lantiq/xway/dma.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -29,6 +30,7 @@ #define LTQ_DMA_PCTRL 0x44 #define LTQ_DMA_IRNEN 0xf4 +#define DMA_ID_CHNR GENMASK(26, 20) /* channel number */ #define DMA_DESCPT BIT(3) /* descriptor complete irq */ #define DMA_TX BIT(8) /* TX channel direction */ #define DMA_CHAN_ON BIT(0) /* channel on / off bit */ @@ -39,7 +41,6 @@ #define DMA_POLL BIT(31) /* turn on channel polling */ #define DMA_CLK_DIV4 BIT(6) /* polling clock divider */ #define DMA_2W_BURST BIT(1) /* 2 word burst length */ -#define DMA_MAX_CHANNEL 20 /* the soc has 20 channels */ #define DMA_ETOP_ENDIANNESS (0xf << 8) /* endianness swap etop channels */ #define DMA_WEIGHT (BIT(17) | BIT(16)) /* default channel wheight */ @@ -205,7 +206,7 @@ ltq_dma_init(struct platform_device *pdev) { struct clk *clk; struct resource *res; - unsigned id; + unsigned int id, nchannels; int i; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -221,21 +222,24 @@ ltq_dma_init(struct platform_device *pdev) clk_enable(clk); ltq_dma_w32_mask(0, DMA_RESET, LTQ_DMA_CTRL); + usleep_range(1, 10); + /* disable all interrupts */ ltq_dma_w32(0, LTQ_DMA_IRNEN); /* reset/configure each channel */ - for (i = 0; i < DMA_MAX_CHANNEL; i++) { + id = ltq_dma_r32(LTQ_DMA_ID); + nchannels = ((id & DMA_ID_CHNR) >> 20); + for (i = 0; i < nchannels; i++) { ltq_dma_w32(i, LTQ_DMA_CS); ltq_dma_w32(DMA_CHAN_RST, LTQ_DMA_CCTRL); ltq_dma_w32(DMA_POLL | DMA_CLK_DIV4, LTQ_DMA_CPOLL); ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL); } - id = ltq_dma_r32(LTQ_DMA_ID); dev_info(&pdev->dev, "Init done - hw rev: %X, ports: %d, channels: %d\n", - id & 0x1f, (id >> 16) & 0xf, id >> 20); + id & 0x1f, (id >> 16) & 0xf, nchannels); return 0; } diff --git a/arch/mips/lantiq/xway/gptu.c b/arch/mips/lantiq/xway/gptu.c index 3d5683e75cf1e..200fe9ff641d6 100644 --- a/arch/mips/lantiq/xway/gptu.c +++ b/arch/mips/lantiq/xway/gptu.c @@ -122,6 +122,8 @@ static inline void clkdev_add_gptu(struct device *dev, const char *con, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev_name(dev); clk->cl.con_id = con; clk->cl.clk = clk; diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 2ee68d6e8bb99..6c2d9779ac727 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -311,6 +311,8 @@ static void clkdev_add_pmu(const char *dev, const char *con, bool deactivate, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = con; clk->cl.clk = clk; @@ -334,6 +336,8 @@ static void clkdev_add_cgu(const char *dev, const char *con, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = con; clk->cl.clk = clk; @@ -352,24 +356,28 @@ static void clkdev_add_pci(void) struct clk *clk_ext = kzalloc(sizeof(struct clk), GFP_KERNEL); /* main pci clock */ - clk->cl.dev_id = "17000000.pci"; - clk->cl.con_id = NULL; - clk->cl.clk = clk; - clk->rate = CLOCK_33M; - clk->rates = valid_pci_rates; - clk->enable = pci_enable; - clk->disable = pmu_disable; - clk->module = 0; - clk->bits = PMU_PCI; - clkdev_add(&clk->cl); + if (clk) { + clk->cl.dev_id = "17000000.pci"; + clk->cl.con_id = NULL; + clk->cl.clk = clk; + clk->rate = CLOCK_33M; + clk->rates = valid_pci_rates; + clk->enable = pci_enable; + clk->disable = pmu_disable; + clk->module = 0; + clk->bits = PMU_PCI; + clkdev_add(&clk->cl); + } /* use internal/external bus clock */ - clk_ext->cl.dev_id = "17000000.pci"; - clk_ext->cl.con_id = "external"; - clk_ext->cl.clk = clk_ext; - clk_ext->enable = pci_ext_enable; - clk_ext->disable = pci_ext_disable; - clkdev_add(&clk_ext->cl); + if (clk_ext) { + clk_ext->cl.dev_id = "17000000.pci"; + clk_ext->cl.con_id = "external"; + clk_ext->cl.clk = clk_ext; + clk_ext->enable = pci_ext_enable; + clk_ext->disable = pci_ext_disable; + clkdev_add(&clk_ext->cl); + } } /* xway socs can generate clocks on gpio pins */ @@ -389,9 +397,15 @@ static void clkdev_add_clkout(void) char *name; name = kzalloc(sizeof("clkout0"), GFP_KERNEL); + if (!name) + continue; sprintf(name, "clkout%d", i); clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) { + kfree(name); + continue; + } clk->cl.dev_id = "1f103000.cgu"; clk->cl.con_id = name; clk->cl.clk = clk; diff --git a/arch/mips/mti-malta/malta-dtshim.c b/arch/mips/mti-malta/malta-dtshim.c index 98a063093b69a..0be28adff5572 100644 --- a/arch/mips/mti-malta/malta-dtshim.c +++ b/arch/mips/mti-malta/malta-dtshim.c @@ -22,7 +22,7 @@ #define ROCIT_CONFIG_GEN1_MEMMAP_SHIFT 8 #define ROCIT_CONFIG_GEN1_MEMMAP_MASK (0xf << 8) -static unsigned char fdt_buf[16 << 10] __initdata; +static unsigned char fdt_buf[16 << 10] __initdata __aligned(8); /* determined physical memory size, not overridden by command line args */ extern unsigned long physical_memsize; diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile index 2d03af7d6b19d..d55912349039c 100644 --- a/arch/mips/net/Makefile +++ b/arch/mips/net/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only # MIPS networking code +obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c new file mode 100644 index 0000000000000..29a288ff4f183 --- /dev/null +++ b/arch/mips/net/bpf_jit.c @@ -0,0 +1,1299 @@ +/* + * Just-In-Time compiler for BPF filters on MIPS + * + * Copyright (c) 2014 Imagination Technologies Ltd. + * Author: Markos Chandras + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_jit.h" + +/* ABI + * r_skb_hl SKB header length + * r_data SKB data pointer + * r_off Offset + * r_A BPF register A + * r_X BPF register X + * r_skb *skb + * r_M *scratch memory + * r_skb_len SKB length + * + * On entry (*bpf_func)(*skb, *filter) + * a0 = MIPS_R_A0 = skb; + * a1 = MIPS_R_A1 = filter; + * + * Stack + * ... + * M[15] + * M[14] + * M[13] + * ... + * M[0] <-- r_M + * saved reg k-1 + * saved reg k-2 + * ... + * saved reg 0 <-- r_sp + * + * + * Packet layout + * + * <--------------------- len ------------------------> + * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------> + * ---------------------------------------------------- + * | skb->data | + * ---------------------------------------------------- + */ + +#define ptr typeof(unsigned long) + +#define SCRATCH_OFF(k) (4 * (k)) + +/* JIT flags */ +#define SEEN_CALL (1 << BPF_MEMWORDS) +#define SEEN_SREG_SFT (BPF_MEMWORDS + 1) +#define SEEN_SREG_BASE (1 << SEEN_SREG_SFT) +#define SEEN_SREG(x) (SEEN_SREG_BASE << (x)) +#define SEEN_OFF SEEN_SREG(2) +#define SEEN_A SEEN_SREG(3) +#define SEEN_X SEEN_SREG(4) +#define SEEN_SKB SEEN_SREG(5) +#define SEEN_MEM SEEN_SREG(6) +/* SEEN_SK_DATA also implies skb_hl an skb_len */ +#define SEEN_SKB_DATA (SEEN_SREG(7) | SEEN_SREG(1) | SEEN_SREG(0)) + +/* Arguments used by JIT */ +#define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */ + +#define SBIT(x) (1 << (x)) /* Signed version of BIT() */ + +/** + * struct jit_ctx - JIT context + * @skf: The sk_filter + * @prologue_bytes: Number of bytes for prologue + * @idx: Instruction index + * @flags: JIT flags + * @offsets: Instruction offsets + * @target: Memory location for the compiled filter + */ +struct jit_ctx { + const struct bpf_prog *skf; + unsigned int prologue_bytes; + u32 idx; + u32 flags; + u32 *offsets; + u32 *target; +}; + + +static inline int optimize_div(u32 *k) +{ + /* power of 2 divides can be implemented with right shift */ + if (!(*k & (*k-1))) { + *k = ilog2(*k); + return 1; + } + + return 0; +} + +static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx); + +/* Simply emit the instruction if the JIT memory space has been allocated */ +#define emit_instr(ctx, func, ...) \ +do { \ + if ((ctx)->target != NULL) { \ + u32 *p = &(ctx)->target[ctx->idx]; \ + uasm_i_##func(&p, ##__VA_ARGS__); \ + } \ + (ctx)->idx++; \ +} while (0) + +/* + * Similar to emit_instr but it must be used when we need to emit + * 32-bit or 64-bit instructions + */ +#define emit_long_instr(ctx, func, ...) \ +do { \ + if ((ctx)->target != NULL) { \ + u32 *p = &(ctx)->target[ctx->idx]; \ + UASM_i_##func(&p, ##__VA_ARGS__); \ + } \ + (ctx)->idx++; \ +} while (0) + +/* Determine if immediate is within the 16-bit signed range */ +static inline bool is_range16(s32 imm) +{ + return !(imm >= SBIT(15) || imm < -SBIT(15)); +} + +static inline void emit_addu(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, addu, dst, src1, src2); +} + +static inline void emit_nop(struct jit_ctx *ctx) +{ + emit_instr(ctx, nop); +} + +/* Load a u32 immediate to a register */ +static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx) +{ + if (ctx->target != NULL) { + /* addiu can only handle s16 */ + if (!is_range16(imm)) { + u32 *p = &ctx->target[ctx->idx]; + uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16); + p = &ctx->target[ctx->idx + 1]; + uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff); + } else { + u32 *p = &ctx->target[ctx->idx]; + uasm_i_addiu(&p, dst, r_zero, imm); + } + } + ctx->idx++; + + if (!is_range16(imm)) + ctx->idx++; +} + +static inline void emit_or(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, or, dst, src1, src2); +} + +static inline void emit_ori(unsigned int dst, unsigned src, u32 imm, + struct jit_ctx *ctx) +{ + if (imm >= BIT(16)) { + emit_load_imm(r_tmp, imm, ctx); + emit_or(dst, src, r_tmp, ctx); + } else { + emit_instr(ctx, ori, dst, src, imm); + } +} + +static inline void emit_daddiu(unsigned int dst, unsigned int src, + int imm, struct jit_ctx *ctx) +{ + /* + * Only used for stack, so the imm is relatively small + * and it fits in 15-bits + */ + emit_instr(ctx, daddiu, dst, src, imm); +} + +static inline void emit_addiu(unsigned int dst, unsigned int src, + u32 imm, struct jit_ctx *ctx) +{ + if (!is_range16(imm)) { + emit_load_imm(r_tmp, imm, ctx); + emit_addu(dst, r_tmp, src, ctx); + } else { + emit_instr(ctx, addiu, dst, src, imm); + } +} + +static inline void emit_and(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, and, dst, src1, src2); +} + +static inline void emit_andi(unsigned int dst, unsigned int src, + u32 imm, struct jit_ctx *ctx) +{ + /* If imm does not fit in u16 then load it to register */ + if (imm >= BIT(16)) { + emit_load_imm(r_tmp, imm, ctx); + emit_and(dst, src, r_tmp, ctx); + } else { + emit_instr(ctx, andi, dst, src, imm); + } +} + +static inline void emit_xor(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, xor, dst, src1, src2); +} + +static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx) +{ + /* If imm does not fit in u16 then load it to register */ + if (imm >= BIT(16)) { + emit_load_imm(r_tmp, imm, ctx); + emit_xor(dst, src, r_tmp, ctx); + } else { + emit_instr(ctx, xori, dst, src, imm); + } +} + +static inline void emit_stack_offset(int offset, struct jit_ctx *ctx) +{ + emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset); +} + +static inline void emit_subu(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, subu, dst, src1, src2); +} + +static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx) +{ + emit_subu(reg, r_zero, reg, ctx); +} + +static inline void emit_sllv(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + emit_instr(ctx, sllv, dst, src, sa); +} + +static inline void emit_sll(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + /* sa is 5-bits long */ + if (sa >= BIT(5)) + /* Shifting >= 32 results in zero */ + emit_jit_reg_move(dst, r_zero, ctx); + else + emit_instr(ctx, sll, dst, src, sa); +} + +static inline void emit_srlv(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + emit_instr(ctx, srlv, dst, src, sa); +} + +static inline void emit_srl(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + /* sa is 5-bits long */ + if (sa >= BIT(5)) + /* Shifting >= 32 results in zero */ + emit_jit_reg_move(dst, r_zero, ctx); + else + emit_instr(ctx, srl, dst, src, sa); +} + +static inline void emit_slt(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, slt, dst, src1, src2); +} + +static inline void emit_sltu(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, sltu, dst, src1, src2); +} + +static inline void emit_sltiu(unsigned dst, unsigned int src, + unsigned int imm, struct jit_ctx *ctx) +{ + /* 16 bit immediate */ + if (!is_range16((s32)imm)) { + emit_load_imm(r_tmp, imm, ctx); + emit_sltu(dst, src, r_tmp, ctx); + } else { + emit_instr(ctx, sltiu, dst, src, imm); + } + +} + +/* Store register on the stack */ +static inline void emit_store_stack_reg(ptr reg, ptr base, + unsigned int offset, + struct jit_ctx *ctx) +{ + emit_long_instr(ctx, SW, reg, offset, base); +} + +static inline void emit_store(ptr reg, ptr base, unsigned int offset, + struct jit_ctx *ctx) +{ + emit_instr(ctx, sw, reg, offset, base); +} + +static inline void emit_load_stack_reg(ptr reg, ptr base, + unsigned int offset, + struct jit_ctx *ctx) +{ + emit_long_instr(ctx, LW, reg, offset, base); +} + +static inline void emit_load(unsigned int reg, unsigned int base, + unsigned int offset, struct jit_ctx *ctx) +{ + emit_instr(ctx, lw, reg, offset, base); +} + +static inline void emit_load_byte(unsigned int reg, unsigned int base, + unsigned int offset, struct jit_ctx *ctx) +{ + emit_instr(ctx, lb, reg, offset, base); +} + +static inline void emit_half_load(unsigned int reg, unsigned int base, + unsigned int offset, struct jit_ctx *ctx) +{ + emit_instr(ctx, lh, reg, offset, base); +} + +static inline void emit_half_load_unsigned(unsigned int reg, unsigned int base, + unsigned int offset, struct jit_ctx *ctx) +{ + emit_instr(ctx, lhu, reg, offset, base); +} + +static inline void emit_mul(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, mul, dst, src1, src2); +} + +static inline void emit_div(unsigned int dst, unsigned int src, + struct jit_ctx *ctx) +{ + if (ctx->target != NULL) { + u32 *p = &ctx->target[ctx->idx]; + uasm_i_divu(&p, dst, src); + p = &ctx->target[ctx->idx + 1]; + uasm_i_mflo(&p, dst); + } + ctx->idx += 2; /* 2 insts */ +} + +static inline void emit_mod(unsigned int dst, unsigned int src, + struct jit_ctx *ctx) +{ + if (ctx->target != NULL) { + u32 *p = &ctx->target[ctx->idx]; + uasm_i_divu(&p, dst, src); + p = &ctx->target[ctx->idx + 1]; + uasm_i_mfhi(&p, dst); + } + ctx->idx += 2; /* 2 insts */ +} + +static inline void emit_dsll(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + emit_instr(ctx, dsll, dst, src, sa); +} + +static inline void emit_dsrl32(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + emit_instr(ctx, dsrl32, dst, src, sa); +} + +static inline void emit_wsbh(unsigned int dst, unsigned int src, + struct jit_ctx *ctx) +{ + emit_instr(ctx, wsbh, dst, src); +} + +/* load pointer to register */ +static inline void emit_load_ptr(unsigned int dst, unsigned int src, + int imm, struct jit_ctx *ctx) +{ + /* src contains the base addr of the 32/64-pointer */ + emit_long_instr(ctx, LW, dst, imm, src); +} + +/* load a function pointer to register */ +static inline void emit_load_func(unsigned int reg, ptr imm, + struct jit_ctx *ctx) +{ + if (IS_ENABLED(CONFIG_64BIT)) { + /* At this point imm is always 64-bit */ + emit_load_imm(r_tmp, (u64)imm >> 32, ctx); + emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ + emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx); + emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ + emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx); + } else { + emit_load_imm(reg, imm, ctx); + } +} + +/* Move to real MIPS register */ +static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) +{ + emit_long_instr(ctx, ADDU, dst, src, r_zero); +} + +/* Move to JIT (32-bit) register */ +static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) +{ + emit_addu(dst, src, r_zero, ctx); +} + +/* Compute the immediate value for PC-relative branches. */ +static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) +{ + if (ctx->target == NULL) + return 0; + + /* + * We want a pc-relative branch. We only do forward branches + * so tgt is always after pc. tgt is the instruction offset + * we want to jump to. + + * Branch on MIPS: + * I: target_offset <- sign_extend(offset) + * I+1: PC += target_offset (delay slot) + * + * ctx->idx currently points to the branch instruction + * but the offset is added to the delay slot so we need + * to subtract 4. + */ + return ctx->offsets[tgt] - + (ctx->idx * 4 - ctx->prologue_bytes) - 4; +} + +static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2, + unsigned int imm, struct jit_ctx *ctx) +{ + if (ctx->target != NULL) { + u32 *p = &ctx->target[ctx->idx]; + + switch (cond) { + case MIPS_COND_EQ: + uasm_i_beq(&p, reg1, reg2, imm); + break; + case MIPS_COND_NE: + uasm_i_bne(&p, reg1, reg2, imm); + break; + case MIPS_COND_ALL: + uasm_i_b(&p, imm); + break; + default: + pr_warn("%s: Unhandled branch conditional: %d\n", + __func__, cond); + } + } + ctx->idx++; +} + +static inline void emit_b(unsigned int imm, struct jit_ctx *ctx) +{ + emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx); +} + +static inline void emit_jalr(unsigned int link, unsigned int reg, + struct jit_ctx *ctx) +{ + emit_instr(ctx, jalr, link, reg); +} + +static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx) +{ + emit_instr(ctx, jr, reg); +} + +static inline u16 align_sp(unsigned int num) +{ + /* Double word alignment for 32-bit, quadword for 64-bit */ + unsigned int align = IS_ENABLED(CONFIG_64BIT) ? 16 : 8; + num = (num + (align - 1)) & -align; + return num; +} + +static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) +{ + int i = 0, real_off = 0; + u32 sflags, tmp_flags; + + /* Adjust the stack pointer */ + if (offset) + emit_stack_offset(-align_sp(offset), ctx); + + tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; + /* sflags is essentially a bitmap */ + while (tmp_flags) { + if ((sflags >> i) & 0x1) { + emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off, + ctx); + real_off += SZREG; + } + i++; + tmp_flags >>= 1; + } + + /* save return address */ + if (ctx->flags & SEEN_CALL) { + emit_store_stack_reg(r_ra, r_sp, real_off, ctx); + real_off += SZREG; + } + + /* Setup r_M leaving the alignment gap if necessary */ + if (ctx->flags & SEEN_MEM) { + if (real_off % (SZREG * 2)) + real_off += SZREG; + emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off); + } +} + +static void restore_bpf_jit_regs(struct jit_ctx *ctx, + unsigned int offset) +{ + int i, real_off = 0; + u32 sflags, tmp_flags; + + tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; + /* sflags is a bitmap */ + i = 0; + while (tmp_flags) { + if ((sflags >> i) & 0x1) { + emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off, + ctx); + real_off += SZREG; + } + i++; + tmp_flags >>= 1; + } + + /* restore return address */ + if (ctx->flags & SEEN_CALL) + emit_load_stack_reg(r_ra, r_sp, real_off, ctx); + + /* Restore the sp and discard the scrach memory */ + if (offset) + emit_stack_offset(align_sp(offset), ctx); +} + +static unsigned int get_stack_depth(struct jit_ctx *ctx) +{ + int sp_off = 0; + + + /* How may s* regs do we need to preserved? */ + sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * SZREG; + + if (ctx->flags & SEEN_MEM) + sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */ + + if (ctx->flags & SEEN_CALL) + sp_off += SZREG; /* Space for our ra register */ + + return sp_off; +} + +static void build_prologue(struct jit_ctx *ctx) +{ + int sp_off; + + /* Calculate the total offset for the stack pointer */ + sp_off = get_stack_depth(ctx); + save_bpf_jit_regs(ctx, sp_off); + + if (ctx->flags & SEEN_SKB) + emit_reg_move(r_skb, MIPS_R_A0, ctx); + + if (ctx->flags & SEEN_SKB_DATA) { + /* Load packet length */ + emit_load(r_skb_len, r_skb, offsetof(struct sk_buff, len), + ctx); + emit_load(r_tmp, r_skb, offsetof(struct sk_buff, data_len), + ctx); + /* Load the data pointer */ + emit_load_ptr(r_skb_data, r_skb, + offsetof(struct sk_buff, data), ctx); + /* Load the header length */ + emit_subu(r_skb_hl, r_skb_len, r_tmp, ctx); + } + + if (ctx->flags & SEEN_X) + emit_jit_reg_move(r_X, r_zero, ctx); + + /* + * Do not leak kernel data to userspace, we only need to clear + * r_A if it is ever used. In fact if it is never used, we + * will not save/restore it, so clearing it in this case would + * corrupt the state of the caller. + */ + if (bpf_needs_clear_a(&ctx->skf->insns[0]) && + (ctx->flags & SEEN_A)) + emit_jit_reg_move(r_A, r_zero, ctx); +} + +static void build_epilogue(struct jit_ctx *ctx) +{ + unsigned int sp_off; + + /* Calculate the total offset for the stack pointer */ + + sp_off = get_stack_depth(ctx); + restore_bpf_jit_regs(ctx, sp_off); + + /* Return */ + emit_jr(r_ra, ctx); + emit_nop(ctx); +} + +#define CHOOSE_LOAD_FUNC(K, func) \ + ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \ + func##_positive) + +static bool is_bad_offset(int b_off) +{ + return b_off > 0x1ffff || b_off < -0x20000; +} + +static int build_body(struct jit_ctx *ctx) +{ + const struct bpf_prog *prog = ctx->skf; + const struct sock_filter *inst; + unsigned int i, off, condt; + u32 k, b_off __maybe_unused; + u8 (*sk_load_func)(unsigned long *skb, int offset); + + for (i = 0; i < prog->len; i++) { + u16 code; + + inst = &(prog->insns[i]); + pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n", + __func__, inst->code, inst->jt, inst->jf, inst->k); + k = inst->k; + code = bpf_anc_helper(inst); + + if (ctx->target == NULL) + ctx->offsets[i] = ctx->idx * 4; + + switch (code) { + case BPF_LD | BPF_IMM: + /* A <- k ==> li r_A, k */ + ctx->flags |= SEEN_A; + emit_load_imm(r_A, k, ctx); + break; + case BPF_LD | BPF_W | BPF_LEN: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); + /* A <- len ==> lw r_A, offset(skb) */ + ctx->flags |= SEEN_SKB | SEEN_A; + off = offsetof(struct sk_buff, len); + emit_load(r_A, r_skb, off, ctx); + break; + case BPF_LD | BPF_MEM: + /* A <- M[k] ==> lw r_A, offset(M) */ + ctx->flags |= SEEN_MEM | SEEN_A; + emit_load(r_A, r_M, SCRATCH_OFF(k), ctx); + break; + case BPF_LD | BPF_W | BPF_ABS: + /* A <- P[k:4] */ + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_word); + goto load; + case BPF_LD | BPF_H | BPF_ABS: + /* A <- P[k:2] */ + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_half); + goto load; + case BPF_LD | BPF_B | BPF_ABS: + /* A <- P[k:1] */ + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_byte); +load: + emit_load_imm(r_off, k, ctx); +load_common: + ctx->flags |= SEEN_CALL | SEEN_OFF | + SEEN_SKB | SEEN_A | SEEN_SKB_DATA; + + emit_load_func(r_s0, (ptr)sk_load_func, ctx); + emit_reg_move(MIPS_R_A0, r_skb, ctx); + emit_jalr(MIPS_R_RA, r_s0, ctx); + /* Load second argument to delay slot */ + emit_reg_move(MIPS_R_A1, r_off, ctx); + /* Check the error value */ + emit_bcond(MIPS_COND_EQ, r_ret, 0, b_imm(i + 1, ctx), + ctx); + /* Load return register on DS for failures */ + emit_reg_move(r_ret, r_zero, ctx); + /* Return with error */ + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); + emit_nop(ctx); + break; + case BPF_LD | BPF_W | BPF_IND: + /* A <- P[X + k:4] */ + sk_load_func = sk_load_word; + goto load_ind; + case BPF_LD | BPF_H | BPF_IND: + /* A <- P[X + k:2] */ + sk_load_func = sk_load_half; + goto load_ind; + case BPF_LD | BPF_B | BPF_IND: + /* A <- P[X + k:1] */ + sk_load_func = sk_load_byte; +load_ind: + ctx->flags |= SEEN_OFF | SEEN_X; + emit_addiu(r_off, r_X, k, ctx); + goto load_common; + case BPF_LDX | BPF_IMM: + /* X <- k */ + ctx->flags |= SEEN_X; + emit_load_imm(r_X, k, ctx); + break; + case BPF_LDX | BPF_MEM: + /* X <- M[k] */ + ctx->flags |= SEEN_X | SEEN_MEM; + emit_load(r_X, r_M, SCRATCH_OFF(k), ctx); + break; + case BPF_LDX | BPF_W | BPF_LEN: + /* X <- len */ + ctx->flags |= SEEN_X | SEEN_SKB; + off = offsetof(struct sk_buff, len); + emit_load(r_X, r_skb, off, ctx); + break; + case BPF_LDX | BPF_B | BPF_MSH: + /* X <- 4 * (P[k:1] & 0xf) */ + ctx->flags |= SEEN_X | SEEN_CALL | SEEN_SKB; + /* Load offset to a1 */ + emit_load_func(r_s0, (ptr)sk_load_byte, ctx); + /* + * This may emit two instructions so it may not fit + * in the delay slot. So use a0 in the delay slot. + */ + emit_load_imm(MIPS_R_A1, k, ctx); + emit_jalr(MIPS_R_RA, r_s0, ctx); + emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ + /* Check the error value */ + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_NE, r_ret, 0, b_off, ctx); + emit_reg_move(r_ret, r_zero, ctx); + /* We are good */ + /* X <- P[1:K] & 0xf */ + emit_andi(r_X, r_A, 0xf, ctx); + /* X << 2 */ + emit_b(b_imm(i + 1, ctx), ctx); + emit_sll(r_X, r_X, 2, ctx); /* delay slot */ + break; + case BPF_ST: + /* M[k] <- A */ + ctx->flags |= SEEN_MEM | SEEN_A; + emit_store(r_A, r_M, SCRATCH_OFF(k), ctx); + break; + case BPF_STX: + /* M[k] <- X */ + ctx->flags |= SEEN_MEM | SEEN_X; + emit_store(r_X, r_M, SCRATCH_OFF(k), ctx); + break; + case BPF_ALU | BPF_ADD | BPF_K: + /* A += K */ + ctx->flags |= SEEN_A; + emit_addiu(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_ADD | BPF_X: + /* A += X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_addu(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_SUB | BPF_K: + /* A -= K */ + ctx->flags |= SEEN_A; + emit_addiu(r_A, r_A, -k, ctx); + break; + case BPF_ALU | BPF_SUB | BPF_X: + /* A -= X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_subu(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_MUL | BPF_K: + /* A *= K */ + /* Load K to scratch register before MUL */ + ctx->flags |= SEEN_A; + emit_load_imm(r_s0, k, ctx); + emit_mul(r_A, r_A, r_s0, ctx); + break; + case BPF_ALU | BPF_MUL | BPF_X: + /* A *= X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_mul(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_DIV | BPF_K: + /* A /= k */ + if (k == 1) + break; + if (optimize_div(&k)) { + ctx->flags |= SEEN_A; + emit_srl(r_A, r_A, k, ctx); + break; + } + ctx->flags |= SEEN_A; + emit_load_imm(r_s0, k, ctx); + emit_div(r_A, r_s0, ctx); + break; + case BPF_ALU | BPF_MOD | BPF_K: + /* A %= k */ + if (k == 1) { + ctx->flags |= SEEN_A; + emit_jit_reg_move(r_A, r_zero, ctx); + } else { + ctx->flags |= SEEN_A; + emit_load_imm(r_s0, k, ctx); + emit_mod(r_A, r_s0, ctx); + } + break; + case BPF_ALU | BPF_DIV | BPF_X: + /* A /= X */ + ctx->flags |= SEEN_X | SEEN_A; + /* Check if r_X is zero */ + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx); + emit_load_imm(r_ret, 0, ctx); /* delay slot */ + emit_div(r_A, r_X, ctx); + break; + case BPF_ALU | BPF_MOD | BPF_X: + /* A %= X */ + ctx->flags |= SEEN_X | SEEN_A; + /* Check if r_X is zero */ + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx); + emit_load_imm(r_ret, 0, ctx); /* delay slot */ + emit_mod(r_A, r_X, ctx); + break; + case BPF_ALU | BPF_OR | BPF_K: + /* A |= K */ + ctx->flags |= SEEN_A; + emit_ori(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_OR | BPF_X: + /* A |= X */ + ctx->flags |= SEEN_A; + emit_ori(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_XOR | BPF_K: + /* A ^= k */ + ctx->flags |= SEEN_A; + emit_xori(r_A, r_A, k, ctx); + break; + case BPF_ANC | SKF_AD_ALU_XOR_X: + case BPF_ALU | BPF_XOR | BPF_X: + /* A ^= X */ + ctx->flags |= SEEN_A; + emit_xor(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_AND | BPF_K: + /* A &= K */ + ctx->flags |= SEEN_A; + emit_andi(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_AND | BPF_X: + /* A &= X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_and(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_LSH | BPF_K: + /* A <<= K */ + ctx->flags |= SEEN_A; + emit_sll(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_LSH | BPF_X: + /* A <<= X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_sllv(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_RSH | BPF_K: + /* A >>= K */ + ctx->flags |= SEEN_A; + emit_srl(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_RSH | BPF_X: + ctx->flags |= SEEN_A | SEEN_X; + emit_srlv(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_NEG: + /* A = -A */ + ctx->flags |= SEEN_A; + emit_neg(r_A, ctx); + break; + case BPF_JMP | BPF_JA: + /* pc += K */ + b_off = b_imm(i + k + 1, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); + emit_nop(ctx); + break; + case BPF_JMP | BPF_JEQ | BPF_K: + /* pc += ( A == K ) ? pc->jt : pc->jf */ + condt = MIPS_COND_EQ | MIPS_COND_K; + goto jmp_cmp; + case BPF_JMP | BPF_JEQ | BPF_X: + ctx->flags |= SEEN_X; + /* pc += ( A == X ) ? pc->jt : pc->jf */ + condt = MIPS_COND_EQ | MIPS_COND_X; + goto jmp_cmp; + case BPF_JMP | BPF_JGE | BPF_K: + /* pc += ( A >= K ) ? pc->jt : pc->jf */ + condt = MIPS_COND_GE | MIPS_COND_K; + goto jmp_cmp; + case BPF_JMP | BPF_JGE | BPF_X: + ctx->flags |= SEEN_X; + /* pc += ( A >= X ) ? pc->jt : pc->jf */ + condt = MIPS_COND_GE | MIPS_COND_X; + goto jmp_cmp; + case BPF_JMP | BPF_JGT | BPF_K: + /* pc += ( A > K ) ? pc->jt : pc->jf */ + condt = MIPS_COND_GT | MIPS_COND_K; + goto jmp_cmp; + case BPF_JMP | BPF_JGT | BPF_X: + ctx->flags |= SEEN_X; + /* pc += ( A > X ) ? pc->jt : pc->jf */ + condt = MIPS_COND_GT | MIPS_COND_X; +jmp_cmp: + /* Greater or Equal */ + if ((condt & MIPS_COND_GE) || + (condt & MIPS_COND_GT)) { + if (condt & MIPS_COND_K) { /* K */ + ctx->flags |= SEEN_A; + emit_sltiu(r_s0, r_A, k, ctx); + } else { /* X */ + ctx->flags |= SEEN_A | + SEEN_X; + emit_sltu(r_s0, r_A, r_X, ctx); + } + /* A < (K|X) ? r_scrach = 1 */ + b_off = b_imm(i + inst->jf + 1, ctx); + emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, + ctx); + emit_nop(ctx); + /* A > (K|X) ? scratch = 0 */ + if (condt & MIPS_COND_GT) { + /* Checking for equality */ + ctx->flags |= SEEN_A | SEEN_X; + if (condt & MIPS_COND_K) + emit_load_imm(r_s0, k, ctx); + else + emit_jit_reg_move(r_s0, r_X, + ctx); + b_off = b_imm(i + inst->jf + 1, ctx); + emit_bcond(MIPS_COND_EQ, r_A, r_s0, + b_off, ctx); + emit_nop(ctx); + /* Finally, A > K|X */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_b(b_off, ctx); + emit_nop(ctx); + } else { + /* A >= (K|X) so jump */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_b(b_off, ctx); + emit_nop(ctx); + } + } else { + /* A == K|X */ + if (condt & MIPS_COND_K) { /* K */ + ctx->flags |= SEEN_A; + emit_load_imm(r_s0, k, ctx); + /* jump true */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_bcond(MIPS_COND_EQ, r_A, r_s0, + b_off, ctx); + emit_nop(ctx); + /* jump false */ + b_off = b_imm(i + inst->jf + 1, + ctx); + emit_bcond(MIPS_COND_NE, r_A, r_s0, + b_off, ctx); + emit_nop(ctx); + } else { /* X */ + /* jump true */ + ctx->flags |= SEEN_A | SEEN_X; + b_off = b_imm(i + inst->jt + 1, + ctx); + emit_bcond(MIPS_COND_EQ, r_A, r_X, + b_off, ctx); + emit_nop(ctx); + /* jump false */ + b_off = b_imm(i + inst->jf + 1, ctx); + emit_bcond(MIPS_COND_NE, r_A, r_X, + b_off, ctx); + emit_nop(ctx); + } + } + break; + case BPF_JMP | BPF_JSET | BPF_K: + ctx->flags |= SEEN_A; + /* pc += (A & K) ? pc -> jt : pc -> jf */ + emit_load_imm(r_s1, k, ctx); + emit_and(r_s0, r_A, r_s1, ctx); + /* jump true */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); + emit_nop(ctx); + /* jump false */ + b_off = b_imm(i + inst->jf + 1, ctx); + emit_b(b_off, ctx); + emit_nop(ctx); + break; + case BPF_JMP | BPF_JSET | BPF_X: + ctx->flags |= SEEN_X | SEEN_A; + /* pc += (A & X) ? pc -> jt : pc -> jf */ + emit_and(r_s0, r_A, r_X, ctx); + /* jump true */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); + emit_nop(ctx); + /* jump false */ + b_off = b_imm(i + inst->jf + 1, ctx); + emit_b(b_off, ctx); + emit_nop(ctx); + break; + case BPF_RET | BPF_A: + ctx->flags |= SEEN_A; + if (i != prog->len - 1) { + /* + * If this is not the last instruction + * then jump to the epilogue + */ + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); + } + emit_reg_move(r_ret, r_A, ctx); /* delay slot */ + break; + case BPF_RET | BPF_K: + /* + * It can emit two instructions so it does not fit on + * the delay slot. + */ + emit_load_imm(r_ret, k, ctx); + if (i != prog->len - 1) { + /* + * If this is not the last instruction + * then jump to the epilogue + */ + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); + emit_nop(ctx); + } + break; + case BPF_MISC | BPF_TAX: + /* X = A */ + ctx->flags |= SEEN_X | SEEN_A; + emit_jit_reg_move(r_X, r_A, ctx); + break; + case BPF_MISC | BPF_TXA: + /* A = X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_jit_reg_move(r_A, r_X, ctx); + break; + /* AUX */ + case BPF_ANC | SKF_AD_PROTOCOL: + /* A = ntohs(skb->protocol */ + ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A; + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, + protocol) != 2); + off = offsetof(struct sk_buff, protocol); + emit_half_load(r_A, r_skb, off, ctx); +#ifdef CONFIG_CPU_LITTLE_ENDIAN + /* This needs little endian fixup */ + if (cpu_has_wsbh) { + /* R2 and later have the wsbh instruction */ + emit_wsbh(r_A, r_A, ctx); + } else { + /* Get first byte */ + emit_andi(r_tmp_imm, r_A, 0xff, ctx); + /* Shift it */ + emit_sll(r_tmp, r_tmp_imm, 8, ctx); + /* Get second byte */ + emit_srl(r_tmp_imm, r_A, 8, ctx); + emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx); + /* Put everyting together in r_A */ + emit_or(r_A, r_tmp, r_tmp_imm, ctx); + } +#endif + break; + case BPF_ANC | SKF_AD_CPU: + ctx->flags |= SEEN_A | SEEN_OFF; + /* A = current_thread_info()->cpu */ + BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, + cpu) != 4); + off = offsetof(struct thread_info, cpu); + /* $28/gp points to the thread_info struct */ + emit_load(r_A, 28, off, ctx); + break; + case BPF_ANC | SKF_AD_IFINDEX: + /* A = skb->dev->ifindex */ + case BPF_ANC | SKF_AD_HATYPE: + /* A = skb->dev->type */ + ctx->flags |= SEEN_SKB | SEEN_A; + off = offsetof(struct sk_buff, dev); + /* Load *dev pointer */ + emit_load_ptr(r_s0, r_skb, off, ctx); + /* error (0) in the delay slot */ + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_EQ, r_s0, r_zero, b_off, ctx); + emit_reg_move(r_ret, r_zero, ctx); + if (code == (BPF_ANC | SKF_AD_IFINDEX)) { + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); + off = offsetof(struct net_device, ifindex); + emit_load(r_A, r_s0, off, ctx); + } else { /* (code == (BPF_ANC | SKF_AD_HATYPE) */ + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); + off = offsetof(struct net_device, type); + emit_half_load_unsigned(r_A, r_s0, off, ctx); + } + break; + case BPF_ANC | SKF_AD_MARK: + ctx->flags |= SEEN_SKB | SEEN_A; + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + off = offsetof(struct sk_buff, mark); + emit_load(r_A, r_skb, off, ctx); + break; + case BPF_ANC | SKF_AD_RXHASH: + ctx->flags |= SEEN_SKB | SEEN_A; + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + off = offsetof(struct sk_buff, hash); + emit_load(r_A, r_skb, off, ctx); + break; + case BPF_ANC | SKF_AD_VLAN_TAG: + ctx->flags |= SEEN_SKB | SEEN_A; + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, + vlan_tci) != 2); + off = offsetof(struct sk_buff, vlan_tci); + emit_half_load_unsigned(r_A, r_skb, off, ctx); + break; + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: + ctx->flags |= SEEN_SKB | SEEN_A; + emit_load_byte(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET(), ctx); + if (PKT_VLAN_PRESENT_BIT) + emit_srl(r_A, r_A, PKT_VLAN_PRESENT_BIT, ctx); + if (PKT_VLAN_PRESENT_BIT < 7) + emit_andi(r_A, r_A, 1, ctx); + break; + case BPF_ANC | SKF_AD_PKTTYPE: + ctx->flags |= SEEN_SKB; + + emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx); + /* Keep only the last 3 bits */ + emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx); +#ifdef __BIG_ENDIAN_BITFIELD + /* Get the actual packet type to the lower 3 bits */ + emit_srl(r_A, r_A, 5, ctx); +#endif + break; + case BPF_ANC | SKF_AD_QUEUE: + ctx->flags |= SEEN_SKB | SEEN_A; + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, + queue_mapping) != 2); + BUILD_BUG_ON(offsetof(struct sk_buff, + queue_mapping) > 0xff); + off = offsetof(struct sk_buff, queue_mapping); + emit_half_load_unsigned(r_A, r_skb, off, ctx); + break; + default: + pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__, + inst->code); + return -1; + } + } + + /* compute offsets only during the first pass */ + if (ctx->target == NULL) + ctx->offsets[i] = ctx->idx * 4; + + return 0; +} + +void bpf_jit_compile(struct bpf_prog *fp) +{ + struct jit_ctx ctx; + unsigned int alloc_size, tmp_idx; + + if (!bpf_jit_enable) + return; + + memset(&ctx, 0, sizeof(ctx)); + + ctx.offsets = kcalloc(fp->len + 1, sizeof(*ctx.offsets), GFP_KERNEL); + if (ctx.offsets == NULL) + return; + + ctx.skf = fp; + + if (build_body(&ctx)) + goto out; + + tmp_idx = ctx.idx; + build_prologue(&ctx); + ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; + /* just to complete the ctx.idx count */ + build_epilogue(&ctx); + + alloc_size = 4 * ctx.idx; + ctx.target = module_alloc(alloc_size); + if (ctx.target == NULL) + goto out; + + /* Clean it */ + memset(ctx.target, 0, alloc_size); + + ctx.idx = 0; + + /* Generate the actual JIT code */ + build_prologue(&ctx); + if (build_body(&ctx)) { + module_memfree(ctx.target); + goto out; + } + build_epilogue(&ctx); + + /* Update the icache */ + flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx)); + + if (bpf_jit_enable > 1) + /* Dump JIT code */ + bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); + + fp->bpf_func = (void *)ctx.target; + fp->jited = 1; + +out: + kfree(ctx.offsets); +} + +void bpf_jit_free(struct bpf_prog *fp) +{ + if (fp->jited) + module_memfree(fp->bpf_func); + + bpf_prog_unlock_free(fp); +} diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S new file mode 100644 index 0000000000000..57154c5883b6f --- /dev/null +++ b/arch/mips/net/bpf_jit_asm.S @@ -0,0 +1,285 @@ +/* + * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF + * compiler. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + * Author: Markos Chandras + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + */ + +#include +#include +#include +#include "bpf_jit.h" + +/* ABI + * + * r_skb_hl skb header length + * r_skb_data skb data + * r_off(a1) offset register + * r_A BPF register A + * r_X PF register X + * r_skb(a0) *skb + * r_M *scratch memory + * r_skb_le skb length + * r_s0 Scratch register 0 + * r_s1 Scratch register 1 + * + * On entry: + * a0: *skb + * a1: offset (imm or imm + X) + * + * All non-BPF-ABI registers are free for use. On return, we only + * care about r_ret. The BPF-ABI registers are assumed to remain + * unmodified during the entire filter operation. + */ + +#define skb a0 +#define offset a1 +#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */ + + /* We know better :) so prevent assembler reordering etc */ + .set noreorder + +#define is_offset_negative(TYPE) \ + /* If offset is negative we have more work to do */ \ + slti t0, offset, 0; \ + bgtz t0, bpf_slow_path_##TYPE##_neg; \ + /* Be careful what follows in DS. */ + +#define is_offset_in_header(SIZE, TYPE) \ + /* Reading from header? */ \ + addiu $r_s0, $r_skb_hl, -SIZE; \ + slt t0, $r_s0, offset; \ + bgtz t0, bpf_slow_path_##TYPE; \ + +LEAF(sk_load_word) + is_offset_negative(word) +FEXPORT(sk_load_word_positive) + is_offset_in_header(4, word) + /* Offset within header boundaries */ + PTR_ADDU t1, $r_skb_data, offset + .set reorder + lw $r_A, 0(t1) + .set noreorder +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# if MIPS_ISA_REV >= 2 + wsbh t0, $r_A + rotr $r_A, t0, 16 +# else + sll t0, $r_A, 24 + srl t1, $r_A, 24 + srl t2, $r_A, 8 + or t0, t0, t1 + andi t2, t2, 0xff00 + andi t1, $r_A, 0xff00 + or t0, t0, t2 + sll t1, t1, 8 + or $r_A, t0, t1 +# endif +#endif + jr $r_ra + move $r_ret, zero + END(sk_load_word) + +LEAF(sk_load_half) + is_offset_negative(half) +FEXPORT(sk_load_half_positive) + is_offset_in_header(2, half) + /* Offset within header boundaries */ + PTR_ADDU t1, $r_skb_data, offset + lhu $r_A, 0(t1) +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# if MIPS_ISA_REV >= 2 + wsbh $r_A, $r_A +# else + sll t0, $r_A, 8 + srl t1, $r_A, 8 + andi t0, t0, 0xff00 + or $r_A, t0, t1 +# endif +#endif + jr $r_ra + move $r_ret, zero + END(sk_load_half) + +LEAF(sk_load_byte) + is_offset_negative(byte) +FEXPORT(sk_load_byte_positive) + is_offset_in_header(1, byte) + /* Offset within header boundaries */ + PTR_ADDU t1, $r_skb_data, offset + lbu $r_A, 0(t1) + jr $r_ra + move $r_ret, zero + END(sk_load_byte) + +/* + * call skb_copy_bits: + * (prototype in linux/skbuff.h) + * + * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len) + * + * o32 mandates we leave 4 spaces for argument registers in case + * the callee needs to use them. Even though we don't care about + * the argument registers ourselves, we need to allocate that space + * to remain ABI compliant since the callee may want to use that space. + * We also allocate 2 more spaces for $r_ra and our return register (*to). + * + * n64 is a bit different. The *caller* will allocate the space to preserve + * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no + * good reason but it does not matter that much really. + * + * (void *to) is returned in r_s0 + * + */ +#ifdef CONFIG_CPU_LITTLE_ENDIAN +#define DS_OFFSET(SIZE) (4 * SZREG) +#else +#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE)) +#endif +#define bpf_slow_path_common(SIZE) \ + /* Quick check. Are we within reasonable boundaries? */ \ + LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \ + sltu $r_s0, offset, $r_s1; \ + beqz $r_s0, fault; \ + /* Load 4th argument in DS */ \ + LONG_ADDIU a3, zero, SIZE; \ + PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ + PTR_LA t0, skb_copy_bits; \ + PTR_S $r_ra, (5 * SZREG)($r_sp); \ + /* Assign low slot to a2 */ \ + PTR_ADDIU a2, $r_sp, DS_OFFSET(SIZE); \ + jalr t0; \ + /* Reset our destination slot (DS but it's ok) */ \ + INT_S zero, (4 * SZREG)($r_sp); \ + /* \ + * skb_copy_bits returns 0 on success and -EFAULT \ + * on error. Our data live in a2. Do not bother with \ + * our data if an error has been returned. \ + */ \ + /* Restore our frame */ \ + PTR_L $r_ra, (5 * SZREG)($r_sp); \ + INT_L $r_s0, (4 * SZREG)($r_sp); \ + bltz v0, fault; \ + PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ + move $r_ret, zero; \ + +NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp) + bpf_slow_path_common(4) +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# if MIPS_ISA_REV >= 2 + wsbh t0, $r_s0 + jr $r_ra + rotr $r_A, t0, 16 +# else + sll t0, $r_s0, 24 + srl t1, $r_s0, 24 + srl t2, $r_s0, 8 + or t0, t0, t1 + andi t2, t2, 0xff00 + andi t1, $r_s0, 0xff00 + or t0, t0, t2 + sll t1, t1, 8 + jr $r_ra + or $r_A, t0, t1 +# endif +#else + jr $r_ra + move $r_A, $r_s0 +#endif + + END(bpf_slow_path_word) + +NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp) + bpf_slow_path_common(2) +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# if MIPS_ISA_REV >= 2 + jr $r_ra + wsbh $r_A, $r_s0 +# else + sll t0, $r_s0, 8 + andi t1, $r_s0, 0xff00 + andi t0, t0, 0xff00 + srl t1, t1, 8 + jr $r_ra + or $r_A, t0, t1 +# endif +#else + jr $r_ra + move $r_A, $r_s0 +#endif + + END(bpf_slow_path_half) + +NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp) + bpf_slow_path_common(1) + jr $r_ra + move $r_A, $r_s0 + + END(bpf_slow_path_byte) + +/* + * Negative entry points + */ + .macro bpf_is_end_of_data + li t0, SKF_LL_OFF + /* Reading link layer data? */ + slt t1, offset, t0 + bgtz t1, fault + /* Be careful what follows in DS. */ + .endm +/* + * call skb_copy_bits: + * (prototype in linux/filter.h) + * + * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, + * int k, unsigned int size) + * + * see above (bpf_slow_path_common) for ABI restrictions + */ +#define bpf_negative_common(SIZE) \ + PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ + PTR_LA t0, bpf_internal_load_pointer_neg_helper; \ + PTR_S $r_ra, (5 * SZREG)($r_sp); \ + jalr t0; \ + li a2, SIZE; \ + PTR_L $r_ra, (5 * SZREG)($r_sp); \ + /* Check return pointer */ \ + beqz v0, fault; \ + PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ + /* Preserve our pointer */ \ + move $r_s0, v0; \ + /* Set return value */ \ + move $r_ret, zero; \ + +bpf_slow_path_word_neg: + bpf_is_end_of_data +NESTED(sk_load_word_negative, (6 * SZREG), $r_sp) + bpf_negative_common(4) + jr $r_ra + lw $r_A, 0($r_s0) + END(sk_load_word_negative) + +bpf_slow_path_half_neg: + bpf_is_end_of_data +NESTED(sk_load_half_negative, (6 * SZREG), $r_sp) + bpf_negative_common(2) + jr $r_ra + lhu $r_A, 0($r_s0) + END(sk_load_half_negative) + +bpf_slow_path_byte_neg: + bpf_is_end_of_data +NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp) + bpf_negative_common(1) + jr $r_ra + lbu $r_A, 0($r_s0) + END(sk_load_byte_negative) + +fault: + jr $r_ra + addiu $r_ret, zero, 1 diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 561154cbcc401..b31b91e57c341 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -1355,6 +1355,9 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, } break; + case BPF_ST | BPF_NOSPEC: /* speculation barrier */ + break; + case BPF_ST | BPF_B | BPF_MEM: case BPF_ST | BPF_H | BPF_MEM: case BPF_ST | BPF_W | BPF_MEM: diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c index 50f376f058f43..f232c77ff5265 100644 --- a/arch/mips/pic32/pic32mzda/init.c +++ b/arch/mips/pic32/pic32mzda/init.c @@ -28,7 +28,7 @@ static ulong get_fdtaddr(void) if (fw_passed_dtb && !fw_arg2 && !fw_arg3) return (ulong)fw_passed_dtb; - if (__dtb_start < __dtb_end) + if (&__dtb_start < &__dtb_end) ftaddr = (ulong)__dtb_start; return ftaddr; diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c index 0ddeb31afa93a..45ca2b84f0962 100644 --- a/arch/mips/ralink/ill_acc.c +++ b/arch/mips/ralink/ill_acc.c @@ -61,6 +61,7 @@ static int __init ill_acc_of_setup(void) pdev = of_find_device_by_node(np); if (!pdev) { pr_err("%pOFn: failed to lookup pdev\n", np); + of_node_put(np); return -EINVAL; } diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 4e38a905ab386..e2c71c6c667da 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -77,7 +77,7 @@ void __init plat_mem_setup(void) */ if (fw_passed_dtb) dtb = (void *)fw_passed_dtb; - else if (__dtb_start != __dtb_end) + else if (&__dtb_start != &__dtb_end) dtb = (void *)__dtb_start; __dt_setup_arch(dtb); diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index c9ecf17f86605..74808619fefeb 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -310,11 +310,9 @@ static int __init plat_setup_devices(void) static int __init setup_kmac(char *s) { printk(KERN_INFO "korina mac = %s\n", s); - if (!mac_pton(s, korina_dev0_data.mac)) { + if (!mac_pton(s, korina_dev0_data.mac)) printk(KERN_ERR "Invalid mac\n"); - return -EINVAL; - } - return 0; + return 1; } __setup("kmac=", setup_kmac); diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c index dbace1f3e1a97..745ceb945fc50 100644 --- a/arch/mips/sni/time.c +++ b/arch/mips/sni/time.c @@ -18,14 +18,14 @@ static int a20r_set_periodic(struct clock_event_device *evt) { *(volatile u8 *)(A20R_PT_CLOCK_BASE + 12) = 0x34; wmb(); - *(volatile u8 *)(A20R_PT_CLOCK_BASE + 0) = SNI_COUNTER0_DIV; + *(volatile u8 *)(A20R_PT_CLOCK_BASE + 0) = SNI_COUNTER0_DIV & 0xff; wmb(); *(volatile u8 *)(A20R_PT_CLOCK_BASE + 0) = SNI_COUNTER0_DIV >> 8; wmb(); *(volatile u8 *)(A20R_PT_CLOCK_BASE + 12) = 0xb4; wmb(); - *(volatile u8 *)(A20R_PT_CLOCK_BASE + 8) = SNI_COUNTER2_DIV; + *(volatile u8 *)(A20R_PT_CLOCK_BASE + 8) = SNI_COUNTER2_DIV & 0xff; wmb(); *(volatile u8 *)(A20R_PT_CLOCK_BASE + 8) = SNI_COUNTER2_DIV >> 8; wmb(); diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c index 7b7f25b4b057e..9240bcdbe74e4 100644 --- a/arch/mips/vr41xx/common/icu.c +++ b/arch/mips/vr41xx/common/icu.c @@ -640,8 +640,6 @@ static int icu_get_irq(unsigned int irq) printk(KERN_ERR "spurious ICU interrupt: %04x,%04x\n", pend1, pend2); - atomic_inc(&irq_err_count); - return -1; } diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h index 8916ad9f9f139..e205b1db8c807 100644 --- a/arch/nds32/include/asm/uaccess.h +++ b/arch/nds32/include/asm/uaccess.h @@ -71,9 +71,7 @@ static inline void set_fs(mm_segment_t fs) * versions are void (ie, don't return a value as such). */ -#define get_user __get_user \ - -#define __get_user(x, ptr) \ +#define get_user(x, ptr) \ ({ \ long __gu_err = 0; \ __get_user_check((x), (ptr), __gu_err); \ @@ -86,6 +84,14 @@ static inline void set_fs(mm_segment_t fs) (void)0; \ }) +#define __get_user(x, ptr) \ +({ \ + long __gu_err = 0; \ + const __typeof__(*(ptr)) __user *__p = (ptr); \ + __get_user_err((x), __p, (__gu_err)); \ + __gu_err; \ +}) + #define __get_user_check(x, ptr, err) \ ({ \ const __typeof__(*(ptr)) __user *__p = (ptr); \ @@ -166,12 +172,18 @@ do { \ : "r"(addr), "i"(-EFAULT) \ : "cc") -#define put_user __put_user \ +#define put_user(x, ptr) \ +({ \ + long __pu_err = 0; \ + __put_user_check((x), (ptr), __pu_err); \ + __pu_err; \ +}) #define __put_user(x, ptr) \ ({ \ long __pu_err = 0; \ - __put_user_err((x), (ptr), __pu_err); \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + __put_user_err((x), __p, __pu_err); \ __pu_err; \ }) diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c index 334c2a6cec23d..8a4f9babb1646 100644 --- a/arch/nds32/kernel/perf_event_cpu.c +++ b/arch/nds32/kernel/perf_event_cpu.c @@ -1363,6 +1363,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); unsigned long fp = 0; unsigned long gp = 0; unsigned long lp = 0; @@ -1371,7 +1372,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, leaf_fp = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -1479,9 +1480,10 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct stackframe fr; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -1493,20 +1495,23 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, unsigned long perf_instruction_pointer(struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + /* However, NDS32 does not support virtualization */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); + if (guest_cbs && guest_cbs->is_in_guest()) + return guest_cbs->get_guest_ip(); return instruction_pointer(regs); } unsigned long perf_misc_flags(struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); int misc = 0; /* However, NDS32 does not support virtualization */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) + if (guest_cbs && guest_cbs->is_in_guest()) { + if (guest_cbs->is_user_mode()) misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; diff --git a/arch/nios2/include/asm/irqflags.h b/arch/nios2/include/asm/irqflags.h index b3ec3e510706d..25acf27862f91 100644 --- a/arch/nios2/include/asm/irqflags.h +++ b/arch/nios2/include/asm/irqflags.h @@ -9,7 +9,7 @@ static inline unsigned long arch_local_save_flags(void) { - return RDCTL(CTL_STATUS); + return RDCTL(CTL_FSTATUS); } /* @@ -18,7 +18,7 @@ static inline unsigned long arch_local_save_flags(void) */ static inline void arch_local_irq_restore(unsigned long flags) { - WRCTL(CTL_STATUS, flags); + WRCTL(CTL_FSTATUS, flags); } static inline void arch_local_irq_disable(void) diff --git a/arch/nios2/include/asm/registers.h b/arch/nios2/include/asm/registers.h index 183c720e454d9..95b67dd16f818 100644 --- a/arch/nios2/include/asm/registers.h +++ b/arch/nios2/include/asm/registers.h @@ -11,7 +11,7 @@ #endif /* control register numbers */ -#define CTL_STATUS 0 +#define CTL_FSTATUS 0 #define CTL_ESTATUS 1 #define CTL_BSTATUS 2 #define CTL_IENABLE 3 diff --git a/arch/nios2/include/asm/timex.h b/arch/nios2/include/asm/timex.h index a769f871b28d9..40a1adc9bd03e 100644 --- a/arch/nios2/include/asm/timex.h +++ b/arch/nios2/include/asm/timex.h @@ -8,5 +8,8 @@ typedef unsigned long cycles_t; extern cycles_t get_cycles(void); +#define get_cycles get_cycles + +#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback()) #endif diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h index e83f831a76f93..6be5e913c42e4 100644 --- a/arch/nios2/include/asm/uaccess.h +++ b/arch/nios2/include/asm/uaccess.h @@ -89,6 +89,7 @@ extern __must_check long strnlen_user(const char __user *s, long n); /* Optimized macros */ #define __get_user_asm(val, insn, addr, err) \ { \ + unsigned long __gu_val; \ __asm__ __volatile__( \ " movi %0, %3\n" \ "1: " insn " %1, 0(%2)\n" \ @@ -97,14 +98,20 @@ extern __must_check long strnlen_user(const char __user *s, long n); " .section __ex_table,\"a\"\n" \ " .word 1b, 2b\n" \ " .previous" \ - : "=&r" (err), "=r" (val) \ + : "=&r" (err), "=r" (__gu_val) \ : "r" (addr), "i" (-EFAULT)); \ + val = (__force __typeof__(*(addr)))__gu_val; \ } -#define __get_user_unknown(val, size, ptr, err) do { \ +extern void __get_user_unknown(void); + +#define __get_user_8(val, ptr, err) do { \ + u64 __val = 0; \ err = 0; \ - if (__copy_from_user(&(val), ptr, size)) { \ + if (raw_copy_from_user(&(__val), ptr, sizeof(val))) { \ err = -EFAULT; \ + } else { \ + val = (typeof(val))(typeof((val) - (val)))__val; \ } \ } while (0) @@ -120,8 +127,11 @@ do { \ case 4: \ __get_user_asm(val, "ldw", ptr, err); \ break; \ + case 8: \ + __get_user_8(val, ptr, err); \ + break; \ default: \ - __get_user_unknown(val, size, ptr, err); \ + __get_user_unknown(); \ break; \ } \ } while (0) @@ -130,9 +140,7 @@ do { \ ({ \ long __gu_err = -EFAULT; \ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ - unsigned long __gu_val = 0; \ - __get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\ - (x) = (__force __typeof__(x))__gu_val; \ + __get_user_common(x, sizeof(*(ptr)), __gu_ptr, __gu_err); \ __gu_err; \ }) @@ -140,11 +148,9 @@ do { \ ({ \ long __gu_err = -EFAULT; \ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ - unsigned long __gu_val = 0; \ if (access_ok( __gu_ptr, sizeof(*__gu_ptr))) \ - __get_user_common(__gu_val, sizeof(*__gu_ptr), \ + __get_user_common(x, sizeof(*__gu_ptr), \ __gu_ptr, __gu_err); \ - (x) = (__force __typeof__(x))__gu_val; \ __gu_err; \ }) diff --git a/arch/nios2/platform/Kconfig.platform b/arch/nios2/platform/Kconfig.platform index 9e32fb7f3d4ce..e849daff6fd16 100644 --- a/arch/nios2/platform/Kconfig.platform +++ b/arch/nios2/platform/Kconfig.platform @@ -37,6 +37,7 @@ config NIOS2_DTB_PHYS_ADDR config NIOS2_DTB_SOURCE_BOOL bool "Compile and link device tree into kernel image" + depends on !COMPILE_TEST help This allows you to specify a dts (device tree source) file which will be compiled and linked into the kernel image. diff --git a/arch/openrisc/include/asm/timex.h b/arch/openrisc/include/asm/timex.h index d52b4e536e3f9..5487fa93dd9be 100644 --- a/arch/openrisc/include/asm/timex.h +++ b/arch/openrisc/include/asm/timex.h @@ -23,6 +23,7 @@ static inline cycles_t get_cycles(void) { return mfspr(SPR_TTCR); } +#define get_cycles get_cycles /* This isn't really used any more */ #define CLOCK_TICK_RATE 1000 diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index 4d5b8bd1d7956..bbff5510b3484 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -103,11 +103,14 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, * We need to iterate through the pages, clearing the dcache for * them and setting the cache-inhibit bit. */ + mmap_read_lock(&init_mm); if (walk_page_range(&init_mm, va, va + size, &set_nocache_walk_ops, NULL)) { + mmap_read_unlock(&init_mm); free_pages_exact(page, size); return NULL; } + mmap_read_unlock(&init_mm); return (void *)va; } @@ -119,8 +122,10 @@ arch_dma_free(struct device *dev, size_t size, void *vaddr, unsigned long va = (unsigned long)vaddr; /* walk_page_range shouldn't be able to fail here */ + mmap_read_lock(&init_mm); WARN_ON(walk_page_range(&init_mm, va, va + size, &clear_nocache_walk_ops, NULL)); + mmap_read_unlock(&init_mm); free_pages_exact(vaddr, size); } diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index c6481cfc5220f..6b27cf4a0d786 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -547,6 +547,7 @@ EXCEPTION_ENTRY(_external_irq_handler) l.bnf 1f // ext irq enabled, all ok. l.nop +#ifdef CONFIG_PRINTK l.addi r1,r1,-0x8 l.movhi r3,hi(42f) l.ori r3,r3,lo(42f) @@ -560,6 +561,7 @@ EXCEPTION_ENTRY(_external_irq_handler) .string "\n\rESR interrupt bug: in _external_irq_handler (ESR %x)\n\r" .align 4 .previous +#endif l.ori r4,r4,SPR_SR_IEE // fix the bug // l.sw PT_SR(r1),r4 diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index b0dc974f9a743..ffbbf639b7f95 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -521,6 +521,15 @@ _start: l.ori r3,r0,0x1 l.mtspr r0,r3,SPR_SR + /* + * Start the TTCR as early as possible, so that the RNG can make use of + * measurements of boot time from the earliest opportunity. Especially + * important is that the TTCR does not return zero by the time we reach + * rand_initialize(). + */ + l.movhi r3,hi(SPR_TTMR_CR) + l.mtspr r0,r3,SPR_TTMR + CLEAR_GPR(r1) CLEAR_GPR(r2) CLEAR_GPR(r3) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 0c29d6cb2c8df..c97c01c755665 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -11,6 +11,7 @@ config PARISC select ARCH_WANT_FRAME_POINTERS select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_NO_SG_CHAIN select ARCH_SUPPORTS_MEMORY_FAILURE diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 53f974817affb..04460c2d2f8c1 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -17,7 +17,12 @@ # Mike Shaver, Helge Deller and Martin K. Petersen # +ifdef CONFIG_PARISC_SELF_EXTRACT +boot := arch/parisc/boot +KBUILD_IMAGE := $(boot)/bzImage +else KBUILD_IMAGE := vmlinuz +endif NM = sh $(srctree)/arch/parisc/nm CHECKFLAGS += -D__hppa__=1 @@ -60,7 +65,6 @@ KBUILD_CFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1 \ -DFTRACE_PATCHABLE_FUNCTION_SIZE=$(NOP_COUNT) CC_FLAGS_FTRACE := -fpatchable-function-entry=$(NOP_COUNT),$(shell echo $$(($(NOP_COUNT)-1))) -KBUILD_LDS_MODULE += $(srctree)/arch/parisc/kernel/module.lds endif OBJCOPY_FLAGS =-O binary -R .note -R .comment -S diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index 93caf17ac5e2f..9ebf3b0413d5f 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -181,7 +181,7 @@ extern int npmem_ranges; #include #include -#define PAGE0 ((struct zeropage *)__PAGE_OFFSET) +#define PAGE0 ((struct zeropage *)absolute_pointer(__PAGE_OFFSET)) /* DEFINITION OF THE ZERO-PAGE (PAG0) */ /* based on work by Jason Eckhardt (jason@equator.com) */ diff --git a/arch/parisc/include/asm/string.h b/arch/parisc/include/asm/string.h index 4a0c9dbd62fd0..f6e1132f4e352 100644 --- a/arch/parisc/include/asm/string.h +++ b/arch/parisc/include/asm/string.h @@ -8,19 +8,4 @@ extern void * memset(void *, int, size_t); #define __HAVE_ARCH_MEMCPY void * memcpy(void * dest,const void *src,size_t count); -#define __HAVE_ARCH_STRLEN -extern size_t strlen(const char *s); - -#define __HAVE_ARCH_STRCPY -extern char *strcpy(char *dest, const char *src); - -#define __HAVE_ARCH_STRNCPY -extern char *strncpy(char *dest, const char *src, size_t count); - -#define __HAVE_ARCH_STRCAT -extern char *strcat(char *dest, const char *src); - -#define __HAVE_ARCH_MEMSET -extern void *memset(void *, int, size_t); - #endif diff --git a/arch/parisc/include/asm/timex.h b/arch/parisc/include/asm/timex.h index 45537cd4d1d39..1cd2bd3eef33b 100644 --- a/arch/parisc/include/asm/timex.h +++ b/arch/parisc/include/asm/timex.h @@ -12,9 +12,10 @@ typedef unsigned long cycles_t; -static inline cycles_t get_cycles (void) +static inline cycles_t get_cycles(void) { return mfctl(16); } +#define get_cycles get_cycles #endif diff --git a/arch/parisc/install.sh b/arch/parisc/install.sh index 6f68784fea25f..a8c49815f58c8 100644 --- a/arch/parisc/install.sh +++ b/arch/parisc/install.sh @@ -39,6 +39,7 @@ verify "$3" if [ -n "${INSTALLKERNEL}" ]; then if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi + if [ -x /usr/sbin/${INSTALLKERNEL} ]; then exec /usr/sbin/${INSTALLKERNEL} "$@"; fi fi # Default install diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 873bf3434da94..2f64f112934b6 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -1841,8 +1841,8 @@ syscall_restore: LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 /* Are we being ptraced? */ - ldw TASK_FLAGS(%r1),%r19 - ldi _TIF_SYSCALL_TRACE_MASK,%r2 + LDREG TI_FLAGS-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r19 + ldi _TIF_SINGLESTEP|_TIF_BLOCKSTEP,%r2 and,COND(=) %r19,%r2,%r0 b,n syscall_restore_rfi diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index ac5f34993b53b..1c50093e2ebee 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -862,7 +863,7 @@ int module_finalize(const Elf_Ehdr *hdr, const char *strtab = NULL; const Elf_Shdr *s; char *secstrings; - int err, symindex = -1; + int symindex = -1; Elf_Sym *newptr, *oldptr; Elf_Shdr *symhdr = NULL; #ifdef DEBUG @@ -946,11 +947,13 @@ int module_finalize(const Elf_Ehdr *hdr, /* patch .altinstructions */ apply_alternatives(aseg, aseg + s->sh_size, me->name); +#ifdef CONFIG_DYNAMIC_FTRACE /* For 32 bit kernels we're compiling modules with * -ffunction-sections so we must relocate the addresses in the - *__mcount_loc section. + * ftrace callsite section. */ - if (symindex != -1 && !strcmp(secname, "__mcount_loc")) { + if (symindex != -1 && !strcmp(secname, FTRACE_CALLSITE_SECTION)) { + int err; if (s->sh_type == SHT_REL) err = apply_relocate((Elf_Shdr *)sechdrs, strtab, symindex, @@ -962,6 +965,7 @@ int module_finalize(const Elf_Ehdr *hdr, if (err) return err; } +#endif } return 0; } diff --git a/arch/parisc/kernel/module.lds b/arch/parisc/kernel/module.lds deleted file mode 100644 index 1a9a92aca5c8a..0000000000000 --- a/arch/parisc/kernel/module.lds +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -SECTIONS { - __mcount_loc : { - *(__patchable_function_entries) - } -} diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 8ed409ecec933..e8a6a751dfd8e 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -17,10 +17,6 @@ #include EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strcpy); -EXPORT_SYMBOL(strncpy); -EXPORT_SYMBOL(strcat); #include EXPORT_SYMBOL(__xchg8); diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c index 80a0ab372802d..e59574f65e641 100644 --- a/arch/parisc/kernel/patch.c +++ b/arch/parisc/kernel/patch.c @@ -40,10 +40,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags, *need_unmap = 1; set_fixmap(fixmap, page_to_phys(page)); - if (flags) - raw_spin_lock_irqsave(&patch_lock, *flags); - else - __acquire(&patch_lock); + raw_spin_lock_irqsave(&patch_lock, *flags); return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); } @@ -52,10 +49,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags) { clear_fixmap(fixmap); - if (flags) - raw_spin_unlock_irqrestore(&patch_lock, *flags); - else - __release(&patch_lock); + raw_spin_unlock_irqrestore(&patch_lock, *flags); } void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) @@ -67,8 +61,9 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) int mapped; /* Make sure we don't have any aliases in cache */ - flush_kernel_vmap_range(addr, len); - flush_icache_range(start, end); + flush_kernel_dcache_range_asm(start, end); + flush_kernel_icache_range_asm(start, end); + flush_tlb_kernel_range(start, end); p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, &mapped); @@ -81,8 +76,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) * We're crossing a page boundary, so * need to remap */ - flush_kernel_vmap_range((void *)fixmap, - (p-fixmap) * sizeof(*p)); + flush_kernel_dcache_range_asm((unsigned long)fixmap, + (unsigned long)p); + flush_tlb_kernel_range((unsigned long)fixmap, + (unsigned long)p); if (mapped) patch_unmap(FIX_TEXT_POKE0, &flags); p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, @@ -90,10 +87,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) } } - flush_kernel_vmap_range((void *)fixmap, (p-fixmap) * sizeof(*p)); + flush_kernel_dcache_range_asm((unsigned long)fixmap, (unsigned long)p); + flush_tlb_kernel_range((unsigned long)fixmap, (unsigned long)p); if (mapped) patch_unmap(FIX_TEXT_POKE0, &flags); - flush_icache_range(start, end); } void __kprobes __patch_text(void *addr, u32 insn) diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 13f771f74ee3b..b0045889864c2 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -419,8 +419,7 @@ show_cpuinfo (struct seq_file *m, void *v) } seq_printf(m, " (0x%02lx)\n", boot_cpu_data.pdc.capabilities); - seq_printf(m, "model\t\t: %s\n" - "model name\t: %s\n", + seq_printf(m, "model\t\t: %s - %s\n", boot_cpu_data.pdc.sys_model_name, cpuinfo->dev ? cpuinfo->dev->name : "Unknown"); diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index def33d4d5c0ef..396ca3b75d860 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -238,6 +238,12 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, #endif usp = (regs->gr[30] & ~(0x01UL)); +#ifdef CONFIG_64BIT + if (is_compat_task()) { + /* The gcc alloca implementation leaves garbage in the upper 32 bits of sp */ + usp = (compat_uint_t)usp; + } +#endif /*FIXME: frame_size parameter is unused, remove it. */ frame = get_sigframe(&ksig->ka, usp, sizeof(*frame)); diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index e202c37e56af3..9997465c11820 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -71,7 +72,10 @@ enum ipi_message_type { IPI_CALL_FUNC, IPI_CPU_START, IPI_CPU_STOP, - IPI_CPU_TEST + IPI_CPU_TEST, +#ifdef CONFIG_KGDB + IPI_ENTER_KGDB, +#endif }; @@ -169,7 +173,12 @@ ipi_interrupt(int irq, void *dev_id) case IPI_CPU_TEST: smp_debug(100, KERN_DEBUG "CPU%d is alive!\n", this_cpu); break; - +#ifdef CONFIG_KGDB + case IPI_ENTER_KGDB: + smp_debug(100, KERN_DEBUG "CPU%d ENTER_KGDB\n", this_cpu); + kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); + break; +#endif default: printk(KERN_CRIT "Unknown IPI num on CPU%d: %lu\n", this_cpu, which); @@ -225,6 +234,12 @@ send_IPI_allbutself(enum ipi_message_type op) } } +#ifdef CONFIG_KGDB +void kgdb_roundup_cpus(void) +{ + send_IPI_allbutself(IPI_ENTER_KGDB); +} +#endif inline void smp_send_stop(void) { send_IPI_allbutself(IPI_CPU_STOP); } diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index a37814cb66c76..8b616aada6492 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -478,7 +478,7 @@ lws_start: extrd,u %r1,PSW_W_BIT,1,%r1 /* sp must be aligned on 4, so deposit the W bit setting into * the bottom of sp temporarily */ - or,ev %r1,%r30,%r30 + or,od %r1,%r30,%r30 /* Clip LWS number to a 32-bit value for 32-bit processes */ depdi 0, 31, 32, %r20 diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 04508158815c1..9c1ae9d242ca9 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -245,27 +245,13 @@ void __init time_init(void) static int __init init_cr16_clocksource(void) { /* - * The cr16 interval timers are not syncronized across CPUs on - * different sockets, so mark them unstable and lower rating on - * multi-socket SMP systems. + * The cr16 interval timers are not syncronized across CPUs, even if + * they share the same socket. */ if (num_online_cpus() > 1 && !running_on_qemu) { - int cpu; - unsigned long cpu0_loc; - cpu0_loc = per_cpu(cpu_data, 0).cpu_loc; - - for_each_online_cpu(cpu) { - if (cpu == 0) - continue; - if ((cpu0_loc != 0) && - (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc)) - continue; - - clocksource_cr16.name = "cr16_unstable"; - clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; - clocksource_cr16.rating = 0; - break; - } + clocksource_cr16.name = "cr16_unstable"; + clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; + clocksource_cr16.rating = 0; } /* XXX: We may want to mark sched_clock stable here if cr16 clocks are diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 88e5c08eaa8b4..6d360457d70b3 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -783,7 +783,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) * unless pagefault_disable() was called before. */ - if (fault_space == 0 && !faulthandler_disabled()) + if (faulthandler_disabled() || fault_space == 0) { /* Clean up and return if in exception table. */ if (fixup_exception(regs)) diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 237d20dd5622d..286cec4d86d7b 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -340,7 +340,7 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER ); - return 0; + return ret; } static int emulate_std(struct pt_regs *regs, int frreg, int flop) { @@ -397,7 +397,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) __asm__ __volatile__ ( " mtsp %4, %%sr1\n" " zdep %2, 29, 2, %%r19\n" -" dep %%r0, 31, 2, %2\n" +" dep %%r0, 31, 2, %3\n" " mtsar %%r19\n" " zvdepi -2, 32, %%r19\n" "1: ldw 0(%%sr1,%3),%%r20\n" @@ -409,7 +409,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) " andcm %%r21, %%r19, %%r21\n" " or %1, %%r20, %1\n" " or %2, %%r21, %2\n" -"3: stw %1,0(%%sr1,%1)\n" +"3: stw %1,0(%%sr1,%3)\n" "4: stw %%r1,4(%%sr1,%3)\n" "5: stw %2,8(%%sr1,%3)\n" " copy %%r0, %0\n" @@ -596,7 +596,6 @@ void handle_unaligned(struct pt_regs *regs) ret = ERR_NOTHANDLED; /* "undefined", but lets kill them. */ break; } -#ifdef CONFIG_PA20 switch (regs->iir & OPCODE2_MASK) { case OPCODE_FLDD_L: @@ -607,22 +606,23 @@ void handle_unaligned(struct pt_regs *regs) flop=1; ret = emulate_std(regs, R2(regs->iir),1); break; +#ifdef CONFIG_PA20 case OPCODE_LDD_L: ret = emulate_ldd(regs, R2(regs->iir),0); break; case OPCODE_STD_L: ret = emulate_std(regs, R2(regs->iir),0); break; - } #endif + } switch (regs->iir & OPCODE3_MASK) { case OPCODE_FLDW_L: flop=1; - ret = emulate_ldw(regs, R2(regs->iir),0); + ret = emulate_ldw(regs, R2(regs->iir), 1); break; case OPCODE_LDW_M: - ret = emulate_ldw(regs, R2(regs->iir),1); + ret = emulate_ldw(regs, R2(regs->iir), 0); break; case OPCODE_FSTW_L: diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 87ae476d1c4f5..86a57fb0e6fae 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -21,6 +21,8 @@ #include #include +#include +#include /* #define DEBUG 1 */ #ifdef DEBUG @@ -203,6 +205,11 @@ int __init unwind_init(void) return 0; } +static bool pc_is_kernel_fn(unsigned long pc, void *fn) +{ + return (unsigned long)dereference_kernel_function_descriptor(fn) == pc; +} + static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int frame_size) { /* @@ -221,7 +228,7 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int extern void * const _call_on_stack; #endif /* CONFIG_IRQSTACKS */ - if (pc == (unsigned long) &handle_interruption) { + if (pc_is_kernel_fn(pc, handle_interruption)) { struct pt_regs *regs = (struct pt_regs *)(info->sp - frame_size - PT_SZ_ALGN); dbg("Unwinding through handle_interruption()\n"); info->prev_sp = regs->gr[30]; @@ -229,13 +236,13 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int return 1; } - if (pc == (unsigned long) &ret_from_kernel_thread || - pc == (unsigned long) &syscall_exit) { + if (pc_is_kernel_fn(pc, ret_from_kernel_thread) || + pc_is_kernel_fn(pc, syscall_exit)) { info->prev_sp = info->prev_ip = 0; return 1; } - if (pc == (unsigned long) &intr_return) { + if (pc_is_kernel_fn(pc, intr_return)) { struct pt_regs *regs; dbg("Found intr_return()\n"); @@ -246,20 +253,20 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int return 1; } - if (pc == (unsigned long) &_switch_to_ret) { + if (pc_is_kernel_fn(pc, _switch_to) || + pc_is_kernel_fn(pc, _switch_to_ret)) { info->prev_sp = info->sp - CALLEE_SAVE_FRAME_SIZE; info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET); return 1; } #ifdef CONFIG_IRQSTACKS - if (pc == (unsigned long) &_call_on_stack) { + if (pc_is_kernel_fn(pc, _call_on_stack)) { info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ); info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET); return 1; } #endif - return 0; } diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile index 2d7a9974dbaef..7b197667faf6c 100644 --- a/arch/parisc/lib/Makefile +++ b/arch/parisc/lib/Makefile @@ -3,7 +3,7 @@ # Makefile for parisc-specific library files # -lib-y := lusercopy.o bitops.o checksum.o io.o memcpy.o \ - ucmpdi2.o delay.o string.o +lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \ + ucmpdi2.o delay.o obj-y := iomap.o diff --git a/arch/parisc/lib/memset.c b/arch/parisc/lib/memset.c new file mode 100644 index 0000000000000..133e4809859a3 --- /dev/null +++ b/arch/parisc/lib/memset.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#include +#include + +#define OPSIZ (BITS_PER_LONG/8) +typedef unsigned long op_t; + +void * +memset (void *dstpp, int sc, size_t len) +{ + unsigned int c = sc; + long int dstp = (long int) dstpp; + + if (len >= 8) + { + size_t xlen; + op_t cccc; + + cccc = (unsigned char) c; + cccc |= cccc << 8; + cccc |= cccc << 16; + if (OPSIZ > 4) + /* Do the shift in two steps to avoid warning if long has 32 bits. */ + cccc |= (cccc << 16) << 16; + + /* There are at least some bytes to set. + No need to test for LEN == 0 in this alignment loop. */ + while (dstp % OPSIZ != 0) + { + ((unsigned char *) dstp)[0] = c; + dstp += 1; + len -= 1; + } + + /* Write 8 `op_t' per iteration until less than 8 `op_t' remain. */ + xlen = len / (OPSIZ * 8); + while (xlen > 0) + { + ((op_t *) dstp)[0] = cccc; + ((op_t *) dstp)[1] = cccc; + ((op_t *) dstp)[2] = cccc; + ((op_t *) dstp)[3] = cccc; + ((op_t *) dstp)[4] = cccc; + ((op_t *) dstp)[5] = cccc; + ((op_t *) dstp)[6] = cccc; + ((op_t *) dstp)[7] = cccc; + dstp += 8 * OPSIZ; + xlen -= 1; + } + len %= OPSIZ * 8; + + /* Write 1 `op_t' per iteration until less than OPSIZ bytes remain. */ + xlen = len / OPSIZ; + while (xlen > 0) + { + ((op_t *) dstp)[0] = cccc; + dstp += OPSIZ; + xlen -= 1; + } + len %= OPSIZ; + } + + /* Write the last few bytes. */ + while (len > 0) + { + ((unsigned char *) dstp)[0] = c; + dstp += 1; + len -= 1; + } + + return dstpp; +} diff --git a/arch/parisc/lib/string.S b/arch/parisc/lib/string.S deleted file mode 100644 index 4a64264427a63..0000000000000 --- a/arch/parisc/lib/string.S +++ /dev/null @@ -1,136 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * PA-RISC assembly string functions - * - * Copyright (C) 2019 Helge Deller - */ - -#include -#include - - .section .text.hot - .level PA_ASM_LEVEL - - t0 = r20 - t1 = r21 - t2 = r22 - -ENTRY_CFI(strlen, frame=0,no_calls) - or,COND(<>) arg0,r0,ret0 - b,l,n .Lstrlen_null_ptr,r0 - depwi 0,31,2,ret0 - cmpb,COND(<>) arg0,ret0,.Lstrlen_not_aligned - ldw,ma 4(ret0),t0 - cmpib,tr 0,r0,.Lstrlen_loop - uxor,nbz r0,t0,r0 -.Lstrlen_not_aligned: - uaddcm arg0,ret0,t1 - shladd t1,3,r0,t1 - mtsar t1 - depwi -1,%sar,32,t0 - uxor,nbz r0,t0,r0 -.Lstrlen_loop: - b,l,n .Lstrlen_end_loop,r0 - ldw,ma 4(ret0),t0 - cmpib,tr 0,r0,.Lstrlen_loop - uxor,nbz r0,t0,r0 -.Lstrlen_end_loop: - extrw,u,<> t0,7,8,r0 - addib,tr,n -3,ret0,.Lstrlen_out - extrw,u,<> t0,15,8,r0 - addib,tr,n -2,ret0,.Lstrlen_out - extrw,u,<> t0,23,8,r0 - addi -1,ret0,ret0 -.Lstrlen_out: - bv r0(rp) - uaddcm ret0,arg0,ret0 -.Lstrlen_null_ptr: - bv,n r0(rp) -ENDPROC_CFI(strlen) - - -ENTRY_CFI(strcpy, frame=0,no_calls) - ldb 0(arg1),t0 - stb t0,0(arg0) - ldo 0(arg0),ret0 - ldo 1(arg1),t1 - cmpb,= r0,t0,2f - ldo 1(arg0),t2 -1: ldb 0(t1),arg1 - stb arg1,0(t2) - ldo 1(t1),t1 - cmpb,<> r0,arg1,1b - ldo 1(t2),t2 -2: bv,n r0(rp) -ENDPROC_CFI(strcpy) - - -ENTRY_CFI(strncpy, frame=0,no_calls) - ldb 0(arg1),t0 - stb t0,0(arg0) - ldo 1(arg1),t1 - ldo 0(arg0),ret0 - cmpb,= r0,t0,2f - ldo 1(arg0),arg1 -1: ldo -1(arg2),arg2 - cmpb,COND(=),n r0,arg2,2f - ldb 0(t1),arg0 - stb arg0,0(arg1) - ldo 1(t1),t1 - cmpb,<> r0,arg0,1b - ldo 1(arg1),arg1 -2: bv,n r0(rp) -ENDPROC_CFI(strncpy) - - -ENTRY_CFI(strcat, frame=0,no_calls) - ldb 0(arg0),t0 - cmpb,= t0,r0,2f - ldo 0(arg0),ret0 - ldo 1(arg0),arg0 -1: ldb 0(arg0),t1 - cmpb,<>,n r0,t1,1b - ldo 1(arg0),arg0 -2: ldb 0(arg1),t2 - stb t2,0(arg0) - ldo 1(arg0),arg0 - ldb 0(arg1),t0 - cmpb,<> r0,t0,2b - ldo 1(arg1),arg1 - bv,n r0(rp) -ENDPROC_CFI(strcat) - - -ENTRY_CFI(memset, frame=0,no_calls) - copy arg0,ret0 - cmpb,COND(=) r0,arg0,4f - copy arg0,t2 - cmpb,COND(=) r0,arg2,4f - ldo -1(arg2),arg3 - subi -1,arg3,t0 - subi 0,t0,t1 - cmpiclr,COND(>=) 0,t1,arg2 - ldo -1(t1),arg2 - extru arg2,31,2,arg0 -2: stb arg1,0(t2) - ldo 1(t2),t2 - addib,>= -1,arg0,2b - ldo -1(arg3),arg3 - cmpiclr,COND(<=) 4,arg2,r0 - b,l,n 4f,r0 -#ifdef CONFIG_64BIT - depd,* r0,63,2,arg2 -#else - depw r0,31,2,arg2 -#endif - ldo 1(t2),t2 -3: stb arg1,-1(t2) - stb arg1,0(t2) - stb arg1,1(t2) - stb arg1,2(t2) - addib,COND(>) -4,arg2,3b - ldo 4(t2),t2 -4: bv,n r0(rp) -ENDPROC_CFI(memset) - - .end diff --git a/arch/parisc/math-emu/fpudispatch.c b/arch/parisc/math-emu/fpudispatch.c index 7c46969ead9b1..01ed133227c25 100644 --- a/arch/parisc/math-emu/fpudispatch.c +++ b/arch/parisc/math-emu/fpudispatch.c @@ -310,12 +310,15 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) r1 &= ~3; fpregs[t+3] = fpregs[r1+3]; fpregs[t+2] = fpregs[r1+2]; + fallthrough; case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1]; return(NOEXCEPTION); } + BUG(); case 3: /* FABS */ switch (fmt) { case 2: /* illegal */ @@ -325,13 +328,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) r1 &= ~3; fpregs[t+3] = fpregs[r1+3]; fpregs[t+2] = fpregs[r1+2]; + fallthrough; case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ /* copy and clear sign bit */ fpregs[t] = fpregs[r1] & 0x7fffffff; return(NOEXCEPTION); } + BUG(); case 6: /* FNEG */ switch (fmt) { case 2: /* illegal */ @@ -341,13 +347,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) r1 &= ~3; fpregs[t+3] = fpregs[r1+3]; fpregs[t+2] = fpregs[r1+2]; + fallthrough; case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ /* copy and invert sign bit */ fpregs[t] = fpregs[r1] ^ 0x80000000; return(NOEXCEPTION); } + BUG(); case 7: /* FNEGABS */ switch (fmt) { case 2: /* illegal */ @@ -357,13 +366,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) r1 &= ~3; fpregs[t+3] = fpregs[r1+3]; fpregs[t+2] = fpregs[r1+2]; + fallthrough; case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ /* copy and set sign bit */ fpregs[t] = fpregs[r1] | 0x80000000; return(NOEXCEPTION); } + BUG(); case 4: /* FSQRT */ switch (fmt) { case 0: @@ -376,6 +388,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 5: /* FRND */ switch (fmt) { case 0: @@ -389,7 +402,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(MAJOR_0C_EXCP); } } /* end of switch (subop) */ - + BUG(); case 1: /* class 1 */ df = extru(ir,fpdfpos,2); /* get dest format */ if ((df & 2) || (fmt & 2)) { @@ -419,6 +432,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* dbl/dbl */ return(MAJOR_0C_EXCP); } + BUG(); case 1: /* FCNVXF */ switch(fmt) { case 0: /* sgl/sgl */ @@ -434,6 +448,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvxf(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 2: /* FCNVFX */ switch(fmt) { case 0: /* sgl/sgl */ @@ -449,6 +464,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvfx(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 3: /* FCNVFXT */ switch(fmt) { case 0: /* sgl/sgl */ @@ -464,6 +480,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvfxt(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 5: /* FCNVUF (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -479,6 +496,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvuf(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 6: /* FCNVFU (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -494,6 +512,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvfu(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 7: /* FCNVFUT (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -509,10 +528,11 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvfut(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 4: /* undefined */ return(MAJOR_0C_EXCP); } /* end of switch subop */ - + BUG(); case 2: /* class 2 */ fpu_type_flags=fpregs[FPU_TYPE_FLAG_POS]; r2 = extru(ir, fpr2pos, 5) * sizeof(double)/sizeof(u_int); @@ -590,6 +610,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 1: /* FTEST */ switch (fmt) { case 0: @@ -609,8 +630,10 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: return(MAJOR_0C_EXCP); } + BUG(); } /* end of switch subop */ } /* end of else for PA1.0 & PA1.1 */ + BUG(); case 3: /* class 3 */ r2 = extru(ir,fpr2pos,5) * sizeof(double)/sizeof(u_int); if (r2 == 0) @@ -633,6 +656,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 1: /* FSUB */ switch (fmt) { case 0: @@ -645,6 +669,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 2: /* FMPY */ switch (fmt) { case 0: @@ -657,6 +682,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 3: /* FDIV */ switch (fmt) { case 0: @@ -669,6 +695,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 4: /* FREM */ switch (fmt) { case 0: @@ -681,6 +708,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); } /* end of class 3 switch */ } /* end of switch(class) */ @@ -736,10 +764,12 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1]; return(NOEXCEPTION); } + BUG(); case 3: /* FABS */ switch (fmt) { case 2: @@ -747,10 +777,12 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1] & 0x7fffffff; return(NOEXCEPTION); } + BUG(); case 6: /* FNEG */ switch (fmt) { case 2: @@ -758,10 +790,12 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1] ^ 0x80000000; return(NOEXCEPTION); } + BUG(); case 7: /* FNEGABS */ switch (fmt) { case 2: @@ -769,10 +803,12 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1] | 0x80000000; return(NOEXCEPTION); } + BUG(); case 4: /* FSQRT */ switch (fmt) { case 0: @@ -785,6 +821,7 @@ u_int fpregs[]; case 3: return(MAJOR_0E_EXCP); } + BUG(); case 5: /* FRMD */ switch (fmt) { case 0: @@ -798,7 +835,7 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); } } /* end of switch (subop */ - + BUG(); case 1: /* class 1 */ df = extru(ir,fpdfpos,2); /* get dest format */ /* @@ -826,6 +863,7 @@ u_int fpregs[]; case 3: /* dbl/dbl */ return(MAJOR_0E_EXCP); } + BUG(); case 1: /* FCNVXF */ switch(fmt) { case 0: /* sgl/sgl */ @@ -841,6 +879,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvxf(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 2: /* FCNVFX */ switch(fmt) { case 0: /* sgl/sgl */ @@ -856,6 +895,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvfx(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 3: /* FCNVFXT */ switch(fmt) { case 0: /* sgl/sgl */ @@ -871,6 +911,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvfxt(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 5: /* FCNVUF (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -886,6 +927,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvuf(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 6: /* FCNVFU (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -901,6 +943,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvfu(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 7: /* FCNVFUT (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -916,9 +959,11 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvfut(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 4: /* undefined */ return(MAJOR_0C_EXCP); } /* end of switch subop */ + BUG(); case 2: /* class 2 */ /* * Be careful out there. @@ -994,6 +1039,7 @@ u_int fpregs[]; } } /* end of switch subop */ } /* end of else for PA1.0 & PA1.1 */ + BUG(); case 3: /* class 3 */ /* * Be careful out there. @@ -1026,6 +1072,7 @@ u_int fpregs[]; return(dbl_fadd(&fpregs[r1],&fpregs[r2], &fpregs[t],status)); } + BUG(); case 1: /* FSUB */ switch (fmt) { case 0: @@ -1035,6 +1082,7 @@ u_int fpregs[]; return(dbl_fsub(&fpregs[r1],&fpregs[r2], &fpregs[t],status)); } + BUG(); case 2: /* FMPY or XMPYU */ /* * check for integer multiply (x bit set) @@ -1071,6 +1119,7 @@ u_int fpregs[]; &fpregs[r2],&fpregs[t],status)); } } + BUG(); case 3: /* FDIV */ switch (fmt) { case 0: @@ -1080,6 +1129,7 @@ u_int fpregs[]; return(dbl_fdiv(&fpregs[r1],&fpregs[r2], &fpregs[t],status)); } + BUG(); case 4: /* FREM */ switch (fmt) { case 0: diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c index 474cd241c1509..02e19a32e6c5f 100644 --- a/arch/parisc/mm/fixmap.c +++ b/arch/parisc/mm/fixmap.c @@ -18,12 +18,9 @@ void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys) pte_t *pte; if (pmd_none(*pmd)) - pmd = pmd_alloc(NULL, pgd, vaddr); - - pte = pte_offset_kernel(pmd, vaddr); - if (pte_none(*pte)) pte = pte_alloc_kernel(pmd, vaddr); + pte = pte_offset_kernel(pmd, vaddr); set_pte_at(&init_mm, vaddr, pte, __mk_pte(phys, PAGE_KERNEL_RWX)); flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); } diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 3e54484797f62..15b7dae1808db 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -347,9 +347,9 @@ static void __init setup_bootmem(void) static bool kernel_set_to_readonly; -static void __init map_pages(unsigned long start_vaddr, - unsigned long start_paddr, unsigned long size, - pgprot_t pgprot, int force) +static void __ref map_pages(unsigned long start_vaddr, + unsigned long start_paddr, unsigned long size, + pgprot_t pgprot, int force) { pgd_t *pg_dir; pmd_t *pmd; @@ -485,7 +485,7 @@ void __init set_kernel_text_rw(int enable_read_write) flush_tlb_all(); } -void __ref free_initmem(void) +void free_initmem(void) { unsigned long init_begin = (unsigned long)__init_begin; unsigned long init_end = (unsigned long)__init_end; @@ -499,7 +499,6 @@ void __ref free_initmem(void) /* The init text pages are marked R-X. We have to * flush the icache and mark them RW- * - * This is tricky, because map_pages is in the init section. * Do a dummy remap of the data section first (the data * section is already PAGE_KERNEL) to pull in the TLB entries * for map_kernel */ @@ -892,9 +891,9 @@ void flush_tlb_all(void) { int do_recycle; - __inc_irq_stat(irq_tlb_count); do_recycle = 0; spin_lock(&sid_lock); + __inc_irq_stat(irq_tlb_count); if (dirty_space_ids > RECYCLE_THRESHOLD) { BUG_ON(recycle_inuse); /* FIXME: Use a semaphore/wait queue here */ get_dirty_sids(&recycle_ndirty,recycle_dirty_array); @@ -913,8 +912,8 @@ void flush_tlb_all(void) #else void flush_tlb_all(void) { - __inc_irq_stat(irq_tlb_count); spin_lock(&sid_lock); + __inc_irq_stat(irq_tlb_count); flush_tlb_all_local(NULL); recycle_sids(); spin_unlock(&sid_lock); diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 9f73fb6b1cc91..b9d2fcf030d0d 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -174,7 +174,7 @@ else CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5)) CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4) endif -else +else ifdef CONFIG_PPC_BOOK3E_64 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index 92608f34d3123..1d83966f5ef64 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -44,9 +44,6 @@ p_end: .long _end p_pstack: .long _platform_stack_top #endif - .globl _zimage_start - /* Clang appears to require the .weak directive to be after the symbol - * is defined. See https://bugs.llvm.org/show_bug.cgi?id=38921 */ .weak _zimage_start _zimage_start: .globl _zimage_start_lib diff --git a/arch/powerpc/boot/dts/charon.dts b/arch/powerpc/boot/dts/charon.dts index 408b486b13dff..cd589539f313f 100644 --- a/arch/powerpc/boot/dts/charon.dts +++ b/arch/powerpc/boot/dts/charon.dts @@ -35,7 +35,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts index 0e5e9d3acf79f..19a14e62e65f4 100644 --- a/arch/powerpc/boot/dts/digsy_mtc.dts +++ b/arch/powerpc/boot/dts/digsy_mtc.dts @@ -16,7 +16,7 @@ model = "intercontrol,digsy-mtc"; compatible = "intercontrol,digsy-mtc"; - memory { + memory@0 { reg = <0x00000000 0x02000000>; // 32MB }; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi index c90702b04a530..48e5cd61599c6 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi @@ -79,6 +79,7 @@ fman0: fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xfc000 0x1000>; + fsl,erratum-a009885; }; xmdio0: mdio@fd000 { @@ -86,6 +87,7 @@ fman0: fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xfd000 0x1000>; + fsl,erratum-a009885; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t1023rdb.dts b/arch/powerpc/boot/dts/fsl/t1023rdb.dts index 5ba6fbfca2742..f82f85c65964c 100644 --- a/arch/powerpc/boot/dts/fsl/t1023rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1023rdb.dts @@ -154,7 +154,7 @@ fm1mac3: ethernet@e4000 { phy-handle = <&sgmii_aqr_phy3>; - phy-connection-type = "sgmii-2500"; + phy-connection-type = "2500base-x"; sleep = <&rcpm 0x20000000>; }; diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi index 099a598c74c00..bfe1ed5be3374 100644 --- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi @@ -139,12 +139,12 @@ fman@400000 { ethernet@e6000 { phy-handle = <&phy_rgmii_0>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { phy-handle = <&phy_rgmii_1>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; mdio0: mdio@fc000 { diff --git a/arch/powerpc/boot/dts/lite5200.dts b/arch/powerpc/boot/dts/lite5200.dts index cb2782dd6132c..e7b194775d783 100644 --- a/arch/powerpc/boot/dts/lite5200.dts +++ b/arch/powerpc/boot/dts/lite5200.dts @@ -32,7 +32,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/lite5200b.dts b/arch/powerpc/boot/dts/lite5200b.dts index 2b86c81f90485..547cbe726ff23 100644 --- a/arch/powerpc/boot/dts/lite5200b.dts +++ b/arch/powerpc/boot/dts/lite5200b.dts @@ -31,7 +31,7 @@ led4 { gpios = <&gpio_simple 2 1>; }; }; - memory { + memory@0 { reg = <0x00000000 0x10000000>; // 256MB }; diff --git a/arch/powerpc/boot/dts/media5200.dts b/arch/powerpc/boot/dts/media5200.dts index 61cae9dcddef4..f3188018faceb 100644 --- a/arch/powerpc/boot/dts/media5200.dts +++ b/arch/powerpc/boot/dts/media5200.dts @@ -32,7 +32,7 @@ }; }; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB RAM }; diff --git a/arch/powerpc/boot/dts/mpc5200b.dtsi b/arch/powerpc/boot/dts/mpc5200b.dtsi index 648fe31795f49..8b796f3b11da7 100644 --- a/arch/powerpc/boot/dts/mpc5200b.dtsi +++ b/arch/powerpc/boot/dts/mpc5200b.dtsi @@ -33,7 +33,7 @@ }; }; - memory: memory { + memory: memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/o2d.dts b/arch/powerpc/boot/dts/o2d.dts index 24a46f65e5299..e0a8d3034417f 100644 --- a/arch/powerpc/boot/dts/o2d.dts +++ b/arch/powerpc/boot/dts/o2d.dts @@ -12,7 +12,7 @@ model = "ifm,o2d"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/o2d.dtsi b/arch/powerpc/boot/dts/o2d.dtsi index 6661955a2be47..b55a9e5bd828c 100644 --- a/arch/powerpc/boot/dts/o2d.dtsi +++ b/arch/powerpc/boot/dts/o2d.dtsi @@ -19,7 +19,7 @@ model = "ifm,o2d"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/o2dnt2.dts b/arch/powerpc/boot/dts/o2dnt2.dts index eeba7f5507d5d..c2eedbd1f5fcb 100644 --- a/arch/powerpc/boot/dts/o2dnt2.dts +++ b/arch/powerpc/boot/dts/o2dnt2.dts @@ -12,7 +12,7 @@ model = "ifm,o2dnt2"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/o3dnt.dts b/arch/powerpc/boot/dts/o3dnt.dts index fd00396b0593e..e4c1bdd412716 100644 --- a/arch/powerpc/boot/dts/o3dnt.dts +++ b/arch/powerpc/boot/dts/o3dnt.dts @@ -12,7 +12,7 @@ model = "ifm,o3dnt"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/pcm032.dts b/arch/powerpc/boot/dts/pcm032.dts index c259c6b3ac5ab..5674f978b9830 100644 --- a/arch/powerpc/boot/dts/pcm032.dts +++ b/arch/powerpc/boot/dts/pcm032.dts @@ -22,7 +22,7 @@ model = "phytec,pcm032"; compatible = "phytec,pcm032"; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/tqm5200.dts b/arch/powerpc/boot/dts/tqm5200.dts index 9ed0bc78967e1..5bb25a9e40a01 100644 --- a/arch/powerpc/boot/dts/tqm5200.dts +++ b/arch/powerpc/boot/dts/tqm5200.dts @@ -32,7 +32,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 285d506c5a769..2f5e06309f096 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -39,6 +39,7 @@ CONFIG_MTD_CFI_GEOMETRY=y # CONFIG_MTD_CFI_I2 is not set CONFIG_MTD_CFI_I4=y CONFIG_MTD_CFI_AMDSTD=y +CONFIG_MTD_PHYSMAP=y CONFIG_MTD_PHYSMAP_OF=y # CONFIG_BLK_DEV is not set CONFIG_NETDEVICES=y diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h index a09595f00cabe..f0f16b4fc5ea1 100644 --- a/arch/powerpc/include/asm/archrandom.h +++ b/arch/powerpc/include/asm/archrandom.h @@ -6,27 +6,28 @@ #include -static inline int arch_get_random_long(unsigned long *v) +static inline bool arch_get_random_long(unsigned long *v) { - return 0; + return false; } -static inline int arch_get_random_int(unsigned int *v) +static inline bool arch_get_random_int(unsigned int *v) { - return 0; + return false; } -static inline int arch_get_random_seed_long(unsigned long *v) +static inline bool arch_get_random_seed_long(unsigned long *v) { if (ppc_md.get_random_seed) return ppc_md.get_random_seed(v); - return 0; + return false; } -static inline int arch_get_random_seed_int(unsigned int *v) + +static inline bool arch_get_random_seed_int(unsigned int *v) { unsigned long val; - int rc; + bool rc; rc = arch_get_random_seed_long(&val); if (rc) @@ -34,16 +35,6 @@ static inline int arch_get_random_seed_int(unsigned int *v) return rc; } - -static inline int arch_has_random(void) -{ - return 0; -} - -static inline int arch_has_random_seed(void) -{ - return !!ppc_md.get_random_seed; -} #endif /* CONFIG_ARCH_RANDOM */ #ifdef CONFIG_PPC_POWERNV diff --git a/arch/powerpc/include/asm/bpf_perf_event.h b/arch/powerpc/include/asm/bpf_perf_event.h new file mode 100644 index 0000000000000..e8a7b4ffb58c2 --- /dev/null +++ b/arch/powerpc/include/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BPF_PERF_EVENT_H +#define _ASM_POWERPC_BPF_PERF_EVENT_H + +#include + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _ASM_POWERPC_BPF_PERF_EVENT_H */ diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index be0f7257b13c8..96ef54bc7d968 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -22,6 +22,7 @@ #define BRANCH_ABSOLUTE 0x2 bool is_offset_in_branch_range(long offset); +bool is_offset_in_cond_branch_range(long offset); unsigned int create_branch(const unsigned int *addr, unsigned long target, int flags); unsigned int create_cond_branch(const unsigned int *addr, diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index f54a08a2cd709..017336f2b0864 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -96,7 +96,7 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name #endif /* PPC64_ELF_ABI_v1 */ #endif /* CONFIG_FTRACE_SYSCALLS */ -#ifdef CONFIG_PPC64 +#if defined(CONFIG_PPC64) && defined(CONFIG_FUNCTION_TRACER) #include static inline void this_cpu_disable_ftrace(void) @@ -108,9 +108,12 @@ static inline void this_cpu_enable_ftrace(void) { get_paca()->ftrace_enabled = 1; } + +void ftrace_free_init_tramp(void); #else /* CONFIG_PPC64 */ static inline void this_cpu_disable_ftrace(void) { } static inline void this_cpu_enable_ftrace(void) { } +static inline void ftrace_free_init_tramp(void) { } #endif /* CONFIG_PPC64 */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index a63ec938636de..daba2d2a02a0b 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -345,25 +345,37 @@ static inline void __raw_writeq_be(unsigned long v, volatile void __iomem *addr) */ static inline void __raw_rm_writeb(u8 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stbcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stbcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writew(u16 val, volatile void __iomem *paddr) { - __asm__ __volatile__("sthcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + sthcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writel(u32 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stwcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stwcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stdcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stdcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } @@ -375,7 +387,10 @@ static inline void __raw_rm_writeq_be(u64 val, volatile void __iomem *paddr) static inline u8 __raw_rm_readb(volatile void __iomem *paddr) { u8 ret; - __asm__ __volatile__("lbzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lbzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -383,7 +398,10 @@ static inline u8 __raw_rm_readb(volatile void __iomem *paddr) static inline u16 __raw_rm_readw(volatile void __iomem *paddr) { u16 ret; - __asm__ __volatile__("lhzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lhzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -391,7 +409,10 @@ static inline u16 __raw_rm_readw(volatile void __iomem *paddr) static inline u32 __raw_rm_readl(volatile void __iomem *paddr) { u32 ret; - __asm__ __volatile__("lwzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lwzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -399,7 +420,10 @@ static inline u32 __raw_rm_readl(volatile void __iomem *paddr) static inline u64 __raw_rm_readq(volatile void __iomem *paddr) { u64 ret; - __asm__ __volatile__("ldcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + ldcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 6ba5adb96a3be..d92353a96f811 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -132,7 +132,11 @@ static inline bool pfn_valid(unsigned long pfn) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) +#define virt_addr_valid(vaddr) ({ \ + unsigned long _addr = (unsigned long)vaddr; \ + _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \ + pfn_valid(virt_to_pfn(_addr)); \ +}) /* * On Book-E parts we need __va to parse the device tree and we can't @@ -212,6 +216,9 @@ static inline bool pfn_valid(unsigned long pfn) #define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET) #else #ifdef CONFIG_PPC64 + +#define VIRTUAL_WARN_ON(x) WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && (x)) + /* * gcc miscompiles (unsigned long)(&static_var) - PAGE_OFFSET * with -mcmodel=medium, so we use & and | instead of - and + on 64-bit. @@ -219,13 +226,13 @@ static inline bool pfn_valid(unsigned long pfn) */ #define __va(x) \ ({ \ - VIRTUAL_BUG_ON((unsigned long)(x) >= PAGE_OFFSET); \ + VIRTUAL_WARN_ON((unsigned long)(x) >= PAGE_OFFSET); \ (void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET); \ }) #define __pa(x) \ ({ \ - VIRTUAL_BUG_ON((unsigned long)(x) < PAGE_OFFSET); \ + VIRTUAL_WARN_ON((unsigned long)(x) < PAGE_OFFSET); \ (unsigned long)(x) & 0x0fffffffffffffffUL; \ }) diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h index c6bbe9778d3cd..3c09109e708ef 100644 --- a/arch/powerpc/include/asm/pmc.h +++ b/arch/powerpc/include/asm/pmc.h @@ -34,6 +34,13 @@ static inline void ppc_set_pmu_inuse(int inuse) #endif } +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +static inline int ppc_get_pmu_inuse(void) +{ + return get_paca()->pmcregs_in_use; +} +#endif + extern void power4_enable_pmcs(void); #else /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index c1df75edde441..a9af63bd430f2 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -204,6 +204,7 @@ #define PPC_INST_ICBT 0x7c00002c #define PPC_INST_ICSWX 0x7c00032d #define PPC_INST_ICSWEPX 0x7c00076d +#define PPC_INST_DSSALL 0x7e00066c #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LDARX 0x7c0000a8 @@ -439,6 +440,7 @@ __PPC_RA(a) | __PPC_RB(b)) #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \ __PPC_RA(a) | __PPC_RB(b)) +#define PPC_DSSALL stringify_in_c(.long PPC_INST_DSSALL) #define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LQARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index e9e3f85134e54..316a9c8d1929e 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(u64 feature) return !!(powerpc_security_features & feature); } +#ifdef CONFIG_PPC_BOOK3S_64 +enum stf_barrier_type stf_barrier_type_get(void); +#else +static inline enum stf_barrier_type stf_barrier_type_get(void) { return STF_BARRIER_NONE; } +#endif // Features indicating support for Spectre/Meltdown mitigations diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index 6047402b0a4db..5405cb2fe65a2 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -22,6 +22,7 @@ static inline cycles_t get_cycles(void) return mftb(); } +#define get_cycles get_cycles #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_TIMEX_H */ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index cafad1960e766..a14a32f6c3ccd 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -191,8 +191,11 @@ extern long __get_user_bad(void); */ #define __get_user_atomic_128_aligned(kaddr, uaddr, err) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine altivec\n" \ "1: lvx 0,0,%1 # get user\n" \ " stvx 0,0,%2 # put kernel\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ diff --git a/arch/powerpc/include/uapi/asm/bpf_perf_event.h b/arch/powerpc/include/uapi/asm/bpf_perf_event.h deleted file mode 100644 index 5e1e648aeec4c..0000000000000 --- a/arch/powerpc/include/uapi/asm/bpf_perf_event.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ -#define _UAPI__ASM_BPF_PERF_EVENT_H__ - -#include - -typedef struct user_pt_regs bpf_user_pt_regs_t; - -#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index afbd47b0a75cc..5819a577d267a 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -13,6 +13,7 @@ CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC endif +CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 6dfceaa820e42..b0e0b3cd91eec 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -250,8 +250,10 @@ int __init btext_find_display(int allow_nonstdout) rc = btext_initialize(np); printk("result: %d\n", rc); } - if (rc == 0) + if (rc == 0) { + of_node_put(np); break; + } } return rc; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 4fd7efdf2a53a..2a327adb9ac09 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -230,7 +230,7 @@ static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata) pci_save_state(pdev); } -static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s) +static void eeh_set_channel_state(struct eeh_pe *root, pci_channel_state_t s) { struct eeh_pe *pe; struct eeh_dev *edev, *tmp; diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 0455dc1b27977..69d64f406204f 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -835,7 +835,6 @@ static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info) sizeof(struct fadump_memory_range)); return 0; } - static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, u64 base, u64 end) { @@ -854,7 +853,12 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, start = mem_ranges[mrange_info->mem_range_cnt - 1].base; size = mem_ranges[mrange_info->mem_range_cnt - 1].size; - if ((start + size) == base) + /* + * Boot memory area needs separate PT_LOAD segment(s) as it + * is moved to a different location at the time of crash. + * So, fold only if the region is not boot memory area. + */ + if ((start + size) == base && start >= fw_dump.boot_mem_top) is_adjacent = true; } if (!is_adjacent) { diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index a36fd053c3dba..0615ba86baef3 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -37,7 +37,7 @@ static int __init powersave_off(char *arg) { ppc_md.power_save = NULL; cpuidle_disable = IDLE_POWERSAVE_OFF; - return 0; + return 1; } __setup("powersave=off", powersave_off); diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 0ffdd18b9f268..acb8215c5a01d 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -129,7 +129,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM) mtspr SPRN_HID0,r4 BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */ diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index d32751994a62c..c3f62c9ce7406 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -50,28 +50,32 @@ _GLOBAL(isa300_idle_stop_mayloss) std r1,PACAR1(r13) mflr r4 mfcr r5 - /* use stack red zone rather than a new frame for saving regs */ - std r2,-8*0(r1) - std r14,-8*1(r1) - std r15,-8*2(r1) - std r16,-8*3(r1) - std r17,-8*4(r1) - std r18,-8*5(r1) - std r19,-8*6(r1) - std r20,-8*7(r1) - std r21,-8*8(r1) - std r22,-8*9(r1) - std r23,-8*10(r1) - std r24,-8*11(r1) - std r25,-8*12(r1) - std r26,-8*13(r1) - std r27,-8*14(r1) - std r28,-8*15(r1) - std r29,-8*16(r1) - std r30,-8*17(r1) - std r31,-8*18(r1) - std r4,-8*19(r1) - std r5,-8*20(r1) + /* + * Use the stack red zone rather than a new frame for saving regs since + * in the case of no GPR loss the wakeup code branches directly back to + * the caller without deallocating the stack frame first. + */ + std r2,-8*1(r1) + std r14,-8*2(r1) + std r15,-8*3(r1) + std r16,-8*4(r1) + std r17,-8*5(r1) + std r18,-8*6(r1) + std r19,-8*7(r1) + std r20,-8*8(r1) + std r21,-8*9(r1) + std r22,-8*10(r1) + std r23,-8*11(r1) + std r24,-8*12(r1) + std r25,-8*13(r1) + std r26,-8*14(r1) + std r27,-8*15(r1) + std r28,-8*16(r1) + std r29,-8*17(r1) + std r30,-8*18(r1) + std r31,-8*19(r1) + std r4,-8*20(r1) + std r5,-8*21(r1) /* 168 bytes */ PPC_STOP b . /* catch bugs */ @@ -87,8 +91,8 @@ _GLOBAL(isa300_idle_stop_mayloss) */ _GLOBAL(idle_return_gpr_loss) ld r1,PACAR1(r13) - ld r4,-8*19(r1) - ld r5,-8*20(r1) + ld r4,-8*20(r1) + ld r5,-8*21(r1) mtlr r4 mtcr r5 /* @@ -96,38 +100,40 @@ _GLOBAL(idle_return_gpr_loss) * from PACATOC. This could be avoided for that less common case * if KVM saved its r2. */ - ld r2,-8*0(r1) - ld r14,-8*1(r1) - ld r15,-8*2(r1) - ld r16,-8*3(r1) - ld r17,-8*4(r1) - ld r18,-8*5(r1) - ld r19,-8*6(r1) - ld r20,-8*7(r1) - ld r21,-8*8(r1) - ld r22,-8*9(r1) - ld r23,-8*10(r1) - ld r24,-8*11(r1) - ld r25,-8*12(r1) - ld r26,-8*13(r1) - ld r27,-8*14(r1) - ld r28,-8*15(r1) - ld r29,-8*16(r1) - ld r30,-8*17(r1) - ld r31,-8*18(r1) + ld r2,-8*1(r1) + ld r14,-8*2(r1) + ld r15,-8*3(r1) + ld r16,-8*4(r1) + ld r17,-8*5(r1) + ld r18,-8*6(r1) + ld r19,-8*7(r1) + ld r20,-8*8(r1) + ld r21,-8*9(r1) + ld r22,-8*10(r1) + ld r23,-8*11(r1) + ld r24,-8*12(r1) + ld r25,-8*13(r1) + ld r26,-8*14(r1) + ld r27,-8*15(r1) + ld r28,-8*16(r1) + ld r29,-8*17(r1) + ld r30,-8*18(r1) + ld r31,-8*19(r1) blr /* * This is the sequence required to execute idle instructions, as * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0. - * - * The 0(r1) slot is used to save r2 in isa206, so use that here. + * We have to store a GPR somewhere, ptesync, then reload it, and create + * a false dependency on the result of the load. It doesn't matter which + * GPR we store, or where we store it. We have already stored r2 to the + * stack at -8(r1) in isa206_idle_insn_mayloss, so use that. */ #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r2,0(r1); \ + std r2,-8(r1); \ ptesync; \ - ld r2,0(r1); \ + ld r2,-8(r1); \ 236: cmpd cr0,r2,r2; \ bne 236b; \ IDLE_INST; \ @@ -152,28 +158,32 @@ _GLOBAL(isa206_idle_insn_mayloss) std r1,PACAR1(r13) mflr r4 mfcr r5 - /* use stack red zone rather than a new frame for saving regs */ - std r2,-8*0(r1) - std r14,-8*1(r1) - std r15,-8*2(r1) - std r16,-8*3(r1) - std r17,-8*4(r1) - std r18,-8*5(r1) - std r19,-8*6(r1) - std r20,-8*7(r1) - std r21,-8*8(r1) - std r22,-8*9(r1) - std r23,-8*10(r1) - std r24,-8*11(r1) - std r25,-8*12(r1) - std r26,-8*13(r1) - std r27,-8*14(r1) - std r28,-8*15(r1) - std r29,-8*16(r1) - std r30,-8*17(r1) - std r31,-8*18(r1) - std r4,-8*19(r1) - std r5,-8*20(r1) + /* + * Use the stack red zone rather than a new frame for saving regs since + * in the case of no GPR loss the wakeup code branches directly back to + * the caller without deallocating the stack frame first. + */ + std r2,-8*1(r1) + std r14,-8*2(r1) + std r15,-8*3(r1) + std r16,-8*4(r1) + std r17,-8*5(r1) + std r18,-8*6(r1) + std r19,-8*7(r1) + std r20,-8*8(r1) + std r21,-8*9(r1) + std r22,-8*10(r1) + std r23,-8*11(r1) + std r24,-8*12(r1) + std r25,-8*13(r1) + std r26,-8*14(r1) + std r27,-8*15(r1) + std r28,-8*16(r1) + std r29,-8*17(r1) + std r30,-8*18(r1) + std r31,-8*19(r1) + std r4,-8*20(r1) + std r5,-8*21(r1) cmpwi r3,PNV_THREAD_NAP bne 1f IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 617eba82531cb..d89cf802d9aa7 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -669,7 +669,7 @@ static void __init kvm_use_magic_page(void) on_each_cpu(kvm_map_magic_page, &features, 1); /* Quick self-test to see if the mapping works */ - if (!fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) { + if (fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) { kvm_patching_worked = false; return; } diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 2020d255585fa..7684f644e93e4 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -96,7 +96,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L2CR) /* Stop DST streams */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) @@ -293,7 +293,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR) isync /* Stop DST streams */ - DSSALL + PPC_DSSALL sync /* Get the current enable bit of the L3CR into r4 */ @@ -402,7 +402,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR) _GLOBAL(__flush_disable_L1) /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 7a1c11a7cba5a..716f8bb17461c 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -146,11 +146,18 @@ void __init reserve_crashkernel(void) if (!crashk_res.start) { #ifdef CONFIG_PPC64 /* - * On 64bit we split the RMO in half but cap it at half of - * a small SLB (128MB) since the crash kernel needs to place - * itself and some stacks to be in the first segment. + * On the LPAR platform place the crash kernel to mid of + * RMA size (512MB or more) to ensure the crash kernel + * gets enough space to place itself and some stack to be + * in the first segment. At the same time normal kernel + * also get enough space to allocate memory for essential + * system resource in the first segment. Keep the crash + * kernel starts at 128MB offset on other platforms. */ - crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); + if (firmware_has_feature(FW_FEATURE_LPAR)) + crashk_res.start = ppc64_rma_size / 2; + else + crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); #else crashk_res.start = KDUMP_KERNELBASE; #endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index c94bba9142e7e..cf87573e6e785 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1719,7 +1719,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) tm_reclaim_current(0); #endif - memset(regs->gpr, 0, sizeof(regs->gpr)); + memset(®s->gpr[1], 0, sizeof(regs->gpr) - sizeof(regs->gpr[0])); regs->ctr = 0; regs->link = 0; regs->xer = 0; @@ -2001,12 +2001,12 @@ static unsigned long __get_wchan(struct task_struct *p) return 0; do { - sp = *(unsigned long *)sp; + sp = READ_ONCE_NOCHECK(*(unsigned long *)sp); if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) || p->state == TASK_RUNNING) return 0; if (count > 0) { - ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE]; + ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]); if (!in_sched_functions(ip)) return ip; } diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 1b65fb7c0bdaa..7f4e2c031a9ab 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2919,7 +2919,7 @@ static void __init fixup_device_tree_efika_add_phy(void) /* Check if the phy-handle property exists - bail if it does */ rv = prom_getprop(node, "phy-handle", prop, sizeof(prop)); - if (!rv) + if (rv <= 0) return; /* diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index b183ab9c5107c..dfa5f729f774d 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -13,7 +13,7 @@ # If you really need to reference something from prom_init.o add # it to the list below: -grep "^CONFIG_KASAN=y$" .config >/dev/null +grep "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} >/dev/null if [ $? -eq 0 ] then MEM_FUNCS="__memcpy __memset" diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 8c92febf5f443..63bfc5250b67e 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -3014,8 +3014,13 @@ long arch_ptrace(struct task_struct *child, long request, flush_fp_to_thread(child); if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&tmp, &child->thread.TS_FPR(fpidx), - sizeof(long)); + if (IS_ENABLED(CONFIG_PPC32)) { + // On 32-bit the index we are passed refers to 32-bit words + tmp = ((u32 *)child->thread.fp_state.fpr)[fpidx]; + } else { + memcpy(&tmp, &child->thread.TS_FPR(fpidx), + sizeof(long)); + } else tmp = child->thread.fp_state.fpscr; } @@ -3047,8 +3052,13 @@ long arch_ptrace(struct task_struct *child, long request, flush_fp_to_thread(child); if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&child->thread.TS_FPR(fpidx), &data, - sizeof(long)); + if (IS_ENABLED(CONFIG_PPC32)) { + // On 32-bit the index we are passed refers to 32-bit words + ((u32 *)child->thread.fp_state.fpr)[fpidx] = data; + } else { + memcpy(&child->thread.TS_FPR(fpidx), &data, + sizeof(long)); + } else child->thread.fp_state.fpscr = data; ret = 0; @@ -3398,4 +3408,7 @@ void __init pt_regs_check(void) offsetof(struct user_pt_regs, result)); BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs)); + + // ptrace_get/put_fpr() rely on PPC32 and VSX being incompatible + BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_VSX)); } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index c1e2e351ebff8..35e246e39705b 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -988,7 +988,7 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { { "get-time-of-day", -1, -1, -1, -1, -1 }, { "ibm,get-vpd", -1, 0, -1, 1, 2 }, { "ibm,lpar-perftools", -1, 2, 3, -1, -1 }, - { "ibm,platform-dump", -1, 4, 5, -1, -1 }, + { "ibm,platform-dump", -1, 4, 5, -1, -1 }, /* Special cased */ { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 }, { "ibm,scan-log-dump", -1, 0, 1, -1, -1 }, { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 }, @@ -1035,6 +1035,15 @@ static bool block_rtas_call(int token, int nargs, size = 1; end = base + size - 1; + + /* + * Special case for ibm,platform-dump - NULL buffer + * address is used to indicate end of dump processing + */ + if (!strcmp(f->name, "ibm,platform-dump") && + base == 0) + return false; + if (!in_rmo_buf(base, end)) goto err; } @@ -1244,6 +1253,12 @@ int __init early_init_dt_scan_rtas(unsigned long node, entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL); sizep = of_get_flat_dt_prop(node, "rtas-size", NULL); +#ifdef CONFIG_PPC64 + /* need this feature to decide the crashkernel offset */ + if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL)) + powerpc_firmware_features |= FW_FEATURE_LPAR; +#endif + if (basep && entryp && sizep) { rtas.base = *basep; rtas.entry = *entryp; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 1740a66cea84d..ff022e7256934 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -256,6 +256,11 @@ static int __init handle_no_stf_barrier(char *p) early_param("no_stf_barrier", handle_no_stf_barrier); +enum stf_barrier_type stf_barrier_type_get(void) +{ + return stf_enabled_flush_types; +} + /* This is the generic flag used by other architectures */ static int __init handle_ssbd(char *p) { diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index c06cac543f188..4de63ec2e1551 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -582,6 +582,36 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) } #endif +#ifdef CONFIG_NMI_IPI +static void crash_stop_this_cpu(struct pt_regs *regs) +#else +static void crash_stop_this_cpu(void *dummy) +#endif +{ + /* + * Just busy wait here and avoid marking CPU as offline to ensure + * register data is captured appropriately. + */ + while (1) + cpu_relax(); +} + +void crash_smp_send_stop(void) +{ + static bool stopped = false; + + if (stopped) + return; + + stopped = true; + +#ifdef CONFIG_NMI_IPI + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000); +#else + smp_call_function(crash_stop_this_cpu, NULL, 0); +#endif /* CONFIG_NMI_IPI */ +} + #ifdef CONFIG_NMI_IPI static void nmi_stop_this_cpu(struct pt_regs *regs) { @@ -1296,10 +1326,12 @@ void start_secondary(void *unused) BUG(); } +#ifdef CONFIG_PROFILING int setup_profiling_timer(unsigned int multiplier) { return 0; } +#endif #ifdef CONFIG_SCHED_SMT /* cpumask of CPUs with asymetric SMT dependancy */ diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index b13c6213b0d9b..890f95151fb44 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -8,6 +8,7 @@ * Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp. */ +#include #include #include #include diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index cbdf86228eaaa..54c44aea338c4 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -181,7 +181,7 @@ _GLOBAL(swsusp_arch_resume) #ifdef CONFIG_ALTIVEC /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif sync diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 6d3189830dd32..068a268a8013e 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -142,7 +142,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR) _GLOBAL(swsusp_arch_resume) /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 0b4694b8d2482..8ece45c2a1f64 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -203,7 +203,7 @@ static int __init TAU_init(void) tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) && !strcmp(cur_cpu_spec->platform, "ppc750"); - tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0); + tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1); if (!tau_workq) return -ENOMEM; diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 7ea0ca044b650..d816e714f2f48 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -328,9 +328,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) /* Is this a known long jump tramp? */ for (i = 0; i < NUM_FTRACE_TRAMPS; i++) - if (!ftrace_tramps[i]) - break; - else if (ftrace_tramps[i] == tramp) + if (ftrace_tramps[i] == tramp) return 0; /* Is this a known plt tramp? */ @@ -868,6 +866,17 @@ void arch_ftrace_update_code(int command) extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; +void ftrace_free_init_tramp(void) +{ + int i; + + for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++) + if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) { + ftrace_tramps[i] = 0; + return; + } +} + int __init ftrace_dyn_arch_init(void) { int i; diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index af3c15a1d41eb..75b2a6c4db5a5 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -132,6 +132,10 @@ static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb) { cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask); cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask); + /* + * See wd_smp_clear_cpu_pending() + */ + smp_mb(); if (cpumask_empty(&wd_smp_cpus_pending)) { wd_smp_last_reset_tb = tb; cpumask_andnot(&wd_smp_cpus_pending, @@ -217,13 +221,44 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb) cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck); wd_smp_unlock(&flags); + } else { + /* + * The last CPU to clear pending should have reset the + * watchdog so we generally should not find it empty + * here if our CPU was clear. However it could happen + * due to a rare race with another CPU taking the + * last CPU out of the mask concurrently. + * + * We can't add a warning for it. But just in case + * there is a problem with the watchdog that is causing + * the mask to not be reset, try to kick it along here. + */ + if (unlikely(cpumask_empty(&wd_smp_cpus_pending))) + goto none_pending; } return; } + cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + + /* + * Order the store to clear pending with the load(s) to check all + * words in the pending mask to check they are all empty. This orders + * with the same barrier on another CPU. This prevents two CPUs + * clearing the last 2 pending bits, but neither seeing the other's + * store when checking if the mask is empty, and missing an empty + * mask, which ends with a false positive. + */ + smp_mb(); if (cpumask_empty(&wd_smp_cpus_pending)) { unsigned long flags; +none_pending: + /* + * Double check under lock because more than one CPU could see + * a clear mask with the lockless check after clearing their + * pending bits. + */ wd_smp_lock(&flags); if (cpumask_empty(&wd_smp_cpus_pending)) { wd_smp_last_reset_tb = tb; @@ -314,8 +349,12 @@ void arch_touch_nmi_watchdog(void) { unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000; int cpu = smp_processor_id(); - u64 tb = get_tb(); + u64 tb; + if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) + return; + + tb = get_tb(); if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) { per_cpu(wd_timer_tb, cpu) = tb; wd_smp_clear_cpu_pending(cpu, tb); diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 03b947429e4de..4518a0f2d6c69 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -420,13 +420,19 @@ static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, tbl[idx % TCES_PER_PAGE] = tce; } -static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl, - unsigned long entry) +static void kvmppc_clear_tce(struct mm_struct *mm, struct kvmppc_spapr_tce_table *stt, + struct iommu_table *tbl, unsigned long entry) { - unsigned long hpa = 0; - enum dma_data_direction dir = DMA_NONE; + unsigned long i; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift); + + for (i = 0; i < subpages; ++i) { + unsigned long hpa = 0; + enum dma_data_direction dir = DMA_NONE; - iommu_tce_xchg_no_kill(mm, tbl, entry, &hpa, &dir); + iommu_tce_xchg_no_kill(mm, tbl, io_entry + i, &hpa, &dir); + } } static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, @@ -485,6 +491,8 @@ static long kvmppc_tce_iommu_unmap(struct kvm *kvm, break; } + iommu_tce_kill(tbl, io_entry, subpages); + return ret; } @@ -544,6 +552,8 @@ static long kvmppc_tce_iommu_map(struct kvm *kvm, break; } + iommu_tce_kill(tbl, io_entry, subpages); + return ret; } @@ -590,10 +600,9 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); - iommu_tce_kill(stit->tbl, entry, 1); if (ret != H_SUCCESS) { - kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry); + kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry); goto unlock_exit; } } @@ -669,13 +678,13 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, */ if (get_user(tce, tces + i)) { ret = H_TOO_HARD; - goto invalidate_exit; + goto unlock_exit; } tce = be64_to_cpu(tce); if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) { ret = H_PARAMETER; - goto invalidate_exit; + goto unlock_exit; } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { @@ -684,19 +693,15 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, iommu_tce_direction(tce)); if (ret != H_SUCCESS) { - kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, - entry); - goto invalidate_exit; + kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, + entry + i); + goto unlock_exit; } } kvmppc_tce_put(stt, entry + i, tce); } -invalidate_exit: - list_for_each_entry_lockless(stit, &stt->iommu_tables, next) - iommu_tce_kill(stit->tbl, entry, npages); - unlock_exit: srcu_read_unlock(&vcpu->kvm->srcu, idx); @@ -735,20 +740,16 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, continue; if (ret == H_TOO_HARD) - goto invalidate_exit; + return ret; WARN_ON_ONCE(1); - kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry); + kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry + i); } } for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); -invalidate_exit: - list_for_each_entry_lockless(stit, &stt->iommu_tables, next) - iommu_tce_kill(stit->tbl, ioba >> stt->page_shift, npages); - return ret; } EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce); diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index ab6eeb8e753e5..abb49d8633298 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -177,10 +177,13 @@ static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt, idx -= stt->offset; page = stt->pages[idx / TCES_PER_PAGE]; /* - * page must not be NULL in real mode, - * kvmppc_rm_ioba_validate() must have taken care of this. + * kvmppc_rm_ioba_validate() allows pages not be allocated if TCE is + * being cleared, otherwise it returns H_TOO_HARD and we skip this. */ - WARN_ON_ONCE_RM(!page); + if (!page) { + WARN_ON_ONCE_RM(tce != 0); + return; + } tbl = kvmppc_page_address(page); tbl[idx % TCES_PER_PAGE] = tce; @@ -248,13 +251,19 @@ extern void iommu_tce_kill_rm(struct iommu_table *tbl, tbl->it_ops->tce_kill(tbl, entry, pages, true); } -static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl, - unsigned long entry) +static void kvmppc_rm_clear_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *stt, + struct iommu_table *tbl, unsigned long entry) { - unsigned long hpa = 0; - enum dma_data_direction dir = DMA_NONE; + unsigned long i; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift); - iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir); + for (i = 0; i < subpages; ++i) { + unsigned long hpa = 0; + enum dma_data_direction dir = DMA_NONE; + + iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, io_entry + i, &hpa, &dir); + } } static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, @@ -317,6 +326,8 @@ static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm, break; } + iommu_tce_kill_rm(tbl, io_entry, subpages); + return ret; } @@ -380,6 +391,8 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, break; } + iommu_tce_kill_rm(tbl, io_entry, subpages); + return ret; } @@ -425,10 +438,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); - iommu_tce_kill_rm(stit->tbl, entry, 1); - if (ret != H_SUCCESS) { - kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry); return ret; } } @@ -568,7 +579,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ua = 0; if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { ret = H_PARAMETER; - goto invalidate_exit; + goto unlock_exit; } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { @@ -577,19 +588,15 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, iommu_tce_direction(tce)); if (ret != H_SUCCESS) { - kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, - entry); - goto invalidate_exit; + kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, + entry + i); + goto unlock_exit; } } kvmppc_rm_tce_put(stt, entry + i, tce); } -invalidate_exit: - list_for_each_entry_lockless(stit, &stt->iommu_tables, next) - iommu_tce_kill_rm(stit->tbl, entry, npages); - unlock_exit: if (rmap) unlock_rmap(rmap); @@ -632,20 +639,16 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, continue; if (ret == H_TOO_HARD) - goto invalidate_exit; + return ret; WARN_ON_ONCE_RM(1); - kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry + i); } } for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value); -invalidate_exit: - list_for_each_entry_lockless(stit, &stt->iommu_tables, next) - iommu_tce_kill_rm(stit->tbl, ioba >> stt->page_shift, npages); - return ret; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6f48e68f98b98..e51cf193a9633 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include @@ -3559,6 +3560,18 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true); +#ifdef CONFIG_PPC_PSERIES + if (kvmhv_on_pseries()) { + barrier(); + if (vcpu->arch.vpa.pinned_addr) { + struct lppaca *lp = vcpu->arch.vpa.pinned_addr; + get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use; + } else { + get_lppaca()->pmcregs_in_use = 1; + } + barrier(); + } +#endif kvmhv_load_guest_pmu(vcpu); msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); @@ -3693,6 +3706,13 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, save_pmu |= nesting_enabled(vcpu->kvm); kvmhv_save_guest_pmu(vcpu, save_pmu); +#ifdef CONFIG_PPC_PSERIES + if (kvmhv_on_pseries()) { + barrier(); + get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse(); + barrier(); + } +#endif vc->entry_exit_map = 0x101; vc->in_guest = 0; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 4a91b543a8540..6d34b69729854 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -821,6 +821,7 @@ static void flush_guest_tlb(struct kvm *kvm) "r" (0) : "memory"); } asm volatile("ptesync": : :"memory"); + // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory"); } else { for (set = 0; set < kvm->arch.tlb_sets; ++set) { @@ -831,7 +832,9 @@ static void flush_guest_tlb(struct kvm *kvm) rb += PPC_BIT(51); /* increment set number */ } asm volatile("ptesync": : :"memory"); - asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); + // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. + if (cpu_has_feature(CPU_FTR_ARCH_300)) + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); } } diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 9906d203d9d39..613d24b707abe 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -510,7 +510,7 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu) if (eaddr & (0xFFFUL << 52)) return H_PARAMETER; - buf = kzalloc(n, GFP_KERNEL); + buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN); if (!buf) return H_NO_MEM; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c6fbbd29bd871..c9c6619564ffa 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -292,13 +292,16 @@ kvm_novcpu_exit: * r3 contains the SRR1 wakeup value, SRR1 is trashed. */ _GLOBAL(idle_kvm_start_guest) - ld r4,PACAEMERGSP(r13) mfcr r5 mflr r0 - std r1,0(r4) - std r5,8(r4) - std r0,16(r4) - subi r1,r4,STACK_FRAME_OVERHEAD + std r5, 8(r1) // Save CR in caller's frame + std r0, 16(r1) // Save LR in caller's frame + // Create frame on emergency stack + ld r4, PACAEMERGSP(r13) + stdu r1, -SWITCH_FRAME_SIZE(r4) + // Switch to new frame on emergency stack + mr r1, r4 + std r3, 32(r1) // Save SRR1 wakeup value SAVE_NVGPRS(r1) /* @@ -350,6 +353,10 @@ kvm_unsplit_wakeup: kvm_secondary_got_guest: + // About to go to guest, clear saved SRR1 + li r0, 0 + std r0, 32(r1) + /* Set HSTATE_DSCR(r13) to something sensible */ ld r6, PACA_DSCR_DEFAULT(r13) std r6, HSTATE_DSCR(r13) @@ -441,13 +448,12 @@ kvm_no_guest: mfspr r4, SPRN_LPCR rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR, r4 - /* set up r3 for return */ - mfspr r3,SPRN_SRR1 + // Return SRR1 wakeup value, or 0 if we went into the guest + ld r3, 32(r1) REST_NVGPRS(r1) - addi r1, r1, STACK_FRAME_OVERHEAD - ld r0, 16(r1) - ld r5, 8(r1) - ld r1, 0(r1) + ld r1, 0(r1) // Switch back to caller stack + ld r0, 16(r1) // Reload LR + ld r5, 8(r1) // Reload CR mtlr r0 mtcr r5 blr @@ -2529,7 +2535,7 @@ hcall_real_table: .globl hcall_real_table_end hcall_real_table_end: -_GLOBAL(kvmppc_h_set_xdabr) +_GLOBAL_TOC(kvmppc_h_set_xdabr) EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr) andi. r0, r5, DABRX_USER | DABRX_KERNEL beq 6f @@ -2539,7 +2545,7 @@ EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr) 6: li r3, H_PARAMETER blr -_GLOBAL(kvmppc_h_set_dabr) +_GLOBAL_TOC(kvmppc_h_set_dabr) EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr) li r5, DABRX_USER | DABRX_KERNEL 3: @@ -3137,7 +3143,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) /* The following code handles the fake_suspend = 1 case */ mflr r0 std r0, PPC_LR_STKOFF(r1) - stdu r1, -PPC_MIN_STKFRM(r1) + stdu r1, -TM_FRAME_SIZE(r1) /* Turn on TM. */ mfmsr r8 @@ -3152,10 +3158,42 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) nop + /* + * It's possible that treclaim. may modify registers, if we have lost + * track of fake-suspend state in the guest due to it using rfscv. + * Save and restore registers in case this occurs. + */ + mfspr r3, SPRN_DSCR + mfspr r4, SPRN_XER + mfspr r5, SPRN_AMR + /* SPRN_TAR would need to be saved here if the kernel ever used it */ + mfcr r12 + SAVE_NVGPRS(r1) + SAVE_GPR(2, r1) + SAVE_GPR(3, r1) + SAVE_GPR(4, r1) + SAVE_GPR(5, r1) + stw r12, 8(r1) + std r1, HSTATE_HOST_R1(r13) + /* We have to treclaim here because that's the only way to do S->N */ li r3, TM_CAUSE_KVM_RESCHED TRECLAIM(R3) + GET_PACA(r13) + ld r1, HSTATE_HOST_R1(r13) + REST_GPR(2, r1) + REST_GPR(3, r1) + REST_GPR(4, r1) + REST_GPR(5, r1) + lwz r12, 8(r1) + REST_NVGPRS(r1) + mtspr SPRN_DSCR, r3 + mtspr SPRN_XER, r4 + mtspr SPRN_AMR, r5 + mtcr r12 + HMT_MEDIUM + /* * We were in fake suspend, so we are not going to save the * register state as the guest checkpointed state (since @@ -3183,7 +3221,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) std r5, VCPU_TFHAR(r9) std r6, VCPU_TFIAR(r9) - addi r1, r1, PPC_MIN_STKFRM + addi r1, r1, TM_FRAME_SIZE ld r0, PPC_LR_STKOFF(r1) mtlr r0 blr diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8dd4d2b83677b..eb8c72846b7fc 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1495,7 +1495,7 @@ int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, { enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; while (vcpu->arch.mmio_vmx_copy_nums) { @@ -1592,7 +1592,7 @@ int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int index = rs & KVM_MMIO_REG_MASK; enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; vcpu->arch.io_gpr = rs; diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index b8de3be10eb47..8656b8d2ce555 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -16,6 +16,9 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING endif +CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + obj-y += alloc.o code-patching.o feature-fixups.o pmem.o ifndef CONFIG_KASAN diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 3345f039a8769..a05f289e613ed 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -221,6 +221,11 @@ bool is_offset_in_branch_range(long offset) return (offset >= -0x2000000 && offset <= 0x1fffffc && !(offset & 0x3)); } +bool is_offset_in_cond_branch_range(long offset) +{ + return offset >= -0x8000 && offset <= 0x7fff && !(offset & 0x3); +} + /* * Helper to check if a given instruction is a conditional branch * Derived from the conditional checks in analyse_instr() @@ -274,7 +279,7 @@ unsigned int create_cond_branch(const unsigned int *addr, offset = offset - (unsigned long)addr; /* Check we can represent the target in the instruction format */ - if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3) + if (!is_offset_in_cond_branch_range(offset)) return 0; /* Mask out the flags and target, so they don't step on each other. */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index c077acb983a19..27650cd5857f9 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -906,7 +906,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __put_user_asmx(x, addr, err, op, cr) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: " op " %2,0,%3\n" \ + ".machine pop\n" \ " mfcr %1\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ @@ -919,7 +922,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __get_user_asmx(x, addr, err, op) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: "op" %1,0,%2\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ @@ -2787,12 +2793,14 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) case BARRIER_EIEIO: eieio(); break; +#ifdef CONFIG_PPC64 case BARRIER_LWSYNC: asm volatile("lwsync" : : : "memory"); break; case BARRIER_PTESYNC: asm volatile("ptesync" : : : "memory"); break; +#endif } break; @@ -2910,7 +2918,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) __put_user_asmx(op->val, ea, err, "stbcx.", cr); break; case 2: - __put_user_asmx(op->val, ea, err, "stbcx.", cr); + __put_user_asmx(op->val, ea, err, "sthcx.", cr); break; #endif case 4: diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 1cfe57b51d7e3..3f78007a72822 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -121,7 +121,7 @@ static void __init kasan_remap_early_shadow_ro(void) pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); - if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) + if (pte_page(*ptep) != virt_to_page(lm_alias(kasan_early_shadow_page))) continue; __set_pte_at(&init_mm, k_cur, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index c48705c726ac6..d427f70556eab 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -48,6 +48,7 @@ #include #include #include +#include #include @@ -346,6 +347,7 @@ void free_initmem(void) mark_initmem_nx(); init_mem_is_free = true; free_initmem_default(POISON_FREE_INITMEM); + ftrace_free_init_tramp(); } /** diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 18f20da0d3483..64290d343b557 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -79,7 +79,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * context */ if (cpu_has_feature(CPU_FTR_ALTIVEC)) - asm volatile ("dssall"); + asm volatile (PPC_DSSALL); if (new_on_cpu) radix_kvm_prefetch_workaround(next); diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 55d4377ccfae3..6d2a268528dfc 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -11,6 +11,7 @@ #ifndef __ASSEMBLY__ #include +#include #ifdef PPC64_ELF_ABI_v1 #define FUNCTION_DESCR_SIZE 24 @@ -180,13 +181,26 @@ #define PPC_NEG(d, a) EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a)) /* Long jump; (unconditional 'branch') */ -#define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \ - (((dest) - (ctx->idx * 4)) & 0x03fffffc)) +#define PPC_JMP(dest) \ + do { \ + long offset = (long)(dest) - (ctx->idx * 4); \ + if (!is_offset_in_branch_range(offset)) { \ + pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ + return -ERANGE; \ + } \ + EMIT(PPC_INST_BRANCH | (offset & 0x03fffffc)); \ + } while (0) /* "cond" here covers BO:BI fields. */ -#define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \ - (((cond) & 0x3ff) << 16) | \ - (((dest) - (ctx->idx * 4)) & \ - 0xfffc)) +#define PPC_BCC_SHORT(cond, dest) \ + do { \ + long offset = (long)(dest) - (ctx->idx * 4); \ + if (!is_offset_in_cond_branch_range(offset)) { \ + pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ + return -ERANGE; \ + } \ + EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \ + } while (0) + /* Sign-extended 32-bit immediate load */ #define PPC_LI32(d, i) do { \ if ((int)(uintptr_t)(i) >= -32768 && \ @@ -225,11 +239,6 @@ #define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0) #endif -static inline bool is_nearbranch(int offset) -{ - return (offset < 32768) && (offset >= -32768); -} - /* * The fly in the ointment of code size changing from pass to pass is * avoided by padding the short branch case with a NOP. If code size differs @@ -238,7 +247,7 @@ static inline bool is_nearbranch(int offset) * state. */ #define PPC_BCC(cond, dest) do { \ - if (is_nearbranch((dest) - (ctx->idx * 4))) { \ + if (is_offset_in_cond_branch_range((long)(dest) - (ctx->idx * 4))) { \ PPC_BCC_SHORT(cond, dest); \ PPC_NOP(); \ } else { \ diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index cf3a7e337f025..68ddf3502ab03 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -16,18 +16,18 @@ * with our redzone usage. * * [ prev sp ] <------------- - * [ nv gpr save area ] 6*8 | + * [ nv gpr save area ] 5*8 | * [ tail_call_cnt ] 8 | - * [ local_tmp_var ] 8 | + * [ local_tmp_var ] 16 | * fp (r31) --> [ ebpf stack space ] upto 512 | * [ frame header ] 32/112 | * sp (r1) ---> [ stack pointer ] -------------- */ /* for gpr non volatile registers BPG_REG_6 to 10 */ -#define BPF_PPC_STACK_SAVE (6*8) +#define BPF_PPC_STACK_SAVE (5*8) /* for bpf JIT code internal usage */ -#define BPF_PPC_STACK_LOCALS 16 +#define BPF_PPC_STACK_LOCALS 24 /* stack frame excluding BPF stack, ensure this is quadword aligned */ #define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index be3517ef0574d..687cd352648aa 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "bpf_jit64.h" @@ -56,9 +57,9 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * [ prev sp ] <------------- * [ ... ] | * sp (r1) ---> [ stack pointer ] -------------- - * [ nv gpr save area ] 6*8 + * [ nv gpr save area ] 5*8 * [ tail_call_cnt ] 8 - * [ local_tmp_var ] 8 + * [ local_tmp_var ] 16 * [ unused red zone ] 208 bytes protected */ static int bpf_jit_stack_local(struct codegen_context *ctx) @@ -66,12 +67,12 @@ static int bpf_jit_stack_local(struct codegen_context *ctx) if (bpf_has_stack_frame(ctx)) return STACK_FRAME_MIN_SIZE + ctx->stack_size; else - return -(BPF_PPC_STACK_SAVE + 16); + return -(BPF_PPC_STACK_SAVE + 24); } static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) { - return bpf_jit_stack_local(ctx) + 8; + return bpf_jit_stack_local(ctx) + 16; } static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) @@ -224,7 +225,7 @@ static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, PPC_BLRL(); } -static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) +static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) { /* * By now, the eBPF program has already setup parameters in r3, r4 and r5 @@ -285,14 +286,39 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 bpf_jit_emit_common_epilogue(image, ctx); PPC_BCTR(); + /* out: */ + return 0; } +/* + * We spill into the redzone always, even if the bpf program has its own stackframe. + * Offsets hardcoded based on BPF_PPC_STACK_SAVE -- see bpf_jit_stack_local() + */ +void bpf_stf_barrier(void); + +asm ( +" .global bpf_stf_barrier ;" +" bpf_stf_barrier: ;" +" std 21,-64(1) ;" +" std 22,-56(1) ;" +" sync ;" +" ld 21,-64(1) ;" +" ld 22,-56(1) ;" +" ori 31,31,0 ;" +" .rept 14 ;" +" b 1f ;" +" 1: ;" +" .endr ;" +" blr ;" +); + /* Assemble the body code between the prologue & epilogue */ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, u32 *addrs, bool extra_pass) { + enum stf_barrier_type stf_barrier = stf_barrier_type_get(); const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; int i, ret; @@ -347,18 +373,25 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_SUB(dst_reg, dst_reg, src_reg); goto bpf_alu32_trunc; case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ - case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ + if (!imm) { + goto bpf_alu32_trunc; + } else if (imm >= -32768 && imm < 32768) { + PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); + } else { + PPC_LI32(b2p[TMP_REG_1], imm); + PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); + } + goto bpf_alu32_trunc; + case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ - if (BPF_OP(code) == BPF_SUB) - imm = -imm; - if (imm) { - if (imm >= -32768 && imm < 32768) - PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); - else { - PPC_LI32(b2p[TMP_REG_1], imm); - PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); - } + if (!imm) { + goto bpf_alu32_trunc; + } else if (imm > -32768 && imm <= 32768) { + PPC_ADDI(dst_reg, dst_reg, IMM_L(-imm)); + } else { + PPC_LI32(b2p[TMP_REG_1], imm); + PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); } goto bpf_alu32_trunc; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ @@ -408,8 +441,14 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ if (imm == 0) return -EINVAL; - else if (imm == 1) - goto bpf_alu32_trunc; + if (imm == 1) { + if (BPF_OP(code) == BPF_DIV) { + goto bpf_alu32_trunc; + } else { + PPC_LI(dst_reg, 0); + break; + } + } PPC_LI32(b2p[TMP_REG_1], imm); switch (BPF_CLASS(code)) { @@ -644,6 +683,36 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, } break; + /* + * BPF_ST NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + if (!security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) || + (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) && + (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) || !cpu_has_feature(CPU_FTR_HVMODE)))) + break; + + switch (stf_barrier) { + case STF_BARRIER_EIEIO: + EMIT(0x7c0006ac | 0x02000000); + break; + case STF_BARRIER_SYNC_ORI: + EMIT(PPC_INST_SYNC); + PPC_LD(b2p[TMP_REG_1], 13, 0); + PPC_ORI(31, 31, 0); + break; + case STF_BARRIER_FALLBACK: + EMIT(PPC_INST_MFLR | ___PPC_RT(b2p[TMP_REG_1])); + PPC_LI64(12, dereference_kernel_function_descriptor(bpf_stf_barrier)); + PPC_MTCTR(12); + EMIT(PPC_INST_BCTR | 0x1); + PPC_MTLR(b2p[TMP_REG_1]); + break; + case STF_BARRIER_NONE: + break; + } + break; + /* * BPF_ST(X) */ @@ -989,7 +1058,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, */ case BPF_JMP | BPF_TAIL_CALL: ctx->seen |= SEEN_TAILCALL; - bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + ret = bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + if (ret < 0) + return ret; break; default: diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 6f013e4188349..0495a3f3e092a 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2135,7 +2135,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_WEIGHT && ppmu->get_mem_weight) - ppmu->get_mem_weight(&data.weight); + ppmu->get_mem_weight(&data.weight.full); if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 6884d16ec19b9..732cfc53e260d 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -164,7 +164,7 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index, */ count = 0; for (i = offset; i < offset + length; i++) - count |= arg->bytes[i] << (i - offset); + count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8); *value = count; out: diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index eb82dda884e51..d76e800a1f337 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1441,7 +1441,11 @@ static int trace_imc_event_init(struct perf_event *event) event->hw.idx = -1; target = event->hw.target; - event->pmu->task_ctx_nr = perf_hw_context; + /* + * There can only be a single PMU for perf_hw_context events which is assigned to + * core PMU. Hence use "perf_sw_context" for trace_imc. + */ + event->pmu->task_ctx_nr = perf_sw_context; event->destroy = reset_global_refc; return 0; } diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 944180f55a3c6..25eda98f3b1bd 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -326,7 +326,8 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) if (event_is_threshold(event) && is_thresh_cmp_valid(event)) { mask |= CNST_THRESH_MASK; value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); - } + } else if (event_is_threshold(event)) + return -1; } else { /* * Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC, diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 08c3ef7961982..1225f53609a44 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -131,11 +131,11 @@ int p9_dd22_bl_ev[] = { /* Table of alternatives, sorted by column 0 */ static const unsigned int power9_event_alternatives[][MAX_ALT] = { - { PM_INST_DISP, PM_INST_DISP_ALT }, - { PM_RUN_CYC_ALT, PM_RUN_CYC }, - { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, - { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT }, { PM_BR_2PATH, PM_BR_2PATH_ALT }, + { PM_INST_DISP, PM_INST_DISP_ALT }, + { PM_RUN_CYC_ALT, PM_RUN_CYC }, + { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT }, + { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, }; static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[]) diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c index b299e43f5ef94..823397c802def 100644 --- a/arch/powerpc/platforms/44x/fsp2.c +++ b/arch/powerpc/platforms/44x/fsp2.c @@ -208,6 +208,7 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler) if (irq == NO_IRQ) { pr_err("device tree node %pOFn is missing a interrupt", np); + of_node_put(np); return; } @@ -215,6 +216,7 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler) if (rc) { pr_err("fsp_of_probe: request_irq failed: np=%pOF rc=%d", np, rc); + of_node_put(np); return; } } diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c index ae8b812c92029..2481e78c04234 100644 --- a/arch/powerpc/platforms/4xx/cpm.c +++ b/arch/powerpc/platforms/4xx/cpm.c @@ -327,6 +327,6 @@ late_initcall(cpm_init); static int __init cpm_powersave_off(char *arg) { cpm.powersave_off = 1; - return 0; + return 1; } __setup("powersave=off", cpm_powersave_off); diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c index 7c0133f558d02..ffa8a7a6a2db7 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c @@ -94,9 +94,8 @@ int __init mpc85xx_setup_pmc(void) pr_err("Could not map guts node address\n"); return -ENOMEM; } + qoriq_pm_ops = &mpc85xx_pm_ops; } - qoriq_pm_ops = &mpc85xx_pm_ops; - return 0; } diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index 0f65c51271db9..ec6dc2d7a9db3 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -292,6 +292,7 @@ cpm_setbrg(uint brg, uint rate) out_be32(bp, (((BRG_UART_CLK_DIV16 / rate) - 1) << 1) | CPM_BRG_EN | CPM_BRG_DIV16); } +EXPORT_SYMBOL(cpm_setbrg); struct cpm_ioport16 { __be16 dir, par, odr_sor, dat, intr; diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c index e9617d35fd1f6..209b12323aa4c 100644 --- a/arch/powerpc/platforms/8xx/pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -153,6 +153,7 @@ int mpc8xx_pic_init(void) if (mpc8xx_pic_host == NULL) { printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); ret = -ENOMEM; + goto out; } ret = 0; diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index ca9ffc1c8685d..a6a60e2b8f453 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -976,6 +976,7 @@ static int __init cell_iommu_fixed_mapping_init(void) if (hbase < dbase || (hend > (dbase + dsize))) { pr_debug("iommu: hash window doesn't fit in" "real DMA window\n"); + of_node_put(np); return -1; } } diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c index 6af3a6e600a70..80c9c4d715505 100644 --- a/arch/powerpc/platforms/cell/pervasive.c +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -77,6 +77,7 @@ static int cbe_system_reset_exception(struct pt_regs *regs) switch (regs->msr & SRR1_WAKEMASK) { case SRR1_WAKEDEC: set_dec(1); + break; case SRR1_WAKEEE: /* * Handle these when interrupts get re-enabled and we take diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index a1b7f79a8a152..de10c13de15c6 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -215,6 +215,7 @@ void hlwd_pic_probe(void) irq_set_chained_handler(cascade_virq, hlwd_pic_irq_cascade); hlwd_irq_host = host; + of_node_put(np); break; } } diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S index da69e0fcb4f15..9b85b030cbebb 100644 --- a/arch/powerpc/platforms/powermac/cache.S +++ b/arch/powerpc/platforms/powermac/cache.S @@ -48,7 +48,7 @@ flush_disable_75x: /* Stop DST streams */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) @@ -196,7 +196,7 @@ flush_disable_745x: isync /* Stop prefetch streams */ - DSSALL + PPC_DSSALL sync /* Disable L2 prefetching */ diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index bf4be4b53b44d..210435a43bf95 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -582,6 +582,7 @@ static void __init kw_i2c_add(struct pmac_i2c_host_kw *host, bus->close = kw_i2c_close; bus->xfer = kw_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); if (controller == busnode) bus->flags = pmac_i2c_multibus; @@ -811,6 +812,7 @@ static void __init pmu_i2c_probe(void) bus->hostdata = bus + 1; bus->xfer = pmu_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); bus->flags = pmac_i2c_multibus; list_add(&bus->link, &pmac_i2c_busses); @@ -934,6 +936,7 @@ static void __init smu_i2c_probe(void) bus->hostdata = bus + 1; bus->xfer = smu_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); bus->flags = 0; list_add(&bus->link, &pmac_i2c_busses); diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c index d361d37d975f3..f5cea068f0bdc 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.c +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -60,7 +60,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) addr = be64_to_cpu(addr); pr_debug("Kernel metadata addr: %llx\n", addr); opal_fdm_active = (void *)addr; - if (opal_fdm_active->registered_regions == 0) + if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) return; ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr); @@ -95,17 +95,17 @@ static int opal_fadump_unregister(struct fw_dump *fadump_conf); static void opal_fadump_update_config(struct fw_dump *fadump_conf, const struct opal_fadump_mem_struct *fdm) { - pr_debug("Boot memory regions count: %d\n", fdm->region_cnt); + pr_debug("Boot memory regions count: %d\n", be16_to_cpu(fdm->region_cnt)); /* * The destination address of the first boot memory region is the * destination address of boot memory regions. */ - fadump_conf->boot_mem_dest_addr = fdm->rgn[0].dest; + fadump_conf->boot_mem_dest_addr = be64_to_cpu(fdm->rgn[0].dest); pr_debug("Destination address of boot memory regions: %#016llx\n", fadump_conf->boot_mem_dest_addr); - fadump_conf->fadumphdr_addr = fdm->fadumphdr_addr; + fadump_conf->fadumphdr_addr = be64_to_cpu(fdm->fadumphdr_addr); } /* @@ -126,9 +126,9 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, fadump_conf->boot_memory_size = 0; pr_debug("Boot memory regions:\n"); - for (i = 0; i < fdm->region_cnt; i++) { - base = fdm->rgn[i].src; - size = fdm->rgn[i].size; + for (i = 0; i < be16_to_cpu(fdm->region_cnt); i++) { + base = be64_to_cpu(fdm->rgn[i].src); + size = be64_to_cpu(fdm->rgn[i].size); pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size); fadump_conf->boot_mem_addr[i] = base; @@ -143,7 +143,7 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, * Start address of reserve dump area (permanent reservation) for * re-registering FADump after dump capture. */ - fadump_conf->reserve_dump_area_start = fdm->rgn[0].dest; + fadump_conf->reserve_dump_area_start = be64_to_cpu(fdm->rgn[0].dest); /* * Rarely, but it can so happen that system crashes before all @@ -155,13 +155,14 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, * Hope the memory that could not be preserved only has pages * that are usually filtered out while saving the vmcore. */ - if (fdm->region_cnt > fdm->registered_regions) { + if (be16_to_cpu(fdm->region_cnt) > be16_to_cpu(fdm->registered_regions)) { pr_warn("Not all memory regions were saved!!!\n"); pr_warn(" Unsaved memory regions:\n"); - i = fdm->registered_regions; - while (i < fdm->region_cnt) { + i = be16_to_cpu(fdm->registered_regions); + while (i < be16_to_cpu(fdm->region_cnt)) { pr_warn("\t[%03d] base: 0x%llx, size: 0x%llx\n", - i, fdm->rgn[i].src, fdm->rgn[i].size); + i, be64_to_cpu(fdm->rgn[i].src), + be64_to_cpu(fdm->rgn[i].size)); i++; } @@ -170,7 +171,7 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, } fadump_conf->boot_mem_top = (fadump_conf->boot_memory_size + hole_size); - fadump_conf->boot_mem_regs_cnt = fdm->region_cnt; + fadump_conf->boot_mem_regs_cnt = be16_to_cpu(fdm->region_cnt); opal_fadump_update_config(fadump_conf, fdm); } @@ -178,35 +179,38 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, static void opal_fadump_init_metadata(struct opal_fadump_mem_struct *fdm) { fdm->version = OPAL_FADUMP_VERSION; - fdm->region_cnt = 0; - fdm->registered_regions = 0; - fdm->fadumphdr_addr = 0; + fdm->region_cnt = cpu_to_be16(0); + fdm->registered_regions = cpu_to_be16(0); + fdm->fadumphdr_addr = cpu_to_be64(0); } static u64 opal_fadump_init_mem_struct(struct fw_dump *fadump_conf) { u64 addr = fadump_conf->reserve_dump_area_start; + u16 reg_cnt; int i; opal_fdm = __va(fadump_conf->kernel_metadata); opal_fadump_init_metadata(opal_fdm); /* Boot memory regions */ + reg_cnt = be16_to_cpu(opal_fdm->region_cnt); for (i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) { - opal_fdm->rgn[i].src = fadump_conf->boot_mem_addr[i]; - opal_fdm->rgn[i].dest = addr; - opal_fdm->rgn[i].size = fadump_conf->boot_mem_sz[i]; + opal_fdm->rgn[i].src = cpu_to_be64(fadump_conf->boot_mem_addr[i]); + opal_fdm->rgn[i].dest = cpu_to_be64(addr); + opal_fdm->rgn[i].size = cpu_to_be64(fadump_conf->boot_mem_sz[i]); - opal_fdm->region_cnt++; + reg_cnt++; addr += fadump_conf->boot_mem_sz[i]; } + opal_fdm->region_cnt = cpu_to_be16(reg_cnt); /* * Kernel metadata is passed to f/w and retrieved in capture kerenl. * So, use it to save fadump header address instead of calculating it. */ - opal_fdm->fadumphdr_addr = (opal_fdm->rgn[0].dest + - fadump_conf->boot_memory_size); + opal_fdm->fadumphdr_addr = cpu_to_be64(be64_to_cpu(opal_fdm->rgn[0].dest) + + fadump_conf->boot_memory_size); opal_fadump_update_config(fadump_conf, opal_fdm); @@ -269,18 +273,21 @@ static u64 opal_fadump_get_bootmem_min(void) static int opal_fadump_register(struct fw_dump *fadump_conf) { s64 rc = OPAL_PARAMETER; + u16 registered_regs; int i, err = -EIO; - for (i = 0; i < opal_fdm->region_cnt; i++) { + registered_regs = be16_to_cpu(opal_fdm->registered_regions); + for (i = 0; i < be16_to_cpu(opal_fdm->region_cnt); i++) { rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE, - opal_fdm->rgn[i].src, - opal_fdm->rgn[i].dest, - opal_fdm->rgn[i].size); + be64_to_cpu(opal_fdm->rgn[i].src), + be64_to_cpu(opal_fdm->rgn[i].dest), + be64_to_cpu(opal_fdm->rgn[i].size)); if (rc != OPAL_SUCCESS) break; - opal_fdm->registered_regions++; + registered_regs++; } + opal_fdm->registered_regions = cpu_to_be16(registered_regs); switch (rc) { case OPAL_SUCCESS: @@ -291,7 +298,8 @@ static int opal_fadump_register(struct fw_dump *fadump_conf) case OPAL_RESOURCE: /* If MAX regions limit in f/w is hit, warn and proceed. */ pr_warn("%d regions could not be registered for MPIPL as MAX limit is reached!\n", - (opal_fdm->region_cnt - opal_fdm->registered_regions)); + (be16_to_cpu(opal_fdm->region_cnt) - + be16_to_cpu(opal_fdm->registered_regions))); fadump_conf->dump_registered = 1; err = 0; break; @@ -312,7 +320,7 @@ static int opal_fadump_register(struct fw_dump *fadump_conf) * If some regions were registered before OPAL_MPIPL_ADD_RANGE * OPAL call failed, unregister all regions. */ - if ((err < 0) && (opal_fdm->registered_regions > 0)) + if ((err < 0) && (be16_to_cpu(opal_fdm->registered_regions) > 0)) opal_fadump_unregister(fadump_conf); return err; @@ -328,7 +336,7 @@ static int opal_fadump_unregister(struct fw_dump *fadump_conf) return -EIO; } - opal_fdm->registered_regions = 0; + opal_fdm->registered_regions = cpu_to_be16(0); fadump_conf->dump_registered = 0; return 0; } @@ -563,19 +571,20 @@ static void opal_fadump_region_show(struct fw_dump *fadump_conf, else fdm_ptr = opal_fdm; - for (i = 0; i < fdm_ptr->region_cnt; i++) { + for (i = 0; i < be16_to_cpu(fdm_ptr->region_cnt); i++) { /* * Only regions that are registered for MPIPL * would have dump data. */ if ((fadump_conf->dump_active) && - (i < fdm_ptr->registered_regions)) - dumped_bytes = fdm_ptr->rgn[i].size; + (i < be16_to_cpu(fdm_ptr->registered_regions))) + dumped_bytes = be64_to_cpu(fdm_ptr->rgn[i].size); seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ", - fdm_ptr->rgn[i].src, fdm_ptr->rgn[i].dest); + be64_to_cpu(fdm_ptr->rgn[i].src), + be64_to_cpu(fdm_ptr->rgn[i].dest)); seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n", - fdm_ptr->rgn[i].size, dumped_bytes); + be64_to_cpu(fdm_ptr->rgn[i].size), dumped_bytes); } /* Dump is active. Show reserved area start address. */ @@ -624,6 +633,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) { const __be32 *prop; unsigned long dn; + __be64 be_addr; u64 addr = 0; int i, len; s64 ret; @@ -680,13 +690,13 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) if (!prop) return; - ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr); - if ((ret != OPAL_SUCCESS) || !addr) { + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &be_addr); + if ((ret != OPAL_SUCCESS) || !be_addr) { pr_err("Failed to get Kernel metadata (%lld)\n", ret); return; } - addr = be64_to_cpu(addr); + addr = be64_to_cpu(be_addr); pr_debug("Kernel metadata addr: %llx\n", addr); opal_fdm_active = __va(addr); @@ -697,14 +707,14 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) } /* Kernel regions not registered with f/w for MPIPL */ - if (opal_fdm_active->registered_regions == 0) { + if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) { opal_fdm_active = NULL; return; } - ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr); - if (addr) { - addr = be64_to_cpu(addr); + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &be_addr); + if (be_addr) { + addr = be64_to_cpu(be_addr); pr_debug("CPU metadata addr: %llx\n", addr); opal_cpu_metadata = __va(addr); } diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h b/arch/powerpc/platforms/powernv/opal-fadump.h index f1e9ecf548c5d..3f715efb0aa6e 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.h +++ b/arch/powerpc/platforms/powernv/opal-fadump.h @@ -31,14 +31,14 @@ * OPAL FADump kernel metadata * * The address of this structure will be registered with f/w for retrieving - * and processing during crash dump. + * in the capture kernel to process the crash dump. */ struct opal_fadump_mem_struct { u8 version; u8 reserved[3]; - u16 region_cnt; /* number of regions */ - u16 registered_regions; /* Regions registered for MPIPL */ - u64 fadumphdr_addr; + __be16 region_cnt; /* number of regions */ + __be16 registered_regions; /* Regions registered for MPIPL */ + __be64 fadumphdr_addr; struct opal_mpipl_region rgn[FADUMP_MAX_MEM_REGS]; } __packed; @@ -135,7 +135,7 @@ static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt, for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) { reg_entry = (struct hdat_fadump_reg_entry *)bufp; val = (cpu_endian ? be64_to_cpu(reg_entry->reg_val) : - reg_entry->reg_val); + (u64)(reg_entry->reg_val)); opal_fadump_set_regval_regnum(regs, be32_to_cpu(reg_entry->reg_type), be32_to_cpu(reg_entry->reg_num), diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index 608569082ba0b..123a0e799b7bd 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -396,6 +396,7 @@ void __init opal_lpc_init(void) if (!of_get_property(np, "primary", NULL)) continue; opal_lpc_chip_id = of_get_ibm_chip_id(np); + of_node_put(np); break; } if (opal_lpc_chip_id < 0) diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index 45f4223a790f2..6ebc906e07b0a 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -372,6 +372,12 @@ static struct notifier_block opal_prd_event_nb = { .priority = 0, }; +static struct notifier_block opal_prd_event_nb2 = { + .notifier_call = opal_prd_msg_notifier, + .next = NULL, + .priority = 0, +}; + static int opal_prd_probe(struct platform_device *pdev) { int rc; @@ -393,9 +399,10 @@ static int opal_prd_probe(struct platform_device *pdev) return rc; } - rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb); + rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb2); if (rc) { pr_err("Couldn't register PRD2 event notifier\n"); + opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); return rc; } @@ -404,6 +411,8 @@ static int opal_prd_probe(struct platform_device *pdev) pr_err("failed to register miscdev\n"); opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); + opal_message_notifier_unregister(OPAL_MSG_PRD2, + &opal_prd_event_nb2); return rc; } @@ -414,6 +423,7 @@ static int opal_prd_remove(struct platform_device *pdev) { misc_deregister(&opal_prd_dev); opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); + opal_message_notifier_unregister(OPAL_MSG_PRD2, &opal_prd_event_nb2); return 0; } diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 1aa51c4fa9045..4825e4cf4410d 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -35,4 +35,6 @@ ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count); u32 memcons_get_size(struct memcons *mc); struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name); +void pnv_rng_init(void); + #endif /* _POWERNV_H */ diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 8035caf6e297d..7186e17cfd3dc 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -17,6 +17,7 @@ #include #include #include +#include "powernv.h" #define DARN_ERR 0xFFFFFFFFFFFFFFFFul @@ -28,7 +29,6 @@ struct powernv_rng { static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng); - int powernv_hwrng_present(void) { struct powernv_rng *rng; @@ -43,7 +43,11 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) unsigned long parity; /* Calculate the parity of the value */ - asm ("popcntd %0,%1" : "=r" (parity) : "r" (val)); + asm (".machine push; \ + .machine power7; \ + popcntd %0,%1; \ + .machine pop;" + : "=r" (parity) : "r" (val)); /* xor our value with the previous mask */ val ^= rng->mask; @@ -94,9 +98,6 @@ static int initialise_darn(void) return 0; } } - - pr_warn("Unable to use DARN for get_random_seed()\n"); - return -EIO; } @@ -159,32 +160,59 @@ static __init int rng_create(struct device_node *dn) rng_init_per_cpu(rng, dn); - pr_info_once("Registering arch random hook.\n"); - ppc_md.get_random_seed = powernv_get_random_long; return 0; } -static __init int rng_init(void) +static int __init pnv_get_random_long_early(unsigned long *v) { struct device_node *dn; - int rc; - - for_each_compatible_node(dn, NULL, "ibm,power-rng") { - rc = rng_create(dn); - if (rc) { - pr_err("Failed creating rng for %pOF (%d).\n", - dn, rc); - continue; - } - /* Create devices for hwrng driver */ - of_platform_device_create(dn, NULL, NULL); - } + if (!slab_is_available()) + return 0; + + if (cmpxchg(&ppc_md.get_random_seed, pnv_get_random_long_early, + NULL) != pnv_get_random_long_early) + return 0; + + for_each_compatible_node(dn, NULL, "ibm,power-rng") + rng_create(dn); + + if (!ppc_md.get_random_seed) + return 0; + return ppc_md.get_random_seed(v); +} - initialise_darn(); +void __init pnv_rng_init(void) +{ + struct device_node *dn; + + /* Prefer darn over the rest. */ + if (!initialise_darn()) + return; + + dn = of_find_compatible_node(NULL, NULL, "ibm,power-rng"); + if (dn) + ppc_md.get_random_seed = pnv_get_random_long_early; + + of_node_put(dn); +} + +static int __init pnv_rng_late_init(void) +{ + struct device_node *dn; + unsigned long v; + + /* In case it wasn't called during init for some other reason. */ + if (ppc_md.get_random_seed == pnv_get_random_long_early) + pnv_get_random_long_early(&v); + + if (ppc_md.get_random_seed == powernv_get_random_long) { + for_each_compatible_node(dn, NULL, "ibm,power-rng") + of_platform_device_create(dn, NULL, NULL); + } return 0; } -machine_subsys_initcall(powernv, rng_init); +machine_subsys_initcall(powernv, pnv_rng_late_init); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 1b7b0d0c3ebdd..d9e26614d7ed8 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -168,6 +168,8 @@ static void __init pnv_setup_arch(void) powersave_nap = 1; /* XXX PMCS */ + + pnv_rng_init(); } static void __init pnv_init(void) diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c index e4a00ad06f9d3..67c8c4b2d8b17 100644 --- a/arch/powerpc/platforms/powernv/ultravisor.c +++ b/arch/powerpc/platforms/powernv/ultravisor.c @@ -55,6 +55,7 @@ static int __init uv_init(void) return -ENODEV; uv_memcons = memcons_init(node, "memcons"); + of_node_put(node); if (!uv_memcons) return -ENOENT; diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 13fa370a87e4e..5fd56230b01c5 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -114,4 +114,6 @@ int dlpar_workqueue_init(void); void pseries_setup_rfi_flush(void); void pseries_lpar_read_hblkrm_characteristics(void); +void pseries_rng_init(void); + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c index 6268545947b83..6ddfdeaace9ef 100644 --- a/arch/powerpc/platforms/pseries/rng.c +++ b/arch/powerpc/platforms/pseries/rng.c @@ -10,6 +10,7 @@ #include #include #include +#include "pseries.h" static int pseries_get_random_long(unsigned long *v) @@ -24,19 +25,13 @@ static int pseries_get_random_long(unsigned long *v) return 0; } -static __init int rng_init(void) +void __init pseries_rng_init(void) { struct device_node *dn; dn = of_find_compatible_node(NULL, NULL, "ibm,random"); if (!dn) - return -ENODEV; - - pr_info("Registering arch random hook.\n"); - + return; ppc_md.get_random_seed = pseries_get_random_long; - of_node_put(dn); - return 0; } -machine_subsys_initcall(pseries, rng_init); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 16b744646ea94..d5abb25865e36 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -792,6 +792,8 @@ static void __init pSeries_setup_arch(void) if (swiotlb_force == SWIOTLB_FORCE) ppc_swiotlb_enable = 1; + + pseries_rng_init(); } static void pseries_panic(char *str) diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 6b4a34b36d987..8ff9bcfe4b8d4 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -403,9 +403,10 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) } /* Initialize the DART HW */ - if (dart_init(dn) != 0) + if (dart_init(dn) != 0) { + of_node_put(dn); return; - + } /* * U4 supports a DART bypass, we use it for 64-bit capable devices to * improve performance. However, that only works for devices connected @@ -418,6 +419,7 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) /* Setup pci_dma ops */ set_pci_dma_ops(&dma_iommu_ops); + of_node_put(dn); } #ifdef CONFIG_PM diff --git a/arch/powerpc/sysdev/dcr-low.S b/arch/powerpc/sysdev/dcr-low.S index efeeb1b885a17..329b9c4ae5429 100644 --- a/arch/powerpc/sysdev/dcr-low.S +++ b/arch/powerpc/sysdev/dcr-low.S @@ -11,7 +11,7 @@ #include #define DCR_ACCESS_PROLOG(table) \ - cmpli cr0,r3,1024; \ + cmplwi cr0,r3,1024; \ rlwinm r3,r3,4,18,27; \ lis r5,table@h; \ ori r5,r5,table@l; \ diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c index 8963eaffb1b7b..39186ad6b3c3a 100644 --- a/arch/powerpc/sysdev/fsl_gtm.c +++ b/arch/powerpc/sysdev/fsl_gtm.c @@ -86,7 +86,7 @@ static LIST_HEAD(gtms); */ struct gtm_timer *gtm_get_timer16(void) { - struct gtm *gtm = NULL; + struct gtm *gtm; int i; list_for_each_entry(gtm, >ms, list_node) { @@ -103,7 +103,7 @@ struct gtm_timer *gtm_get_timer16(void) spin_unlock_irq(>m->lock); } - if (gtm) + if (!list_empty(>ms)) return ERR_PTR(-EBUSY); return ERR_PTR(-ENODEV); } diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 07c164f7f8cfe..3f9f78621cf3c 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -505,8 +505,10 @@ int fsl_rio_setup(struct platform_device *dev) if (rc) { dev_err(&dev->dev, "Can't get %pOF property 'reg'\n", rmu_node); + of_node_put(rmu_node); goto err_rmu; } + of_node_put(rmu_node); rmu_regs_win = ioremap(rmu_regs.start, resource_size(&rmu_regs)); if (!rmu_regs_win) { dev_err(&dev->dev, "Unable to map rmu register window\n"); diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index 68fd2540b0931..7fa520efcefa0 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -195,6 +195,7 @@ int icp_opal_init(void) printk("XICS: Using OPAL ICP fallbacks\n"); + of_node_put(np); return 0; } diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 2d4c09a77910f..d4467da279668 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -990,7 +990,8 @@ static int xive_get_irqchip_state(struct irq_data *data, * interrupt to be inactive in that case. */ *state = (pq != XIVE_ESB_INVALID) && !xd->stale_p && - (xd->saved_p || !!(pq & XIVE_ESB_VAL_P)); + (xd->saved_p || (!!(pq & XIVE_ESB_VAL_P) && + !irqd_irq_disabled(data))); return 0; default: return -EINVAL; diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 1cbe0ad78b0f3..d1a9615391010 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -48,6 +48,12 @@ riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c + +# Newer binutils versions default to ISA spec version 20191213 which moves some +# instructions from the I extension to the Zicsr and Zifencei extensions. +toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei) +riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei + KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y)) KBUILD_AFLAGS += -march=$(riscv-march-y) @@ -68,6 +74,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y) endif KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) +KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax) # GCC versions that support the "-mstrict-align" option default to allowing # unaligned accesses. While unaligned accesses are explicitly allowed in the diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index c9fecd120d187..8ae9708a8eee8 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -4,4 +4,8 @@ #include #include +long long __lshrti3(long long a, int b); +long long __ashrti3(long long a, int b); +long long __ashlti3(long long a, int b); + #endif /* _ASM_RISCV_PROTOTYPES_H */ diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index f539149d04c20..8c5b11a640dd3 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -22,6 +22,8 @@ #ifndef __ASSEMBLY__ +#include + struct task_struct; struct pt_regs; diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h index 13ce76cc5affe..80dff2c2bf677 100644 --- a/arch/riscv/include/uapi/asm/unistd.h +++ b/arch/riscv/include/uapi/asm/unistd.h @@ -18,9 +18,10 @@ #ifdef __LP64__ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SET_GET_RLIMIT -#define __ARCH_WANT_SYS_CLONE3 #endif /* __LP64__ */ +#define __ARCH_WANT_SYS_CLONE3 + #include /* diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index 4c90c07d8c39d..d930bd073b7b2 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -16,7 +16,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, this_leaf->type = type; } -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct device_node *np = of_cpu_device_node_get(cpu); @@ -58,7 +58,7 @@ static int __init_cache_level(unsigned int cpu) return 0; } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf = this_cpu_ci->info_list; @@ -95,6 +95,3 @@ static int __populate_cache_leaves(unsigned int cpu) return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 6bf5b16743843..a963b761e1a39 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -13,6 +13,19 @@ #include #include +/* + * The auipc+jalr instruction pair can reach any PC-relative offset + * in the range [-2^31 - 2^11, 2^31 - 2^11) + */ +static bool riscv_insn_valid_32bit_offset(ptrdiff_t val) +{ +#ifdef CONFIG_32BIT + return true; +#else + return (-(1L << 31) - (1L << 11)) <= val && val < ((1L << 31) - (1L << 11)); +#endif +} + static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) { if (v != (u32)v) { @@ -95,7 +108,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, ptrdiff_t offset = (void *)v - (void *)location; s32 hi20; - if (offset != (s32)offset) { + if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); @@ -197,10 +210,9 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; - s32 fill_v = offset; u32 hi20, lo12; - if (offset != fill_v) { + if (!riscv_insn_valid_32bit_offset(offset)) { /* Only emit the plt entry if offset over 32-bit range */ if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { offset = module_emit_plt_entry(me, v); @@ -224,10 +236,9 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; - s32 fill_v = offset; u32 hi20, lo12; - if (offset != fill_v) { + if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); diff --git a/arch/riscv/kernel/module.lds b/arch/riscv/kernel/module.lds index 295ecfb341a29..18ec719899e28 100644 --- a/arch/riscv/kernel/module.lds +++ b/arch/riscv/kernel/module.lds @@ -2,7 +2,7 @@ /* Copyright (C) 2017 Andes Technology Corporation */ SECTIONS { - .plt (NOLOAD) : { BYTE(0) } - .got (NOLOAD) : { BYTE(0) } - .got.plt (NOLOAD) : { BYTE(0) } + .plt : { BYTE(0) } + .got : { BYTE(0) } + .got.plt : { BYTE(0) } } diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index 8d2804f05cf93..1de5991916eb9 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -60,10 +60,11 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); unsigned long fp = 0; /* RISC-V does not support perf in guest mode. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + if (guest_cbs && guest_cbs->is_in_guest()) return; fp = regs->s0; @@ -76,7 +77,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, bool fill_callchain(unsigned long pc, void *entry) { - return perf_callchain_store(entry, pc); + return perf_callchain_store(entry, pc) == 0; } void notrace walk_stackframe(struct task_struct *task, @@ -84,8 +85,10 @@ void notrace walk_stackframe(struct task_struct *task, void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + /* RISC-V does not support perf in guest mode. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { pr_warn("RISC-V does not support perf in guest mode!"); return; } diff --git a/arch/riscv/lib/tishift.S b/arch/riscv/lib/tishift.S index 15f9d54c7db63..ef90075c4b0a9 100644 --- a/arch/riscv/lib/tishift.S +++ b/arch/riscv/lib/tishift.S @@ -4,34 +4,73 @@ */ #include +#include -ENTRY(__lshrti3) +SYM_FUNC_START(__lshrti3) beqz a2, .L1 li a5,64 sub a5,a5,a2 - addi sp,sp,-16 sext.w a4,a5 blez a5, .L2 sext.w a2,a2 - sll a4,a1,a4 srl a0,a0,a2 - srl a1,a1,a2 + sll a4,a1,a4 + srl a2,a1,a2 or a0,a0,a4 - sd a1,8(sp) - sd a0,0(sp) - ld a0,0(sp) - ld a1,8(sp) - addi sp,sp,16 - ret + mv a1,a2 .L1: ret .L2: - negw a4,a4 - srl a1,a1,a4 - sd a1,0(sp) - sd zero,8(sp) - ld a0,0(sp) - ld a1,8(sp) - addi sp,sp,16 + negw a0,a4 + li a2,0 + srl a0,a1,a0 + mv a1,a2 + ret +SYM_FUNC_END(__lshrti3) +EXPORT_SYMBOL(__lshrti3) + +SYM_FUNC_START(__ashrti3) + beqz a2, .L3 + li a5,64 + sub a5,a5,a2 + sext.w a4,a5 + blez a5, .L4 + sext.w a2,a2 + srl a0,a0,a2 + sll a4,a1,a4 + sra a2,a1,a2 + or a0,a0,a4 + mv a1,a2 +.L3: + ret +.L4: + negw a0,a4 + srai a2,a1,0x3f + sra a0,a1,a0 + mv a1,a2 + ret +SYM_FUNC_END(__ashrti3) +EXPORT_SYMBOL(__ashrti3) + +SYM_FUNC_START(__ashlti3) + beqz a2, .L5 + li a5,64 + sub a5,a5,a2 + sext.w a4,a5 + blez a5, .L6 + sext.w a2,a2 + sll a1,a1,a2 + srl a4,a0,a4 + sll a2,a0,a2 + or a1,a1,a4 + mv a0,a2 +.L5: + ret +.L6: + negw a1,a4 + li a2,0 + sll a1,a0,a1 + mv a0,a2 ret -ENDPROC(__lshrti3) +SYM_FUNC_END(__ashlti3) +EXPORT_SYMBOL(__ashlti3) diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c index e2279fed8f564..0eefe6193253b 100644 --- a/arch/riscv/net/bpf_jit_comp.c +++ b/arch/riscv/net/bpf_jit_comp.c @@ -1313,6 +1313,10 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, emit(rv_ld(rd, 0, RV_REG_T1), ctx); break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: emit_imm(RV_REG_T1, imm, ctx); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 0023b78391f14..d7c7c7b80aed1 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -541,7 +541,6 @@ config KEXEC config KEXEC_FILE bool "kexec file based system call" select KEXEC_CORE - select BUILD_BIN2C depends on CRYPTO depends on CRYPTO_SHA256 depends on CRYPTO_SHA256_S390 diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 2faaf456956a6..71e3d7c0b8709 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -31,6 +31,16 @@ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding) KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) + +ifdef CONFIG_CC_IS_GCC + ifeq ($(call cc-ifversion, -ge, 1200, y), y) + ifeq ($(call cc-ifversion, -lt, 1300, y), y) + KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds) + KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, array-bounds) + endif + endif +endif + UTS_MACHINE := s390x STACK_SIZE := $(if $(CONFIG_KASAN),65536,16384) CHECKFLAGS += -D__s390__ -D__s390x__ diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 38d64030aacf6..2e60c80395ab0 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -62,7 +62,6 @@ CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_STATIC_KEYS_SELFTEST=y -CONFIG_REFCOUNT_FULL=y CONFIG_LOCK_EVENT_COUNTS=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 9803e96d29247..558cfe570ccf8 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -861,7 +861,7 @@ static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw, unsigned int nbytes) { gw->walk_bytes_remain -= nbytes; - scatterwalk_unmap(&gw->walk); + scatterwalk_unmap(gw->walk_ptr); scatterwalk_advance(&gw->walk, nbytes); scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain); gw->walk_ptr = NULL; @@ -936,7 +936,7 @@ static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) goto out; } - scatterwalk_unmap(&gw->walk); + scatterwalk_unmap(gw->walk_ptr); gw->walk_ptr = NULL; gw->ptr = gw->buf; diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 4cbb4b6d85a83..1f2d40993c4d2 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -2,126 +2,17 @@ /* * s390 arch random implementation. * - * Copyright IBM Corp. 2017, 2018 + * Copyright IBM Corp. 2017, 2020 * Author(s): Harald Freudenberger - * - * The s390_arch_random_generate() function may be called from random.c - * in interrupt context. So this implementation does the best to be very - * fast. There is a buffer of random data which is asynchronously checked - * and filled by a workqueue thread. - * If there are enough bytes in the buffer the s390_arch_random_generate() - * just delivers these bytes. Otherwise false is returned until the - * worker thread refills the buffer. - * The worker fills the rng buffer by pulling fresh entropy from the - * high quality (but slow) true hardware random generator. This entropy - * is then spread over the buffer with an pseudo random generator PRNG. - * As the arch_get_random_seed_long() fetches 8 bytes and the calling - * function add_interrupt_randomness() counts this as 1 bit entropy the - * distribution needs to make sure there is in fact 1 bit entropy contained - * in 8 bytes of the buffer. The current values pull 32 byte entropy - * and scatter this into a 2048 byte buffer. So 8 byte in the buffer - * will contain 1 bit of entropy. - * The worker thread is rescheduled based on the charge level of the - * buffer but at least with 500 ms delay to avoid too much CPU consumption. - * So the max. amount of rng data delivered via arch_get_random_seed is - * limited to 4k bytes per second. */ #include #include #include -#include #include -#include #include DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0); EXPORT_SYMBOL(s390_arch_random_counter); - -#define ARCH_REFILL_TICKS (HZ/2) -#define ARCH_PRNG_SEED_SIZE 32 -#define ARCH_RNG_BUF_SIZE 2048 - -static DEFINE_SPINLOCK(arch_rng_lock); -static u8 *arch_rng_buf; -static unsigned int arch_rng_buf_idx; - -static void arch_rng_refill_buffer(struct work_struct *); -static DECLARE_DELAYED_WORK(arch_rng_work, arch_rng_refill_buffer); - -bool s390_arch_random_generate(u8 *buf, unsigned int nbytes) -{ - /* max hunk is ARCH_RNG_BUF_SIZE */ - if (nbytes > ARCH_RNG_BUF_SIZE) - return false; - - /* lock rng buffer */ - if (!spin_trylock(&arch_rng_lock)) - return false; - - /* try to resolve the requested amount of bytes from the buffer */ - arch_rng_buf_idx -= nbytes; - if (arch_rng_buf_idx < ARCH_RNG_BUF_SIZE) { - memcpy(buf, arch_rng_buf + arch_rng_buf_idx, nbytes); - atomic64_add(nbytes, &s390_arch_random_counter); - spin_unlock(&arch_rng_lock); - return true; - } - - /* not enough bytes in rng buffer, refill is done asynchronously */ - spin_unlock(&arch_rng_lock); - - return false; -} -EXPORT_SYMBOL(s390_arch_random_generate); - -static void arch_rng_refill_buffer(struct work_struct *unused) -{ - unsigned int delay = ARCH_REFILL_TICKS; - - spin_lock(&arch_rng_lock); - if (arch_rng_buf_idx > ARCH_RNG_BUF_SIZE) { - /* buffer is exhausted and needs refill */ - u8 seed[ARCH_PRNG_SEED_SIZE]; - u8 prng_wa[240]; - /* fetch ARCH_PRNG_SEED_SIZE bytes of entropy */ - cpacf_trng(NULL, 0, seed, sizeof(seed)); - /* blow this entropy up to ARCH_RNG_BUF_SIZE with PRNG */ - memset(prng_wa, 0, sizeof(prng_wa)); - cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, - &prng_wa, NULL, 0, seed, sizeof(seed)); - cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, - &prng_wa, arch_rng_buf, ARCH_RNG_BUF_SIZE, NULL, 0); - arch_rng_buf_idx = ARCH_RNG_BUF_SIZE; - } - delay += (ARCH_REFILL_TICKS * arch_rng_buf_idx) / ARCH_RNG_BUF_SIZE; - spin_unlock(&arch_rng_lock); - - /* kick next check */ - queue_delayed_work(system_long_wq, &arch_rng_work, delay); -} - -static int __init s390_arch_random_init(void) -{ - /* all the needed PRNO subfunctions available ? */ - if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG) && - cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) { - - /* alloc arch random working buffer */ - arch_rng_buf = kmalloc(ARCH_RNG_BUF_SIZE, GFP_KERNEL); - if (!arch_rng_buf) - return -ENOMEM; - - /* kick worker queue job to fill the random buffer */ - queue_delayed_work(system_long_wq, - &arch_rng_work, ARCH_REFILL_TICKS); - - /* enable arch random to the outside world */ - static_branch_enable(&s390_arch_random_available); - } - - return 0; -} -arch_initcall(s390_arch_random_init); diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index e1fcc03159ef2..a927adccb4ba7 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -20,6 +20,7 @@ static char local_guest[] = " "; static char all_guests[] = "* "; +static char *all_groups = all_guests; static char *guest_query; struct diag2fc_data { @@ -62,10 +63,11 @@ static int diag2fc(int size, char* query, void *addr) memcpy(parm_list.userid, query, NAME_LEN); ASCEBC(parm_list.userid, NAME_LEN); - parm_list.addr = (unsigned long) addr ; + memcpy(parm_list.aci_grp, all_groups, NAME_LEN); + ASCEBC(parm_list.aci_grp, NAME_LEN); + parm_list.addr = (unsigned long)addr; parm_list.size = size; parm_list.fmt = 0x02; - memset(parm_list.aci_grp, 0x40, NAME_LEN); rc = -1; diag_stat_inc(DIAG_STAT_X2FC); diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index c67b82dfa558e..4120c428dc378 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -2,7 +2,7 @@ /* * Kernel interface for the s390 arch_random_* functions * - * Copyright IBM Corp. 2017 + * Copyright IBM Corp. 2017, 2022 * * Author: Harald Freudenberger * @@ -14,47 +14,41 @@ #ifdef CONFIG_ARCH_RANDOM #include +#include #include +#include DECLARE_STATIC_KEY_FALSE(s390_arch_random_available); extern atomic64_t s390_arch_random_counter; -bool s390_arch_random_generate(u8 *buf, unsigned int nbytes); - -static inline bool arch_has_random(void) -{ - return false; -} - -static inline bool arch_has_random_seed(void) -{ - if (static_branch_likely(&s390_arch_random_available)) - return true; - return false; -} - -static inline bool arch_get_random_long(unsigned long *v) +static inline bool __must_check arch_get_random_long(unsigned long *v) { return false; } -static inline bool arch_get_random_int(unsigned int *v) +static inline bool __must_check arch_get_random_int(unsigned int *v) { return false; } -static inline bool arch_get_random_seed_long(unsigned long *v) +static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { - if (static_branch_likely(&s390_arch_random_available)) { - return s390_arch_random_generate((u8 *)v, sizeof(*v)); + if (static_branch_likely(&s390_arch_random_available) && + in_task()) { + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); + atomic64_add(sizeof(*v), &s390_arch_random_counter); + return true; } return false; } -static inline bool arch_get_random_seed_int(unsigned int *v) +static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { - if (static_branch_likely(&s390_arch_random_available)) { - return s390_arch_random_generate((u8 *)v, sizeof(*v)); + if (static_branch_likely(&s390_arch_random_available) && + in_task()) { + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); + atomic64_add(sizeof(*v), &s390_arch_random_counter); + return true; } return false; } diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index ea398a05f6432..63098df81c9f2 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -9,6 +9,8 @@ #ifndef _S390_KEXEC_H #define _S390_KEXEC_H +#include + #include #include #include @@ -74,7 +76,21 @@ void *kexec_file_add_components(struct kimage *image, int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val, unsigned long addr); +#define ARCH_HAS_KIMAGE_ARCH + +struct kimage_arch { + void *ipl_buf; +}; + extern const struct kexec_file_ops s390_kexec_image_ops; extern const struct kexec_file_ops s390_kexec_elf_ops; +#ifdef CONFIG_KEXEC_FILE +struct purgatory_info; +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add +#endif #endif /*_S390_KEXEC_H */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 0fe5600a037e4..4d59d11e6813d 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -873,6 +873,7 @@ struct kvm_arch{ atomic64_t cmma_dirty_pages; /* subset of available cpu features enabled by user space */ DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); + /* indexed by vcpu_idx */ DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS); struct kvm_s390_gisa_interrupt gisa_int; }; diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index e4dc64cc9c555..287bb88f76986 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -14,12 +14,13 @@ /* I/O Map */ #define ZPCI_IOMAP_SHIFT 48 -#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000UL +#define ZPCI_IOMAP_ADDR_SHIFT 62 +#define ZPCI_IOMAP_ADDR_BASE (1UL << ZPCI_IOMAP_ADDR_SHIFT) #define ZPCI_IOMAP_ADDR_OFF_MASK ((1UL << ZPCI_IOMAP_SHIFT) - 1) #define ZPCI_IOMAP_MAX_ENTRIES \ - ((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT)) + (1UL << (ZPCI_IOMAP_ADDR_SHIFT - ZPCI_IOMAP_SHIFT)) #define ZPCI_IOMAP_ADDR_IDX_MASK \ - (~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE) + ((ZPCI_IOMAP_ADDR_BASE - 1) & ~ZPCI_IOMAP_ADDR_OFF_MASK) struct zpci_iomap_entry { u32 fh; diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index b5ea9e14c017a..3dcd8ab3db73b 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -52,10 +52,17 @@ static inline bool test_preempt_need_resched(void) static inline void __preempt_count_add(int val) { - if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) - __atomic_add_const(val, &S390_lowcore.preempt_count); - else - __atomic_add(val, &S390_lowcore.preempt_count); + /* + * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES + * enabled, gcc 12 fails to handle __builtin_constant_p(). + */ + if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES)) { + if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) { + __atomic_add_const(val, &S390_lowcore.preempt_count); + return; + } + } + __atomic_add(val, &S390_lowcore.preempt_count); } static inline void __preempt_count_sub(int val) diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 1932088686a68..e6a5007f017d8 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -39,6 +39,7 @@ #define MACHINE_FLAG_NX BIT(15) #define MACHINE_FLAG_GS BIT(16) #define MACHINE_FLAG_SCC BIT(17) +#define MACHINE_FLAG_PCI_MIO BIT(18) #define LPP_MAGIC BIT(31) #define LPP_PID_MASK _AC(0xffffffff, UL) @@ -106,6 +107,7 @@ extern unsigned long __swsusp_reset_dma; #define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) #define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) #define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC) +#define MACHINE_HAS_PCI_MIO (S390_lowcore.machine_flags & MACHINE_FLAG_PCI_MIO) /* * Console mode. Override with conmode= diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index b6a4ce9dafafb..99a7e028232d8 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -177,6 +177,7 @@ static inline cycles_t get_cycles(void) { return (cycles_t) get_tod_clock() >> 2; } +#define get_cycles get_cycles int get_phys_clock(unsigned long *clock); void init_cpu_timer(void); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 7184d55d87aae..b1aadc3ad065d 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -327,24 +327,6 @@ static debug_info_t *debug_info_create(const char *name, int pages_per_area, goto out; rc->mode = mode & ~S_IFMT; - - /* create root directory */ - rc->debugfs_root_entry = debugfs_create_dir(rc->name, - debug_debugfs_root_entry); - - /* append new element to linked list */ - if (!debug_area_first) { - /* first element in list */ - debug_area_first = rc; - rc->prev = NULL; - } else { - /* append element to end of list */ - debug_area_last->next = rc; - rc->prev = debug_area_last; - } - debug_area_last = rc; - rc->next = NULL; - refcount_set(&rc->ref_count, 1); out: return rc; @@ -404,27 +386,10 @@ static void debug_info_get(debug_info_t *db_info) */ static void debug_info_put(debug_info_t *db_info) { - int i; - if (!db_info) return; - if (refcount_dec_and_test(&db_info->ref_count)) { - for (i = 0; i < DEBUG_MAX_VIEWS; i++) { - if (!db_info->views[i]) - continue; - debugfs_remove(db_info->debugfs_entries[i]); - } - debugfs_remove(db_info->debugfs_root_entry); - if (db_info == debug_area_first) - debug_area_first = db_info->next; - if (db_info == debug_area_last) - debug_area_last = db_info->prev; - if (db_info->prev) - db_info->prev->next = db_info->next; - if (db_info->next) - db_info->next->prev = db_info->prev; + if (refcount_dec_and_test(&db_info->ref_count)) debug_info_free(db_info); - } } /* @@ -648,6 +613,31 @@ static int debug_close(struct inode *inode, struct file *file) return 0; /* success */ } +/* Create debugfs entries and add to internal list. */ +static void _debug_register(debug_info_t *id) +{ + /* create root directory */ + id->debugfs_root_entry = debugfs_create_dir(id->name, + debug_debugfs_root_entry); + + /* append new element to linked list */ + if (!debug_area_first) { + /* first element in list */ + debug_area_first = id; + id->prev = NULL; + } else { + /* append element to end of list */ + debug_area_last->next = id; + id->prev = debug_area_last; + } + debug_area_last = id; + id->next = NULL; + + debug_register_view(id, &debug_level_view); + debug_register_view(id, &debug_flush_view); + debug_register_view(id, &debug_pages_view); +} + /** * debug_register_mode() - creates and initializes debug area. * @@ -677,19 +667,16 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area, if ((uid != 0) || (gid != 0)) pr_warn("Root becomes the owner of all s390dbf files in sysfs\n"); BUG_ON(!initialized); - mutex_lock(&debug_mutex); /* create new debug_info */ rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode); - if (!rc) - goto out; - debug_register_view(rc, &debug_level_view); - debug_register_view(rc, &debug_flush_view); - debug_register_view(rc, &debug_pages_view); -out: - if (!rc) + if (rc) { + mutex_lock(&debug_mutex); + _debug_register(rc); + mutex_unlock(&debug_mutex); + } else { pr_err("Registering debug feature %s failed\n", name); - mutex_unlock(&debug_mutex); + } return rc; } EXPORT_SYMBOL(debug_register_mode); @@ -718,6 +705,27 @@ debug_info_t *debug_register(const char *name, int pages_per_area, } EXPORT_SYMBOL(debug_register); +/* Remove debugfs entries and remove from internal list. */ +static void _debug_unregister(debug_info_t *id) +{ + int i; + + for (i = 0; i < DEBUG_MAX_VIEWS; i++) { + if (!id->views[i]) + continue; + debugfs_remove(id->debugfs_entries[i]); + } + debugfs_remove(id->debugfs_root_entry); + if (id == debug_area_first) + debug_area_first = id->next; + if (id == debug_area_last) + debug_area_last = id->prev; + if (id->prev) + id->prev->next = id->next; + if (id->next) + id->next->prev = id->prev; +} + /** * debug_unregister() - give back debug area. * @@ -731,8 +739,10 @@ void debug_unregister(debug_info_t *id) if (!id) return; mutex_lock(&debug_mutex); - debug_info_put(id); + _debug_unregister(id); mutex_unlock(&debug_mutex); + + debug_info_put(id); } EXPORT_SYMBOL(debug_unregister); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 2531776cf6cf9..eb89cb0aa60b4 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -252,6 +252,10 @@ static __init void detect_machine_facilities(void) clock_comparator_max = -1ULL >> 1; __ctl_set_bit(0, 53); } + if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) { + S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO; + /* the control bit is set during PCI initialization */ + } } static inline void save_vector_registers(void) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 6837affc19e81..7795cdee6427d 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1783,7 +1783,7 @@ void *ipl_report_finish(struct ipl_report *report) buf = vzalloc(report->size); if (!buf) - return ERR_PTR(-ENOMEM); + goto out; ptr = buf; memcpy(ptr, report->ipib, report->ipib->hdr.len); @@ -1822,6 +1822,7 @@ void *ipl_report_finish(struct ipl_report *report) } BUG_ON(ptr > buf + report->size); +out: return buf; } diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index ab584e8e35275..9156653b56f69 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -36,7 +36,7 @@ static void jump_label_bug(struct jump_entry *entry, struct insn *expected, unsigned char *ipe = (unsigned char *)expected; unsigned char *ipn = (unsigned char *)new; - pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc); + pr_emerg("Jump label code mismatch at %pS [%px]\n", ipc, ipc); pr_emerg("Found: %6ph\n", ipc); pr_emerg("Expected: %6ph\n", ipe); pr_emerg("New: %6ph\n", ipn); diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index f9e4baa64b675..76cd09879eaf4 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -170,6 +171,7 @@ static int kexec_file_add_ipl_report(struct kimage *image, struct kexec_buf buf; unsigned long addr; void *ptr, *end; + int ret; buf.image = image; @@ -199,9 +201,13 @@ static int kexec_file_add_ipl_report(struct kimage *image, ptr += len; } + ret = -ENOMEM; buf.buffer = ipl_report_finish(data->report); + if (!buf.buffer) + goto out; buf.bufsz = data->report->size; buf.memsz = buf.bufsz; + image->arch.ipl_buf = buf.buffer; data->memsz += buf.memsz; @@ -209,7 +215,9 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr); *lc_ipl_parmblock_ptr = (__u32)buf.mem; - return kexec_add_buffer(&buf); + ret = kexec_add_buffer(&buf); +out: + return ret; } void *kexec_file_add_components(struct kimage *image, @@ -269,6 +277,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, { Elf_Rela *relas; int i, r_type; + int ret; relas = (void *)pi->ehdr + relsec->sh_offset; @@ -303,7 +312,11 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, addr = section->sh_addr + relas[i].r_offset; r_type = ELF64_R_TYPE(relas[i].r_info); - arch_kexec_do_relocs(r_type, loc, val, addr); + ret = arch_kexec_do_relocs(r_type, loc, val, addr); + if (ret) { + pr_err("Unknown rela relocation: %d\n", r_type); + return -ENOEXEC; + } } return 0; } @@ -321,3 +334,11 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, return kexec_image_probe_default(image, buf, buf_len); } + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + vfree(image->arch.ipl_buf); + image->arch.ipl_buf = NULL; + + return kexec_image_post_load_cleanup_default(image); +} diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 0eb1d1cc53a88..dddb32e53db8b 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -292,6 +292,26 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) return err; } +/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different + * attribute::type values: + * - PERF_TYPE_HARDWARE: + * - pmu->type: + * Handle both type of invocations identical. They address the same hardware. + * The result is different when event modifiers exclude_kernel and/or + * exclude_user are also set. + */ +static int cpumf_pmu_event_type(struct perf_event *event) +{ + u64 ev = event->attr.config; + + if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev) + return PERF_TYPE_HARDWARE; + return PERF_TYPE_RAW; +} + static int cpumf_pmu_event_init(struct perf_event *event) { unsigned int type = event->attr.type; @@ -301,7 +321,7 @@ static int cpumf_pmu_event_init(struct perf_event *event) err = __hw_perf_event_init(event, type); else if (event->pmu->type == type) /* Registered as unknown PMU */ - err = __hw_perf_event_init(event, PERF_TYPE_RAW); + err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); else return -ENOENT; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f661f176966f5..dbc2a718d232d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -841,9 +841,6 @@ static void __init setup_memory(void) storage_key_init_range(reg->base, reg->base + reg->size); } psw_set_key(PAGE_DEFAULT_KEY); - - /* Only cosmetics */ - memblock_enforce_memory_limit(memblock_end_of_DRAM()); } /* @@ -1008,6 +1005,11 @@ static void __init setup_randomness(void) if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); memblock_free((unsigned long) vmms, PAGE_SIZE); + +#ifdef CONFIG_ARCH_RANDOM + if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) + static_branch_enable(&s390_arch_random_available); +#endif } /* diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 62388a678b91a..8be5750fe5ac3 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -408,13 +408,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) static void __set_cpu_idle(struct kvm_vcpu *vcpu) { kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT); - set_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); + set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask); } static void __unset_cpu_idle(struct kvm_vcpu *vcpu) { kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT); - clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); + clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask); } static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) @@ -1982,6 +1982,13 @@ int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu) return test_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs); } +int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + + return test_bit(IRQ_PEND_RESTART, &li->pending_irqs); +} + void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -2984,18 +2991,19 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask) { - int vcpu_id, online_vcpus = atomic_read(&kvm->online_vcpus); + int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus); struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; struct kvm_vcpu *vcpu; + u8 vcpu_isc_mask; - for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) { - vcpu = kvm_get_vcpu(kvm, vcpu_id); + for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) { + vcpu = kvm_get_vcpu(kvm, vcpu_idx); if (psw_ioint_disabled(vcpu)) continue; - deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24); - if (deliverable_mask) { + vcpu_isc_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24); + if (deliverable_mask & vcpu_isc_mask) { /* lately kicked but not yet running */ - if (test_and_set_bit(vcpu_id, gi->kicked_mask)) + if (test_and_set_bit(vcpu_idx, gi->kicked_mask)) return; kvm_s390_vcpu_wakeup(vcpu); return; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b88cb948734f6..9da63d2108863 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3092,6 +3092,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { + clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); return kvm_s390_vcpu_has_irq(vcpu, 0); } @@ -3726,7 +3727,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) kvm_s390_patch_guest_per_regs(vcpu); } - clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask); + clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask); vcpu->arch.sie_block->icptcode = 0; cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); @@ -4204,10 +4205,15 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) spin_lock(&vcpu->kvm->arch.start_stop_lock); online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); - /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ + /* + * Set the VCPU to STOPPED and THEN clear the interrupt flag, + * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders + * have been fully processed. This will ensure that the VCPU + * is kept BUSY if another VCPU is inquiring with SIGP SENSE. + */ + kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); kvm_s390_clear_stop_irq(vcpu); - kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); __disable_ibs_on_vcpu(vcpu); for (i = 0; i < online_vcpus; i++) { diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 6d9448dbd052b..d497d3e58784b 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -67,7 +67,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) { - return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); + return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask); } static inline int kvm_is_ucontrol(struct kvm *kvm) @@ -373,6 +373,7 @@ void kvm_s390_destroy_adapters(struct kvm *kvm); int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu); extern struct kvm_device_ops kvm_flic_ops; int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu); +int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu); void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu); int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *buf, int len); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index bca3d96eb464a..5f7f047cba599 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -398,6 +398,8 @@ static int handle_sske(struct kvm_vcpu *vcpu) mmap_read_unlock(current->mm); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (rc == -EAGAIN) + continue; if (rc < 0) return rc; start += PAGE_SIZE; diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 683036c1c92a8..3dc921e853b6e 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -288,6 +288,34 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; + /* + * SIGP RESTART, SIGP STOP, and SIGP STOP AND STORE STATUS orders + * are processed asynchronously. Until the affected VCPU finishes + * its work and calls back into KVM to clear the (RESTART or STOP) + * interrupt, we need to return any new non-reset orders "busy". + * + * This is important because a single VCPU could issue: + * 1) SIGP STOP $DESTINATION + * 2) SIGP SENSE $DESTINATION + * + * If the SIGP SENSE would not be rejected as "busy", it could + * return an incorrect answer as to whether the VCPU is STOPPED + * or OPERATING. + */ + if (order_code != SIGP_INITIAL_CPU_RESET && + order_code != SIGP_CPU_RESET) { + /* + * Lockless check. Both SIGP STOP and SIGP (RE)START + * properly synchronize everything while processing + * their orders, while the guest cannot observe a + * difference when issuing other orders from two + * different VCPUs. + */ + if (kvm_s390_is_stop_irq_pending(dst_vcpu) || + kvm_s390_is_restart_irq_pending(dst_vcpu)) + return SIGP_CC_BUSY; + } + switch (order_code) { case SIGP_SENSE: vcpu->stat.instruction_sigp_sense++; diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index 0e30e6e43b0c5..18fbbb679851d 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -246,14 +246,13 @@ EXPORT_SYMBOL(strcmp); #ifdef __HAVE_ARCH_STRRCHR char *strrchr(const char *s, int c) { - size_t len = __strend(s) - s; - - if (len) - do { - if (s[len] == (char) c) - return (char *) s + len; - } while (--len > 0); - return NULL; + ssize_t len = __strend(s) - s; + + do { + if (s[len] == (char)c) + return (char *)s + len; + } while (--len >= 0); + return NULL; } EXPORT_SYMBOL(strrchr); #endif diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 7e46dca8347f6..e665ad1a5dfdb 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -684,9 +684,10 @@ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) vmaddr |= gaddr & ~PMD_MASK; /* Get pointer to the page table entry */ ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); - if (likely(ptep)) + if (likely(ptep)) { ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); - pte_unmap_unlock(ptep, ptl); + pte_unmap_unlock(ptep, ptl); + } } } EXPORT_SYMBOL_GPL(__gmap_zap); @@ -2578,6 +2579,18 @@ static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr, return 0; } +/* + * Give a chance to schedule after setting a key to 256 pages. + * We only hold the mm lock, which is a rwsem and the kvm srcu. + * Both can sleep. + */ +static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + cond_resched(); + return 0; +} + static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, unsigned long hmask, unsigned long next, struct mm_walk *walk) @@ -2600,12 +2613,14 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, end = start + HPAGE_SIZE - 1; __storage_key_init_range(start, end); set_bit(PG_arch_1, &page->flags); + cond_resched(); return 0; } static const struct mm_walk_ops enable_skey_walk_ops = { .hugetlb_entry = __s390_enable_skey_hugetlb, .pte_entry = __s390_enable_skey_pte, + .pmd_entry = __s390_enable_skey_pmd, }; int s390_enable_skey(void) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index c1d96e588152b..5521f593cd20a 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -168,9 +168,9 @@ static void pv_init(void) return; /* make sure bounce buffers are shared */ + swiotlb_force = SWIOTLB_FORCE; swiotlb_init(1); swiotlb_update_mem_attributes(); - swiotlb_force = SWIOTLB_FORCE; } void __init mem_init(void) diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 460f255729402..5182e0836ca71 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -101,6 +101,9 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, pgt_prot = pgprot_val(PAGE_KERNEL_EXEC); sgt_prot = pgprot_val(SEGMENT_KERNEL_EXEC); + /* + * The first 1MB of 1:1 mapping is mapped with 4KB pages + */ while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { @@ -146,30 +149,26 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, pm_dir = pmd_offset(pu_dir, address); if (pmd_none(*pm_dir)) { - if (mode == POPULATE_ZERO_SHADOW && - IS_ALIGNED(address, PMD_SIZE) && + if (IS_ALIGNED(address, PMD_SIZE) && end - address >= PMD_SIZE) { - pmd_populate(&init_mm, pm_dir, - kasan_early_shadow_pte); - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - /* the first megabyte of 1:1 is mapped with 4k pages */ - if (has_edat && address && end - address >= PMD_SIZE && - mode != POPULATE_ZERO_SHADOW) { - void *page; - - if (mode == POPULATE_ONE2ONE) { - page = (void *)address; - } else { - page = kasan_early_alloc_segment(); - memset(page, 0, _SEGMENT_SIZE); + if (mode == POPULATE_ZERO_SHADOW) { + pmd_populate(&init_mm, pm_dir, kasan_early_shadow_pte); + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } else if (has_edat && address) { + void *page; + + if (mode == POPULATE_ONE2ONE) { + page = (void *)address; + } else { + page = kasan_early_alloc_segment(); + memset(page, 0, _SEGMENT_SIZE); + } + pmd_val(*pm_dir) = __pa(page) | sgt_prot; + address = (address + PMD_SIZE) & PMD_MASK; + continue; } - pmd_val(*pm_dir) = __pa(page) | sgt_prot; - address = (address + PMD_SIZE) & PMD_MASK; - continue; } - pt_dir = kasan_early_pte_alloc(); pmd_populate(&init_mm, pm_dir, pt_dir); } else if (pmd_large(*pm_dir)) { diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 46071be897ab0..90943bf3e5eec 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -255,13 +255,15 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) /* Free 2K page table fragment of a 4K page */ bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); spin_lock_bh(&mm->context.lock); - mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24)); + mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24)); mask >>= 24; if (mask & 3) list_add(&page->lru, &mm->context.pgtable_list); else list_del(&page->lru); spin_unlock_bh(&mm->context.lock); + mask = atomic_xor_bits(&page->_refcount, 0x10U << (bit + 24)); + mask >>= 24; if (mask != 0) return; } else { diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 9ebd01219812c..28ca07360e970 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -716,7 +716,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; ptev = pte_val(*ptep); if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) - page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); + page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0); pgste_set_unlock(ptep, pgste); preempt_enable(); } @@ -970,6 +970,7 @@ EXPORT_SYMBOL(get_guest_storage_key); int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, unsigned long *oldpte, unsigned long *oldpgste) { + struct vm_area_struct *vma; unsigned long pgstev; spinlock_t *ptl; pgste_t pgste; @@ -979,6 +980,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, WARN_ON_ONCE(orc > ESSA_MAX); if (unlikely(orc > ESSA_MAX)) return -EINVAL; + + vma = find_vma(mm, hva); + if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -1071,10 +1076,14 @@ EXPORT_SYMBOL(pgste_perform_essa); int set_pgste_bits(struct mm_struct *mm, unsigned long hva, unsigned long bits, unsigned long value) { + struct vm_area_struct *vma; spinlock_t *ptl; pgste_t new; pte_t *ptep; + vma = find_vma(mm, hva); + if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -1099,9 +1108,13 @@ EXPORT_SYMBOL(set_pgste_bits); */ int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) { + struct vm_area_struct *vma; spinlock_t *ptl; pte_t *ptep; + vma = find_vma(mm, hva); + if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index e160f4650f8e4..f63e4cb6c9b31 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -569,10 +569,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb9080000, dst_reg, src_reg); break; case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */ - if (!imm) - break; - /* alfi %dst,imm */ - EMIT6_IMM(0xc20b0000, dst_reg, imm); + if (imm != 0) { + /* alfi %dst,imm */ + EMIT6_IMM(0xc20b0000, dst_reg, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */ @@ -594,17 +594,22 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb9090000, dst_reg, src_reg); break; case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */ - if (!imm) - break; - /* alfi %dst,-imm */ - EMIT6_IMM(0xc20b0000, dst_reg, -imm); + if (imm != 0) { + /* alfi %dst,-imm */ + EMIT6_IMM(0xc20b0000, dst_reg, -imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */ if (!imm) break; - /* agfi %dst,-imm */ - EMIT6_IMM(0xc2080000, dst_reg, -imm); + if (imm == -0x80000000) { + /* algfi %dst,0x80000000 */ + EMIT6_IMM(0xc20a0000, dst_reg, 0x80000000); + } else { + /* agfi %dst,-imm */ + EMIT6_IMM(0xc2080000, dst_reg, -imm); + } break; /* * BPF_MUL @@ -619,10 +624,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb90c0000, dst_reg, src_reg); break; case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */ - if (imm == 1) - break; - /* msfi %r5,imm */ - EMIT6_IMM(0xc2010000, dst_reg, imm); + if (imm != 1) { + /* msfi %r5,imm */ + EMIT6_IMM(0xc2010000, dst_reg, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */ @@ -675,6 +680,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, if (BPF_OP(insn->code) == BPF_MOD) /* lhgi %dst,0 */ EMIT4_IMM(0xa7090000, dst_reg, 0); + else + EMIT_ZERO(dst_reg); break; } /* lhi %w0,0 */ @@ -769,10 +776,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb9820000, dst_reg, src_reg); break; case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */ - if (!imm) - break; - /* xilf %dst,imm */ - EMIT6_IMM(0xc0070000, dst_reg, imm); + if (imm != 0) { + /* xilf %dst,imm */ + EMIT6_IMM(0xc0070000, dst_reg, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */ @@ -793,10 +800,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0); break; case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */ - if (imm == 0) - break; - /* sll %dst,imm(%r0) */ - EMIT4_DISP(0x89000000, dst_reg, REG_0, imm); + if (imm != 0) { + /* sll %dst,imm(%r0) */ + EMIT4_DISP(0x89000000, dst_reg, REG_0, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */ @@ -818,10 +825,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0); break; case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */ - if (imm == 0) - break; - /* srl %dst,imm(%r0) */ - EMIT4_DISP(0x88000000, dst_reg, REG_0, imm); + if (imm != 0) { + /* srl %dst,imm(%r0) */ + EMIT4_DISP(0x88000000, dst_reg, REG_0, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */ @@ -843,10 +850,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0); break; case BPF_ALU | BPF_ARSH | BPF_K: /* ((s32) dst >> imm */ - if (imm == 0) - break; - /* sra %dst,imm(%r0) */ - EMIT4_DISP(0x8a000000, dst_reg, REG_0, imm); + if (imm != 0) { + /* sra %dst,imm(%r0) */ + EMIT4_DISP(0x8a000000, dst_reg, REG_0, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */ @@ -913,6 +920,11 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, break; } break; + /* + * BPF_NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; /* * BPF_ST(X) */ @@ -1373,7 +1385,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL); if (jit.addrs == NULL) { fp = orig_fp; - goto out; + goto free_addrs; } /* * Three initial passes: diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 6105b1b6e49b7..b8ddacf1efe11 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -854,7 +854,6 @@ static void zpci_mem_exit(void) } static unsigned int s390_pci_probe __initdata = 1; -static unsigned int s390_pci_no_mio __initdata; unsigned int s390_pci_force_floating __initdata; static unsigned int s390_pci_initialized; @@ -865,7 +864,7 @@ char * __init pcibios_setup(char *str) return NULL; } if (!strcmp(str, "nomio")) { - s390_pci_no_mio = 1; + S390_lowcore.machine_flags &= ~MACHINE_FLAG_PCI_MIO; return NULL; } if (!strcmp(str, "force_floating")) { @@ -890,7 +889,7 @@ static int __init pci_base_init(void) if (!test_facility(69) || !test_facility(71)) return 0; - if (test_facility(153) && !s390_pci_no_mio) { + if (MACHINE_HAS_PCI_MIO) { static_branch_enable(&have_mio); ctl_set_bit(2, 5); } diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 38efa3e852c4f..a90535b3ef3aa 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -128,7 +128,7 @@ static long get_pfn(unsigned long user_addr, unsigned long access, mmap_read_lock(current->mm); ret = -EINVAL; vma = find_vma(current->mm, user_addr); - if (!vma) + if (!vma || user_addr < vma->vm_start) goto out; ret = -EACCES; if (!(vma->vm_flags & access)) diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug index 010b6c33bbba2..71acd3d9b9e83 100644 --- a/arch/sh/Kconfig.debug +++ b/arch/sh/Kconfig.debug @@ -58,6 +58,7 @@ config DUMP_CODE config DWARF_UNWINDER bool "Enable the DWARF unwinder for stacktraces" + depends on DEBUG_KERNEL select FRAME_POINTER depends on SUPERH32 default n diff --git a/arch/sh/include/asm/sfp-machine.h b/arch/sh/include/asm/sfp-machine.h index cbc7cf8c97ce6..2d2423478b71d 100644 --- a/arch/sh/include/asm/sfp-machine.h +++ b/arch/sh/include/asm/sfp-machine.h @@ -13,6 +13,14 @@ #ifndef _SFP_MACHINE_H #define _SFP_MACHINE_H +#ifdef __BIG_ENDIAN__ +#define __BYTE_ORDER __BIG_ENDIAN +#define __LITTLE_ENDIAN 0 +#else +#define __BYTE_ORDER __LITTLE_ENDIAN +#define __BIG_ENDIAN 0 +#endif + #define _FP_W_TYPE_SIZE 32 #define _FP_W_TYPE unsigned long #define _FP_WS_TYPE signed long diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c index ae354a2931e7e..fd6db0ab19288 100644 --- a/arch/sh/kernel/cpu/fpu.c +++ b/arch/sh/kernel/cpu/fpu.c @@ -62,18 +62,20 @@ void fpu_state_restore(struct pt_regs *regs) } if (!tsk_used_math(tsk)) { - local_irq_enable(); + int ret; /* * does a slab alloc which can sleep */ - if (init_fpu(tsk)) { + local_irq_enable(); + ret = init_fpu(tsk); + local_irq_disable(); + if (ret) { /* * ran out of memory! */ - do_group_exit(SIGKILL); + force_sig(SIGKILL); return; } - local_irq_disable(); } grab_fpu(regs); diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c index f8a2bec0f260b..1261dc7b84e8b 100644 --- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c +++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c @@ -73,8 +73,9 @@ static void shx3_prepare_cpus(unsigned int max_cpus) BUILD_BUG_ON(SMP_MSG_NR >= 8); for (i = 0; i < SMP_MSG_NR; i++) - request_irq(104 + i, ipi_interrupt_handler, - IRQF_PERCPU, "IPI", (void *)(long)i); + if (request_irq(104 + i, ipi_interrupt_handler, + IRQF_PERCPU, "IPI", (void *)(long)i)) + pr_err("Failed to request irq %d\n", i); for (i = 0; i < max_cpus; i++) set_cpu_present(i, true); diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index e8be0eca0444a..615ba932c398e 100644 --- a/arch/sh/math-emu/math.c +++ b/arch/sh/math-emu/math.c @@ -467,109 +467,6 @@ static int fpu_emulate(u16 code, struct sh_fpu_soft_struct *fregs, struct pt_reg return id_sys(fregs, regs, code); } -/** - * denormal_to_double - Given denormalized float number, - * store double float - * - * @fpu: Pointer to sh_fpu_soft structure - * @n: Index to FP register - */ -static void denormal_to_double(struct sh_fpu_soft_struct *fpu, int n) -{ - unsigned long du, dl; - unsigned long x = fpu->fpul; - int exp = 1023 - 126; - - if (x != 0 && (x & 0x7f800000) == 0) { - du = (x & 0x80000000); - while ((x & 0x00800000) == 0) { - x <<= 1; - exp--; - } - x &= 0x007fffff; - du |= (exp << 20) | (x >> 3); - dl = x << 29; - - fpu->fp_regs[n] = du; - fpu->fp_regs[n+1] = dl; - } -} - -/** - * ieee_fpe_handler - Handle denormalized number exception - * - * @regs: Pointer to register structure - * - * Returns 1 when it's handled (should not cause exception). - */ -static int ieee_fpe_handler(struct pt_regs *regs) -{ - unsigned short insn = *(unsigned short *)regs->pc; - unsigned short finsn; - unsigned long nextpc; - int nib[4] = { - (insn >> 12) & 0xf, - (insn >> 8) & 0xf, - (insn >> 4) & 0xf, - insn & 0xf}; - - if (nib[0] == 0xb || - (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */ - regs->pr = regs->pc + 4; - - if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */ - nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3); - finsn = *(unsigned short *) (regs->pc + 2); - } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */ - if (regs->sr & 1) - nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); - else - nextpc = regs->pc + 4; - finsn = *(unsigned short *) (regs->pc + 2); - } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */ - if (regs->sr & 1) - nextpc = regs->pc + 4; - else - nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); - finsn = *(unsigned short *) (regs->pc + 2); - } else if (nib[0] == 0x4 && nib[3] == 0xb && - (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */ - nextpc = regs->regs[nib[1]]; - finsn = *(unsigned short *) (regs->pc + 2); - } else if (nib[0] == 0x0 && nib[3] == 0x3 && - (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */ - nextpc = regs->pc + 4 + regs->regs[nib[1]]; - finsn = *(unsigned short *) (regs->pc + 2); - } else if (insn == 0x000b) { /* rts */ - nextpc = regs->pr; - finsn = *(unsigned short *) (regs->pc + 2); - } else { - nextpc = regs->pc + 2; - finsn = insn; - } - - if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */ - struct task_struct *tsk = current; - - if ((tsk->thread.xstate->softfpu.fpscr & (1 << 17))) { - /* FPU error */ - denormal_to_double (&tsk->thread.xstate->softfpu, - (finsn >> 8) & 0xf); - tsk->thread.xstate->softfpu.fpscr &= - ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); - task_thread_info(tsk)->status |= TS_USEDFPU; - } else { - force_sig_fault(SIGFPE, FPE_FLTINV, - (void __user *)regs->pc); - } - - regs->pc = nextpc; - return 1; - } - - return 0; -} - /** * fpu_init - Initialize FPU registers * @fpu: Pointer to software emulated FPU registers. diff --git a/arch/sparc/include/asm/timex_32.h b/arch/sparc/include/asm/timex_32.h index 542915b462097..f86326a6f89e0 100644 --- a/arch/sparc/include/asm/timex_32.h +++ b/arch/sparc/include/asm/timex_32.h @@ -9,8 +9,6 @@ #define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ -/* XXX Maybe do something better at some point... -DaveM */ -typedef unsigned long cycles_t; -#define get_cycles() (0) +#include #endif diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index f89603855f1ec..b87e0002131dd 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -356,7 +356,9 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - if (!sparc_dma_free_resource(cpu_addr, PAGE_ALIGN(size))) + size = PAGE_ALIGN(size); + + if (!sparc_dma_free_resource(cpu_addr, size)) return; dma_make_coherent(dma_addr, size); diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 8e645ddac58e2..30f171b7b00c2 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -39,6 +39,7 @@ struct mdesc_hdr { u32 node_sz; /* node block size */ u32 name_sz; /* name block size */ u32 data_sz; /* data block size */ + char data[]; } __attribute__((aligned(16))); struct mdesc_elem { @@ -612,7 +613,7 @@ EXPORT_SYMBOL(mdesc_get_node_info); static struct mdesc_elem *node_block(struct mdesc_hdr *mdesc) { - return (struct mdesc_elem *) (mdesc + 1); + return (struct mdesc_elem *) mdesc->data; } static void *name_block(struct mdesc_hdr *mdesc) diff --git a/arch/sparc/lib/iomap.c b/arch/sparc/lib/iomap.c index c9da9f139694d..f3a8cd491ce0d 100644 --- a/arch/sparc/lib/iomap.c +++ b/arch/sparc/lib/iomap.c @@ -19,8 +19,10 @@ void ioport_unmap(void __iomem *addr) EXPORT_SYMBOL(ioport_map); EXPORT_SYMBOL(ioport_unmap); +#ifdef CONFIG_PCI void pci_iounmap(struct pci_dev *dev, void __iomem * addr) { /* nothing to do */ } EXPORT_SYMBOL(pci_iounmap); +#endif diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 3364e2a009899..fef734473c0f3 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1287,6 +1287,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) return 1; break; } + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c index 6040817c036f3..25727ed648b72 100644 --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -220,7 +220,7 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out, unsigned long *stack_out) { struct winch_data data; - int fds[2], n, err; + int fds[2], n, err, pid; char c; err = os_pipe(fds, 1, 1); @@ -238,8 +238,9 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out, * problem with /dev/net/tun, which if held open by this * thread, prevents the TUN/TAP device from being reused. */ - err = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out); - if (err < 0) { + pid = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out); + if (pid < 0) { + err = pid; printk(UM_KERN_ERR "fork of winch_thread failed - errno = %d\n", -err); goto out_close; @@ -263,7 +264,7 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out, goto out_close; } - return err; + return pid; out_close: close(fds[1]); diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 0117489e9b30a..750acc1344328 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -217,7 +217,7 @@ void mconsole_go(struct mc_request *req) void mconsole_stop(struct mc_request *req) { - deactivate_fd(req->originating_fd, MCONSOLE_IRQ); + block_signals(); os_set_fd_block(req->originating_fd, 1); mconsole_reply(req, "stopped", 0, 0); for (;;) { @@ -240,6 +240,7 @@ void mconsole_stop(struct mc_request *req) } os_set_fd_block(req->originating_fd, 0); mconsole_reply(req, "", 0, 0); + unblock_signals(); } static DEFINE_SPINLOCK(mc_devices_lock); diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index e77d1c2a92f3e..69d38839dffa6 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -994,7 +994,7 @@ static int virtio_uml_probe(struct platform_device *pdev) rc = os_connect_socket(pdata->socket_path); } while (rc == -EINTR); if (rc < 0) - return rc; + goto error_free; vu_dev->sock = rc; rc = vhost_user_init(vu_dev); @@ -1010,6 +1010,8 @@ static int virtio_uml_probe(struct platform_device *pdev) error_init: os_close_file(vu_dev->sock); +error_free: + kfree(vu_dev); return rc; } diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index 3b1cb8b3b1864..e610e932cfe1e 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -64,6 +64,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 7 #define TIF_NOTIFY_RESUME 8 #define TIF_SECCOMP 9 /* secure computing */ +#define TIF_SINGLESTEP 10 /* single stepping userspace */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -72,5 +73,6 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_MEMDIE (1 << TIF_MEMDIE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #endif diff --git a/arch/um/include/asm/timex.h b/arch/um/include/asm/timex.h index e392a9a5bc9bd..9f27176adb26d 100644 --- a/arch/um/include/asm/timex.h +++ b/arch/um/include/asm/timex.h @@ -2,13 +2,8 @@ #ifndef __UM_TIMEX_H #define __UM_TIMEX_H -typedef unsigned long cycles_t; - -static inline cycles_t get_cycles (void) -{ - return 0; -} - #define CLOCK_TICK_RATE (HZ) +#include + #endif diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h index 0c50fa6e8a55b..fbb709a222839 100644 --- a/arch/um/include/shared/registers.h +++ b/arch/um/include/shared/registers.h @@ -16,8 +16,8 @@ extern int restore_fp_registers(int pid, unsigned long *fp_regs); extern int save_fpx_registers(int pid, unsigned long *fp_regs); extern int restore_fpx_registers(int pid, unsigned long *fp_regs); extern int save_registers(int pid, struct uml_pt_regs *regs); -extern int restore_registers(int pid, struct uml_pt_regs *regs); -extern int init_registers(int pid); +extern int restore_pid_registers(int pid, struct uml_pt_regs *regs); +extern int init_pid_registers(int pid); extern void get_safe_registers(unsigned long *regs, unsigned long *fp_regs); extern unsigned long get_thread_reg(int reg, jmp_buf *buf); extern int get_fp_registers(int pid, unsigned long *regs); diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index e8fd5d540b05d..7f7a74c82abb6 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -44,7 +44,7 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) { PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; - current->ptrace &= ~PT_DTRACE; + clear_thread_flag(TIF_SINGLESTEP); #ifdef SUBARCH_EXECVE1 SUBARCH_EXECVE1(regs->regs); #endif diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 2032924350233..265d44ded99a3 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -381,7 +381,7 @@ int singlestepping(void * t) { struct task_struct *task = t ? t : current; - if (!(task->ptrace & PT_DTRACE)) + if (!test_thread_flag(TIF_SINGLESTEP)) return 0; if (task->thread.singlestep_syscall) diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index b425f47bddbb3..d37802ced5636 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -12,7 +12,7 @@ void user_enable_single_step(struct task_struct *child) { - child->ptrace |= PT_DTRACE; + set_tsk_thread_flag(child, TIF_SINGLESTEP); child->thread.singlestep_syscall = 0; #ifdef SUBARCH_SET_SINGLESTEPPING @@ -22,7 +22,7 @@ void user_enable_single_step(struct task_struct *child) void user_disable_single_step(struct task_struct *child) { - child->ptrace &= ~PT_DTRACE; + clear_tsk_thread_flag(child, TIF_SINGLESTEP); child->thread.singlestep_syscall = 0; #ifdef SUBARCH_SET_SINGLESTEPPING @@ -121,7 +121,7 @@ static void send_sigtrap(struct uml_pt_regs *regs, int error_code) } /* - * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and + * XXX Check TIF_SINGLESTEP for singlestepping check and * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check */ int syscall_trace_enter(struct pt_regs *regs) @@ -145,7 +145,7 @@ void syscall_trace_leave(struct pt_regs *regs) audit_syscall_exit(regs); /* Fake a debug trap */ - if (ptraced & PT_DTRACE) + if (test_thread_flag(TIF_SINGLESTEP)) send_sigtrap(®s->regs, 0); if (!test_thread_flag(TIF_SYSCALL_TRACE)) diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 3d57c71c532e4..01628195ae520 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -53,7 +53,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) unsigned long sp; int err; - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + if (test_thread_flag(TIF_SINGLESTEP) && (current->ptrace & PT_PTRACED)) singlestep = 1; /* Did we come from a system call? */ @@ -128,7 +128,7 @@ void do_signal(struct pt_regs *regs) * on the host. The tracing thread will check this flag and * PTRACE_SYSCALL if necessary. */ - if (current->ptrace & PT_DTRACE) + if (test_thread_flag(TIF_SINGLESTEP)) current->thread.singlestep_syscall = is_syscall(PT_REGS_IP(¤t->thread.regs)); diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c index 2d9270508e156..b123955be7acc 100644 --- a/arch/um/os-Linux/registers.c +++ b/arch/um/os-Linux/registers.c @@ -21,7 +21,7 @@ int save_registers(int pid, struct uml_pt_regs *regs) return 0; } -int restore_registers(int pid, struct uml_pt_regs *regs) +int restore_pid_registers(int pid, struct uml_pt_regs *regs) { int err; @@ -36,7 +36,7 @@ int restore_registers(int pid, struct uml_pt_regs *regs) static unsigned long exec_regs[MAX_REG_NR]; static unsigned long exec_fp_regs[FP_SIZE]; -int init_registers(int pid) +int init_pid_registers(int pid) { int err; diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index f79dc338279e6..b28373a2b8d2d 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -336,7 +336,7 @@ void __init os_early_checks(void) check_tmpexec(); pid = start_ptraced_child(); - if (init_registers(pid)) + if (init_pid_registers(pid)) fatal("Failed to initialize default registers"); stop_ptraced_child(pid, 1, 1); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 36a28b9e46cbd..21e123e8c8ccc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -73,7 +73,6 @@ config X86 select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PTE_DEVMAP if X86_64 select ARCH_HAS_PTE_SPECIAL - select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE select ARCH_HAS_SET_MEMORY @@ -96,6 +95,7 @@ config X86 select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_HUGE_PMD_SHARE + select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP if X86_64 select ARCH_WANTS_THP_SWAP if X86_64 select BUILDTIME_EXTABLE_SORT select CLKEVT_I8253 @@ -1425,7 +1425,7 @@ config HIGHMEM4G config HIGHMEM64G bool "64GB" - depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6 + depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !MWINCHIP3D && !MK6 select X86_PAE ---help--- Select this if you have a 32-bit processor and more than 4 @@ -1541,7 +1541,6 @@ config AMD_MEM_ENCRYPT config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT bool "Activate AMD Secure Memory Encryption (SME) by default" - default y depends on AMD_MEM_ENCRYPT ---help--- Say yes to have system memory encrypted by default if running on @@ -1991,6 +1990,7 @@ config EFI depends on ACPI select UCS2_STRING select EFI_RUNTIME_WRAPPERS + select ARCH_USE_MEMREMAP_PROT ---help--- This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 759b1a927826b..e3412e06fd4bb 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -115,3 +115,13 @@ sha256-ssse3-y += sha256_ni_asm.o endif sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o + +obj-$(CONFIG_CRYPTO_SM3_AVX_X86_64) += sm3-avx-x86_64.o +sm3-avx-x86_64-y := sm3-avx-asm_64.o sm3_avx_glue.o + +obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o +sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o + +obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64) += sm4-aesni-avx2-x86_64.o +sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o + diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S index 848f9c75fd4f5..596f91e3a774e 100644 --- a/arch/x86/crypto/chacha-avx512vl-x86_64.S +++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S @@ -172,7 +172,7 @@ ENTRY(chacha_2block_xor_avx512vl) # xor remaining bytes from partial register into output mov %rcx,%rax and $0xf,%rcx - jz .Ldone8 + jz .Ldone2 mov %rax,%r9 and $~0xf,%r9 @@ -438,7 +438,7 @@ ENTRY(chacha_4block_xor_avx512vl) # xor remaining bytes from partial register into output mov %rcx,%rax and $0xf,%rcx - jz .Ldone8 + jz .Ldone4 mov %rax,%r9 and $~0xf,%r9 diff --git a/arch/x86/crypto/sm3-avx-asm_64.S b/arch/x86/crypto/sm3-avx-asm_64.S new file mode 100644 index 0000000000000..71e6aae23e17c --- /dev/null +++ b/arch/x86/crypto/sm3-avx-asm_64.S @@ -0,0 +1,517 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM3 AVX accelerated transform. + * specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02 + * + * Copyright (C) 2021 Jussi Kivilinna + * Copyright (C) 2021 Tianjia Zhang + */ + +/* Based on SM3 AES/BMI2 accelerated work by libgcrypt at: + * https://gnupg.org/software/libgcrypt/index.html + */ + +#include +#include + +/* Context structure */ + +#define state_h0 0 +#define state_h1 4 +#define state_h2 8 +#define state_h3 12 +#define state_h4 16 +#define state_h5 20 +#define state_h6 24 +#define state_h7 28 + +/* Constants */ + +/* Round constant macros */ + +#define K0 2043430169 /* 0x79cc4519 */ +#define K1 -208106958 /* 0xf3988a32 */ +#define K2 -416213915 /* 0xe7311465 */ +#define K3 -832427829 /* 0xce6228cb */ +#define K4 -1664855657 /* 0x9cc45197 */ +#define K5 965255983 /* 0x3988a32f */ +#define K6 1930511966 /* 0x7311465e */ +#define K7 -433943364 /* 0xe6228cbc */ +#define K8 -867886727 /* 0xcc451979 */ +#define K9 -1735773453 /* 0x988a32f3 */ +#define K10 823420391 /* 0x311465e7 */ +#define K11 1646840782 /* 0x6228cbce */ +#define K12 -1001285732 /* 0xc451979c */ +#define K13 -2002571463 /* 0x88a32f39 */ +#define K14 289824371 /* 0x11465e73 */ +#define K15 579648742 /* 0x228cbce6 */ +#define K16 -1651869049 /* 0x9d8a7a87 */ +#define K17 991229199 /* 0x3b14f50f */ +#define K18 1982458398 /* 0x7629ea1e */ +#define K19 -330050500 /* 0xec53d43c */ +#define K20 -660100999 /* 0xd8a7a879 */ +#define K21 -1320201997 /* 0xb14f50f3 */ +#define K22 1654563303 /* 0x629ea1e7 */ +#define K23 -985840690 /* 0xc53d43ce */ +#define K24 -1971681379 /* 0x8a7a879d */ +#define K25 351604539 /* 0x14f50f3b */ +#define K26 703209078 /* 0x29ea1e76 */ +#define K27 1406418156 /* 0x53d43cec */ +#define K28 -1482130984 /* 0xa7a879d8 */ +#define K29 1330705329 /* 0x4f50f3b1 */ +#define K30 -1633556638 /* 0x9ea1e762 */ +#define K31 1027854021 /* 0x3d43cec5 */ +#define K32 2055708042 /* 0x7a879d8a */ +#define K33 -183551212 /* 0xf50f3b14 */ +#define K34 -367102423 /* 0xea1e7629 */ +#define K35 -734204845 /* 0xd43cec53 */ +#define K36 -1468409689 /* 0xa879d8a7 */ +#define K37 1358147919 /* 0x50f3b14f */ +#define K38 -1578671458 /* 0xa1e7629e */ +#define K39 1137624381 /* 0x43cec53d */ +#define K40 -2019718534 /* 0x879d8a7a */ +#define K41 255530229 /* 0x0f3b14f5 */ +#define K42 511060458 /* 0x1e7629ea */ +#define K43 1022120916 /* 0x3cec53d4 */ +#define K44 2044241832 /* 0x79d8a7a8 */ +#define K45 -206483632 /* 0xf3b14f50 */ +#define K46 -412967263 /* 0xe7629ea1 */ +#define K47 -825934525 /* 0xcec53d43 */ +#define K48 -1651869049 /* 0x9d8a7a87 */ +#define K49 991229199 /* 0x3b14f50f */ +#define K50 1982458398 /* 0x7629ea1e */ +#define K51 -330050500 /* 0xec53d43c */ +#define K52 -660100999 /* 0xd8a7a879 */ +#define K53 -1320201997 /* 0xb14f50f3 */ +#define K54 1654563303 /* 0x629ea1e7 */ +#define K55 -985840690 /* 0xc53d43ce */ +#define K56 -1971681379 /* 0x8a7a879d */ +#define K57 351604539 /* 0x14f50f3b */ +#define K58 703209078 /* 0x29ea1e76 */ +#define K59 1406418156 /* 0x53d43cec */ +#define K60 -1482130984 /* 0xa7a879d8 */ +#define K61 1330705329 /* 0x4f50f3b1 */ +#define K62 -1633556638 /* 0x9ea1e762 */ +#define K63 1027854021 /* 0x3d43cec5 */ + +/* Register macros */ + +#define RSTATE %rdi +#define RDATA %rsi +#define RNBLKS %rdx + +#define t0 %eax +#define t1 %ebx +#define t2 %ecx + +#define a %r8d +#define b %r9d +#define c %r10d +#define d %r11d +#define e %r12d +#define f %r13d +#define g %r14d +#define h %r15d + +#define W0 %xmm0 +#define W1 %xmm1 +#define W2 %xmm2 +#define W3 %xmm3 +#define W4 %xmm4 +#define W5 %xmm5 + +#define XTMP0 %xmm6 +#define XTMP1 %xmm7 +#define XTMP2 %xmm8 +#define XTMP3 %xmm9 +#define XTMP4 %xmm10 +#define XTMP5 %xmm11 +#define XTMP6 %xmm12 + +#define BSWAP_REG %xmm15 + +/* Stack structure */ + +#define STACK_W_SIZE (32 * 2 * 3) +#define STACK_REG_SAVE_SIZE (64) + +#define STACK_W (0) +#define STACK_REG_SAVE (STACK_W + STACK_W_SIZE) +#define STACK_SIZE (STACK_REG_SAVE + STACK_REG_SAVE_SIZE) + +/* Instruction helpers. */ + +#define roll2(v, reg) \ + roll $(v), reg; + +#define roll3mov(v, src, dst) \ + movl src, dst; \ + roll $(v), dst; + +#define roll3(v, src, dst) \ + rorxl $(32-(v)), src, dst; + +#define addl2(a, out) \ + leal (a, out), out; + +/* Round function macros. */ + +#define GG1(x, y, z, o, t) \ + movl x, o; \ + xorl y, o; \ + xorl z, o; + +#define FF1(x, y, z, o, t) GG1(x, y, z, o, t) + +#define GG2(x, y, z, o, t) \ + andnl z, x, o; \ + movl y, t; \ + andl x, t; \ + addl2(t, o); + +#define FF2(x, y, z, o, t) \ + movl y, o; \ + xorl x, o; \ + movl y, t; \ + andl x, t; \ + andl z, o; \ + xorl t, o; + +#define R(i, a, b, c, d, e, f, g, h, round, widx, wtype) \ + /* rol(a, 12) => t0 */ \ + roll3mov(12, a, t0); /* rorxl here would reduce perf by 6% on zen3 */ \ + /* rol (t0 + e + t), 7) => t1 */ \ + leal K##round(t0, e, 1), t1; \ + roll2(7, t1); \ + /* h + w1 => h */ \ + addl wtype##_W1_ADDR(round, widx), h; \ + /* h + t1 => h */ \ + addl2(t1, h); \ + /* t1 ^ t0 => t0 */ \ + xorl t1, t0; \ + /* w1w2 + d => d */ \ + addl wtype##_W1W2_ADDR(round, widx), d; \ + /* FF##i(a,b,c) => t1 */ \ + FF##i(a, b, c, t1, t2); \ + /* d + t1 => d */ \ + addl2(t1, d); \ + /* GG#i(e,f,g) => t2 */ \ + GG##i(e, f, g, t2, t1); \ + /* h + t2 => h */ \ + addl2(t2, h); \ + /* rol (f, 19) => f */ \ + roll2(19, f); \ + /* d + t0 => d */ \ + addl2(t0, d); \ + /* rol (b, 9) => b */ \ + roll2(9, b); \ + /* P0(h) => h */ \ + roll3(9, h, t2); \ + roll3(17, h, t1); \ + xorl t2, h; \ + xorl t1, h; + +#define R1(a, b, c, d, e, f, g, h, round, widx, wtype) \ + R(1, a, b, c, d, e, f, g, h, round, widx, wtype) + +#define R2(a, b, c, d, e, f, g, h, round, widx, wtype) \ + R(2, a, b, c, d, e, f, g, h, round, widx, wtype) + +/* Input expansion macros. */ + +/* Byte-swapped input address. */ +#define IW_W_ADDR(round, widx, offs) \ + (STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4))(%rsp) + +/* Expanded input address. */ +#define XW_W_ADDR(round, widx, offs) \ + (STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4))(%rsp) + +/* Rounds 1-12, byte-swapped input block addresses. */ +#define IW_W1_ADDR(round, widx) IW_W_ADDR(round, widx, 0) +#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 32) + +/* Rounds 1-12, expanded input block addresses. */ +#define XW_W1_ADDR(round, widx) XW_W_ADDR(round, widx, 0) +#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 32) + +/* Input block loading. */ +#define LOAD_W_XMM_1() \ + vmovdqu 0*16(RDATA), XTMP0; /* XTMP0: w3, w2, w1, w0 */ \ + vmovdqu 1*16(RDATA), XTMP1; /* XTMP1: w7, w6, w5, w4 */ \ + vmovdqu 2*16(RDATA), XTMP2; /* XTMP2: w11, w10, w9, w8 */ \ + vmovdqu 3*16(RDATA), XTMP3; /* XTMP3: w15, w14, w13, w12 */ \ + vpshufb BSWAP_REG, XTMP0, XTMP0; \ + vpshufb BSWAP_REG, XTMP1, XTMP1; \ + vpshufb BSWAP_REG, XTMP2, XTMP2; \ + vpshufb BSWAP_REG, XTMP3, XTMP3; \ + vpxor XTMP0, XTMP1, XTMP4; \ + vpxor XTMP1, XTMP2, XTMP5; \ + vpxor XTMP2, XTMP3, XTMP6; \ + leaq 64(RDATA), RDATA; \ + vmovdqa XTMP0, IW_W1_ADDR(0, 0); \ + vmovdqa XTMP4, IW_W1W2_ADDR(0, 0); \ + vmovdqa XTMP1, IW_W1_ADDR(4, 0); \ + vmovdqa XTMP5, IW_W1W2_ADDR(4, 0); + +#define LOAD_W_XMM_2() \ + vmovdqa XTMP2, IW_W1_ADDR(8, 0); \ + vmovdqa XTMP6, IW_W1W2_ADDR(8, 0); + +#define LOAD_W_XMM_3() \ + vpshufd $0b00000000, XTMP0, W0; /* W0: xx, w0, xx, xx */ \ + vpshufd $0b11111001, XTMP0, W1; /* W1: xx, w3, w2, w1 */ \ + vmovdqa XTMP1, W2; /* W2: xx, w6, w5, w4 */ \ + vpalignr $12, XTMP1, XTMP2, W3; /* W3: xx, w9, w8, w7 */ \ + vpalignr $8, XTMP2, XTMP3, W4; /* W4: xx, w12, w11, w10 */ \ + vpshufd $0b11111001, XTMP3, W5; /* W5: xx, w15, w14, w13 */ + +/* Message scheduling. Note: 3 words per XMM register. */ +#define SCHED_W_0(round, w0, w1, w2, w3, w4, w5) \ + /* Load (w[i - 16]) => XTMP0 */ \ + vpshufd $0b10111111, w0, XTMP0; \ + vpalignr $12, XTMP0, w1, XTMP0; /* XTMP0: xx, w2, w1, w0 */ \ + /* Load (w[i - 13]) => XTMP1 */ \ + vpshufd $0b10111111, w1, XTMP1; \ + vpalignr $12, XTMP1, w2, XTMP1; \ + /* w[i - 9] == w3 */ \ + /* XMM3 ^ XTMP0 => XTMP0 */ \ + vpxor w3, XTMP0, XTMP0; + +#define SCHED_W_1(round, w0, w1, w2, w3, w4, w5) \ + /* w[i - 3] == w5 */ \ + /* rol(XMM5, 15) ^ XTMP0 => XTMP0 */ \ + vpslld $15, w5, XTMP2; \ + vpsrld $(32-15), w5, XTMP3; \ + vpxor XTMP2, XTMP3, XTMP3; \ + vpxor XTMP3, XTMP0, XTMP0; \ + /* rol(XTMP1, 7) => XTMP1 */ \ + vpslld $7, XTMP1, XTMP5; \ + vpsrld $(32-7), XTMP1, XTMP1; \ + vpxor XTMP5, XTMP1, XTMP1; \ + /* XMM4 ^ XTMP1 => XTMP1 */ \ + vpxor w4, XTMP1, XTMP1; \ + /* w[i - 6] == XMM4 */ \ + /* P1(XTMP0) ^ XTMP1 => XMM0 */ \ + vpslld $15, XTMP0, XTMP5; \ + vpsrld $(32-15), XTMP0, XTMP6; \ + vpslld $23, XTMP0, XTMP2; \ + vpsrld $(32-23), XTMP0, XTMP3; \ + vpxor XTMP0, XTMP1, XTMP1; \ + vpxor XTMP6, XTMP5, XTMP5; \ + vpxor XTMP3, XTMP2, XTMP2; \ + vpxor XTMP2, XTMP5, XTMP5; \ + vpxor XTMP5, XTMP1, w0; + +#define SCHED_W_2(round, w0, w1, w2, w3, w4, w5) \ + /* W1 in XMM12 */ \ + vpshufd $0b10111111, w4, XTMP4; \ + vpalignr $12, XTMP4, w5, XTMP4; \ + vmovdqa XTMP4, XW_W1_ADDR((round), 0); \ + /* W1 ^ W2 => XTMP1 */ \ + vpxor w0, XTMP4, XTMP1; \ + vmovdqa XTMP1, XW_W1W2_ADDR((round), 0); + + +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + +.Lbe32mask: + .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f + +.text + +/* + * Transform nblocks*64 bytes (nblocks*16 32-bit words) at DATA. + * + * void sm3_transform_avx(struct sm3_state *state, + * const u8 *data, int nblocks); + */ +.align 16 +SYM_FUNC_START(sm3_transform_avx) + /* input: + * %rdi: ctx, CTX + * %rsi: data (64*nblks bytes) + * %rdx: nblocks + */ + vzeroupper; + + pushq %rbp; + movq %rsp, %rbp; + + movq %rdx, RNBLKS; + + subq $STACK_SIZE, %rsp; + andq $(~63), %rsp; + + movq %rbx, (STACK_REG_SAVE + 0 * 8)(%rsp); + movq %r15, (STACK_REG_SAVE + 1 * 8)(%rsp); + movq %r14, (STACK_REG_SAVE + 2 * 8)(%rsp); + movq %r13, (STACK_REG_SAVE + 3 * 8)(%rsp); + movq %r12, (STACK_REG_SAVE + 4 * 8)(%rsp); + + vmovdqa .Lbe32mask (%rip), BSWAP_REG; + + /* Get the values of the chaining variables. */ + movl state_h0(RSTATE), a; + movl state_h1(RSTATE), b; + movl state_h2(RSTATE), c; + movl state_h3(RSTATE), d; + movl state_h4(RSTATE), e; + movl state_h5(RSTATE), f; + movl state_h6(RSTATE), g; + movl state_h7(RSTATE), h; + +.align 16 +.Loop: + /* Load data part1. */ + LOAD_W_XMM_1(); + + leaq -1(RNBLKS), RNBLKS; + + /* Transform 0-3 + Load data part2. */ + R1(a, b, c, d, e, f, g, h, 0, 0, IW); LOAD_W_XMM_2(); + R1(d, a, b, c, h, e, f, g, 1, 1, IW); + R1(c, d, a, b, g, h, e, f, 2, 2, IW); + R1(b, c, d, a, f, g, h, e, 3, 3, IW); LOAD_W_XMM_3(); + + /* Transform 4-7 + Precalc 12-14. */ + R1(a, b, c, d, e, f, g, h, 4, 0, IW); + R1(d, a, b, c, h, e, f, g, 5, 1, IW); + R1(c, d, a, b, g, h, e, f, 6, 2, IW); SCHED_W_0(12, W0, W1, W2, W3, W4, W5); + R1(b, c, d, a, f, g, h, e, 7, 3, IW); SCHED_W_1(12, W0, W1, W2, W3, W4, W5); + + /* Transform 8-11 + Precalc 12-17. */ + R1(a, b, c, d, e, f, g, h, 8, 0, IW); SCHED_W_2(12, W0, W1, W2, W3, W4, W5); + R1(d, a, b, c, h, e, f, g, 9, 1, IW); SCHED_W_0(15, W1, W2, W3, W4, W5, W0); + R1(c, d, a, b, g, h, e, f, 10, 2, IW); SCHED_W_1(15, W1, W2, W3, W4, W5, W0); + R1(b, c, d, a, f, g, h, e, 11, 3, IW); SCHED_W_2(15, W1, W2, W3, W4, W5, W0); + + /* Transform 12-14 + Precalc 18-20 */ + R1(a, b, c, d, e, f, g, h, 12, 0, XW); SCHED_W_0(18, W2, W3, W4, W5, W0, W1); + R1(d, a, b, c, h, e, f, g, 13, 1, XW); SCHED_W_1(18, W2, W3, W4, W5, W0, W1); + R1(c, d, a, b, g, h, e, f, 14, 2, XW); SCHED_W_2(18, W2, W3, W4, W5, W0, W1); + + /* Transform 15-17 + Precalc 21-23 */ + R1(b, c, d, a, f, g, h, e, 15, 0, XW); SCHED_W_0(21, W3, W4, W5, W0, W1, W2); + R2(a, b, c, d, e, f, g, h, 16, 1, XW); SCHED_W_1(21, W3, W4, W5, W0, W1, W2); + R2(d, a, b, c, h, e, f, g, 17, 2, XW); SCHED_W_2(21, W3, W4, W5, W0, W1, W2); + + /* Transform 18-20 + Precalc 24-26 */ + R2(c, d, a, b, g, h, e, f, 18, 0, XW); SCHED_W_0(24, W4, W5, W0, W1, W2, W3); + R2(b, c, d, a, f, g, h, e, 19, 1, XW); SCHED_W_1(24, W4, W5, W0, W1, W2, W3); + R2(a, b, c, d, e, f, g, h, 20, 2, XW); SCHED_W_2(24, W4, W5, W0, W1, W2, W3); + + /* Transform 21-23 + Precalc 27-29 */ + R2(d, a, b, c, h, e, f, g, 21, 0, XW); SCHED_W_0(27, W5, W0, W1, W2, W3, W4); + R2(c, d, a, b, g, h, e, f, 22, 1, XW); SCHED_W_1(27, W5, W0, W1, W2, W3, W4); + R2(b, c, d, a, f, g, h, e, 23, 2, XW); SCHED_W_2(27, W5, W0, W1, W2, W3, W4); + + /* Transform 24-26 + Precalc 30-32 */ + R2(a, b, c, d, e, f, g, h, 24, 0, XW); SCHED_W_0(30, W0, W1, W2, W3, W4, W5); + R2(d, a, b, c, h, e, f, g, 25, 1, XW); SCHED_W_1(30, W0, W1, W2, W3, W4, W5); + R2(c, d, a, b, g, h, e, f, 26, 2, XW); SCHED_W_2(30, W0, W1, W2, W3, W4, W5); + + /* Transform 27-29 + Precalc 33-35 */ + R2(b, c, d, a, f, g, h, e, 27, 0, XW); SCHED_W_0(33, W1, W2, W3, W4, W5, W0); + R2(a, b, c, d, e, f, g, h, 28, 1, XW); SCHED_W_1(33, W1, W2, W3, W4, W5, W0); + R2(d, a, b, c, h, e, f, g, 29, 2, XW); SCHED_W_2(33, W1, W2, W3, W4, W5, W0); + + /* Transform 30-32 + Precalc 36-38 */ + R2(c, d, a, b, g, h, e, f, 30, 0, XW); SCHED_W_0(36, W2, W3, W4, W5, W0, W1); + R2(b, c, d, a, f, g, h, e, 31, 1, XW); SCHED_W_1(36, W2, W3, W4, W5, W0, W1); + R2(a, b, c, d, e, f, g, h, 32, 2, XW); SCHED_W_2(36, W2, W3, W4, W5, W0, W1); + + /* Transform 33-35 + Precalc 39-41 */ + R2(d, a, b, c, h, e, f, g, 33, 0, XW); SCHED_W_0(39, W3, W4, W5, W0, W1, W2); + R2(c, d, a, b, g, h, e, f, 34, 1, XW); SCHED_W_1(39, W3, W4, W5, W0, W1, W2); + R2(b, c, d, a, f, g, h, e, 35, 2, XW); SCHED_W_2(39, W3, W4, W5, W0, W1, W2); + + /* Transform 36-38 + Precalc 42-44 */ + R2(a, b, c, d, e, f, g, h, 36, 0, XW); SCHED_W_0(42, W4, W5, W0, W1, W2, W3); + R2(d, a, b, c, h, e, f, g, 37, 1, XW); SCHED_W_1(42, W4, W5, W0, W1, W2, W3); + R2(c, d, a, b, g, h, e, f, 38, 2, XW); SCHED_W_2(42, W4, W5, W0, W1, W2, W3); + + /* Transform 39-41 + Precalc 45-47 */ + R2(b, c, d, a, f, g, h, e, 39, 0, XW); SCHED_W_0(45, W5, W0, W1, W2, W3, W4); + R2(a, b, c, d, e, f, g, h, 40, 1, XW); SCHED_W_1(45, W5, W0, W1, W2, W3, W4); + R2(d, a, b, c, h, e, f, g, 41, 2, XW); SCHED_W_2(45, W5, W0, W1, W2, W3, W4); + + /* Transform 42-44 + Precalc 48-50 */ + R2(c, d, a, b, g, h, e, f, 42, 0, XW); SCHED_W_0(48, W0, W1, W2, W3, W4, W5); + R2(b, c, d, a, f, g, h, e, 43, 1, XW); SCHED_W_1(48, W0, W1, W2, W3, W4, W5); + R2(a, b, c, d, e, f, g, h, 44, 2, XW); SCHED_W_2(48, W0, W1, W2, W3, W4, W5); + + /* Transform 45-47 + Precalc 51-53 */ + R2(d, a, b, c, h, e, f, g, 45, 0, XW); SCHED_W_0(51, W1, W2, W3, W4, W5, W0); + R2(c, d, a, b, g, h, e, f, 46, 1, XW); SCHED_W_1(51, W1, W2, W3, W4, W5, W0); + R2(b, c, d, a, f, g, h, e, 47, 2, XW); SCHED_W_2(51, W1, W2, W3, W4, W5, W0); + + /* Transform 48-50 + Precalc 54-56 */ + R2(a, b, c, d, e, f, g, h, 48, 0, XW); SCHED_W_0(54, W2, W3, W4, W5, W0, W1); + R2(d, a, b, c, h, e, f, g, 49, 1, XW); SCHED_W_1(54, W2, W3, W4, W5, W0, W1); + R2(c, d, a, b, g, h, e, f, 50, 2, XW); SCHED_W_2(54, W2, W3, W4, W5, W0, W1); + + /* Transform 51-53 + Precalc 57-59 */ + R2(b, c, d, a, f, g, h, e, 51, 0, XW); SCHED_W_0(57, W3, W4, W5, W0, W1, W2); + R2(a, b, c, d, e, f, g, h, 52, 1, XW); SCHED_W_1(57, W3, W4, W5, W0, W1, W2); + R2(d, a, b, c, h, e, f, g, 53, 2, XW); SCHED_W_2(57, W3, W4, W5, W0, W1, W2); + + /* Transform 54-56 + Precalc 60-62 */ + R2(c, d, a, b, g, h, e, f, 54, 0, XW); SCHED_W_0(60, W4, W5, W0, W1, W2, W3); + R2(b, c, d, a, f, g, h, e, 55, 1, XW); SCHED_W_1(60, W4, W5, W0, W1, W2, W3); + R2(a, b, c, d, e, f, g, h, 56, 2, XW); SCHED_W_2(60, W4, W5, W0, W1, W2, W3); + + /* Transform 57-59 + Precalc 63 */ + R2(d, a, b, c, h, e, f, g, 57, 0, XW); SCHED_W_0(63, W5, W0, W1, W2, W3, W4); + R2(c, d, a, b, g, h, e, f, 58, 1, XW); + R2(b, c, d, a, f, g, h, e, 59, 2, XW); SCHED_W_1(63, W5, W0, W1, W2, W3, W4); + + /* Transform 60-62 + Precalc 63 */ + R2(a, b, c, d, e, f, g, h, 60, 0, XW); + R2(d, a, b, c, h, e, f, g, 61, 1, XW); SCHED_W_2(63, W5, W0, W1, W2, W3, W4); + R2(c, d, a, b, g, h, e, f, 62, 2, XW); + + /* Transform 63 */ + R2(b, c, d, a, f, g, h, e, 63, 0, XW); + + /* Update the chaining variables. */ + xorl state_h0(RSTATE), a; + xorl state_h1(RSTATE), b; + xorl state_h2(RSTATE), c; + xorl state_h3(RSTATE), d; + movl a, state_h0(RSTATE); + movl b, state_h1(RSTATE); + movl c, state_h2(RSTATE); + movl d, state_h3(RSTATE); + xorl state_h4(RSTATE), e; + xorl state_h5(RSTATE), f; + xorl state_h6(RSTATE), g; + xorl state_h7(RSTATE), h; + movl e, state_h4(RSTATE); + movl f, state_h5(RSTATE); + movl g, state_h6(RSTATE); + movl h, state_h7(RSTATE); + + cmpq $0, RNBLKS; + jne .Loop; + + vzeroall; + + movq (STACK_REG_SAVE + 0 * 8)(%rsp), %rbx; + movq (STACK_REG_SAVE + 1 * 8)(%rsp), %r15; + movq (STACK_REG_SAVE + 2 * 8)(%rsp), %r14; + movq (STACK_REG_SAVE + 3 * 8)(%rsp), %r13; + movq (STACK_REG_SAVE + 4 * 8)(%rsp), %r12; + + vmovdqa %xmm0, IW_W1_ADDR(0, 0); + vmovdqa %xmm0, IW_W1W2_ADDR(0, 0); + vmovdqa %xmm0, IW_W1_ADDR(4, 0); + vmovdqa %xmm0, IW_W1W2_ADDR(4, 0); + vmovdqa %xmm0, IW_W1_ADDR(8, 0); + vmovdqa %xmm0, IW_W1W2_ADDR(8, 0); + + movq %rbp, %rsp; + popq %rbp; + ret; +SYM_FUNC_END(sm3_transform_avx) diff --git a/arch/x86/crypto/sm3_avx_glue.c b/arch/x86/crypto/sm3_avx_glue.c new file mode 100644 index 0000000000000..661b6f22ffcd8 --- /dev/null +++ b/arch/x86/crypto/sm3_avx_glue.c @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM3 Secure Hash Algorithm, AVX assembler accelerated. + * specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02 + * + * Copyright (C) 2021 Tianjia Zhang + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +asmlinkage void sm3_transform_avx(struct sm3_state *state, + const u8 *data, int nblocks); + +static int sm3_avx_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (!crypto_simd_usable() || + (sctx->count % SM3_BLOCK_SIZE) + len < SM3_BLOCK_SIZE) { + sm3_update(sctx, data, len); + return 0; + } + + /* + * Make sure struct sm3_state begins directly with the SM3 + * 256-bit internal state, as this is what the asm functions expect. + */ + BUILD_BUG_ON(offsetof(struct sm3_state, state) != 0); + + kernel_fpu_begin(); + sm3_base_do_update(desc, data, len, sm3_transform_avx); + kernel_fpu_end(); + + return 0; +} + +static int sm3_avx_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (!crypto_simd_usable()) { + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (len) + sm3_update(sctx, data, len); + + sm3_final(sctx, out); + return 0; + } + + kernel_fpu_begin(); + if (len) + sm3_base_do_update(desc, data, len, sm3_transform_avx); + sm3_base_do_finalize(desc, sm3_transform_avx); + kernel_fpu_end(); + + return sm3_base_finish(desc, out); +} + +static int sm3_avx_final(struct shash_desc *desc, u8 *out) +{ + if (!crypto_simd_usable()) { + sm3_final(shash_desc_ctx(desc), out); + return 0; + } + + kernel_fpu_begin(); + sm3_base_do_finalize(desc, sm3_transform_avx); + kernel_fpu_end(); + + return sm3_base_finish(desc, out); +} + +static struct shash_alg sm3_avx_alg = { + .digestsize = SM3_DIGEST_SIZE, + .init = sm3_base_init, + .update = sm3_avx_update, + .final = sm3_avx_final, + .finup = sm3_avx_finup, + .descsize = sizeof(struct sm3_state), + .base = { + .cra_name = "sm3", + .cra_driver_name = "sm3-avx", + .cra_priority = 300, + .cra_blocksize = SM3_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init sm3_avx_mod_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX)) { + pr_info("AVX instruction are not detected.\n"); + return -ENODEV; + } + + if (!boot_cpu_has(X86_FEATURE_BMI2)) { + pr_info("BMI2 instruction are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return crypto_register_shash(&sm3_avx_alg); +} + +static void __exit sm3_avx_mod_exit(void) +{ + crypto_unregister_shash(&sm3_avx_alg); +} + +module_init(sm3_avx_mod_init); +module_exit(sm3_avx_mod_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_DESCRIPTION("SM3 Secure Hash Algorithm, AVX assembler accelerated"); +MODULE_ALIAS_CRYPTO("sm3"); +MODULE_ALIAS_CRYPTO("sm3-avx"); diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S new file mode 100644 index 0000000000000..1cc72b4804fab --- /dev/null +++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S @@ -0,0 +1,594 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, AES-NI/AVX optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2018 Markku-Juhani O. Saarinen + * Copyright (C) 2020 Jussi Kivilinna + * Copyright (c) 2021 Tianjia Zhang + */ + +/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at: + * https://github.com/mjosaarinen/sm4ni + */ + +#include +#include + +#define rRIP (%rip) + +#define RX0 %xmm0 +#define RX1 %xmm1 +#define MASK_4BIT %xmm2 +#define RTMP0 %xmm3 +#define RTMP1 %xmm4 +#define RTMP2 %xmm5 +#define RTMP3 %xmm6 +#define RTMP4 %xmm7 + +#define RA0 %xmm8 +#define RA1 %xmm9 +#define RA2 %xmm10 +#define RA3 %xmm11 + +#define RB0 %xmm12 +#define RB1 %xmm13 +#define RB2 %xmm14 +#define RB3 %xmm15 + +#define RNOT %xmm0 +#define RBSWAP %xmm1 + + +/* Transpose four 32-bit words between 128-bit vectors. */ +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +/* pre-SubByte transform. */ +#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by + * 'vaeslastenc' instruction. + */ +#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \ + vpandn mask4bit, x, tmp0; \ + vpsrld $4, x, x; \ + vpand x, mask4bit, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + + +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + +/* + * Following four affine transform look-up tables are from work by + * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni + * + * These allow exposing SM4 S-Box from AES SubByte. + */ + +/* pre-SubByte affine transform, from SM4 field to AES field. */ +.Lpre_tf_lo_s: + .quad 0x9197E2E474720701, 0xC7C1B4B222245157 +.Lpre_tf_hi_s: + .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012 + +/* post-SubByte affine transform, from AES field to SM4 field. */ +.Lpost_tf_lo_s: + .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82 +.Lpost_tf_hi_s: + .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF + +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 + +/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_8: + .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e + .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06 + +/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_16: + .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01 + .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09 + +/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_24: + .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04 + .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c + +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* For input word byte-swap */ +.Lbswap32_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 + +.align 4 +/* 4-bit mask */ +.L0f0f0f0f: + .long 0x0f0f0f0f + +/* 12 bytes, only for padding */ +.Lpadding_deadbeef: + .long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + + +.text +.align 16 + +/* + * void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst, + * const u8 *src, int nblocks) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_crypt4) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (1..4 blocks) + * %rdx: src (1..4 blocks) + * %rcx: num blocks (1..4) + */ + FRAME_BEGIN + + vmovdqu 0*16(%rdx), RA0; + vmovdqa RA0, RA1; + vmovdqa RA0, RA2; + vmovdqa RA0, RA3; + cmpq $2, %rcx; + jb .Lblk4_load_input_done; + vmovdqu 1*16(%rdx), RA1; + je .Lblk4_load_input_done; + vmovdqu 2*16(%rdx), RA2; + cmpq $3, %rcx; + je .Lblk4_load_input_done; + vmovdqu 3*16(%rdx), RA3; + +.Lblk4_load_input_done: + + vmovdqa .Lbswap32_mask rRIP, RTMP2; + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + + vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT; + vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; + vmovdqa .Lpre_tf_hi_s rRIP, RB0; + vmovdqa .Lpost_tf_lo_s rRIP, RB1; + vmovdqa .Lpost_tf_hi_s rRIP, RB2; + vmovdqa .Linv_shift_row rRIP, RB3; + vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP2; + vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP3; + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + +#define ROUND(round, s0, s1, s2, s3) \ + vbroadcastss (4*(round))(%rdi), RX0; \ + vpxor s1, RX0, RX0; \ + vpxor s2, RX0, RX0; \ + vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \ + \ + /* sbox, non-linear part */ \ + transform_pre(RX0, RTMP4, RB0, MASK_4BIT, RTMP0); \ + vaesenclast MASK_4BIT, RX0, RX0; \ + transform_post(RX0, RB1, RB2, MASK_4BIT, RTMP0); \ + \ + /* linear part */ \ + vpshufb RB3, RX0, RTMP0; \ + vpxor RTMP0, s0, s0; /* s0 ^ x */ \ + vpshufb RTMP2, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \ + vpshufb RTMP3, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb .Linv_shift_row_rol_24 rRIP, RX0, RTMP1; \ + vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \ + vpslld $2, RTMP0, RTMP1; \ + vpsrld $30, RTMP0, RTMP0; \ + vpxor RTMP0, s0, s0; \ + /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpxor RTMP1, s0, s0; + + leaq (32*4)(%rdi), %rax; +.align 16 +.Lroundloop_blk4: + ROUND(0, RA0, RA1, RA2, RA3); + ROUND(1, RA1, RA2, RA3, RA0); + ROUND(2, RA2, RA3, RA0, RA1); + ROUND(3, RA3, RA0, RA1, RA2); + leaq (4*4)(%rdi), %rdi; + cmpq %rax, %rdi; + jne .Lroundloop_blk4; + +#undef ROUND + + vmovdqa .Lbswap128_mask rRIP, RTMP2; + + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + + vmovdqu RA0, 0*16(%rsi); + cmpq $2, %rcx; + jb .Lblk4_store_output_done; + vmovdqu RA1, 1*16(%rsi); + je .Lblk4_store_output_done; + vmovdqu RA2, 2*16(%rsi); + cmpq $3, %rcx; + je .Lblk4_store_output_done; + vmovdqu RA3, 3*16(%rsi); + +.Lblk4_store_output_done: + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_crypt4) + +.align 8 +SYM_FUNC_START_LOCAL(__sm4_crypt_blk8) + /* input: + * %rdi: round key array, CTX + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel + * plaintext blocks + * output: + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel + * ciphertext blocks + */ + FRAME_BEGIN + + vmovdqa .Lbswap32_mask rRIP, RTMP2; + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT; + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + +#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \ + vbroadcastss (4*(round))(%rdi), RX0; \ + vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; \ + vmovdqa .Lpre_tf_hi_s rRIP, RTMP1; \ + vmovdqa RX0, RX1; \ + vpxor s1, RX0, RX0; \ + vpxor s2, RX0, RX0; \ + vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \ + vmovdqa .Lpost_tf_lo_s rRIP, RTMP2; \ + vmovdqa .Lpost_tf_hi_s rRIP, RTMP3; \ + vpxor r1, RX1, RX1; \ + vpxor r2, RX1, RX1; \ + vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \ + \ + /* sbox, non-linear part */ \ + transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + vmovdqa .Linv_shift_row rRIP, RTMP4; \ + vaesenclast MASK_4BIT, RX0, RX0; \ + vaesenclast MASK_4BIT, RX1, RX1; \ + transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + \ + /* linear part */ \ + vpshufb RTMP4, RX0, RTMP0; \ + vpxor RTMP0, s0, s0; /* s0 ^ x */ \ + vpshufb RTMP4, RX1, RTMP2; \ + vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP4; \ + vpxor RTMP2, r0, r0; /* r0 ^ x */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vmovdqa .Linv_shift_row_rol_24 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \ + /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpslld $2, RTMP0, RTMP1; \ + vpsrld $30, RTMP0, RTMP0; \ + vpxor RTMP0, s0, s0; \ + vpxor RTMP1, s0, s0; \ + vpshufb RTMP4, RX1, RTMP3; \ + vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \ + /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpslld $2, RTMP2, RTMP3; \ + vpsrld $30, RTMP2, RTMP2; \ + vpxor RTMP2, r0, r0; \ + vpxor RTMP3, r0, r0; + + leaq (32*4)(%rdi), %rax; +.align 16 +.Lroundloop_blk8: + ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3); + ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0); + ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1); + ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2); + leaq (4*4)(%rdi), %rdi; + cmpq %rax, %rdi; + jne .Lroundloop_blk8; + +#undef ROUND + + vmovdqa .Lbswap128_mask rRIP, RTMP2; + + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + FRAME_END + ret; +SYM_FUNC_END(__sm4_crypt_blk8) + +/* + * void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst, + * const u8 *src, int nblocks) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_crypt8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (1..8 blocks) + * %rdx: src (1..8 blocks) + * %rcx: num blocks (1..8) + */ + cmpq $5, %rcx; + jb sm4_aesni_avx_crypt4; + + FRAME_BEGIN + + vmovdqu (0 * 16)(%rdx), RA0; + vmovdqu (1 * 16)(%rdx), RA1; + vmovdqu (2 * 16)(%rdx), RA2; + vmovdqu (3 * 16)(%rdx), RA3; + vmovdqu (4 * 16)(%rdx), RB0; + vmovdqa RB0, RB1; + vmovdqa RB0, RB2; + vmovdqa RB0, RB3; + je .Lblk8_load_input_done; + vmovdqu (5 * 16)(%rdx), RB1; + cmpq $7, %rcx; + jb .Lblk8_load_input_done; + vmovdqu (6 * 16)(%rdx), RB2; + je .Lblk8_load_input_done; + vmovdqu (7 * 16)(%rdx), RB3; + +.Lblk8_load_input_done: + call __sm4_crypt_blk8; + + cmpq $6, %rcx; + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + jb .Lblk8_store_output_done; + vmovdqu RB1, (5 * 16)(%rsi); + je .Lblk8_store_output_done; + vmovdqu RB2, (6 * 16)(%rsi); + cmpq $7, %rcx; + je .Lblk8_store_output_done; + vmovdqu RB3, (7 * 16)(%rsi); + +.Lblk8_store_output_done: + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_crypt8) + +/* + * void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv (big endian, 128bit) + */ + FRAME_BEGIN + + /* load IV and byteswap */ + vmovdqu (%rcx), RA0; + + vmovdqa .Lbswap128_mask rRIP, RBSWAP; + vpshufb RBSWAP, RA0, RTMP0; /* be => le */ + + vpcmpeqd RNOT, RNOT, RNOT; + vpsrldq $8, RNOT, RNOT; /* low: -1, high: 0 */ + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + + /* construct IVs */ + inc_le128(RTMP0, RNOT, RTMP2); /* +1 */ + vpshufb RBSWAP, RTMP0, RA1; + inc_le128(RTMP0, RNOT, RTMP2); /* +2 */ + vpshufb RBSWAP, RTMP0, RA2; + inc_le128(RTMP0, RNOT, RTMP2); /* +3 */ + vpshufb RBSWAP, RTMP0, RA3; + inc_le128(RTMP0, RNOT, RTMP2); /* +4 */ + vpshufb RBSWAP, RTMP0, RB0; + inc_le128(RTMP0, RNOT, RTMP2); /* +5 */ + vpshufb RBSWAP, RTMP0, RB1; + inc_le128(RTMP0, RNOT, RTMP2); /* +6 */ + vpshufb RBSWAP, RTMP0, RB2; + inc_le128(RTMP0, RNOT, RTMP2); /* +7 */ + vpshufb RBSWAP, RTMP0, RB3; + inc_le128(RTMP0, RNOT, RTMP2); /* +8 */ + vpshufb RBSWAP, RTMP0, RTMP1; + + /* store new IV */ + vmovdqu RTMP1, (%rcx); + + call __sm4_crypt_blk8; + + vpxor (0 * 16)(%rdx), RA0, RA0; + vpxor (1 * 16)(%rdx), RA1, RA1; + vpxor (2 * 16)(%rdx), RA2, RA2; + vpxor (3 * 16)(%rdx), RA3, RA3; + vpxor (4 * 16)(%rdx), RB0, RB0; + vpxor (5 * 16)(%rdx), RB1, RB1; + vpxor (6 * 16)(%rdx), RB2, RB2; + vpxor (7 * 16)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + vmovdqu RB1, (5 * 16)(%rsi); + vmovdqu RB2, (6 * 16)(%rsi); + vmovdqu RB3, (7 * 16)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8) + +/* + * void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + vmovdqu (0 * 16)(%rdx), RA0; + vmovdqu (1 * 16)(%rdx), RA1; + vmovdqu (2 * 16)(%rdx), RA2; + vmovdqu (3 * 16)(%rdx), RA3; + vmovdqu (4 * 16)(%rdx), RB0; + vmovdqu (5 * 16)(%rdx), RB1; + vmovdqu (6 * 16)(%rdx), RB2; + vmovdqu (7 * 16)(%rdx), RB3; + + call __sm4_crypt_blk8; + + vmovdqu (7 * 16)(%rdx), RNOT; + vpxor (%rcx), RA0, RA0; + vpxor (0 * 16)(%rdx), RA1, RA1; + vpxor (1 * 16)(%rdx), RA2, RA2; + vpxor (2 * 16)(%rdx), RA3, RA3; + vpxor (3 * 16)(%rdx), RB0, RB0; + vpxor (4 * 16)(%rdx), RB1, RB1; + vpxor (5 * 16)(%rdx), RB2, RB2; + vpxor (6 * 16)(%rdx), RB3, RB3; + vmovdqu RNOT, (%rcx); /* store new IV */ + + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + vmovdqu RB1, (5 * 16)(%rsi); + vmovdqu RB2, (6 * 16)(%rsi); + vmovdqu RB3, (7 * 16)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8) + +/* + * void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + /* Load input */ + vmovdqu (%rcx), RA0; + vmovdqu 0 * 16(%rdx), RA1; + vmovdqu 1 * 16(%rdx), RA2; + vmovdqu 2 * 16(%rdx), RA3; + vmovdqu 3 * 16(%rdx), RB0; + vmovdqu 4 * 16(%rdx), RB1; + vmovdqu 5 * 16(%rdx), RB2; + vmovdqu 6 * 16(%rdx), RB3; + + /* Update IV */ + vmovdqu 7 * 16(%rdx), RNOT; + vmovdqu RNOT, (%rcx); + + call __sm4_crypt_blk8; + + vpxor (0 * 16)(%rdx), RA0, RA0; + vpxor (1 * 16)(%rdx), RA1, RA1; + vpxor (2 * 16)(%rdx), RA2, RA2; + vpxor (3 * 16)(%rdx), RA3, RA3; + vpxor (4 * 16)(%rdx), RB0, RB0; + vpxor (5 * 16)(%rdx), RB1, RB1; + vpxor (6 * 16)(%rdx), RB2, RB2; + vpxor (7 * 16)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + vmovdqu RB1, (5 * 16)(%rsi); + vmovdqu RB2, (6 * 16)(%rsi); + vmovdqu RB3, (7 * 16)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_cfb_dec_blk8) diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S new file mode 100644 index 0000000000000..9c5d3f3ad45a9 --- /dev/null +++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S @@ -0,0 +1,501 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SM4 Cipher Algorithm, AES-NI/AVX2 optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2018 Markku-Juhani O. Saarinen + * Copyright (C) 2020 Jussi Kivilinna + * Copyright (c) 2021 Tianjia Zhang + */ + +/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at: + * https://github.com/mjosaarinen/sm4ni + */ + +#include +#include + +#define rRIP (%rip) + +/* vector registers */ +#define RX0 %ymm0 +#define RX1 %ymm1 +#define MASK_4BIT %ymm2 +#define RTMP0 %ymm3 +#define RTMP1 %ymm4 +#define RTMP2 %ymm5 +#define RTMP3 %ymm6 +#define RTMP4 %ymm7 + +#define RA0 %ymm8 +#define RA1 %ymm9 +#define RA2 %ymm10 +#define RA3 %ymm11 + +#define RB0 %ymm12 +#define RB1 %ymm13 +#define RB2 %ymm14 +#define RB3 %ymm15 + +#define RNOT %ymm0 +#define RBSWAP %ymm1 + +#define RX0x %xmm0 +#define RX1x %xmm1 +#define MASK_4BITx %xmm2 + +#define RNOTx %xmm0 +#define RBSWAPx %xmm1 + +#define RTMP0x %xmm3 +#define RTMP1x %xmm4 +#define RTMP2x %xmm5 +#define RTMP3x %xmm6 +#define RTMP4x %xmm7 + + +/* helper macros */ + +/* Transpose four 32-bit words between 128-bit vector lanes. */ +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +/* post-SubByte transform. */ +#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by + * 'vaeslastenc' instruction. */ +#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \ + vpandn mask4bit, x, tmp0; \ + vpsrld $4, x, x; \ + vpand x, mask4bit, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + + +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + +/* + * Following four affine transform look-up tables are from work by + * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni + * + * These allow exposing SM4 S-Box from AES SubByte. + */ + +/* pre-SubByte affine transform, from SM4 field to AES field. */ +.Lpre_tf_lo_s: + .quad 0x9197E2E474720701, 0xC7C1B4B222245157 +.Lpre_tf_hi_s: + .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012 + +/* post-SubByte affine transform, from AES field to SM4 field. */ +.Lpost_tf_lo_s: + .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82 +.Lpost_tf_hi_s: + .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF + +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 + +/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_8: + .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e + .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06 + +/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_16: + .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01 + .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09 + +/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_24: + .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04 + .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c + +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* For input word byte-swap */ +.Lbswap32_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 + +.align 4 +/* 4-bit mask */ +.L0f0f0f0f: + .long 0x0f0f0f0f + +/* 12 bytes, only for padding */ +.Lpadding_deadbeef: + .long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + +.text +.align 16 + +.align 8 +SYM_FUNC_START_LOCAL(__sm4_crypt_blk16) + /* input: + * %rdi: round key array, CTX + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel + * plaintext blocks + * output: + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel + * ciphertext blocks + */ + FRAME_BEGIN + + vbroadcasti128 .Lbswap32_mask rRIP, RTMP2; + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + vpbroadcastd .L0f0f0f0f rRIP, MASK_4BIT; + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + +#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \ + vpbroadcastd (4*(round))(%rdi), RX0; \ + vbroadcasti128 .Lpre_tf_lo_s rRIP, RTMP4; \ + vbroadcasti128 .Lpre_tf_hi_s rRIP, RTMP1; \ + vmovdqa RX0, RX1; \ + vpxor s1, RX0, RX0; \ + vpxor s2, RX0, RX0; \ + vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \ + vbroadcasti128 .Lpost_tf_lo_s rRIP, RTMP2; \ + vbroadcasti128 .Lpost_tf_hi_s rRIP, RTMP3; \ + vpxor r1, RX1, RX1; \ + vpxor r2, RX1, RX1; \ + vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \ + \ + /* sbox, non-linear part */ \ + transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + vextracti128 $1, RX0, RTMP4x; \ + vextracti128 $1, RX1, RTMP0x; \ + vaesenclast MASK_4BITx, RX0x, RX0x; \ + vaesenclast MASK_4BITx, RTMP4x, RTMP4x; \ + vaesenclast MASK_4BITx, RX1x, RX1x; \ + vaesenclast MASK_4BITx, RTMP0x, RTMP0x; \ + vinserti128 $1, RTMP4x, RX0, RX0; \ + vbroadcasti128 .Linv_shift_row rRIP, RTMP4; \ + vinserti128 $1, RTMP0x, RX1, RX1; \ + transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + \ + /* linear part */ \ + vpshufb RTMP4, RX0, RTMP0; \ + vpxor RTMP0, s0, s0; /* s0 ^ x */ \ + vpshufb RTMP4, RX1, RTMP2; \ + vbroadcasti128 .Linv_shift_row_rol_8 rRIP, RTMP4; \ + vpxor RTMP2, r0, r0; /* r0 ^ x */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vbroadcasti128 .Linv_shift_row_rol_16 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vbroadcasti128 .Linv_shift_row_rol_24 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \ + vpslld $2, RTMP0, RTMP1; \ + vpsrld $30, RTMP0, RTMP0; \ + vpxor RTMP0, s0, s0; \ + /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpxor RTMP1, s0, s0; \ + vpshufb RTMP4, RX1, RTMP3; \ + vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \ + vpslld $2, RTMP2, RTMP3; \ + vpsrld $30, RTMP2, RTMP2; \ + vpxor RTMP2, r0, r0; \ + /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpxor RTMP3, r0, r0; + + leaq (32*4)(%rdi), %rax; +.align 16 +.Lroundloop_blk8: + ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3); + ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0); + ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1); + ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2); + leaq (4*4)(%rdi), %rdi; + cmpq %rax, %rdi; + jne .Lroundloop_blk8; + +#undef ROUND + + vbroadcasti128 .Lbswap128_mask rRIP, RTMP2; + + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + FRAME_END + ret; +SYM_FUNC_END(__sm4_crypt_blk16) + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + +/* + * void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv (big endian, 128bit) + */ + FRAME_BEGIN + + movq 8(%rcx), %rax; + bswapq %rax; + + vzeroupper; + + vbroadcasti128 .Lbswap128_mask rRIP, RTMP3; + vpcmpeqd RNOT, RNOT, RNOT; + vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */ + vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */ + + /* load IV and byteswap */ + vmovdqu (%rcx), RTMP4x; + vpshufb RTMP3x, RTMP4x, RTMP4x; + vmovdqa RTMP4x, RTMP0x; + inc_le128(RTMP4x, RNOTx, RTMP1x); + vinserti128 $1, RTMP4x, RTMP0, RTMP0; + vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */ + + /* check need for handling 64-bit overflow and carry */ + cmpq $(0xffffffffffffffff - 16), %rax; + ja .Lhandle_ctr_carry; + + /* construct IVs */ + vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */ + vpshufb RTMP3, RTMP0, RA1; + vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */ + vpshufb RTMP3, RTMP0, RA2; + vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */ + vpshufb RTMP3, RTMP0, RA3; + vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */ + vpshufb RTMP3, RTMP0, RB0; + vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */ + vpshufb RTMP3, RTMP0, RB1; + vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */ + vpshufb RTMP3, RTMP0, RB2; + vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */ + vpshufb RTMP3, RTMP0, RB3; + vpsubq RTMP2, RTMP0, RTMP0; /* +16 */ + vpshufb RTMP3x, RTMP0x, RTMP0x; + + jmp .Lctr_carry_done; + +.Lhandle_ctr_carry: + /* construct IVs */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */ + inc_le128(RTMP0, RNOT, RTMP1); + vextracti128 $1, RTMP0, RTMP0x; + vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */ + +.align 4 +.Lctr_carry_done: + /* store new IV */ + vmovdqu RTMP0x, (%rcx); + + call __sm4_crypt_blk16; + + vpxor (0 * 32)(%rdx), RA0, RA0; + vpxor (1 * 32)(%rdx), RA1, RA1; + vpxor (2 * 32)(%rdx), RA2, RA2; + vpxor (3 * 32)(%rdx), RA3, RA3; + vpxor (4 * 32)(%rdx), RB0, RB0; + vpxor (5 * 32)(%rdx), RB1, RB1; + vpxor (6 * 32)(%rdx), RB2, RB2; + vpxor (7 * 32)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA3, (3 * 32)(%rsi); + vmovdqu RB0, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB3, (7 * 32)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16) + +/* + * void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + vzeroupper; + + vmovdqu (0 * 32)(%rdx), RA0; + vmovdqu (1 * 32)(%rdx), RA1; + vmovdqu (2 * 32)(%rdx), RA2; + vmovdqu (3 * 32)(%rdx), RA3; + vmovdqu (4 * 32)(%rdx), RB0; + vmovdqu (5 * 32)(%rdx), RB1; + vmovdqu (6 * 32)(%rdx), RB2; + vmovdqu (7 * 32)(%rdx), RB3; + + call __sm4_crypt_blk16; + + vmovdqu (%rcx), RNOTx; + vinserti128 $1, (%rdx), RNOT, RNOT; + vpxor RNOT, RA0, RA0; + vpxor (0 * 32 + 16)(%rdx), RA1, RA1; + vpxor (1 * 32 + 16)(%rdx), RA2, RA2; + vpxor (2 * 32 + 16)(%rdx), RA3, RA3; + vpxor (3 * 32 + 16)(%rdx), RB0, RB0; + vpxor (4 * 32 + 16)(%rdx), RB1, RB1; + vpxor (5 * 32 + 16)(%rdx), RB2, RB2; + vpxor (6 * 32 + 16)(%rdx), RB3, RB3; + vmovdqu (7 * 32 + 16)(%rdx), RNOTx; + vmovdqu RNOTx, (%rcx); /* store new IV */ + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA3, (3 * 32)(%rsi); + vmovdqu RB0, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB3, (7 * 32)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16) + +/* + * void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + vzeroupper; + + /* Load input */ + vmovdqu (%rcx), RNOTx; + vinserti128 $1, (%rdx), RNOT, RA0; + vmovdqu (0 * 32 + 16)(%rdx), RA1; + vmovdqu (1 * 32 + 16)(%rdx), RA2; + vmovdqu (2 * 32 + 16)(%rdx), RA3; + vmovdqu (3 * 32 + 16)(%rdx), RB0; + vmovdqu (4 * 32 + 16)(%rdx), RB1; + vmovdqu (5 * 32 + 16)(%rdx), RB2; + vmovdqu (6 * 32 + 16)(%rdx), RB3; + + /* Update IV */ + vmovdqu (7 * 32 + 16)(%rdx), RNOTx; + vmovdqu RNOTx, (%rcx); + + call __sm4_crypt_blk16; + + vpxor (0 * 32)(%rdx), RA0, RA0; + vpxor (1 * 32)(%rdx), RA1, RA1; + vpxor (2 * 32)(%rdx), RA2, RA2; + vpxor (3 * 32)(%rdx), RA3, RA3; + vpxor (4 * 32)(%rdx), RB0, RB0; + vpxor (5 * 32)(%rdx), RB1, RB1; + vpxor (6 * 32)(%rdx), RB2, RB2; + vpxor (7 * 32)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA3, (3 * 32)(%rsi); + vmovdqu RB0, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB3, (7 * 32)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx2_cfb_dec_blk16) diff --git a/arch/x86/crypto/sm4-avx.h b/arch/x86/crypto/sm4-avx.h new file mode 100644 index 0000000000000..1bceab7516aa1 --- /dev/null +++ b/arch/x86/crypto/sm4-avx.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef ASM_X86_SM4_AVX_H +#define ASM_X86_SM4_AVX_H + +#include +#include + +typedef void (*sm4_crypt_func)(const u32 *rk, u8 *dst, const u8 *src, u8 *iv); + +int sm4_avx_ecb_encrypt(struct skcipher_request *req); +int sm4_avx_ecb_decrypt(struct skcipher_request *req); + +int sm4_cbc_encrypt(struct skcipher_request *req); +int sm4_avx_cbc_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func); + +int sm4_cfb_encrypt(struct skcipher_request *req); +int sm4_avx_cfb_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func); + +int sm4_avx_ctr_crypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func); + +#endif diff --git a/arch/x86/crypto/sm4_aesni_avx2_glue.c b/arch/x86/crypto/sm4_aesni_avx2_glue.c new file mode 100644 index 0000000000000..84bc718f49a3d --- /dev/null +++ b/arch/x86/crypto/sm4_aesni_avx2_glue.c @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, AES-NI/AVX2 optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (c) 2021, Alibaba Group. + * Copyright (c) 2021 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include "sm4-avx.h" + +#define SM4_CRYPT16_BLOCK_SIZE (SM4_BLOCK_SIZE * 16) + +asmlinkage void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); + +static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cbc_decrypt(req, SM4_CRYPT16_BLOCK_SIZE, + sm4_aesni_avx2_cbc_dec_blk16); +} + + +static int cfb_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cfb_decrypt(req, SM4_CRYPT16_BLOCK_SIZE, + sm4_aesni_avx2_cfb_dec_blk16); +} + +static int ctr_crypt(struct skcipher_request *req) +{ + return sm4_avx_ctr_crypt(req, SM4_CRYPT16_BLOCK_SIZE, + sm4_aesni_avx2_ctr_enc_blk16); +} + +static struct skcipher_alg sm4_aesni_avx2_skciphers[] = { + { + .base = { + .cra_name = "__ecb(sm4)", + .cra_driver_name = "__ecb-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_avx_ecb_encrypt, + .decrypt = sm4_avx_ecb_decrypt, + }, { + .base = { + .cra_name = "__cbc(sm4)", + .cra_driver_name = "__cbc-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base = { + .cra_name = "__cfb(sm4)", + .cra_driver_name = "__cfb-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cfb_encrypt, + .decrypt = cfb_decrypt, + }, { + .base = { + .cra_name = "__ctr(sm4)", + .cra_driver_name = "__ctr-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; + +static struct simd_skcipher_alg * +simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)]; + +static int __init sm4_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX2 or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(sm4_aesni_avx2_skciphers, + ARRAY_SIZE(sm4_aesni_avx2_skciphers), + simd_sm4_aesni_avx2_skciphers); +} + +static void __exit sm4_exit(void) +{ + simd_unregister_skciphers(sm4_aesni_avx2_skciphers, + ARRAY_SIZE(sm4_aesni_avx2_skciphers), + simd_sm4_aesni_avx2_skciphers); +} + +module_init(sm4_init); +module_exit(sm4_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX2 optimized"); +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("sm4-aesni-avx2"); diff --git a/arch/x86/crypto/sm4_aesni_avx_glue.c b/arch/x86/crypto/sm4_aesni_avx_glue.c new file mode 100644 index 0000000000000..7800f77d68add --- /dev/null +++ b/arch/x86/crypto/sm4_aesni_avx_glue.c @@ -0,0 +1,487 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, AES-NI/AVX optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (c) 2021, Alibaba Group. + * Copyright (c) 2021 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include "sm4-avx.h" + +#define SM4_CRYPT8_BLOCK_SIZE (SM4_BLOCK_SIZE * 8) + +asmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst, + const u8 *src, int nblocks); +asmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst, + const u8 *src, int nblocks); +asmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); + +static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + +static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) +{ + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) { + sm4_aesni_avx_crypt8(rkey, dst, src, 8); + dst += SM4_CRYPT8_BLOCK_SIZE; + src += SM4_CRYPT8_BLOCK_SIZE; + nbytes -= SM4_CRYPT8_BLOCK_SIZE; + } + while (nbytes >= SM4_BLOCK_SIZE) { + unsigned int nblocks = min(nbytes >> 4, 4u); + sm4_aesni_avx_crypt4(rkey, dst, src, nblocks); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + kernel_fpu_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +int sm4_avx_ecb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_crypt(req, ctx->rkey_enc); +} +EXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt); + +int sm4_avx_ecb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_crypt(req, ctx->rkey_dec); +} +EXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt); + +int sm4_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *iv = walk.iv; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= SM4_BLOCK_SIZE) { + crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE); + sm4_crypt_block(ctx->rkey_enc, dst, dst); + iv = dst; + src += SM4_BLOCK_SIZE; + dst += SM4_BLOCK_SIZE; + nbytes -= SM4_BLOCK_SIZE; + } + if (iv != walk.iv) + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_cbc_encrypt); + +int sm4_avx_cbc_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + + while (nbytes >= bsize) { + func(ctx->rkey_dec, dst, src, walk.iv); + dst += bsize; + src += bsize; + nbytes -= bsize; + } + + while (nbytes >= SM4_BLOCK_SIZE) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + u8 iv[SM4_BLOCK_SIZE]; + unsigned int nblocks = min(nbytes >> 4, 8u); + int i; + + sm4_aesni_avx_crypt8(ctx->rkey_dec, keystream, + src, nblocks); + + src += ((int)nblocks - 2) * SM4_BLOCK_SIZE; + dst += (nblocks - 1) * SM4_BLOCK_SIZE; + memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE); + + for (i = nblocks - 1; i > 0; i--) { + crypto_xor_cpy(dst, src, + &keystream[i * SM4_BLOCK_SIZE], + SM4_BLOCK_SIZE); + src -= SM4_BLOCK_SIZE; + dst -= SM4_BLOCK_SIZE; + } + crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE); + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + dst += nblocks * SM4_BLOCK_SIZE; + src += (nblocks + 1) * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + + kernel_fpu_end(); + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt); + +static int cbc_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE, + sm4_aesni_avx_cbc_dec_blk8); +} + +int sm4_cfb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + const u8 *iv = walk.iv; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= SM4_BLOCK_SIZE) { + sm4_crypt_block(ctx->rkey_enc, keystream, iv); + crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE); + iv = dst; + src += SM4_BLOCK_SIZE; + dst += SM4_BLOCK_SIZE; + nbytes -= SM4_BLOCK_SIZE; + } + if (iv != walk.iv) + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_cfb_encrypt); + +int sm4_avx_cfb_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + + while (nbytes >= bsize) { + func(ctx->rkey_enc, dst, src, walk.iv); + dst += bsize; + src += bsize; + nbytes -= bsize; + } + + while (nbytes >= SM4_BLOCK_SIZE) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + unsigned int nblocks = min(nbytes >> 4, 8u); + + memcpy(keystream, walk.iv, SM4_BLOCK_SIZE); + if (nblocks > 1) + memcpy(&keystream[SM4_BLOCK_SIZE], src, + (nblocks - 1) * SM4_BLOCK_SIZE); + memcpy(walk.iv, src + (nblocks - 1) * SM4_BLOCK_SIZE, + SM4_BLOCK_SIZE); + + sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream, + keystream, nblocks); + + crypto_xor_cpy(dst, src, keystream, + nblocks * SM4_BLOCK_SIZE); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + + kernel_fpu_end(); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_avx_cfb_decrypt); + +static int cfb_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cfb_decrypt(req, SM4_CRYPT8_BLOCK_SIZE, + sm4_aesni_avx_cfb_dec_blk8); +} + +int sm4_avx_ctr_crypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + + while (nbytes >= bsize) { + func(ctx->rkey_enc, dst, src, walk.iv); + dst += bsize; + src += bsize; + nbytes -= bsize; + } + + while (nbytes >= SM4_BLOCK_SIZE) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + unsigned int nblocks = min(nbytes >> 4, 8u); + int i; + + for (i = 0; i < nblocks; i++) { + memcpy(&keystream[i * SM4_BLOCK_SIZE], + walk.iv, SM4_BLOCK_SIZE); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + } + sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream, + keystream, nblocks); + + crypto_xor_cpy(dst, src, keystream, + nblocks * SM4_BLOCK_SIZE); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + + kernel_fpu_end(); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + memcpy(keystream, walk.iv, SM4_BLOCK_SIZE); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + + sm4_crypt_block(ctx->rkey_enc, keystream, keystream); + + crypto_xor_cpy(dst, src, keystream, nbytes); + dst += nbytes; + src += nbytes; + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt); + +static int ctr_crypt(struct skcipher_request *req) +{ + return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE, + sm4_aesni_avx_ctr_enc_blk8); +} + +static struct skcipher_alg sm4_aesni_avx_skciphers[] = { + { + .base = { + .cra_name = "__ecb(sm4)", + .cra_driver_name = "__ecb-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_avx_ecb_encrypt, + .decrypt = sm4_avx_ecb_decrypt, + }, { + .base = { + .cra_name = "__cbc(sm4)", + .cra_driver_name = "__cbc-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base = { + .cra_name = "__cfb(sm4)", + .cra_driver_name = "__cfb-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cfb_encrypt, + .decrypt = cfb_decrypt, + }, { + .base = { + .cra_name = "__ctr(sm4)", + .cra_driver_name = "__ctr-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; + +static struct simd_skcipher_alg * +simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)]; + +static int __init sm4_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(sm4_aesni_avx_skciphers, + ARRAY_SIZE(sm4_aesni_avx_skciphers), + simd_sm4_aesni_avx_skciphers); +} + +static void __exit sm4_exit(void) +{ + simd_unregister_skciphers(sm4_aesni_avx_skciphers, + ARRAY_SIZE(sm4_aesni_avx_skciphers), + simd_sm4_aesni_avx_skciphers); +} + +module_init(sm4_init); +module_exit(sm4_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized"); +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("sm4-aesni-avx"); diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 5f1f961118b30..20b046f19fe48 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -323,7 +323,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) static __init int vdso_setup(char *s) { vdso64_enabled = simple_strtoul(s, NULL, 0); - return 0; + return 1; } __setup("vdso=", vdso_setup); diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index aed9606d67237..063191e7c5cde 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "../perf_event.h" @@ -18,6 +19,9 @@ static unsigned long perf_nmi_window; #define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL) #define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE) +/* PMC Enable and Overflow bits for PerfCntrGlobal* registers */ +static u64 amd_pmu_global_cntr_mask __read_mostly; + static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -510,6 +514,18 @@ static struct amd_nb *amd_alloc_nb(int cpu) return nb; } +static void amd_pmu_cpu_reset(int cpu) +{ + if (x86_pmu.version < 2) + return; + + /* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */ + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0); + + /* Clear overflow bits i.e. PerfCntrGLobalStatus.PerfCntrOvfl */ + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, amd_pmu_global_cntr_mask); +} + static int amd_pmu_cpu_prepare(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); @@ -555,6 +571,7 @@ static void amd_pmu_cpu_starting(int cpu) cpuc->amd_nb->nb_id = nb_id; cpuc->amd_nb->refcnt++; + amd_pmu_cpu_reset(cpu); } static void amd_pmu_cpu_dead(int cpu) @@ -574,8 +591,54 @@ static void amd_pmu_cpu_dead(int cpu) cpuhw->amd_nb = NULL; } + + amd_pmu_cpu_reset(cpu); +} + +static inline void amd_pmu_set_global_ctl(u64 ctl) +{ + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl); +} + +static inline u64 amd_pmu_get_global_status(void) +{ + u64 status; + + /* PerfCntrGlobalStatus is read-only */ + rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status); + + return status & amd_pmu_global_cntr_mask; } +static inline void amd_pmu_ack_global_status(u64 status) +{ + /* + * PerfCntrGlobalStatus is read-only but an overflow acknowledgment + * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr + * clears the same bit in PerfCntrGlobalStatus + */ + + /* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */ + status &= amd_pmu_global_cntr_mask; + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status); +} + +static bool amd_pmu_test_overflow_topbit(int idx) +{ + u64 counter; + + rdmsrl(x86_pmu_event_addr(idx), counter); + + return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1)); +} + +static bool amd_pmu_test_overflow_status(int idx) +{ + return amd_pmu_get_global_status() & BIT_ULL(idx); +} + +static bool (*amd_pmu_test_overflow)(int idx) = amd_pmu_test_overflow_topbit; + /* * When a PMC counter overflows, an NMI is used to process the event and * reset the counter. NMI latency can result in the counter being updated @@ -588,7 +651,6 @@ static void amd_pmu_cpu_dead(int cpu) static void amd_pmu_wait_on_overflow(int idx) { unsigned int i; - u64 counter; /* * Wait for the counter to be reset if it has overflowed. This loop @@ -596,8 +658,7 @@ static void amd_pmu_wait_on_overflow(int idx) * forever... */ for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) { - rdmsrl(x86_pmu_event_addr(idx), counter); - if (counter & (1ULL << (x86_pmu.cntval_bits - 1))) + if (!amd_pmu_test_overflow(idx)) break; /* Might be in IRQ context, so can't sleep */ @@ -605,13 +666,11 @@ static void amd_pmu_wait_on_overflow(int idx) } } -static void amd_pmu_disable_all(void) +static void amd_pmu_check_overflow(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; - x86_pmu_disable_all(); - /* * This shouldn't be called from NMI context, but add a safeguard here * to return, since if we're in NMI context we can't wait for an NMI @@ -634,6 +693,26 @@ static void amd_pmu_disable_all(void) } } +static void amd_pmu_v2_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* + * Testing cpu_hw_events.enabled should be skipped in this case unlike + * in x86_pmu_enable_event(). + * + * Since cpu_hw_events.enabled is set only after returning from + * x86_pmu_start(), the PMCs must be programmed and kept ready. + * Counting starts only after x86_pmu_enable_all() is called. + */ + __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); +} + +static void amd_pmu_v2_enable_all(int added) +{ + amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask); +} + static void amd_pmu_disable_event(struct perf_event *event) { x86_pmu_disable_event(event); @@ -651,6 +730,19 @@ static void amd_pmu_disable_event(struct perf_event *event) amd_pmu_wait_on_overflow(event->hw.idx); } +static void amd_pmu_disable_all(void) +{ + x86_pmu_disable_all(); + amd_pmu_check_overflow(); +} + +static void amd_pmu_v2_disable_all(void) +{ + /* Disable all PMCs */ + amd_pmu_set_global_ctl(0); + amd_pmu_check_overflow(); +} + /* * Because of NMI latency, if multiple PMC counters are active or other sources * of NMIs are received, the perf NMI handler can handle one or more overflowed @@ -669,6 +761,24 @@ static void amd_pmu_disable_event(struct perf_event *event) * handled a counter. When an un-handled NMI is received, it will be claimed * only if arriving within that window. */ +static inline int amd_pmu_adjust_nmi_window(int handled) +{ + /* + * If a counter was handled, record a timestamp such that un-handled + * NMIs will be claimed if arriving within that window. + */ + if (handled) { + this_cpu_write(perf_nmi_tstamp, jiffies + perf_nmi_window); + + return handled; + } + + if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp))) + return NMI_DONE; + + return NMI_HANDLED; +} + static int amd_pmu_handle_irq(struct pt_regs *regs) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -684,21 +794,84 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) /* Process any counter overflows */ handled = x86_pmu_handle_irq(regs); + return amd_pmu_adjust_nmi_window(handled); +} + +static int amd_pmu_v2_handle_irq(struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_sample_data data; + struct hw_perf_event *hwc; + struct perf_event *event; + int handled = 0, idx; + u64 status, mask; + bool pmu_enabled; + /* - * If a counter was handled, record a timestamp such that un-handled - * NMIs will be claimed if arriving within that window. + * Save the PMU state as it needs to be restored when leaving the + * handler */ - if (handled) { - this_cpu_write(perf_nmi_tstamp, - jiffies + perf_nmi_window); + pmu_enabled = cpuc->enabled; + cpuc->enabled = 0; - return handled; + /* Stop counting */ + amd_pmu_v2_disable_all(); + + status = amd_pmu_get_global_status(); + + /* Check if any overflows are pending */ + if (!status) + goto done; + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + if (!test_bit(idx, cpuc->active_mask)) + continue; + + event = cpuc->events[idx]; + hwc = &event->hw; + x86_perf_event_update(event); + mask = BIT_ULL(idx); + + if (!(status & mask)) + continue; + + /* Event overflow */ + handled++; + perf_sample_data_init(&data, 0, hwc->last_period); + + if (!x86_perf_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + x86_pmu_stop(event, 0); + + status &= ~mask; } - if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp))) - return NMI_DONE; + /* + * It should never be the case that some overflows are not handled as + * the corresponding PMCs are expected to be inactive according to the + * active_mask + */ + WARN_ON(status > 0); - return NMI_HANDLED; + /* Clear overflow bits */ + amd_pmu_ack_global_status(~status); + + /* + * Unmasking the LVTPC is not required as the Mask (M) bit of the LVT + * PMI entry is not set by the local APIC when a PMC overflow occurs + */ + inc_irq_stat(apic_perf_irqs); + +done: + cpuc->enabled = pmu_enabled; + + /* Resume counting only if PMU is active */ + if (pmu_enabled) + amd_pmu_v2_enable_all(0); + + return amd_pmu_adjust_nmi_window(handled); } static struct event_constraint * @@ -949,6 +1122,7 @@ static __initconst const struct x86_pmu amd_pmu = { static int __init amd_core_pmu_init(void) { + union cpuid_0x80000022_ebx ebx; u64 even_ctr_mask = 0ULL; int i; @@ -966,6 +1140,27 @@ static int __init amd_core_pmu_init(void) x86_pmu.eventsel = MSR_F15H_PERF_CTL; x86_pmu.perfctr = MSR_F15H_PERF_CTR; x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; + + /* Check for Performance Monitoring v2 support */ + if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) { + ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); + + /* Update PMU version for later usage */ + x86_pmu.version = 2; + + /* Find the number of available Core PMCs */ + x86_pmu.num_counters = ebx.split.num_core_pmc; + + amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1; + + /* Update PMC handling functions */ + x86_pmu.enable_all = amd_pmu_v2_enable_all; + x86_pmu.disable_all = amd_pmu_v2_disable_all; + x86_pmu.enable = amd_pmu_v2_enable_event; + x86_pmu.handle_irq = amd_pmu_v2_handle_irq; + amd_pmu_test_overflow = amd_pmu_test_overflow_status; + } + /* * AMD Core perfctr has separate MSRs for the NB events, see * the amd/uncore.c driver. @@ -1032,6 +1227,24 @@ __init int amd_pmu_init(void) return 0; } +static inline void amd_pmu_reload_virt(void) +{ + if (x86_pmu.version >= 2) { + /* + * Clear global enable bits, reprogram the PERF_CTL + * registers with updated perf_ctr_virt_mask and then + * set global enable bits once again + */ + amd_pmu_v2_disable_all(); + x86_pmu_enable_all(0); + amd_pmu_v2_enable_all(0); + return; + } + + amd_pmu_disable_all(); + x86_pmu_enable_all(0); +} + void amd_pmu_enable_virt(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1039,8 +1252,7 @@ void amd_pmu_enable_virt(void) cpuc->perf_ctr_virt_mask = 0; /* Reload all events */ - amd_pmu_disable_all(); - x86_pmu_enable_all(0); + amd_pmu_reload_virt(); } EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); @@ -1057,7 +1269,6 @@ void amd_pmu_disable_virt(void) cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; /* Reload all events */ - amd_pmu_disable_all(); - x86_pmu_enable_all(0); + amd_pmu_reload_virt(); } EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 0a54b8c2a457a..c251bc44c088d 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -26,6 +26,7 @@ static u32 ibs_caps; #include #include +#include #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT @@ -90,24 +91,12 @@ struct perf_ibs { unsigned long offset_mask[1]; int offset_max; unsigned int fetch_count_reset_broken : 1; + unsigned int fetch_ignore_if_zero_rip : 1; struct cpu_perf_ibs __percpu *pcpu; - struct attribute **format_attrs; - struct attribute_group format_group; - const struct attribute_group *attr_groups[2]; - u64 (*get_count)(u64 config); }; -struct perf_ibs_data { - u32 size; - union { - u32 data[0]; /* data buffer starts here */ - u32 caps; - }; - u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; -}; - static int perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period) { @@ -311,6 +300,16 @@ static int perf_ibs_init(struct perf_event *event) hwc->config_base = perf_ibs->msr; hwc->config = config; + /* + * rip recorded by IbsOpRip will not be consistent with rsp and rbp + * recorded as part of interrupt regs. Thus we need to use rip from + * interrupt regs while unwinding call stack. Setting _EARLY flag + * makes sure we unwind call-stack before perf sample rip is set to + * IbsOpRip. + */ + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY; + return 0; } @@ -328,11 +327,14 @@ static int perf_ibs_set_period(struct perf_ibs *perf_ibs, static u64 get_ibs_fetch_count(u64 config) { - return (config & IBS_FETCH_CNT) >> 12; + union ibs_fetch_ctl fetch_ctl = (union ibs_fetch_ctl)config; + + return fetch_ctl.fetch_cnt << 4; } static u64 get_ibs_op_count(u64 config) { + union ibs_op_ctl op_ctl = (union ibs_op_ctl)config; u64 count = 0; /* @@ -340,10 +342,13 @@ static u64 get_ibs_op_count(u64 config) * and the lower 7 bits of CurCnt are randomized. * Otherwise CurCnt has the full 27-bit current counter value. */ - if (config & IBS_OP_VAL) - count = (config & IBS_OP_MAX_CNT) << 4; - else if (ibs_caps & IBS_CAPS_RDWROPCNT) - count = (config & IBS_OP_CUR_CNT) >> 32; + if (op_ctl.op_val) { + count = op_ctl.opmaxcnt << 4; + if (ibs_caps & IBS_CAPS_OPCNTEXT) + count += op_ctl.opmaxcnt_ext << 20; + } else if (ibs_caps & IBS_CAPS_RDWROPCNT) { + count = op_ctl.opcurcnt; + } return count; } @@ -404,7 +409,7 @@ static void perf_ibs_start(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); - u64 period; + u64 period, config = 0; if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) return; @@ -413,13 +418,19 @@ static void perf_ibs_start(struct perf_event *event, int flags) hwc->state = 0; perf_ibs_set_period(perf_ibs, hwc, &period); + if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) { + config |= period & IBS_OP_MAX_CNT_EXT_MASK; + period &= ~IBS_OP_MAX_CNT_EXT_MASK; + } + config |= period >> 4; + /* * Set STARTED before enabling the hardware, such that a subsequent NMI * must observe it. */ set_bit(IBS_STARTED, pcpu->state); clear_bit(IBS_STOPPING, pcpu->state); - perf_ibs_enable_event(perf_ibs, hwc, period >> 4); + perf_ibs_enable_event(perf_ibs, hwc, config); perf_event_update_userpage(event); } @@ -513,16 +524,118 @@ static void perf_ibs_del(struct perf_event *event, int flags) static void perf_ibs_read(struct perf_event *event) { } +/* + * We need to initialize with empty group if all attributes in the + * group are dynamic. + */ +static struct attribute *attrs_empty[] = { + NULL, +}; + +static struct attribute_group empty_format_group = { + .name = "format", + .attrs = attrs_empty, +}; + +static struct attribute_group empty_caps_group = { + .name = "caps", + .attrs = attrs_empty, +}; + +static const struct attribute_group *empty_attr_groups[] = { + &empty_format_group, + &empty_caps_group, + NULL, +}; + PMU_FORMAT_ATTR(rand_en, "config:57"); PMU_FORMAT_ATTR(cnt_ctl, "config:19"); +PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59"); +PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16"); +PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1"); + +static umode_t +zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0; +} -static struct attribute *ibs_fetch_format_attrs[] = { +static struct attribute *rand_en_attrs[] = { &format_attr_rand_en.attr, NULL, }; -static struct attribute *ibs_op_format_attrs[] = { - NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */ +static struct attribute *fetch_l3missonly_attrs[] = { + &fetch_l3missonly.attr.attr, + NULL, +}; + +static struct attribute *zen4_ibs_extensions_attrs[] = { + &zen4_ibs_extensions.attr.attr, + NULL, +}; + +static struct attribute_group group_rand_en = { + .name = "format", + .attrs = rand_en_attrs, +}; + +static struct attribute_group group_fetch_l3missonly = { + .name = "format", + .attrs = fetch_l3missonly_attrs, + .is_visible = zen4_ibs_extensions_is_visible, +}; + +static struct attribute_group group_zen4_ibs_extensions = { + .name = "caps", + .attrs = zen4_ibs_extensions_attrs, + .is_visible = zen4_ibs_extensions_is_visible, +}; + +static const struct attribute_group *fetch_attr_groups[] = { + &group_rand_en, + &empty_caps_group, + NULL, +}; + +static const struct attribute_group *fetch_attr_update[] = { + &group_fetch_l3missonly, + &group_zen4_ibs_extensions, + NULL, +}; + +static umode_t +cnt_ctl_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return ibs_caps & IBS_CAPS_OPCNT ? attr->mode : 0; +} + +static struct attribute *cnt_ctl_attrs[] = { + &format_attr_cnt_ctl.attr, + NULL, +}; + +static struct attribute *op_l3missonly_attrs[] = { + &op_l3missonly.attr.attr, + NULL, +}; + +static struct attribute_group group_cnt_ctl = { + .name = "format", + .attrs = cnt_ctl_attrs, + .is_visible = cnt_ctl_is_visible, +}; + +static struct attribute_group group_op_l3missonly = { + .name = "format", + .attrs = op_l3missonly_attrs, + .is_visible = zen4_ibs_extensions_is_visible, +}; + +static const struct attribute_group *op_attr_update[] = { + &group_cnt_ctl, + &group_op_l3missonly, + &group_zen4_ibs_extensions, NULL, }; @@ -546,7 +659,6 @@ static struct perf_ibs perf_ibs_fetch = { .max_period = IBS_FETCH_MAX_CNT << 4, .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, - .format_attrs = ibs_fetch_format_attrs, .get_count = get_ibs_fetch_count, }; @@ -572,7 +684,6 @@ static struct perf_ibs perf_ibs_op = { .max_period = IBS_OP_MAX_CNT << 4, .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, .offset_max = MSR_AMD64_IBSOP_REG_COUNT, - .format_attrs = ibs_op_format_attrs, .get_count = get_ibs_op_count, }; @@ -588,7 +699,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) struct perf_ibs_data ibs_data; int offset, size, check_rip, offset_max, throttle = 0; unsigned int msr; - u64 *buf, *config, period; + u64 *buf, *config, period, new_config = 0; if (!test_bit(IBS_STARTED, pcpu->state)) { fail: @@ -664,6 +775,10 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { regs.flags &= ~PERF_EFLAGS_EXACT; } else { + /* Workaround for erratum #1197 */ + if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1])) + goto out; + set_linear_ip(®s, ibs_data.regs[1]); regs.flags |= PERF_EFLAGS_EXACT; } @@ -678,18 +793,30 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) data.raw = &raw; } + /* + * rip recorded by IbsOpRip will not be consistent with rsp and rbp + * recorded as part of interrupt regs. Thus we need to use rip from + * interrupt regs while unwinding call stack. + */ + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + data.callchain = perf_callchain(event, iregs); + throttle = perf_event_overflow(event, &data, ®s); out: if (throttle) { perf_ibs_stop(event, 0); } else { - period >>= 4; - - if ((ibs_caps & IBS_CAPS_RDWROPCNT) && - (*config & IBS_OP_CNT_CTL)) - period |= *config & IBS_OP_CUR_CNT_RAND; + if (perf_ibs == &perf_ibs_op) { + if (ibs_caps & IBS_CAPS_OPCNTEXT) { + new_config = period & IBS_OP_MAX_CNT_EXT_MASK; + period &= ~IBS_OP_MAX_CNT_EXT_MASK; + } + if ((ibs_caps & IBS_CAPS_RDWROPCNT) && (*config & IBS_OP_CNT_CTL)) + new_config |= *config & IBS_OP_CUR_CNT_RAND; + } + new_config |= period >> 4; - perf_ibs_enable_event(perf_ibs, hwc, period); + perf_ibs_enable_event(perf_ibs, hwc, new_config); } perf_event_update_userpage(event); @@ -726,17 +853,6 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) perf_ibs->pcpu = pcpu; - /* register attributes */ - if (perf_ibs->format_attrs[0]) { - memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group)); - perf_ibs->format_group.name = "format"; - perf_ibs->format_group.attrs = perf_ibs->format_attrs; - - memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups)); - perf_ibs->attr_groups[0] = &perf_ibs->format_group; - perf_ibs->pmu.attr_groups = perf_ibs->attr_groups; - } - ret = perf_pmu_register(&perf_ibs->pmu, name, -1); if (ret) { perf_ibs->pcpu = NULL; @@ -746,10 +862,8 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) return ret; } -static __init void perf_event_ibs_init(void) +static __init int perf_ibs_fetch_init(void) { - struct attribute **attr = ibs_op_format_attrs; - /* * Some chips fail to reset the fetch count when it is written; instead * they need a 0-1 transition of IbsFetchEn. @@ -757,21 +871,75 @@ static __init void perf_event_ibs_init(void) if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) perf_ibs_fetch.fetch_count_reset_broken = 1; - perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); + if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10) + perf_ibs_fetch.fetch_ignore_if_zero_rip = 1; + + if (ibs_caps & IBS_CAPS_ZEN4) + perf_ibs_fetch.config_mask |= IBS_FETCH_L3MISSONLY; + + perf_ibs_fetch.pmu.attr_groups = fetch_attr_groups; + perf_ibs_fetch.pmu.attr_update = fetch_attr_update; + + return perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); +} - if (ibs_caps & IBS_CAPS_OPCNT) { +static __init int perf_ibs_op_init(void) +{ + if (ibs_caps & IBS_CAPS_OPCNT) perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; - *attr++ = &format_attr_cnt_ctl.attr; + + if (ibs_caps & IBS_CAPS_OPCNTEXT) { + perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK; + perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK; + perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK; } - perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); - register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); + if (ibs_caps & IBS_CAPS_ZEN4) + perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY; + + perf_ibs_op.pmu.attr_groups = empty_attr_groups; + perf_ibs_op.pmu.attr_update = op_attr_update; + + return perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); +} + +static __init int perf_event_ibs_init(void) +{ + int ret; + + ret = perf_ibs_fetch_init(); + if (ret) + return ret; + + ret = perf_ibs_op_init(); + if (ret) + goto err_op; + + ret = register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); + if (ret) + goto err_nmi; + pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps); + return 0; + +err_nmi: + perf_pmu_unregister(&perf_ibs_op.pmu); + free_percpu(perf_ibs_op.pcpu); + perf_ibs_op.pcpu = NULL; +err_op: + perf_pmu_unregister(&perf_ibs_fetch.pmu); + free_percpu(perf_ibs_fetch.pcpu); + perf_ibs_fetch.pcpu = NULL; + + return ret; } #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ -static __init void perf_event_ibs_init(void) { } +static __init int perf_event_ibs_init(void) +{ + return 0; +} #endif @@ -1041,9 +1209,7 @@ static __init int amd_ibs_init(void) x86_pmu_amd_ibs_starting_cpu, x86_pmu_amd_ibs_dying_cpu); - perf_event_ibs_init(); - - return 0; + return perf_event_ibs_init(); } /* Since we need the pci subsystem to init ibs we can't do this earlier: */ diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 6a98a76516214..2da6139b0977f 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -18,8 +18,6 @@ #include "../perf_event.h" #include "iommu.h" -#define COUNTER_SHIFT 16 - /* iommu pmu conf masks */ #define GET_CSOURCE(x) ((x)->conf & 0xFFULL) #define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL) @@ -285,22 +283,31 @@ static void perf_iommu_start(struct perf_event *event, int flags) WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); hwc->state = 0; + /* + * To account for power-gating, which prevents write to + * the counter, we need to enable the counter + * before setting up counter register. + */ + perf_iommu_enable_event(event); + if (flags & PERF_EF_RELOAD) { - u64 prev_raw_count = local64_read(&hwc->prev_count); + u64 count = 0; struct amd_iommu *iommu = perf_event_2_iommu(event); + /* + * Since the IOMMU PMU only support counting mode, + * the counter always start with value zero. + */ amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, - IOMMU_PC_COUNTER_REG, &prev_raw_count); + IOMMU_PC_COUNTER_REG, &count); } - perf_iommu_enable_event(event); perf_event_update_userpage(event); - } static void perf_iommu_read(struct perf_event *event) { - u64 count, prev, delta; + u64 count; struct hw_perf_event *hwc = &event->hw; struct amd_iommu *iommu = perf_event_2_iommu(event); @@ -311,14 +318,11 @@ static void perf_iommu_read(struct perf_event *event) /* IOMMU pc counter register is only 48 bits */ count &= GENMASK_ULL(47, 0); - prev = local64_read(&hwc->prev_count); - if (local64_cmpxchg(&hwc->prev_count, prev, count) != prev) - return; - - /* Handle 48-bit counter overflow */ - delta = (count << COUNTER_SHIFT) - (prev << COUNTER_SHIFT); - delta >>= COUNTER_SHIFT; - local64_add(delta, &event->count); + /* + * Since the counter always start with value zero, + * simply just accumulate the count for the event. + */ + local64_add(count, &event->count); } static void perf_iommu_stop(struct perf_event *event, int flags) @@ -328,15 +332,16 @@ static void perf_iommu_stop(struct perf_event *event, int flags) if (hwc->state & PERF_HES_UPTODATE) return; + /* + * To account for power-gating, in which reading the counter would + * return zero, we need to read the register before disabling. + */ + perf_iommu_read(event); + hwc->state |= PERF_HES_UPTODATE; + perf_iommu_disable_event(event); WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; - - if (hwc->state & PERF_HES_UPTODATE) - return; - - perf_iommu_read(event); - hwc->state |= PERF_HES_UPTODATE; } static int perf_iommu_add(struct perf_event *event, int flags) diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c index abef51320e3a5..c4892b7d0c36b 100644 --- a/arch/x86/events/amd/power.c +++ b/arch/x86/events/amd/power.c @@ -217,6 +217,7 @@ static struct pmu pmu_class = { .stop = pmu_event_stop, .read = pmu_event_read, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .module = THIS_MODULE, }; static int power_cpu_exit(unsigned int cpu) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index d608ad17b69cc..814fb09cbdb63 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -223,6 +223,8 @@ static bool check_hw_exists(void) if (ret) goto msr_fail; for (i = 0; i < x86_pmu.num_counters_fixed; i++) { + if (fixed_counter_disabled(i)) + continue; if (val & (0x03 << i*4)) { bios_fail = 1; val_fail = val; @@ -1501,6 +1503,8 @@ void perf_event_print_debug(void) cpu, idx, prev_left); } for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { + if (fixed_counter_disabled(idx)) + continue; rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", @@ -1939,7 +1943,9 @@ static int __init init_hw_perf_events(void) pr_info("... generic registers: %d\n", x86_pmu.num_counters); pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); pr_info("... max period: %016Lx\n", x86_pmu.max_period); - pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); + pr_info("... fixed-purpose events: %lu\n", + hweight64((((1ULL << x86_pmu.num_counters_fixed) - 1) + << INTEL_PMC_IDX_FIXED) & x86_pmu.intel_ctrl)); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); /* @@ -2214,6 +2220,7 @@ static int x86_pmu_event_init(struct perf_event *event) if (err) { if (event->destroy) event->destroy(event); + event->destroy = NULL; } if (READ_ONCE(x86_pmu.attr_rdpmc) && @@ -2469,10 +2476,11 @@ static bool perf_hw_regs(struct pt_regs *regs) void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct unwind_state state; unsigned long addr; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* TODO: We don't support guest os callchain now */ return; } @@ -2578,10 +2586,11 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct stack_frame frame; const unsigned long __user *fp; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* TODO: We don't support guest os callchain now */ return; } @@ -2665,18 +2674,21 @@ static unsigned long code_segment_base(struct pt_regs *regs) unsigned long perf_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + + if (guest_cbs && guest_cbs->is_in_guest()) + return guest_cbs->get_guest_ip(); return regs->ip + code_segment_base(regs); } unsigned long perf_misc_flags(struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); int misc = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) + if (guest_cbs && guest_cbs->is_in_guest()) { + if (guest_cbs->is_user_mode()) misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index f0ac975f3d223..979b9c1d54d08 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -254,7 +254,7 @@ static struct event_constraint intel_icl_event_constraints[] = { INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf), INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */ - INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x56, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf), INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */ INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */ @@ -263,6 +263,7 @@ static struct event_constraint intel_icl_event_constraints[] = { INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf), + INTEL_EVENT_CONSTRAINT(0xef, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf), EVENT_CONSTRAINT_END }; @@ -275,6 +276,57 @@ static struct extra_reg intel_icl_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; +static struct extra_reg intel_spr_extra_regs[] __read_mostly = { + INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), + INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE), + INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), + EVENT_EXTRA_END +}; + +static struct event_constraint intel_spr_event_constraints[] = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7), + + INTEL_EVENT_CONSTRAINT(0x2e, 0xff), + INTEL_EVENT_CONSTRAINT(0x3c, 0xff), + /* + * Generally event codes < 0x90 are restricted to counters 0-3. + * The 0x2E and 0x3C are exception, which has no restriction. + */ + INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf), + + INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf), + INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), + INTEL_UEVENT_CONSTRAINT(0x08a3, 0xf), + INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1), + INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1), + INTEL_UEVENT_CONSTRAINT(0x02cd, 0x1), + INTEL_EVENT_CONSTRAINT(0xce, 0x1), + INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf), + /* + * Generally event codes >= 0x90 are likely to have no restrictions. + * The exception are defined as above. + */ + INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0xff), + + EVENT_CONSTRAINT_END +}; + + EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); @@ -314,11 +366,15 @@ EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles, EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale, "4", "2"); -EVENT_ATTR_STR(slots, slots, "event=0x00,umask=0x4"); -EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80"); -EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec, "event=0x00,umask=0x81"); -EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound, "event=0x00,umask=0x82"); -EVENT_ATTR_STR(topdown-be-bound, td_be_bound, "event=0x00,umask=0x83"); +EVENT_ATTR_STR(slots, slots, "event=0x00,umask=0x4"); +EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80"); +EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec, "event=0x00,umask=0x81"); +EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound, "event=0x00,umask=0x82"); +EVENT_ATTR_STR(topdown-be-bound, td_be_bound, "event=0x00,umask=0x83"); +EVENT_ATTR_STR(topdown-heavy-ops, td_heavy_ops, "event=0x00,umask=0x84"); +EVENT_ATTR_STR(topdown-br-mispredict, td_br_mispredict, "event=0x00,umask=0x85"); +EVENT_ATTR_STR(topdown-fetch-lat, td_fetch_lat, "event=0x00,umask=0x86"); +EVENT_ATTR_STR(topdown-mem-bound, td_mem_bound, "event=0x00,umask=0x87"); static struct attribute *snb_events_attrs[] = { EVENT_PTR(td_slots_issued), @@ -384,6 +440,108 @@ static u64 intel_pmu_event_map(int hw_event) return intel_perfmon_event_map[hw_event]; } +static __initconst const u64 spr_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, + [ C(RESULT_MISS) ] = 0xe124, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_MISS) ] = 0xe424, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x12a, + [ C(RESULT_MISS) ] = 0x12a, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x12a, + [ C(RESULT_MISS) ] = 0x12a, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, + [ C(RESULT_MISS) ] = 0xe12, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, + [ C(RESULT_MISS) ] = 0xe13, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = 0xe11, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x4c4, + [ C(RESULT_MISS) ] = 0x4c5, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x12a, + [ C(RESULT_MISS) ] = 0x12a, + }, + }, +}; + +static __initconst const u64 spr_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x10001, + [ C(RESULT_MISS) ] = 0x3fbfc00001, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x3f3ffc0002, + [ C(RESULT_MISS) ] = 0x3f3fc00002, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x10c000001, + [ C(RESULT_MISS) ] = 0x3fb3000001, + }, + }, +}; + /* * Notes on the events: * - data reads do not include code reads (comparable to earlier tables) @@ -2114,18 +2272,6 @@ static void intel_tfa_pmu_enable_all(int added) intel_pmu_enable_all(added); } -static void enable_counter_freeze(void) -{ - update_debugctlmsr(get_debugctlmsr() | - DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI); -} - -static void disable_counter_freeze(void) -{ - update_debugctlmsr(get_debugctlmsr() & - ~DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI); -} - static inline u64 intel_pmu_get_status(void) { u64 status; @@ -2317,8 +2463,8 @@ static void __icl_update_topdown_event(struct perf_event *event, } } -static void update_saved_topdown_regs(struct perf_event *event, - u64 slots, u64 metrics) +static void update_saved_topdown_regs(struct perf_event *event, u64 slots, + u64 metrics, int metric_end) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_event *other; @@ -2327,7 +2473,7 @@ static void update_saved_topdown_regs(struct perf_event *event, event->hw.saved_slots = slots; event->hw.saved_metric = metrics; - for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) { + for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) { if (!is_topdown_idx(idx)) continue; other = cpuc->events[idx]; @@ -2342,7 +2488,8 @@ static void update_saved_topdown_regs(struct perf_event *event, * The PERF_METRICS and Fixed counter 3 are read separately. The values may be * modify by a NMI. PMU has to be disabled before calling this function. */ -static u64 icl_update_topdown_event(struct perf_event *event) + +static u64 intel_update_topdown_event(struct perf_event *event, int metric_end) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_event *other; @@ -2358,7 +2505,7 @@ static u64 icl_update_topdown_event(struct perf_event *event) /* read PERF_METRICS */ rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics); - for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) { + for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) { if (!is_topdown_idx(idx)) continue; other = cpuc->events[idx]; @@ -2384,7 +2531,7 @@ static u64 icl_update_topdown_event(struct perf_event *event) * Don't need to reset the PERF_METRICS and Fixed counter 3. * Because the values will be restored in next schedule in. */ - update_saved_topdown_regs(event, slots, metrics); + update_saved_topdown_regs(event, slots, metrics, metric_end); reset = false; } @@ -2393,12 +2540,18 @@ static u64 icl_update_topdown_event(struct perf_event *event) wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0); wrmsrl(MSR_PERF_METRICS, 0); if (event) - update_saved_topdown_regs(event, 0, 0); + update_saved_topdown_regs(event, 0, 0, metric_end); } return slots; } +static u64 icl_update_topdown_event(struct perf_event *event) +{ + return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE + + x86_pmu.num_topdown_events - 1); +} + static void intel_pmu_read_topdown_event(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -2553,8 +2706,11 @@ static void intel_pmu_reset(void) wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); wrmsrl_safe(x86_pmu_event_addr(idx), 0ull); } - for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) + for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { + if (fixed_counter_disabled(idx)) + continue; wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); + } if (ds) ds->bts_index = ds->bts_buffer_base; @@ -2578,6 +2734,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) { struct perf_sample_data data; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_guest_info_callbacks *guest_cbs; int bit; int handled = 0; @@ -2631,9 +2788,11 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) */ if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) { handled++; - if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() && - perf_guest_cbs->handle_intel_pt_intr)) - perf_guest_cbs->handle_intel_pt_intr(); + + guest_cbs = perf_get_guest_cbs(); + if (unlikely(guest_cbs && guest_cbs->is_in_guest() && + guest_cbs->handle_intel_pt_intr)) + guest_cbs->handle_intel_pt_intr(); else intel_pt_interrupt(); } @@ -2677,95 +2836,6 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) return handled; } -static bool disable_counter_freezing = true; -static int __init intel_perf_counter_freezing_setup(char *s) -{ - bool res; - - if (kstrtobool(s, &res)) - return -EINVAL; - - disable_counter_freezing = !res; - return 1; -} -__setup("perf_v4_pmi=", intel_perf_counter_freezing_setup); - -/* - * Simplified handler for Arch Perfmon v4: - * - We rely on counter freezing/unfreezing to enable/disable the PMU. - * This is done automatically on PMU ack. - * - Ack the PMU only after the APIC. - */ - -static int intel_pmu_handle_irq_v4(struct pt_regs *regs) -{ - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - int handled = 0; - bool bts = false; - u64 status; - int pmu_enabled = cpuc->enabled; - int loops = 0; - - /* PMU has been disabled because of counter freezing */ - cpuc->enabled = 0; - if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { - bts = true; - intel_bts_disable_local(); - handled = intel_pmu_drain_bts_buffer(); - handled += intel_bts_interrupt(); - } - status = intel_pmu_get_status(); - if (!status) - goto done; -again: - intel_pmu_lbr_read(); - if (++loops > 100) { - static bool warned; - - if (!warned) { - WARN(1, "perfevents: irq loop stuck!\n"); - perf_event_print_debug(); - warned = true; - } - intel_pmu_reset(); - goto done; - } - - - handled += handle_pmi_common(regs, status); -done: - /* Ack the PMI in the APIC */ - apic_write(APIC_LVTPC, APIC_DM_NMI); - - /* - * The counters start counting immediately while ack the status. - * Make it as close as possible to IRET. This avoids bogus - * freezing on Skylake CPUs. - */ - if (status) { - intel_pmu_ack_status(status); - } else { - /* - * CPU may issues two PMIs very close to each other. - * When the PMI handler services the first one, the - * GLOBAL_STATUS is already updated to reflect both. - * When it IRETs, the second PMI is immediately - * handled and it sees clear status. At the meantime, - * there may be a third PMI, because the freezing bit - * isn't set since the ack in first PMI handlers. - * Double check if there is more work to be done. - */ - status = intel_pmu_get_status(); - if (status) - goto again; - } - - if (bts) - intel_bts_enable_local(); - cpuc->enabled = pmu_enabled; - return handled; -} - /* * This handler is triggered by the local APIC, so the APIC IRQ handling * rules apply: @@ -2859,8 +2929,10 @@ intel_vlbr_constraints(struct perf_event *event) { struct event_constraint *c = &vlbr_constraint; - if (unlikely(constraint_match(c, event->hw.config))) + if (unlikely(constraint_match(c, event->hw.config))) { + event->hw.flags |= c->flags; return c; + } return NULL; } @@ -3531,6 +3603,26 @@ static int core_pmu_hw_config(struct perf_event *event) return intel_pmu_bts_config(event); } +#define INTEL_TD_METRIC_AVAILABLE_MAX (INTEL_TD_METRIC_RETIRING + \ + ((x86_pmu.num_topdown_events - 1) << 8)) + +static bool is_available_metric_event(struct perf_event *event) +{ + return is_metric_event(event) && + event->attr.config <= INTEL_TD_METRIC_AVAILABLE_MAX; +} + +static inline bool is_mem_loads_event(struct perf_event *event) +{ + return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0xcd, .umask=0x01); +} + +static inline bool is_mem_loads_aux_event(struct perf_event *event) +{ + return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0x03, .umask=0x82); +} + + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); @@ -3543,6 +3635,9 @@ static int intel_pmu_hw_config(struct perf_event *event) return ret; if (event->attr.precise_ip) { + if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) + return -EINVAL; + if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; if (!(event->attr.sample_type & @@ -3604,7 +3699,7 @@ static int intel_pmu_hw_config(struct perf_event *event) if (event->attr.config & X86_ALL_EVENT_FLAGS) return -EINVAL; - if (is_metric_event(event)) { + if (is_available_metric_event(event)) { struct perf_event *leader = event->group_leader; /* The metric events don't support sampling. */ @@ -3633,6 +3728,33 @@ static int intel_pmu_hw_config(struct perf_event *event) } } + /* + * The load latency event X86_CONFIG(.event=0xcd, .umask=0x01) on SPR + * doesn't function quite right. As a work-around it needs to always be + * co-scheduled with a auxiliary event X86_CONFIG(.event=0x03, .umask=0x82). + * The actual count of this second event is irrelevant it just needs + * to be active to make the first event function correctly. + * + * In a group, the auxiliary event must be in front of the load latency + * event. The rule is to simplify the implementation of the check. + * That's because perf cannot have a complete group at the moment. + */ + if (x86_pmu.flags & PMU_FL_MEM_LOADS_AUX && + (event->attr.sample_type & PERF_SAMPLE_DATA_SRC) && + is_mem_loads_event(event)) { + struct perf_event *leader = event->group_leader; + struct perf_event *sibling = NULL; + + if (!is_mem_loads_aux_event(leader)) { + for_each_sibling_event(sibling, leader) { + if (is_mem_loads_aux_event(sibling)) + break; + } + if (list_entry_is_head(sibling, &leader->sibling_list, sibling_list)) + return -ENODATA; + } + } + if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) return 0; @@ -3820,6 +3942,31 @@ icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, return hsw_get_event_constraints(cpuc, idx, event); } +static struct event_constraint * +spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c; + + c = icl_get_event_constraints(cpuc, idx, event); + + /* + * The :ppp indicates the Precise Distribution (PDist) facility, which + * is only supported on the GP counter 0. If a :ppp event which is not + * available on the GP counter 0, error out. + * Exception: Instruction PDIR is only available on the fixed counter 0. + */ + if ((event->attr.precise_ip == 3) && + !constraint_match(&fixed0_constraint, event->hw.config)) { + if (c->idxmsk64 & BIT_ULL(0)) + return &counter0_constraint; + + return &emptyconstraint; + } + + return c; +} + static struct event_constraint * glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) @@ -3909,6 +4056,14 @@ static u64 nhm_limit_period(struct perf_event *event, u64 left) return max(left, 32ULL); } +static u64 spr_limit_period(struct perf_event *event, u64 left) +{ + if (event->attr.precise_ip == 3) + return max(left, 128ULL); + + return left; +} + PMU_FORMAT_ATTR(event, "config:0-7" ); PMU_FORMAT_ATTR(umask, "config:8-15" ); PMU_FORMAT_ATTR(edge, "config:18" ); @@ -4050,9 +4205,6 @@ static void intel_pmu_cpu_starting(int cpu) if (x86_pmu.version > 1) flip_smm_bit(&x86_pmu.attr_freeze_on_smi); - if (x86_pmu.counter_freezing) - enable_counter_freeze(); - /* Disable perf metrics if any added CPU doesn't support it. */ if (x86_pmu.intel_cap.perf_metrics) { union perf_capabilities perf_cap; @@ -4123,9 +4275,6 @@ static void free_excl_cntrs(struct cpu_hw_events *cpuc) static void intel_pmu_cpu_dying(int cpu) { fini_debug_store_on_cpu(cpu); - - if (x86_pmu.counter_freezing) - disable_counter_freeze(); } void intel_cpuc_finish(struct cpu_hw_events *cpuc) @@ -4503,39 +4652,6 @@ static __init void intel_nehalem_quirk(void) } } -static const struct x86_cpu_desc counter_freezing_ucodes[] = { - INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 2, 0x0000000e), - INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 9, 0x0000002e), - INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 10, 0x00000008), - INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_D, 1, 0x00000028), - INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 1, 0x00000028), - INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 8, 0x00000006), - {} -}; - -static bool intel_counter_freezing_broken(void) -{ - return !x86_cpu_has_min_microcode_rev(counter_freezing_ucodes); -} - -static __init void intel_counter_freezing_quirk(void) -{ - /* Check if it's already disabled */ - if (disable_counter_freezing) - return; - - /* - * If the system starts with the wrong ucode, leave the - * counter-freezing feature permanently disabled. - */ - if (intel_counter_freezing_broken()) { - pr_info("PMU counter freezing disabled due to CPU errata," - "please upgrade microcode\n"); - x86_pmu.counter_freezing = false; - x86_pmu.handle_irq = intel_pmu_handle_irq; - } -} - /* * enable software workaround for errata: * SNB: BJ122 @@ -4645,6 +4761,42 @@ static struct attribute *icl_tsx_events_attrs[] = { NULL, }; + +EVENT_ATTR_STR(mem-stores, mem_st_spr, "event=0xcd,umask=0x2"); +EVENT_ATTR_STR(mem-loads-aux, mem_ld_aux, "event=0x03,umask=0x82"); + +static struct attribute *spr_events_attrs[] = { + EVENT_PTR(mem_ld_hsw), + EVENT_PTR(mem_st_spr), + EVENT_PTR(mem_ld_aux), + NULL, +}; + +static struct attribute *spr_td_events_attrs[] = { + EVENT_PTR(slots), + EVENT_PTR(td_retiring), + EVENT_PTR(td_bad_spec), + EVENT_PTR(td_fe_bound), + EVENT_PTR(td_be_bound), + EVENT_PTR(td_heavy_ops), + EVENT_PTR(td_br_mispredict), + EVENT_PTR(td_fetch_lat), + EVENT_PTR(td_mem_bound), + NULL, +}; + +static struct attribute *spr_tsx_events_attrs[] = { + EVENT_PTR(tx_start), + EVENT_PTR(tx_abort), + EVENT_PTR(tx_commit), + EVENT_PTR(tx_capacity_read), + EVENT_PTR(tx_capacity_write), + EVENT_PTR(tx_conflict), + EVENT_PTR(cycles_t), + EVENT_PTR(cycles_ct), + NULL, +}; + static ssize_t freeze_on_smi_show(struct device *cdev, struct device_attribute *attr, char *buf) @@ -4868,7 +5020,7 @@ __init int intel_pmu_init(void) union cpuid10_eax eax; union cpuid10_ebx ebx; struct event_constraint *c; - unsigned int unused; + unsigned int fixed_mask; struct extra_reg *er; bool pmem = false; int version, i; @@ -4890,7 +5042,7 @@ __init int intel_pmu_init(void) * Check whether the Architectural PerfMon supports * Branch Misses Retired hw_event or not. */ - cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); + cpuid(10, &eax.full, &ebx.full, &fixed_mask, &edx.full); if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) return -ENODEV; @@ -4914,15 +5066,15 @@ __init int intel_pmu_init(void) * Quirk: v2 perfmon does not report fixed-purpose events, so * assume at least 3 events, when not running in a hypervisor: */ - if (version > 1) { + if (version > 1 && version < 5) { int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR); x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, assume); - } - if (version >= 4) - x86_pmu.counter_freezing = !disable_counter_freezing; + fixed_mask = (1L << x86_pmu.num_counters_fixed) - 1; + } else if (version >= 5) + x86_pmu.num_counters_fixed = fls(fixed_mask); if (boot_cpu_has(X86_FEATURE_PDCM)) { u64 capabilities; @@ -5037,7 +5189,6 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ATOM_GOLDMONT: case INTEL_FAM6_ATOM_GOLDMONT_D: - x86_add_quirk(intel_counter_freezing_quirk); memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, @@ -5064,7 +5215,6 @@ __init int intel_pmu_init(void) break; case INTEL_FAM6_ATOM_GOLDMONT_PLUS: - x86_add_quirk(intel_counter_freezing_quirk); memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs, @@ -5408,12 +5558,50 @@ __init int intel_pmu_init(void) x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); x86_pmu.lbr_pt_coexist = true; intel_pmu_pebs_data_source_skl(pmem); + x86_pmu.num_topdown_events = 4; x86_pmu.update_topdown_event = icl_update_topdown_event; x86_pmu.set_topdown_event_period = icl_set_topdown_event_period; pr_cont("Icelake events, "); name = "icelake"; break; + case INTEL_FAM6_SAPPHIRERAPIDS_X: + pmem = true; + x86_pmu.late_ack = true; + memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + + x86_pmu.event_constraints = intel_spr_event_constraints; + x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints; + x86_pmu.extra_regs = intel_spr_extra_regs; + x86_pmu.limit_period = spr_limit_period; + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.pebs_block = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + x86_pmu.flags |= PMU_FL_PEBS_ALL; + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; + + x86_pmu.hw_config = hsw_hw_config; + x86_pmu.get_event_constraints = spr_get_event_constraints; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + hsw_format_attr : nhm_format_attr; + extra_skl_attr = skl_format_attr; + mem_attr = spr_events_attrs; + td_attr = spr_td_events_attrs; + tsx_attr = spr_tsx_events_attrs; + x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); + x86_pmu.lbr_pt_coexist = true; + intel_pmu_pebs_data_source_skl(pmem); + x86_pmu.num_topdown_events = 8; + x86_pmu.update_topdown_event = icl_update_topdown_event; + x86_pmu.set_topdown_event_period = icl_set_topdown_event_period; + pr_cont("Sapphire Rapids events, "); + name = "sapphire_rapids"; + break; + default: switch (x86_pmu.version) { case 1: @@ -5456,8 +5644,7 @@ __init int intel_pmu_init(void) x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; } - x86_pmu.intel_ctrl |= - ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; + x86_pmu.intel_ctrl |= (u64)fixed_mask << INTEL_PMC_IDX_FIXED; if (x86_pmu.event_constraints) { /* @@ -5470,13 +5657,22 @@ __init int intel_pmu_init(void) * events to the generic counters. */ if (c->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) { + /* + * Disable topdown slots and metrics events, + * if slots event is not in CPUID. + */ + if (!(INTEL_PMC_MSK_FIXED_SLOTS & x86_pmu.intel_ctrl)) + c->idxmsk64 = 0; c->weight = hweight64(c->idxmsk64); continue; } - if (c->cmask == FIXED_EVENT_FLAGS - && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) { - c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; + if (c->cmask == FIXED_EVENT_FLAGS) { + /* Disabled fixed counters which are not in CPUID */ + c->idxmsk64 &= x86_pmu.intel_ctrl; + + if (c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) + c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; } c->idxmsk64 &= ~(~0ULL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); @@ -5522,13 +5718,6 @@ __init int intel_pmu_init(void) pr_cont("full-width counters, "); } - /* - * For arch perfmon 4 use counter freezing to avoid - * several MSR accesses in the PMI. - */ - if (x86_pmu.counter_freezing) - x86_pmu.handle_irq = intel_pmu_handle_irq_v4; - if (x86_pmu.intel_cap.perf_metrics) x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 5965d341350ca..dd29436947160 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -35,7 +35,9 @@ union intel_x86_pebs_dse { unsigned int ld_dse:4; unsigned int ld_stlb_miss:1; unsigned int ld_locked:1; - unsigned int ld_reserved:26; + unsigned int ld_data_blk:1; + unsigned int ld_addr_blk:1; + unsigned int ld_reserved:24; }; struct { unsigned int st_l1d_hit:1; @@ -44,6 +46,12 @@ union intel_x86_pebs_dse { unsigned int st_locked:1; unsigned int st_reserved2:26; }; + struct { + unsigned int st_lat_dse:4; + unsigned int st_lat_stlb_miss:1; + unsigned int st_lat_locked:1; + unsigned int ld_reserved3:26; + }; }; @@ -197,9 +205,74 @@ static u64 load_latency_data(u64 status) if (dse.ld_locked) val |= P(LOCK, LOCKED); + /* + * Ice Lake and earlier models do not support block infos. + */ + if (!x86_pmu.pebs_block) { + val |= P(BLK, NA); + return val; + } + /* + * bit 6: load was blocked since its data could not be forwarded + * from a preceding store + */ + if (dse.ld_data_blk) + val |= P(BLK, DATA); + + /* + * bit 7: load was blocked due to potential address conflict with + * a preceding store + */ + if (dse.ld_addr_blk) + val |= P(BLK, ADDR); + + if (!dse.ld_data_blk && !dse.ld_addr_blk) + val |= P(BLK, NA); + return val; } +static u64 store_latency_data(u64 status) +{ + union intel_x86_pebs_dse dse; + union perf_mem_data_src src; + u64 val; + + dse.val = status; + + /* + * use the mapping table for bit 0-3 + */ + val = pebs_data_source[dse.st_lat_dse]; + + /* + * bit 4: TLB access + * 0 = did not miss 2nd level TLB + * 1 = missed 2nd level TLB + */ + if (dse.st_lat_stlb_miss) + val |= P(TLB, MISS) | P(TLB, L2); + else + val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + /* + * bit 5: locked prefix + */ + if (dse.st_lat_locked) + val |= P(LOCK, LOCKED); + + val |= P(BLK, NA); + + /* + * the pebs_data_source table is only for loads + * so override the mem_op to say STORE instead + */ + src.val = val; + src.mem_op = P(OP,STORE); + + return src.val; +} + struct pebs_record_core { u64 flags, ip; u64 ax, bx, cx, dx; @@ -867,6 +940,33 @@ struct event_constraint intel_icl_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_spr_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), + INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), + + INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe), + INTEL_PLD_CONSTRAINT(0x1cd, 0xfe), + INTEL_PSD_CONSTRAINT(0x2cd, 0x1), + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ + + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), + + INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), + + /* + * Everything else is handled by PMU_FL_PEBS_ALL, because we + * need the full constraints from the main table. + */ + + EVENT_CONSTRAINT_END +}; + struct event_constraint *intel_pebs_constraints(struct perf_event *event) { struct event_constraint *c; @@ -957,7 +1057,8 @@ static void adaptive_pebs_record_size_update(void) } #define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \ - PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_WEIGHT_TYPE | \ PERF_SAMPLE_TRANSACTION) static u64 pebs_update_adaptive_cfg(struct perf_event *event) @@ -983,7 +1084,7 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event) gprs = (sample_type & PERF_SAMPLE_REGS_INTR) && (attr->sample_regs_intr & PEBS_GP_REGS); - tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) && + tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) && ((attr->config & INTEL_ARCH_EVENT_MASK) == x86_pmu.rtm_abort_event); @@ -1327,6 +1428,8 @@ static u64 get_data_src(struct perf_event *event, u64 aux) if (fl & PERF_X86_EVENT_PEBS_LDLAT) val = load_latency_data(aux); + else if (fl & PERF_X86_EVENT_PEBS_STLAT) + val = store_latency_data(aux); else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) val = precise_datala_hsw(event, aux); else if (fst) @@ -1361,8 +1464,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, /* * Use latency for weight (only avail with PEBS-LL) */ - if (fll && (sample_type & PERF_SAMPLE_WEIGHT)) - data->weight = pebs->lat; + if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) + data->weight.full = pebs->lat; /* * data.data_src encodes the data source @@ -1454,8 +1557,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, if (x86_pmu.intel_cap.pebs_format >= 2) { /* Only set the TSX weight when no memory weight. */ - if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll) - data->weight = intel_get_tsx_weight(pebs->tsx_tuning); + if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) + data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning); if (sample_type & PERF_SAMPLE_TRANSACTION) data->txn = intel_get_tsx_transaction(pebs->tsx_tuning, @@ -1499,6 +1602,9 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs, #endif } +#define PEBS_LATENCY_MASK 0xffff +#define PEBS_CACHE_LATENCY_OFFSET 32 + /* * With adaptive PEBS the layout depends on what fields are configured. */ @@ -1569,9 +1675,27 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, } if (format_size & PEBS_DATACFG_MEMINFO) { - if (sample_type & PERF_SAMPLE_WEIGHT) - data->weight = meminfo->latency ?: - intel_get_tsx_weight(meminfo->tsx_tuning); + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { + u64 weight = meminfo->latency; + + if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) { + data->weight.var2_w = weight & PEBS_LATENCY_MASK; + weight >>= PEBS_CACHE_LATENCY_OFFSET; + } + + /* + * Although meminfo::latency is defined as a u64, + * only the lower 32 bits include the valid data + * in practice on Ice Lake and earlier platforms. + */ + if (sample_type & PERF_SAMPLE_WEIGHT) { + data->weight.full = weight ?: + intel_get_tsx_weight(meminfo->tsx_tuning); + } else { + data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?: + intel_get_tsx_weight(meminfo->tsx_tuning); + } + } if (sample_type & PERF_SAMPLE_DATA_SRC) data->data_src.val = get_data_src(event, meminfo->aux); diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 05e43d0f430bc..f4d2322f4c629 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -62,7 +62,7 @@ static struct pt_cap_desc { PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)), PT_CAP(output_subsys, 0, CPUID_ECX, BIT(3)), PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)), - PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x3), + PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x7), PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000), PT_CAP(cycle_thresholds, 1, CPUID_EBX, 0xffff), PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000), @@ -460,7 +460,7 @@ static u64 pt_config_filters(struct perf_event *event) pt->filters.filter[range].msr_b = filter->msr_b; } - rtit_ctl |= filter->config << pt_address_ranges[range].reg_off; + rtit_ctl |= (u64)filter->config << pt_address_ranges[range].reg_off; } return rtit_ctl; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 0aa40798252b6..9625137aaed53 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -415,7 +415,7 @@ #define ICX_M3UPI_PCI_PMON_BOX_CTL 0xa0 /* ICX IMC */ -#define ICX_NUMBER_IMC_CHN 2 +#define ICX_NUMBER_IMC_CHN 3 #define ICX_IMC_MEM_STRIDE 0x4 DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); @@ -3516,6 +3516,9 @@ static int skx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *ev struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; struct extra_reg *er; int idx = 0; + /* Any of the CHA events may be filtered by Thread/Core-ID.*/ + if (event->hw.config & SNBEP_CBO_PMON_CTL_TID_EN) + idx = SKX_CHA_MSR_PMON_BOX_FILTER_TID; for (er = skx_uncore_cha_extra_regs; er->msr; er++) { if (er->event != (event->hw.config & er->config_mask)) @@ -3583,6 +3586,7 @@ static struct event_constraint skx_uncore_iio_constraints[] = { UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), UNCORE_EVENT_CONSTRAINT(0xc5, 0xc), UNCORE_EVENT_CONSTRAINT(0xd4, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd5, 0xc), EVENT_CONSTRAINT_END }; @@ -4419,7 +4423,7 @@ static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box, return; pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword); - addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; + addr = ((resource_size_t)pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; pci_read_config_dword(pdev, mem_offset, &pci_dword); addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12; @@ -4541,10 +4545,10 @@ static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = { INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(read.scale, "6.103515625e-5"), INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(write.scale, "6.103515625e-5"), INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"), { /* end: all zeroes */ }, }; @@ -4638,8 +4642,10 @@ static struct event_constraint icx_uncore_iio_constraints[] = { UNCORE_EVENT_CONSTRAINT(0x02, 0x3), UNCORE_EVENT_CONSTRAINT(0x03, 0x3), UNCORE_EVENT_CONSTRAINT(0x83, 0x3), + UNCORE_EVENT_CONSTRAINT(0x88, 0xc), UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), UNCORE_EVENT_CONSTRAINT(0xc5, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd5, 0xc), EVENT_CONSTRAINT_END }; @@ -4809,9 +4815,10 @@ static struct intel_uncore_type icx_uncore_m2m = { .perf_ctr = SNR_M2M_PCI_PMON_CTR0, .event_ctl = SNR_M2M_PCI_PMON_CTL0, .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT, .box_ctl = SNR_M2M_PCI_PMON_BOX_CTL, .ops = &snr_m2m_uncore_pci_ops, - .format_group = &skx_uncore_format_group, + .format_group = &snr_m2m_uncore_format_group, }; static struct attribute *icx_upi_uncore_formats_attr[] = { @@ -4967,12 +4974,12 @@ static struct intel_uncore_ops icx_uncore_mmio_ops = { static struct intel_uncore_type icx_uncore_imc = { .name = "imc", .num_counters = 4, - .num_boxes = 8, + .num_boxes = 12, .perf_ctr_bits = 48, .fixed_ctr_bits = 48, .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, - .event_descs = hswep_uncore_imc_events, + .event_descs = snr_uncore_imc_events, .perf_ctr = SNR_IMC_MMIO_PMON_CTR0, .event_ctl = SNR_IMC_MMIO_PMON_CTL0, .event_mask = SNBEP_PMON_RAW_EVENT_MASK, @@ -5000,17 +5007,17 @@ static struct uncore_event_desc icx_uncore_imc_freerunning_events[] = { INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(read.scale, "6.103515625e-5"), INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(write.scale, "6.103515625e-5"), INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(ddrt_read, "event=0xff,umask=0x30"), - INTEL_UNCORE_EVENT_DESC(ddrt_read.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(ddrt_read.scale, "6.103515625e-5"), INTEL_UNCORE_EVENT_DESC(ddrt_read.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(ddrt_write, "event=0xff,umask=0x31"), - INTEL_UNCORE_EVENT_DESC(ddrt_write.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(ddrt_write.scale, "6.103515625e-5"), INTEL_UNCORE_EVENT_DESC(ddrt_write.unit, "MiB"), { /* end: all zeroes */ }, }; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 92d4215ffefb2..89c898c4a6eda 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -80,6 +80,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode) #define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */ #define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */ #define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */ +#define PERF_X86_EVENT_PEBS_STLAT 0x8000 /* st+stlat data address sampling */ static inline bool is_topdown_count(struct perf_event *event) { @@ -428,6 +429,10 @@ struct cpu_hw_events { __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) +#define INTEL_PSD_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_STLAT) + #define INTEL_PST_CONSTRAINT(c, n) \ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) @@ -666,7 +671,7 @@ struct x86_pmu { /* PMI handler bits */ unsigned int late_ack :1, - counter_freezing :1; + enabled_ack :1; /* * sysfs attrs */ @@ -707,7 +712,8 @@ struct x86_pmu { pebs_broken :1, pebs_prec_dist :1, pebs_no_tlb :1, - pebs_no_isolation :1; + pebs_no_isolation :1, + pebs_block :1; int pebs_record_size; int pebs_buffer_size; int max_pebs_events; @@ -735,6 +741,7 @@ struct x86_pmu { /* * Intel perf metrics */ + int num_topdown_events; u64 (*update_topdown_event)(struct perf_event *event); int (*set_topdown_event_period)(struct perf_event *event); @@ -794,6 +801,8 @@ do { \ #define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */ #define PMU_FL_TFA 0x20 /* deal with TSX force abort */ #define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */ +#define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */ +#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr @@ -976,6 +985,11 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); +static inline bool fixed_counter_disabled(int i) +{ + return !(x86_pmu.intel_ctrl >> (i + INTEL_PMC_IDX_FIXED)); +} + #ifdef CONFIG_CPU_SUP_AMD int amd_pmu_init(void); @@ -1071,6 +1085,8 @@ extern struct event_constraint intel_skl_pebs_event_constraints[]; extern struct event_constraint intel_icl_pebs_event_constraints[]; +extern struct event_constraint intel_spr_pebs_event_constraints[]; + struct event_constraint *intel_pebs_constraints(struct perf_event *event); void intel_pmu_pebs_add(struct perf_event *event); diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 79583bac9ac4a..df4a4a9dc1e84 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -155,7 +155,6 @@ void set_hv_tscchange_cb(void (*cb)(void)) struct hv_reenlightenment_control re_ctrl = { .vector = HYPERV_REENLIGHTENMENT_VECTOR, .enabled = 1, - .target_vp = hv_vp_index[smp_processor_id()] }; struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1}; @@ -164,13 +163,20 @@ void set_hv_tscchange_cb(void (*cb)(void)) return; } + if (!hv_vp_index) + return; + hv_reenlightenment_cb = cb; /* Make sure callback is registered before we write to MSRs */ wmb(); + re_ctrl.target_vp = hv_vp_index[get_cpu()]; + wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl)); + + put_cpu(); } EXPORT_SYMBOL_GPL(set_hv_tscchange_cb); diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 2c87350c1fb09..13838f1a45bd4 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -68,15 +68,6 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, local_irq_save(flags); - /* - * Only check the mask _after_ interrupt has been disabled to avoid the - * mask changing under our feet. - */ - if (cpumask_empty(cpus)) { - local_irq_restore(flags); - return; - } - flush_pcpu = (struct hv_tlb_flush **) this_cpu_ptr(hyperv_pcpu_input_arg); @@ -115,7 +106,9 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, * must. We will also check all VP numbers when walking the * supplied CPU set to remain correct in all cases. */ - if (hv_cpu_number_to_vp_number(cpumask_last(cpus)) >= 64) + cpu = cpumask_last(cpus); + + if (cpu < nr_cpumask_bits && hv_cpu_number_to_vp_number(cpu) >= 64) goto do_ex_hypercall; for_each_cpu(cpu, cpus) { @@ -131,6 +124,12 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, __set_bit(vcpu, (unsigned long *) &flush->processor_mask); } + + /* nothing to flush if 'processor_mask' ends up being empty */ + if (!flush->processor_mask) { + local_irq_restore(flags); + return; + } } /* diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h index 9aff97f0de7fd..d937c55e717e6 100644 --- a/arch/x86/include/asm/acenv.h +++ b/arch/x86/include/asm/acenv.h @@ -13,7 +13,19 @@ /* Asm macros */ -#define ACPI_FLUSH_CPU_CACHE() wbinvd() +/* + * ACPI_FLUSH_CPU_CACHE() flushes caches on entering sleep states. + * It is required to prevent data loss. + * + * While running inside virtual machine, the kernel can bypass cache flushing. + * Changing sleep state in a virtual machine doesn't affect the host system + * sleep state and cannot lead to data loss. + */ +#define ACPI_FLUSH_CPU_CACHE() \ +do { \ + if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) \ + wbinvd(); \ +} while (0) int __acpi_acquire_global_lock(unsigned int *lock); int __acpi_release_global_lock(unsigned int *lock); diff --git a/arch/x86/include/asm/amd-ibs.h b/arch/x86/include/asm/amd-ibs.h new file mode 100644 index 0000000000000..46e1df45efc04 --- /dev/null +++ b/arch/x86/include/asm/amd-ibs.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * From PPR Vol 1 for AMD Family 19h Model 01h B1 + * 55898 Rev 0.35 - Feb 5, 2021 + */ + +#include + +/* + * IBS Hardware MSRs + */ + +/* MSR 0xc0011030: IBS Fetch Control */ +union ibs_fetch_ctl { + __u64 val; + struct { + __u64 fetch_maxcnt:16,/* 0-15: instruction fetch max. count */ + fetch_cnt:16, /* 16-31: instruction fetch count */ + fetch_lat:16, /* 32-47: instruction fetch latency */ + fetch_en:1, /* 48: instruction fetch enable */ + fetch_val:1, /* 49: instruction fetch valid */ + fetch_comp:1, /* 50: instruction fetch complete */ + ic_miss:1, /* 51: i-cache miss */ + phy_addr_valid:1,/* 52: physical address valid */ + l1tlb_pgsz:2, /* 53-54: i-cache L1TLB page size + * (needs IbsPhyAddrValid) */ + l1tlb_miss:1, /* 55: i-cache fetch missed in L1TLB */ + l2tlb_miss:1, /* 56: i-cache fetch missed in L2TLB */ + rand_en:1, /* 57: random tagging enable */ + fetch_l2_miss:1,/* 58: L2 miss for sampled fetch + * (needs IbsFetchComp) */ + reserved:5; /* 59-63: reserved */ + }; +}; + +/* MSR 0xc0011033: IBS Execution Control */ +union ibs_op_ctl { + __u64 val; + struct { + __u64 opmaxcnt:16, /* 0-15: periodic op max. count */ + reserved0:1, /* 16: reserved */ + op_en:1, /* 17: op sampling enable */ + op_val:1, /* 18: op sample valid */ + cnt_ctl:1, /* 19: periodic op counter control */ + opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */ + reserved1:5, /* 27-31: reserved */ + opcurcnt:27, /* 32-58: periodic op counter current count */ + reserved2:5; /* 59-63: reserved */ + }; +}; + +/* MSR 0xc0011035: IBS Op Data 2 */ +union ibs_op_data { + __u64 val; + struct { + __u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */ + tag_to_ret_ctr:16, /* 15-31: op tag to retire count */ + reserved1:2, /* 32-33: reserved */ + op_return:1, /* 34: return op */ + op_brn_taken:1, /* 35: taken branch op */ + op_brn_misp:1, /* 36: mispredicted branch op */ + op_brn_ret:1, /* 37: branch op retired */ + op_rip_invalid:1, /* 38: RIP is invalid */ + op_brn_fuse:1, /* 39: fused branch op */ + op_microcode:1, /* 40: microcode op */ + reserved2:23; /* 41-63: reserved */ + }; +}; + +/* MSR 0xc0011036: IBS Op Data 2 */ +union ibs_op_data2 { + __u64 val; + struct { + __u64 data_src:3, /* 0-2: data source */ + reserved0:1, /* 3: reserved */ + rmt_node:1, /* 4: destination node */ + cache_hit_st:1, /* 5: cache hit state */ + reserved1:57; /* 5-63: reserved */ + }; +}; + +/* MSR 0xc0011037: IBS Op Data 3 */ +union ibs_op_data3 { + __u64 val; + struct { + __u64 ld_op:1, /* 0: load op */ + st_op:1, /* 1: store op */ + dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */ + dc_l2tlb_miss:1, /* 3: data cache L2TLB hit in 2M page */ + dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */ + dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */ + dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */ + dc_miss:1, /* 7: data cache miss */ + dc_mis_acc:1, /* 8: misaligned access */ + reserved:4, /* 9-12: reserved */ + dc_wc_mem_acc:1, /* 13: write combining memory access */ + dc_uc_mem_acc:1, /* 14: uncacheable memory access */ + dc_locked_op:1, /* 15: locked operation */ + dc_miss_no_mab_alloc:1, /* 16: DC miss with no MAB allocated */ + dc_lin_addr_valid:1, /* 17: data cache linear address valid */ + dc_phy_addr_valid:1, /* 18: data cache physical address valid */ + dc_l2_tlb_hit_1g:1, /* 19: data cache L2 hit in 1GB page */ + l2_miss:1, /* 20: L2 cache miss */ + sw_pf:1, /* 21: software prefetch */ + op_mem_width:4, /* 22-25: load/store size in bytes */ + op_dc_miss_open_mem_reqs:6, /* 26-31: outstanding mem reqs on DC fill */ + dc_miss_lat:16, /* 32-47: data cache miss latency */ + tlb_refill_lat:16; /* 48-63: L1 TLB refill latency */ + }; +}; + +/* MSR 0xc001103c: IBS Fetch Control Extended */ +union ic_ibs_extd_ctl { + __u64 val; + struct { + __u64 itlb_refill_lat:16, /* 0-15: ITLB Refill latency for sampled fetch */ + reserved:48; /* 16-63: reserved */ + }; +}; + +/* + * IBS driver related + */ + +struct perf_ibs_data { + u32 size; + union { + u32 data[0]; /* data buffer starts here */ + u32 caps; + }; + u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; +}; diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 1ae4e5791afaf..a9715f3984f0f 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -58,6 +58,7 @@ struct threshold_bank { /* initialized to the number of CPUs on the node sharing this bank */ refcount_t cpus; + unsigned int shared; }; struct amd_northbridge { diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index af45e1452f097..feb59461046c6 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -73,10 +73,6 @@ static inline bool rdseed_int(unsigned int *v) return ok; } -/* Conditional execution based on CPU type */ -#define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND) -#define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED) - /* * These are the generic interfaces; they must not be declared if the * stubs in are to be invoked, @@ -86,22 +82,22 @@ static inline bool rdseed_int(unsigned int *v) static inline bool arch_get_random_long(unsigned long *v) { - return arch_has_random() ? rdrand_long(v) : false; + return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_long(v) : false; } static inline bool arch_get_random_int(unsigned int *v) { - return arch_has_random() ? rdrand_int(v) : false; + return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_int(v) : false; } static inline bool arch_get_random_seed_long(unsigned long *v) { - return arch_has_random_seed() ? rdseed_long(v) : false; + return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_long(v) : false; } static inline bool arch_get_random_seed_int(unsigned int *v) { - return arch_has_random_seed() ? rdseed_int(v) : false; + return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_int(v) : false; } extern void x86_init_rdrand(struct cpuinfo_x86 *c); diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 12933d4062079..028efe1a36885 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -7,9 +7,11 @@ # define __ASM_FORM_RAW(x) x # define __ASM_FORM_COMMA(x) x, #else -# define __ASM_FORM(x) " " #x " " -# define __ASM_FORM_RAW(x) #x -# define __ASM_FORM_COMMA(x) " " #x "," +#include + +# define __ASM_FORM(x) " " __stringify(x) " " +# define __ASM_FORM_RAW(x) __stringify(x) +# define __ASM_FORM_COMMA(x) " " __stringify(x) "," #endif #ifndef __x86_64__ @@ -142,9 +144,6 @@ # define _ASM_EXTABLE_EX(from, to) \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) -# define _ASM_EXTABLE_REFCOUNT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) - # define _ASM_NOKPROBE(entry) \ .pushsection "_kprobe_blacklist","aw" ; \ _ASM_ALIGN ; \ @@ -176,9 +175,6 @@ # define _ASM_EXTABLE_EX(from, to) \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) -# define _ASM_EXTABLE_REFCOUNT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) - /* For C file, we already have NOKPROBE_SYMBOL macro */ #endif diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 22c4dfe659923..b4dd6ab0fdfce 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -31,15 +31,13 @@ typedef s64 __attribute__((aligned(4))) compat_s64; typedef u64 __attribute__((aligned(4))) compat_u64; struct compat_stat { - compat_dev_t st_dev; - u16 __pad1; + u32 st_dev; compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; __compat_uid_t st_uid; __compat_gid_t st_gid; - compat_dev_t st_rdev; - u16 __pad2; + u32 st_rdev; u32 st_size; u32 st_blksize; u32 st_blocks; diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 59bf91c57aa85..00d329a990e56 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -30,6 +30,7 @@ enum cpuid_leafs CPUID_7_ECX, CPUID_8000_0007_EBX, CPUID_7_EDX, + CPUID_8000_001F_EAX, }; #ifdef CONFIG_X86_FEATURE_NAMES @@ -49,7 +50,7 @@ extern const char * const x86_power_flags[32]; extern const char * const x86_bug_flags[NBUGINTS*32]; #define test_cpu_cap(c, bit) \ - test_bit(bit, (unsigned long *)((c)->x86_capability)) + arch_test_bit(bit, (unsigned long *)((c)->x86_capability)) /* * There are 32 bits/features in each mask word. The high bits @@ -88,8 +89,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 19)) + BUILD_BUG_ON_ZERO(NCAPINTS != 20)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -111,8 +113,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 19)) + BUILD_BUG_ON_ZERO(NCAPINTS != 20)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index dde63a9f9bfee..194e45dff8f05 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 19 /* N 32-bit words worth of info */ +#define NCAPINTS 20 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -96,6 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ +/* FREE! ( 3*32+17) */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ @@ -107,6 +108,7 @@ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ +/* free ( 3*32+29) */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ #define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ @@ -199,7 +201,7 @@ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ +/* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ @@ -209,7 +211,7 @@ #define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ @@ -284,6 +286,7 @@ #define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+ 6) /* "" Fill RSB on VM exit when EIBRS is enabled */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ @@ -368,6 +371,13 @@ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ +/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ +#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ + /* * BUG word(s) */ @@ -404,5 +414,7 @@ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(26) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index a5ea841cc6d22..f0f935f8d9174 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -84,6 +84,7 @@ #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP) #define DISABLED_MASK17 0 #define DISABLED_MASK18 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) +#define DISABLED_MASK19 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/emulate_prefix.h b/arch/x86/include/asm/emulate_prefix.h new file mode 100644 index 0000000000000..70f5b98a52869 --- /dev/null +++ b/arch/x86/include/asm/emulate_prefix.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_EMULATE_PREFIX_H +#define _ASM_X86_EMULATE_PREFIX_H + +/* + * Virt escape sequences to trigger instruction emulation; + * ideally these would decode to 'whole' instruction and not destroy + * the instruction stream; sadly this is not true for the 'kvm' one :/ + */ + +#define __XEN_EMULATE_PREFIX 0x0f,0x0b,0x78,0x65,0x6e /* ud2 ; .ascii "xen" */ +#define __KVM_EMULATE_PREFIX 0x0f,0x0b,0x6b,0x76,0x6d /* ud2 ; .ascii "kvm" */ + +#endif diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 03b3de491b5e6..5ed702e2c55f4 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -560,9 +560,11 @@ static inline void __fpregs_load_activate(void) * The FPU context is only stored/restored for a user task and * PF_KTHREAD is used to distinguish between kernel and user threads. */ -static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) +static inline void switch_fpu_prepare(struct task_struct *prev, int cpu) { - if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) { + struct fpu *old_fpu = &prev->thread.fpu; + + if (static_cpu_has(X86_FEATURE_FPU) && !(prev->flags & PF_KTHREAD)) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else @@ -581,10 +583,11 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) * Load PKRU from the FPU context if available. Delay loading of the * complete FPU state until the return to userland. */ -static inline void switch_fpu_finish(struct fpu *new_fpu) +static inline void switch_fpu_finish(struct task_struct *next) { u32 pkru_val = init_pkru_value; struct pkru_state *pk; + struct fpu *next_fpu = &next->thread.fpu; if (!static_cpu_has(X86_FEATURE_FPU)) return; @@ -598,7 +601,7 @@ static inline void switch_fpu_finish(struct fpu *new_fpu) * PKRU state is switched eagerly because it needs to be valid before we * return to userland e.g. for a copy_to_user() operation. */ - if (!(current->flags & PF_KTHREAD)) { + if (!(next->flags & PF_KTHREAD)) { /* * If the PKRU bit in xsave.header.xfeatures is not set, * then the PKRU component was in init state, which means @@ -607,7 +610,7 @@ static inline void switch_fpu_finish(struct fpu *new_fpu) * in memory is not valid. This means pkru_val has to be * set to 0 and not to init_pkru_value. */ - pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); + pk = get_xsave_addr(&next_fpu->state.xsave, XFEATURE_PKRU); pkru_val = pk ? pk->pkru : 0; } __write_pkru(pkru_val); diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index a51ffeea6d879..a8c3d284fa46c 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -45,6 +45,7 @@ struct insn { struct insn_field immediate2; /* for 64bit imm or seg16 */ }; + int emulate_prefix_size; insn_attr_t attr; unsigned char opnd_bytes; unsigned char addr_bytes; @@ -128,6 +129,11 @@ static inline int insn_is_evex(struct insn *insn) return (insn->vex_prefix.nbytes == 4); } +static inline int insn_has_emulate_prefix(struct insn *insn) +{ + return !!insn->emulate_prefix_size; +} + /* Ensure this instruction is decoded completely */ static inline int insn_complete(struct insn *insn) { diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index cec5881be0ddc..2a869827594ed 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -86,6 +86,14 @@ #define INTEL_FAM6_COMETLAKE 0xA5 #define INTEL_FAM6_COMETLAKE_L 0xA6 +#define INTEL_FAM6_ROCKETLAKE 0xA7 + +/* Hybrid Core/Atom Processors */ + +#define INTEL_FAM6_LAKEFIELD 0x8A +#define INTEL_FAM6_ALDERLAKE 0x97 +#define INTEL_FAM6_ALDERLAKE_L 0x9A + /* "Small Core" Processors (Atom) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ @@ -111,6 +119,7 @@ #define INTEL_FAM6_ATOM_TREMONT_D 0x86 /* Jacobsville */ #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ +#define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ /* Xeon Phi */ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 5e7d6b46de97d..367da081f7d9d 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -201,6 +202,14 @@ extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages); #define arch_kexec_pre_free_pages arch_kexec_pre_free_pages +#ifdef CONFIG_KEXEC_FILE +struct purgatory_info; +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add +#endif #endif typedef void crash_vmclear_fn(void); diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h index eceea92990974..6c57651921028 100644 --- a/arch/x86/include/asm/kvmclock.h +++ b/arch/x86/include/asm/kvmclock.h @@ -2,6 +2,20 @@ #ifndef _ASM_X86_KVM_CLOCK_H #define _ASM_X86_KVM_CLOCK_H +#include + extern struct clocksource kvm_clock; +DECLARE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); + +static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) +{ + return &this_cpu_read(hv_clock_per_cpu)->pvti; +} + +static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void) +{ + return this_cpu_read(hv_clock_per_cpu); +} + #endif /* _ASM_X86_KVM_CLOCK_H */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index dd2e21b130eb6..a75a0520d646e 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -127,6 +127,8 @@ #define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) +#define XEC(x, mask) (((x) >> 16) & mask) + /* mce.kflags flag bits for logging etc. */ #define MCE_HANDLED_CEC BIT_ULL(0) #define MCE_HANDLED_UC BIT_ULL(1) @@ -174,7 +176,8 @@ enum mce_notifier_prios { MCE_PRIO_EXTLOG, MCE_PRIO_UC, MCE_PRIO_EARLY, - MCE_PRIO_CEC + MCE_PRIO_CEC, + MCE_PRIO_HIGHEST = MCE_PRIO_CEC }; struct notifier_block; @@ -315,7 +318,7 @@ extern void apei_mce_report_mem_error(int corrected, /* These may be used by multiple smca_hwid_mcatypes */ enum smca_bank_types { SMCA_LS = 0, /* Load Store */ - SMCA_LS_V2, /* Load Store */ + SMCA_LS_V2, SMCA_IF, /* Instruction Fetch */ SMCA_L2_CACHE, /* L2 Cache */ SMCA_DE, /* Decoder Unit */ @@ -324,37 +327,32 @@ enum smca_bank_types { SMCA_FP, /* Floating Point */ SMCA_L3_CACHE, /* L3 Cache */ SMCA_CS, /* Coherent Slave */ - SMCA_CS_V2, /* Coherent Slave */ + SMCA_CS_V2, SMCA_PIE, /* Power, Interrupts, etc. */ SMCA_UMC, /* Unified Memory Controller */ + SMCA_UMC_V2, SMCA_PB, /* Parameter Block */ SMCA_PSP, /* Platform Security Processor */ - SMCA_PSP_V2, /* Platform Security Processor */ + SMCA_PSP_V2, SMCA_SMU, /* System Management Unit */ - SMCA_SMU_V2, /* System Management Unit */ + SMCA_SMU_V2, SMCA_MP5, /* Microprocessor 5 Unit */ + SMCA_MPDMA, /* MPDMA Unit */ SMCA_NBIO, /* Northbridge IO Unit */ SMCA_PCIE, /* PCI Express Unit */ + SMCA_PCIE_V2, + SMCA_XGMI_PCS, /* xGMI PCS Unit */ + SMCA_NBIF, /* NBIF Unit */ + SMCA_SHUB, /* System HUB Unit */ + SMCA_SATA, /* SATA Unit */ + SMCA_USB, /* USB Unit */ + SMCA_GMI_PCS, /* GMI PCS Unit */ + SMCA_XGMI_PHY, /* xGMI PHY Unit */ + SMCA_WAFL_PHY, /* WAFL PHY Unit */ + SMCA_GMI_PHY, /* GMI PHY Unit */ N_SMCA_BANK_TYPES }; -#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype)) - -struct smca_hwid { - unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ - u32 hwid_mcatype; /* (hwid,mcatype) tuple */ - u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */ - u8 count; /* Number of instances. */ -}; - -struct smca_bank { - struct smca_hwid *hwid; - u32 id; /* Value of MCA_IPID[InstanceId]. */ - u8 sysfs_id; /* Value used for sysfs name. */ -}; - -extern struct smca_bank smca_banks[MAX_NR_BANKS]; - extern const char *smca_get_long_name(enum smca_bank_types t); extern bool amd_mce_is_memory_error(struct mce *m); @@ -362,18 +360,14 @@ extern int mce_threshold_create_device(unsigned int cpu); extern int mce_threshold_remove_device(unsigned int cpu); void mce_amd_feature_init(struct cpuinfo_x86 *c); -int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr); - +enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank); #else static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } -static inline int -umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; }; #endif static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } - #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 2b7cc5397f80d..91a06cef50c1b 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -133,11 +133,13 @@ extern void load_ucode_ap(void); void reload_early_microcode(void); extern bool get_builtin_firmware(struct cpio_data *cd, const char *name); extern bool initrd_gone; +void microcode_bsp_resume(void); #else static inline int __init microcode_init(void) { return 0; }; static inline void __init load_ucode_bsp(void) { } static inline void load_ucode_ap(void) { } static inline void reload_early_microcode(void) { } +static inline void microcode_bsp_resume(void) { } static inline bool get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; } #endif diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 23d32a8c82e02..9a7fca7492717 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -105,6 +105,34 @@ * Not susceptible to * TSX Async Abort (TAA) vulnerabilities. */ +#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /* + * Not susceptible to SBDR and SSDP + * variants of Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FBSDP_NO BIT(14) /* + * Not susceptible to FBSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_PSDP_NO BIT(15) /* + * Not susceptible to PSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FB_CLEAR BIT(17) /* + * VERW clears CPU fill buffer + * even on MDS_NO CPUs. + */ +#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /* + * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS] + * bit available to control VERW + * behavior. + */ +#define ARCH_CAP_PBRSB_NO BIT(24) /* + * Not susceptible to Post-Barrier + * Return Stack Buffer Predictions. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -122,6 +150,7 @@ /* SRBDS support */ #define MSR_IA32_MCU_OPT_CTRL 0x00000123 #define RNGDS_MITG_DIS BIT(0) +#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 @@ -443,6 +472,11 @@ #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f +/* AMD Performance Counter Global Status and Control MSRs */ +#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 +#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 +#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 + /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 956df82bbc2bc..1e5df3ccdd5cb 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -61,7 +61,16 @@ 774: \ dec reg; \ jnz 771b; \ - add $(BITS_PER_LONG/8) * nr, sp; + add $(BITS_PER_LONG/8) * nr, sp; \ + /* barrier for jnz misprediction */ \ + lfence; + +#define __ISSUE_UNBALANCED_RET_GUARD(sp) \ + call 881f; \ + int3; \ +881: \ + add $(BITS_PER_LONG/8), sp; \ + lfence; #ifdef __ASSEMBLY__ @@ -130,6 +139,14 @@ #else call *\reg #endif +.endm + +.macro ISSUE_UNBALANCED_RET_GUARD ftr:req + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE "jmp .Lskip_pbrsb_\@", \ + __stringify(__ISSUE_UNBALANCED_RET_GUARD(%_ASM_SP)) \ + \ftr +.Lskip_pbrsb_\@: .endm /* @@ -313,6 +330,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(mds_user_clear); DECLARE_STATIC_KEY_FALSE(mds_idle_clear); +DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); + #include /** diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 288b065955b72..9d0b479452720 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -15,7 +15,7 @@ #define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) -#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER) +#define EXCEPTION_STACK_ORDER (1 + KASAN_STACK_ORDER) #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) #define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 6a174de55b04c..c31ce7828649a 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -142,6 +142,18 @@ union cpuid10_edx { unsigned int full; }; +/* + * AMD "Extended Performance Monitoring and Debug" CPUID + * detection/enumeration details: + */ +union cpuid_0x80000022_ebx { + struct { + /* Number of Core Performance Counters */ + unsigned int num_core_pmc:4; + } split; + unsigned int full; +}; + struct x86_pmu_capability { int version; int num_counters_gp; @@ -219,8 +231,12 @@ struct x86_pmu_capability { #define INTEL_PMC_IDX_TD_BAD_SPEC (INTEL_PMC_IDX_METRIC_BASE + 1) #define INTEL_PMC_IDX_TD_FE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 2) #define INTEL_PMC_IDX_TD_BE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 3) -#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_BE_BOUND -#define INTEL_PMC_MSK_TOPDOWN ((0xfull << INTEL_PMC_IDX_METRIC_BASE) | \ +#define INTEL_PMC_IDX_TD_HEAVY_OPS (INTEL_PMC_IDX_METRIC_BASE + 4) +#define INTEL_PMC_IDX_TD_BR_MISPREDICT (INTEL_PMC_IDX_METRIC_BASE + 5) +#define INTEL_PMC_IDX_TD_FETCH_LAT (INTEL_PMC_IDX_METRIC_BASE + 6) +#define INTEL_PMC_IDX_TD_MEM_BOUND (INTEL_PMC_IDX_METRIC_BASE + 7) +#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_MEM_BOUND +#define INTEL_PMC_MSK_TOPDOWN ((0xffull << INTEL_PMC_IDX_METRIC_BASE) | \ INTEL_PMC_MSK_FIXED_SLOTS) /* @@ -238,8 +254,14 @@ struct x86_pmu_capability { #define INTEL_TD_METRIC_BAD_SPEC 0x8100 /* Bad speculation metric */ #define INTEL_TD_METRIC_FE_BOUND 0x8200 /* FE bound metric */ #define INTEL_TD_METRIC_BE_BOUND 0x8300 /* BE bound metric */ -#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_BE_BOUND -#define INTEL_TD_METRIC_NUM 4 +/* Level 2 metrics */ +#define INTEL_TD_METRIC_HEAVY_OPS 0x8400 /* Heavy Operations metric */ +#define INTEL_TD_METRIC_BR_MISPREDICT 0x8500 /* Branch Mispredict metric */ +#define INTEL_TD_METRIC_FETCH_LAT 0x8600 /* Fetch Latency metric */ +#define INTEL_TD_METRIC_MEM_BOUND 0x8700 /* Memory bound metric */ + +#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_MEM_BOUND +#define INTEL_TD_METRIC_NUM 8 static inline bool is_metric_idx(int idx) { @@ -321,6 +343,11 @@ struct pebs_lbr { struct pebs_lbr_entry lbr[0]; /* Variable length */ }; +/* + * AMD Extended Performance Monitoring and Debug cpuid feature detection + */ +#define EXT_PERFMON_DEBUG_FEATURES 0x80000022 + /* * IBS cpuid feature detection */ @@ -342,6 +369,7 @@ struct pebs_lbr { #define IBS_CAPS_OPBRNFUSE (1U<<8) #define IBS_CAPS_FETCHCTLEXTD (1U<<9) #define IBS_CAPS_OPDATA4 (1U<<10) +#define IBS_CAPS_ZEN4 (1U<<11) #define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ | IBS_CAPS_FETCHSAM \ @@ -355,6 +383,7 @@ struct pebs_lbr { #define IBSCTL_LVT_OFFSET_MASK 0x0F /* IBS fetch bits/masks */ +#define IBS_FETCH_L3MISSONLY (1ULL<<59) #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) #define IBS_FETCH_ENABLE (1ULL<<48) @@ -371,8 +400,10 @@ struct pebs_lbr { #define IBS_OP_CNT_CTL (1ULL<<19) #define IBS_OP_VAL (1ULL<<18) #define IBS_OP_ENABLE (1ULL<<17) +#define IBS_OP_L3MISSONLY (1ULL<<16) #define IBS_OP_MAX_CNT 0x0000FFFFULL #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ +#define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL<<20) /* separate upper 7 bits */ #define IBS_RIP_INVALID (1ULL<<38) #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index e1fb43bf218d7..99a0ff7fe21fd 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1380,8 +1380,8 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) #endif #endif -#define PKRU_AD_BIT 0x1 -#define PKRU_WD_BIT 0x2 +#define PKRU_AD_BIT 0x1u +#define PKRU_WD_BIT 0x2u #define PKRU_BITS_PER_PKEY 2 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 528b2dffc2775..16c6ee556f017 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -99,9 +99,10 @@ struct cpuinfo_x86 { /* in KB - valid for CPUS which support this call: */ unsigned int x86_cache_size; int x86_cache_alignment; /* In bytes */ - /* Cache QoS architectural values: */ + /* Cache QoS architectural values, valid only on the BSP: */ int x86_cache_max_rmid; /* max index */ int x86_cache_occ_scale; /* scale to bytes */ + int x86_cache_mbm_width_offset; int x86_power; unsigned long loops_per_jiffy; /* cpuid returned max cores value: */ diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 09ecc32f65248..52d7512ea91ab 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -82,6 +82,7 @@ static inline void set_real_mode_mem(phys_addr_t mem) } void reserve_real_mode(void); +void load_trampoline_pgtable(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h deleted file mode 100644 index 232f856e0db06..0000000000000 --- a/arch/x86/include/asm/refcount.h +++ /dev/null @@ -1,126 +0,0 @@ -#ifndef __ASM_X86_REFCOUNT_H -#define __ASM_X86_REFCOUNT_H -/* - * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from - * PaX/grsecurity. - */ -#include -#include - -/* - * This is the first portion of the refcount error handling, which lives in - * .text.unlikely, and is jumped to from the CPU flag check (in the - * following macros). This saves the refcount value location into CX for - * the exception handler to use (in mm/extable.c), and then triggers the - * central refcount exception. The fixup address for the exception points - * back to the regular execution flow in .text. - */ -#define _REFCOUNT_EXCEPTION \ - ".pushsection .text..refcount\n" \ - "111:\tlea %[var], %%" _ASM_CX "\n" \ - "112:\t" ASM_UD2 "\n" \ - ASM_UNREACHABLE \ - ".popsection\n" \ - "113:\n" \ - _ASM_EXTABLE_REFCOUNT(112b, 113b) - -/* Trigger refcount exception if refcount result is negative. */ -#define REFCOUNT_CHECK_LT_ZERO \ - "js 111f\n\t" \ - _REFCOUNT_EXCEPTION - -/* Trigger refcount exception if refcount result is zero or negative. */ -#define REFCOUNT_CHECK_LE_ZERO \ - "jz 111f\n\t" \ - REFCOUNT_CHECK_LT_ZERO - -/* Trigger refcount exception unconditionally. */ -#define REFCOUNT_ERROR \ - "jmp 111f\n\t" \ - _REFCOUNT_EXCEPTION - -static __always_inline void refcount_add(unsigned int i, refcount_t *r) -{ - asm volatile(LOCK_PREFIX "addl %1,%0\n\t" - REFCOUNT_CHECK_LT_ZERO - : [var] "+m" (r->refs.counter) - : "ir" (i) - : "cc", "cx"); -} - -static __always_inline void refcount_inc(refcount_t *r) -{ - asm volatile(LOCK_PREFIX "incl %0\n\t" - REFCOUNT_CHECK_LT_ZERO - : [var] "+m" (r->refs.counter) - : : "cc", "cx"); -} - -static __always_inline void refcount_dec(refcount_t *r) -{ - asm volatile(LOCK_PREFIX "decl %0\n\t" - REFCOUNT_CHECK_LE_ZERO - : [var] "+m" (r->refs.counter) - : : "cc", "cx"); -} - -static __always_inline __must_check -bool refcount_sub_and_test(unsigned int i, refcount_t *r) -{ - bool ret = GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", - REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, e, "er", i, "cx"); - - if (ret) { - smp_acquire__after_ctrl_dep(); - return true; - } - - return false; -} - -static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) -{ - bool ret = GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", - REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, e, "cx"); - - if (ret) { - smp_acquire__after_ctrl_dep(); - return true; - } - - return false; -} - -static __always_inline __must_check -bool refcount_add_not_zero(unsigned int i, refcount_t *r) -{ - int c, result; - - c = atomic_read(&(r->refs)); - do { - if (unlikely(c == 0)) - return false; - - result = c + i; - - /* Did we try to increment from/to an undesirable state? */ - if (unlikely(c < 0 || c == INT_MAX || result < c)) { - asm volatile(REFCOUNT_ERROR - : : [var] "m" (r->refs.counter) - : "cc", "cx"); - break; - } - - } while (!atomic_try_cmpxchg(&(r->refs), &c, result)); - - return c != 0; -} - -static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r) -{ - return refcount_add_not_zero(1, r); -} - -#endif diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 6847d85400a8b..fa5700097f647 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -101,6 +101,7 @@ #define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 #define REQUIRED_MASK18 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) +#define REQUIRED_MASK19 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index fdbd9d7b7bca1..3b97aa9215430 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -21,7 +21,6 @@ struct saved_context { #endif unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; - bool misc_enable_saved; struct saved_msrs saved_msrs; struct desc_ptr gdt_desc; struct desc_ptr idt; @@ -30,6 +29,7 @@ struct saved_context { unsigned long tr; unsigned long safety; unsigned long return_address; + bool misc_enable_saved; } __attribute__((packed)); /* routines for saving/restoring kernel state */ diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 35bb35d28733e..54df06687d834 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -14,9 +14,13 @@ * Image of the saved processor state, used by the low level ACPI suspend to * RAM code and by the low level hibernation code. * - * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that - * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c, - * still work as required. + * If you modify it, check how it is used in arch/x86/kernel/acpi/wakeup_64.S + * and make sure that __save/__restore_processor_state(), defined in + * arch/x86/power/cpu.c, still work as required. + * + * Because the structure is packed, make sure to avoid unaligned members. For + * optimisation purposes but also because tools like kmemleak only search for + * pointers that are aligned. */ struct saved_context { struct pt_regs regs; @@ -36,7 +40,6 @@ struct saved_context { unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; - bool misc_enable_saved; struct saved_msrs saved_msrs; unsigned long efer; u16 gdt_pad; /* Unused */ @@ -48,6 +51,7 @@ struct saved_context { unsigned long tr; unsigned long safety; unsigned long return_address; + bool misc_enable_saved; } __attribute__((packed)); #define loaddebug(thread,register) \ diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h index a4a8b1b16c0c1..956e4145311b1 100644 --- a/arch/x86/include/asm/timex.h +++ b/arch/x86/include/asm/timex.h @@ -5,6 +5,15 @@ #include #include +static inline unsigned long random_get_entropy(void) +{ + if (!IS_ENABLED(CONFIG_X86_TSC) && + !cpu_feature_enabled(X86_FEATURE_TSC)) + return random_get_entropy_fallback(); + return rdtsc(); +} +#define random_get_entropy random_get_entropy + /* Assume we use the PIT time source for the clock tick */ #define CLOCK_TICK_RATE PIT_TICK_RATE diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index f3459df117aa8..99a0c29f925f1 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -118,8 +118,11 @@ extern unsigned int __max_die_per_package; #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) extern unsigned int __max_logical_packages; +extern unsigned int logical_packages; #define topology_max_packages() (__max_logical_packages) +extern unsigned int logical_packages; + static inline int topology_max_die_per_package(void) { return __max_die_per_package; @@ -140,6 +143,7 @@ bool topology_is_primary_thread(unsigned int cpu); bool topology_smt_supported(void); #else #define topology_max_packages() (1) +#define logical_packages (1) static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 8a0c25c6bf099..f1ea8f0c8b129 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -22,13 +22,12 @@ extern void disable_TSC(void); static inline cycles_t get_cycles(void) { -#ifndef CONFIG_X86_TSC - if (!boot_cpu_has(X86_FEATURE_TSC)) + if (!IS_ENABLED(CONFIG_X86_TSC) && + !cpu_feature_enabled(X86_FEATURE_TSC)) return 0; -#endif - return rdtsc(); } +#define get_cycles get_cycles extern struct system_counterval_t convert_art_to_tsc(u64 art); extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 61d93f062a36e..d6a0e57ecc073 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -378,18 +378,6 @@ do { \ : "=r" (err), ltype(x) \ : "m" (__m(addr)), "i" (errret), "0" (err)) -#define __get_user_asm_nozero(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile("\n" \ - "1: mov"itype" %2,%"rtype"1\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: mov %3,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ - : "=r" (err), ltype(x) \ - : "m" (__m(addr)), "i" (errret), "0" (err)) - /* * This doesn't do __uaccess_begin/end - the exception handling * around it must do that. @@ -453,6 +441,103 @@ __pu_label: \ __builtin_expect(__gu_err, 0); \ }) +#ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT +#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm_volatile_goto("\n" \ + "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ + _ASM_EXTABLE_UA(1b, %l[label]) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*_ptr), \ + [old] "+a" (__old) \ + : [new] ltype (__new) \ + : "memory" \ + : label); \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) + +#ifdef CONFIG_X86_32 +#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm_volatile_goto("\n" \ + "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ + _ASM_EXTABLE_UA(1b, %l[label]) \ + : CC_OUT(z) (success), \ + "+A" (__old), \ + [ptr] "+m" (*_ptr) \ + : "b" ((u32)__new), \ + "c" ((u32)((u64)__new >> 32)) \ + : "memory" \ + : label); \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) +#endif // CONFIG_X86_32 +#else // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT +#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ + int __err = 0; \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm volatile("\n" \ + "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ + CC_SET(z) \ + "2:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \ + %[errout]) \ + : CC_OUT(z) (success), \ + [errout] "+r" (__err), \ + [ptr] "+m" (*_ptr), \ + [old] "+a" (__old) \ + : [new] ltype (__new) \ + : "memory"); \ + if (unlikely(__err)) \ + goto label; \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) + +#ifdef CONFIG_X86_32 +/* + * Unlike the normal CMPXCHG, hardcode ECX for both success/fail and error. + * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are + * hardcoded by CMPXCHG8B, leaving only ESI and EDI. If the compiler uses + * both ESI and EDI for the memory operand, compilation will fail if the error + * is an input+output as there will be no register available for input. + */ +#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ + int __result; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm volatile("\n" \ + "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ + "mov $0, %%ecx\n\t" \ + "setz %%cl\n" \ + "2:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %%ecx) \ + : [result]"=c" (__result), \ + "+A" (__old), \ + [ptr] "+m" (*_ptr) \ + : "b" ((u32)__new), \ + "c" ((u32)((u64)__new >> 32)) \ + : "memory", "cc"); \ + if (unlikely(__result < 0)) \ + goto label; \ + if (unlikely(!__result)) \ + *_old = __old; \ + likely(__result); }) +#endif // CONFIG_X86_32 +#endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT + /* FIXME: this hack is definitely wrong -AK */ struct __large_struct { unsigned long buf[100]; }; #define __m(x) (*(struct __large_struct __user *)(x)) @@ -734,6 +819,51 @@ do { \ if (unlikely(__gu_err)) goto err_label; \ } while (0) +extern void __try_cmpxchg_user_wrong_size(void); + +#ifndef CONFIG_X86_32 +#define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label) \ + __try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label) +#endif + +/* + * Force the pointer to u to match the size expected by the asm helper. + * clang/LLVM compiles all cases and only discards the unused paths after + * processing errors, which breaks i386 if the pointer is an 8-byte value. + */ +#define unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \ + bool __ret; \ + __chk_user_ptr(_ptr); \ + switch (sizeof(*(_ptr))) { \ + case 1: __ret = __try_cmpxchg_user_asm("b", "q", \ + (__force u8 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 2: __ret = __try_cmpxchg_user_asm("w", "r", \ + (__force u16 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 4: __ret = __try_cmpxchg_user_asm("l", "r", \ + (__force u32 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 8: __ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\ + (_nval), _label); \ + break; \ + default: __try_cmpxchg_user_wrong_size(); \ + } \ + __ret; }) + +/* "Returns" 0 on success, 1 on failure, -EFAULT if the access faults. */ +#define __try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \ + int __ret = -EFAULT; \ + __uaccess_begin_nospec(); \ + __ret = !unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label); \ +_label: \ + __uaccess_end(); \ + __ret; \ + }) + /* * We want the unsafe accessors to always be inlined and use * the error labels - thus the macro games. diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index ba2dc19306303..388a40660c7b5 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -23,33 +23,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) static __always_inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { - if (__builtin_constant_p(n)) { - unsigned long ret; - - switch (n) { - case 1: - ret = 0; - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u8 *)to, from, ret, - "b", "b", "=q", 1); - __uaccess_end(); - return ret; - case 2: - ret = 0; - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u16 *)to, from, ret, - "w", "w", "=r", 2); - __uaccess_end(); - return ret; - case 4: - ret = 0; - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u32 *)to, from, ret, - "l", "k", "=r", 4); - __uaccess_end(); - return ret; - } - } return __copy_user_ll(to, (__force const void *)from, n); } diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 5cd1caa8bc653..bc10e3dc64fed 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -65,117 +65,13 @@ copy_to_user_mcsafe(void *to, const void *from, unsigned len) static __always_inline __must_check unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long size) { - int ret = 0; - - if (!__builtin_constant_p(size)) - return copy_user_generic(dst, (__force void *)src, size); - switch (size) { - case 1: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src, - ret, "b", "b", "=q", 1); - __uaccess_end(); - return ret; - case 2: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src, - ret, "w", "w", "=r", 2); - __uaccess_end(); - return ret; - case 4: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src, - ret, "l", "k", "=r", 4); - __uaccess_end(); - return ret; - case 8: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 8); - __uaccess_end(); - return ret; - case 10: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 10); - if (likely(!ret)) - __get_user_asm_nozero(*(u16 *)(8 + (char *)dst), - (u16 __user *)(8 + (char __user *)src), - ret, "w", "w", "=r", 2); - __uaccess_end(); - return ret; - case 16: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 16); - if (likely(!ret)) - __get_user_asm_nozero(*(u64 *)(8 + (char *)dst), - (u64 __user *)(8 + (char __user *)src), - ret, "q", "", "=r", 8); - __uaccess_end(); - return ret; - default: - return copy_user_generic(dst, (__force void *)src, size); - } + return copy_user_generic(dst, (__force void *)src, size); } static __always_inline __must_check unsigned long raw_copy_to_user(void __user *dst, const void *src, unsigned long size) { - int ret = 0; - - if (!__builtin_constant_p(size)) - return copy_user_generic((__force void *)dst, src, size); - switch (size) { - case 1: - __uaccess_begin(); - __put_user_asm(*(u8 *)src, (u8 __user *)dst, - ret, "b", "b", "iq", 1); - __uaccess_end(); - return ret; - case 2: - __uaccess_begin(); - __put_user_asm(*(u16 *)src, (u16 __user *)dst, - ret, "w", "w", "ir", 2); - __uaccess_end(); - return ret; - case 4: - __uaccess_begin(); - __put_user_asm(*(u32 *)src, (u32 __user *)dst, - ret, "l", "k", "ir", 4); - __uaccess_end(); - return ret; - case 8: - __uaccess_begin(); - __put_user_asm(*(u64 *)src, (u64 __user *)dst, - ret, "q", "", "er", 8); - __uaccess_end(); - return ret; - case 10: - __uaccess_begin(); - __put_user_asm(*(u64 *)src, (u64 __user *)dst, - ret, "q", "", "er", 10); - if (likely(!ret)) { - asm("":::"memory"); - __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst, - ret, "w", "w", "ir", 2); - } - __uaccess_end(); - return ret; - case 16: - __uaccess_begin(); - __put_user_asm(*(u64 *)src, (u64 __user *)dst, - ret, "q", "", "er", 16); - if (likely(!ret)) { - asm("":::"memory"); - __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst, - ret, "q", "", "er", 8); - } - __uaccess_end(); - return ret; - default: - return copy_user_generic((__force void *)dst, src, size); - } + return copy_user_generic((__force void *)dst, src, size); } static __always_inline __must_check diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 62ca03ef5c657..9139b3e863161 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -379,12 +379,9 @@ struct xen_pmu_arch { * Prefix forces emulation of some non-trapping instructions. * Currently only CPUID. */ -#ifdef __ASSEMBLY__ -#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; -#define XEN_CPUID XEN_EMULATE_PREFIX cpuid -#else -#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " -#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" -#endif +#include + +#define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;) +#define XEN_CPUID XEN_EMULATE_PREFIX __ASM_FORM(cpuid) #endif /* _ASM_X86_XEN_INTERFACE_H */ diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 5b59d80f1d4e1..db9adc081c5af 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -35,7 +35,7 @@ struct mce { __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ __u64 ppin; /* Protected Processor Inventory Number */ __u32 microcode; /* Microcode revision */ - __u64 kflags; /* Internal kernel use. See below */ + __u64 kflags; /* Internal kernel use */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4137a7342d687..7b75658b7e9ac 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1339,6 +1339,17 @@ static int __init disable_acpi_pci(const struct dmi_system_id *d) return 0; } +static int __init disable_acpi_xsdt(const struct dmi_system_id *d) +{ + if (!acpi_force) { + pr_notice("%s detected: force use of acpi=rsdt\n", d->ident); + acpi_gbl_do_not_use_xsdt = TRUE; + } else { + pr_notice("Warning: DMI blacklist says broken, but acpi XSDT forced\n"); + } + return 0; +} + static int __init dmi_disable_acpi(const struct dmi_system_id *d) { if (!acpi_force) { @@ -1463,6 +1474,19 @@ static const struct dmi_system_id acpi_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), }, }, + /* + * Boxes that need ACPI XSDT use disabled due to corrupted tables + */ + { + .callback = disable_acpi_xsdt, + .ident = "Advantech DAC-BJ01", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NEC"), + DMI_MATCH(DMI_PRODUCT_NAME, "Bearlake CRB Board"), + DMI_MATCH(DMI_BIOS_VERSION, "V1.12"), + DMI_MATCH(DMI_BIOS_DATE, "02/01/2011"), + }, + }, {} }; diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 9aceb4bf8e8e0..de67049a7ee43 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -19,12 +19,17 @@ #define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 #define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480 #define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630 +#define PCI_DEVICE_ID_AMD_19H_M10H_ROOT 0x14a4 #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494 #define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444 #define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654 +#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F4 0x14b1 +#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5 +#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d +#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e /* Protect the PCI config register pairs used for SMN and DF indirect access. */ static DEFINE_MUTEX(smn_mutex); @@ -36,6 +41,8 @@ static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) }, {} }; @@ -58,6 +65,9 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) }, {} }; EXPORT_SYMBOL_GPL(amd_nb_misc_ids); @@ -74,6 +84,9 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, {} }; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 4e4476b832be2..68c7340325233 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -168,7 +168,7 @@ static __init int setup_apicpmtimer(char *s) { apic_calibrate_pmtmr = 1; notsc_setup(NULL); - return 0; + return 1; } __setup("apicpmtimer", setup_apicpmtimer); #endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8d84494b57c4e..12c59eed769cc 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1012,6 +1012,8 @@ static void init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_IRPERF) && !cpu_has_amd_erratum(c, amd_erratum_1054)) msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); + + check_null_seg_clears_base(c); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e817aaeef254c..57efa90f3fbd0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -40,8 +40,10 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); -static void __init mds_print_mitigation(void); +static void __init md_clear_update_mitigation(void); +static void __init md_clear_select_mitigation(void); static void __init taa_select_mitigation(void); +static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ @@ -76,6 +78,10 @@ EXPORT_SYMBOL_GPL(mds_user_clear); DEFINE_STATIC_KEY_FALSE(mds_idle_clear); EXPORT_SYMBOL_GPL(mds_idle_clear); +/* Controls CPU Fill buffer clear before KVM guest MMIO accesses */ +DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear); +EXPORT_SYMBOL_GPL(mmio_stale_data_clear); + void __init check_bugs(void) { identify_boot_cpu(); @@ -108,16 +114,9 @@ void __init check_bugs(void) spectre_v2_select_mitigation(); ssb_select_mitigation(); l1tf_select_mitigation(); - mds_select_mitigation(); - taa_select_mitigation(); + md_clear_select_mitigation(); srbds_select_mitigation(); - /* - * As MDS and TAA mitigations are inter-related, print MDS - * mitigation until after TAA mitigation selection is done. - */ - mds_print_mitigation(); - arch_smt_update(); #ifdef CONFIG_X86_32 @@ -257,14 +256,6 @@ static void __init mds_select_mitigation(void) } } -static void __init mds_print_mitigation(void) -{ - if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) - return; - - pr_info("%s\n", mds_strings[mds_mitigation]); -} - static int __init mds_cmdline(char *str) { if (!boot_cpu_has_bug(X86_BUG_MDS)) @@ -312,7 +303,7 @@ static void __init taa_select_mitigation(void) /* TSX previously disabled by tsx=off */ if (!boot_cpu_has(X86_FEATURE_RTM)) { taa_mitigation = TAA_MITIGATION_TSX_DISABLED; - goto out; + return; } if (cpu_mitigations_off()) { @@ -326,7 +317,7 @@ static void __init taa_select_mitigation(void) */ if (taa_mitigation == TAA_MITIGATION_OFF && mds_mitigation == MDS_MITIGATION_OFF) - goto out; + return; if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) taa_mitigation = TAA_MITIGATION_VERW; @@ -358,18 +349,6 @@ static void __init taa_select_mitigation(void) if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); - - /* - * Update MDS mitigation, if necessary, as the mds_user_clear is - * now enabled for TAA mitigation. - */ - if (mds_mitigation == MDS_MITIGATION_OFF && - boot_cpu_has_bug(X86_BUG_MDS)) { - mds_mitigation = MDS_MITIGATION_FULL; - mds_select_mitigation(); - } -out: - pr_info("%s\n", taa_strings[taa_mitigation]); } static int __init tsx_async_abort_parse_cmdline(char *str) @@ -393,6 +372,151 @@ static int __init tsx_async_abort_parse_cmdline(char *str) } early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "MMIO Stale Data: " fmt + +enum mmio_mitigations { + MMIO_MITIGATION_OFF, + MMIO_MITIGATION_UCODE_NEEDED, + MMIO_MITIGATION_VERW, +}; + +/* Default mitigation for Processor MMIO Stale Data vulnerabilities */ +static enum mmio_mitigations mmio_mitigation __ro_after_init = MMIO_MITIGATION_VERW; +static bool mmio_nosmt __ro_after_init = false; + +static const char * const mmio_strings[] = { + [MMIO_MITIGATION_OFF] = "Vulnerable", + [MMIO_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", + [MMIO_MITIGATION_VERW] = "Mitigation: Clear CPU buffers", +}; + +static void __init mmio_select_mitigation(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || + cpu_mitigations_off()) { + mmio_mitigation = MMIO_MITIGATION_OFF; + return; + } + + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return; + + ia32_cap = x86_read_arch_cap_msr(); + + /* + * Enable CPU buffer clear mitigation for host and VMM, if also affected + * by MDS or TAA. Otherwise, enable mitigation for VMM only. + */ + if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && + boot_cpu_has(X86_FEATURE_RTM))) + static_branch_enable(&mds_user_clear); + else + static_branch_enable(&mmio_stale_data_clear); + + /* + * If Processor-MMIO-Stale-Data bug is present and Fill Buffer data can + * be propagated to uncore buffers, clearing the Fill buffers on idle + * is required irrespective of SMT state. + */ + if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) + static_branch_enable(&mds_idle_clear); + + /* + * Check if the system has the right microcode. + * + * CPU Fill buffer clear mitigation is enumerated by either an explicit + * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS + * affected systems. + */ + if ((ia32_cap & ARCH_CAP_FB_CLEAR) || + (boot_cpu_has(X86_FEATURE_MD_CLEAR) && + boot_cpu_has(X86_FEATURE_FLUSH_L1D) && + !(ia32_cap & ARCH_CAP_MDS_NO))) + mmio_mitigation = MMIO_MITIGATION_VERW; + else + mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; + + if (mmio_nosmt || cpu_mitigations_auto_nosmt()) + cpu_smt_disable(false); +} + +static int __init mmio_stale_data_parse_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) { + mmio_mitigation = MMIO_MITIGATION_OFF; + } else if (!strcmp(str, "full")) { + mmio_mitigation = MMIO_MITIGATION_VERW; + } else if (!strcmp(str, "full,nosmt")) { + mmio_mitigation = MMIO_MITIGATION_VERW; + mmio_nosmt = true; + } + + return 0; +} +early_param("mmio_stale_data", mmio_stale_data_parse_cmdline); + +#undef pr_fmt +#define pr_fmt(fmt) "" fmt + +static void __init md_clear_update_mitigation(void) +{ + if (cpu_mitigations_off()) + return; + + if (!static_key_enabled(&mds_user_clear)) + goto out; + + /* + * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data + * mitigation, if necessary. + */ + if (mds_mitigation == MDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MDS)) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_select_mitigation(); + } + if (taa_mitigation == TAA_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_TAA)) { + taa_mitigation = TAA_MITIGATION_VERW; + taa_select_mitigation(); + } + if (mmio_mitigation == MMIO_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { + mmio_mitigation = MMIO_MITIGATION_VERW; + mmio_select_mitigation(); + } +out: + if (boot_cpu_has_bug(X86_BUG_MDS)) + pr_info("MDS: %s\n", mds_strings[mds_mitigation]); + if (boot_cpu_has_bug(X86_BUG_TAA)) + pr_info("TAA: %s\n", taa_strings[taa_mitigation]); + if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); +} + +static void __init md_clear_select_mitigation(void) +{ + mds_select_mitigation(); + taa_select_mitigation(); + mmio_select_mitigation(); + + /* + * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update + * and print their mitigation after MDS, TAA and MMIO Stale Data + * mitigation selection is done. + */ + md_clear_update_mitigation(); +} + #undef pr_fmt #define pr_fmt(fmt) "SRBDS: " fmt @@ -454,11 +578,13 @@ static void __init srbds_select_mitigation(void) return; /* - * Check to see if this is one of the MDS_NO systems supporting - * TSX that are only exposed to SRBDS when TSX is enabled. + * Check to see if this is one of the MDS_NO systems supporting TSX that + * are only exposed to SRBDS when TSX is enabled or when CPU is affected + * by Processor MMIO Stale Data vulnerability. */ ia32_cap = x86_read_arch_cap_msr(); - if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM)) + if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && + !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR; @@ -917,6 +1043,49 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_RETPOLINE; } +static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) +{ + /* + * Similar to context switches, there are two types of RSB attacks + * after VM exit: + * + * 1) RSB underflow + * + * 2) Poisoned RSB entry + * + * When retpoline is enabled, both are mitigated by filling/clearing + * the RSB. + * + * When IBRS is enabled, while #1 would be mitigated by the IBRS branch + * prediction isolation protections, RSB still needs to be cleared + * because of #2. Note that SMEP provides no protection here, unlike + * user-space-poisoned RSB entries. + * + * eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB + * bug is present then a LITE version of RSB protection is required, + * just a single call needs to retire before a RET is executed. + */ + switch (mode) { + case SPECTRE_V2_NONE: + /* These modes already fill RSB at vmexit */ + case SPECTRE_V2_LFENCE: + case SPECTRE_V2_RETPOLINE: + case SPECTRE_V2_EIBRS_RETPOLINE: + return; + + case SPECTRE_V2_EIBRS_LFENCE: + case SPECTRE_V2_EIBRS: + if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); + pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n"); + } + return; + } + + pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit"); + dump_stack(); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -1009,6 +1178,8 @@ static void __init spectre_v2_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); + spectre_v2_determine_rsb_fill_type_at_vmexit(mode); + /* * Retpoline means the kernel is safe because it has no indirect * branches. Enhanced IBRS protects firmware too, so, enable restricted @@ -1066,6 +1237,8 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { + u64 ia32_cap = x86_read_arch_cap_msr(); + /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. @@ -1077,14 +1250,17 @@ static void update_mds_branch_idle(void) if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) return; - if (sched_smt_active()) + if (sched_smt_active()) { static_branch_enable(&mds_idle_clear); - else + } else if (mmio_mitigation == MMIO_MITIGATION_OFF || + (ia32_cap & ARCH_CAP_FBSDP_NO)) { static_branch_disable(&mds_idle_clear); + } } #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" #define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" +#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" void cpu_bugs_smt_update(void) { @@ -1129,6 +1305,16 @@ void cpu_bugs_smt_update(void) break; } + switch (mmio_mitigation) { + case MMIO_MITIGATION_VERW: + case MMIO_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(MMIO_MSG_SMT); + break; + case MMIO_MITIGATION_OFF: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -1692,6 +1878,20 @@ static ssize_t tsx_async_abort_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t mmio_stale_data_show_state(char *buf) +{ + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]); + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + return sysfs_emit(buf, "%s; SMT Host state unknown\n", + mmio_strings[mmio_mitigation]); + } + + return sysfs_emit(buf, "%s; SMT %s\n", mmio_strings[mmio_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); +} + static char *stibp_state(void) { if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) @@ -1724,6 +1924,19 @@ static char *ibpb_state(void) return ""; } +static char *pbrsb_eibrs_state(void) +{ + if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { + if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || + boot_cpu_has(X86_FEATURE_RETPOLINE)) + return ", PBRSB-eIBRS: SW sequence"; + else + return ", PBRSB-eIBRS: Vulnerable"; + } else { + return ", PBRSB-eIBRS: Not affected"; + } +} + static ssize_t spectre_v2_show_state(char *buf) { if (spectre_v2_enabled == SPECTRE_V2_LFENCE) @@ -1736,12 +1949,13 @@ static ssize_t spectre_v2_show_state(char *buf) spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); - return sprintf(buf, "%s%s%s%s%s%s\n", + return sprintf(buf, "%s%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ibpb_state(), boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", stibp_state(), boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + pbrsb_eibrs_state(), spectre_v2_module_string()); } @@ -1792,6 +2006,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SRBDS: return srbds_show_state(buf); + case X86_BUG_MMIO_STALE_DATA: + return mmio_stale_data_show_state(buf); + default: break; } @@ -1843,4 +2060,9 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char * { return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS); } + +ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); +} #endif diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index a9a08c7eadf39..adf41dcca0746 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -985,7 +985,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, this_leaf->priv = base->nb; } -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -1014,7 +1014,7 @@ static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs) id4_regs->id = c->apicid >> index_msb; } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { unsigned int idx, ret; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -1033,6 +1033,3 @@ static int __populate_cache_leaves(unsigned int cpu) return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f961a56e9da3f..be1484bf3e2ac 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -59,6 +59,7 @@ #endif #include "cpu.h" +#include "resctrl/internal.h" u32 elf_hwcap2 __read_mostly; @@ -877,6 +878,7 @@ static void init_cqm(struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { c->x86_cache_max_rmid = -1; c->x86_cache_occ_scale = -1; + c->x86_cache_mbm_width_offset = -1; return; } @@ -893,6 +895,10 @@ static void init_cqm(struct cpuinfo_x86 *c) c->x86_cache_max_rmid = ecx; c->x86_cache_occ_scale = ebx; + c->x86_cache_mbm_width_offset = eax & 0xff; + + if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset) + c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD; } } @@ -961,6 +967,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x8000000a) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); + if (c->extended_cpuid_level >= 0x8000001f) + c->x86_capability[CPUID_8000_001F_EAX] = cpuid_eax(0x8000001f); + init_scattered_cpuid_features(c); init_speculation_control(c); init_cqm(c); @@ -1025,6 +1034,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SWAPGS BIT(6) #define NO_ITLB_MULTIHIT BIT(7) #define NO_SPECTRE_V2 BIT(8) +#define NO_EIBRS_PBRSB BIT(9) #define VULNWL(_vendor, _family, _model, _whitelist) \ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } @@ -1065,7 +1075,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* * Technically, swapgs isn't serializing on AMD (despite it previously @@ -1075,7 +1085,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { * good enough for our purposes. */ - VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB), + VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB), + VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), @@ -1099,18 +1111,42 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { X86_FEATURE_ANY, issues) #define SRBDS BIT(0) +/* CPU is affected by X86_BUG_MMIO_STALE_DATA */ +#define MMIO BIT(1) +/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ +#define MMIO_SBDS BIT(2) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | + BIT(7) | BIT(0xB), MMIO), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xC), SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xD), SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), + VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO), + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), {} }; @@ -1131,6 +1167,13 @@ u64 x86_read_arch_cap_msr(void) return ia32_cap; } +static bool arch_cap_mmio_immune(u64 ia32_cap) +{ + return (ia32_cap & ARCH_CAP_FBSDP_NO && + ia32_cap & ARCH_CAP_PSDP_NO && + ia32_cap & ARCH_CAP_SBDR_SSDP_NO); +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = x86_read_arch_cap_msr(); @@ -1184,12 +1227,32 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) /* * SRBDS affects CPUs which support RDRAND or RDSEED and are listed * in the vulnerability blacklist. + * + * Some of the implications and mitigation of Shared Buffers Data + * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as + * SRBDS. */ if ((cpu_has(c, X86_FEATURE_RDRAND) || cpu_has(c, X86_FEATURE_RDSEED)) && - cpu_matches(cpu_vuln_blacklist, SRBDS)) + cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS)) setup_force_cpu_bug(X86_BUG_SRBDS); + /* + * Processor MMIO Stale Data bug enumeration + * + * Affected CPU list is generally enough to enumerate the vulnerability, + * but for virtualization case check for ARCH_CAP MSR bits also, VMM may + * not want the guest to enumerate the bug. + */ + if (cpu_matches(cpu_vuln_blacklist, MMIO) && + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + + if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) && + !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && + !(ia32_cap & ARCH_CAP_PBRSB_NO)) + setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; @@ -1336,9 +1399,8 @@ void __init early_cpu_init(void) early_identify_cpu(&boot_cpu_data); } -static void detect_null_seg_behavior(struct cpuinfo_x86 *c) +static bool detect_null_seg_behavior(void) { -#ifdef CONFIG_X86_64 /* * Empirically, writing zero to a segment selector on AMD does * not clear the base, whereas writing zero to a segment @@ -1359,10 +1421,43 @@ static void detect_null_seg_behavior(struct cpuinfo_x86 *c) wrmsrl(MSR_FS_BASE, 1); loadsegment(fs, 0); rdmsrl(MSR_FS_BASE, tmp); - if (tmp != 0) - set_cpu_bug(c, X86_BUG_NULL_SEG); wrmsrl(MSR_FS_BASE, old_base); -#endif + return tmp == 0; +} + +void check_null_seg_clears_base(struct cpuinfo_x86 *c) +{ + /* BUG_NULL_SEG is only relevant with 64bit userspace */ + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + /* Zen3 CPUs advertise Null Selector Clears Base in CPUID. */ + if (c->extended_cpuid_level >= 0x80000021 && + cpuid_eax(0x80000021) & BIT(6)) + return; + + /* + * CPUID bit above wasn't set. If this kernel is still running + * as a HV guest, then the HV has decided not to advertize + * that CPUID bit for whatever reason. For example, one + * member of the migration pool might be vulnerable. Which + * means, the bug is present: set the BUG flag and return. + */ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) { + set_cpu_bug(c, X86_BUG_NULL_SEG); + return; + } + + /* + * Zen2 CPUs also have this behaviour, but no CPUID bit. + * 0x18 is the respective family for Hygon. + */ + if ((c->x86 == 0x17 || c->x86 == 0x18) && + detect_null_seg_behavior()) + return; + + /* All the remaining ones are affected */ + set_cpu_bug(c, X86_BUG_NULL_SEG); } static void generic_identify(struct cpuinfo_x86 *c) @@ -1398,8 +1493,6 @@ static void generic_identify(struct cpuinfo_x86 *c) get_model_name(c); /* Default name */ - detect_null_seg_behavior(c); - /* * ESPFIX is a strange bug. All real CPUs have it. Paravirt * systems that run Linux at CPL > 0 may or may not have the diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 9d033693519aa..4d04c127c4a79 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -73,6 +73,7 @@ extern int detect_extended_topology_early(struct cpuinfo_x86 *c); extern int detect_extended_topology(struct cpuinfo_x86 *c); extern int detect_ht_early(struct cpuinfo_x86 *c); extern void detect_ht(struct cpuinfo_x86 *c); +extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 62e9a982adaf9..b232bd0be78d6 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -350,6 +350,8 @@ static void init_hygon(struct cpuinfo_x86 *c) /* Hygon CPUs don't reset SS attributes on SYSRET, Xen does. */ if (!cpu_has(c, X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + + check_null_seg_clears_base(c); } static void cpu_detect_tlb_hygon(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 11d5c5950e2d3..44688917d51fd 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -97,7 +97,7 @@ static bool ring3mwait_disabled __read_mostly; static int __init ring3mwait_disable(char *__unused) { ring3mwait_disabled = true; - return 0; + return 1; } __setup("ring3mwait=disable", ring3mwait_disable); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 05f48779fcedc..640b40583e76e 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -71,33 +71,58 @@ static const char * const smca_umc_block_names[] = { "misc_umc" }; +#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype)) + +struct smca_hwid { + unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ + u32 hwid_mcatype; /* (hwid,mcatype) tuple */ +}; + +struct smca_bank { + const struct smca_hwid *hwid; + u32 id; /* Value of MCA_IPID[InstanceId]. */ + u8 sysfs_id; /* Value used for sysfs name. */ +}; + +static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks); +static DEFINE_PER_CPU_READ_MOSTLY(u8[N_SMCA_BANK_TYPES], smca_bank_counts); + struct smca_bank_name { const char *name; /* Short name for sysfs */ const char *long_name; /* Long name for pretty-printing */ }; static struct smca_bank_name smca_names[] = { - [SMCA_LS] = { "load_store", "Load Store Unit" }, - [SMCA_LS_V2] = { "load_store", "Load Store Unit" }, - [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, - [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, - [SMCA_DE] = { "decode_unit", "Decode Unit" }, - [SMCA_RESERVED] = { "reserved", "Reserved" }, - [SMCA_EX] = { "execution_unit", "Execution Unit" }, - [SMCA_FP] = { "floating_point", "Floating Point Unit" }, - [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, - [SMCA_CS] = { "coherent_slave", "Coherent Slave" }, - [SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" }, - [SMCA_PIE] = { "pie", "Power, Interrupts, etc." }, - [SMCA_UMC] = { "umc", "Unified Memory Controller" }, - [SMCA_PB] = { "param_block", "Parameter Block" }, - [SMCA_PSP] = { "psp", "Platform Security Processor" }, - [SMCA_PSP_V2] = { "psp", "Platform Security Processor" }, - [SMCA_SMU] = { "smu", "System Management Unit" }, - [SMCA_SMU_V2] = { "smu", "System Management Unit" }, - [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" }, - [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" }, - [SMCA_PCIE] = { "pcie", "PCI Express Unit" }, + [SMCA_LS ... SMCA_LS_V2] = { "load_store", "Load Store Unit" }, + [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, + [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, + [SMCA_DE] = { "decode_unit", "Decode Unit" }, + [SMCA_RESERVED] = { "reserved", "Reserved" }, + [SMCA_EX] = { "execution_unit", "Execution Unit" }, + [SMCA_FP] = { "floating_point", "Floating Point Unit" }, + [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, + [SMCA_CS ... SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" }, + [SMCA_PIE] = { "pie", "Power, Interrupts, etc." }, + + /* UMC v2 is separate because both of them can exist in a single system. */ + [SMCA_UMC] = { "umc", "Unified Memory Controller" }, + [SMCA_UMC_V2] = { "umc_v2", "Unified Memory Controller v2" }, + [SMCA_PB] = { "param_block", "Parameter Block" }, + [SMCA_PSP ... SMCA_PSP_V2] = { "psp", "Platform Security Processor" }, + [SMCA_SMU ... SMCA_SMU_V2] = { "smu", "System Management Unit" }, + [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" }, + [SMCA_MPDMA] = { "mpdma", "MPDMA Unit" }, + [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" }, + [SMCA_PCIE ... SMCA_PCIE_V2] = { "pcie", "PCI Express Unit" }, + [SMCA_XGMI_PCS] = { "xgmi_pcs", "Ext Global Memory Interconnect PCS Unit" }, + [SMCA_NBIF] = { "nbif", "NBIF Unit" }, + [SMCA_SHUB] = { "shub", "System Hub Unit" }, + [SMCA_SATA] = { "sata", "SATA Unit" }, + [SMCA_USB] = { "usb", "USB Unit" }, + [SMCA_GMI_PCS] = { "gmi_pcs", "Global Memory Interconnect PCS Unit" }, + [SMCA_XGMI_PHY] = { "xgmi_phy", "Ext Global Memory Interconnect PHY Unit" }, + [SMCA_WAFL_PHY] = { "wafl_phy", "WAFL PHY Unit" }, + [SMCA_GMI_PHY] = { "gmi_phy", "Global Memory Interconnect PHY Unit" }, }; static const char *smca_get_name(enum smca_bank_types t) @@ -117,69 +142,82 @@ const char *smca_get_long_name(enum smca_bank_types t) } EXPORT_SYMBOL_GPL(smca_get_long_name); -static enum smca_bank_types smca_get_bank_type(unsigned int bank) +enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank) { struct smca_bank *b; if (bank >= MAX_NR_BANKS) return N_SMCA_BANK_TYPES; - b = &smca_banks[bank]; + b = &per_cpu(smca_banks, cpu)[bank]; if (!b->hwid) return N_SMCA_BANK_TYPES; return b->hwid->bank_type; } +EXPORT_SYMBOL_GPL(smca_get_bank_type); -static struct smca_hwid smca_hwid_mcatypes[] = { - /* { bank_type, hwid_mcatype, xec_bitmap } */ +static const struct smca_hwid smca_hwid_mcatypes[] = { + /* { bank_type, hwid_mcatype } */ /* Reserved type */ - { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 }, + { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0) }, /* ZN Core (HWID=0xB0) MCA types */ - { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF }, - { SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10), 0xFFFFFF }, - { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF }, - { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF }, - { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF }, + { SMCA_LS, HWID_MCATYPE(0xB0, 0x0) }, + { SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10) }, + { SMCA_IF, HWID_MCATYPE(0xB0, 0x1) }, + { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2) }, + { SMCA_DE, HWID_MCATYPE(0xB0, 0x3) }, /* HWID 0xB0 MCATYPE 0x4 is Reserved */ - { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0xFFF }, - { SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F }, - { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF }, + { SMCA_EX, HWID_MCATYPE(0xB0, 0x5) }, + { SMCA_FP, HWID_MCATYPE(0xB0, 0x6) }, + { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7) }, /* Data Fabric MCA types */ - { SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF }, - { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0x1F }, - { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2), 0x3FFF }, + { SMCA_CS, HWID_MCATYPE(0x2E, 0x0) }, + { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) }, + { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) }, /* Unified Memory Controller MCA type */ - { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0xFF }, + { SMCA_UMC, HWID_MCATYPE(0x96, 0x0) }, + { SMCA_UMC_V2, HWID_MCATYPE(0x96, 0x1) }, /* Parameter Block MCA type */ - { SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 }, + { SMCA_PB, HWID_MCATYPE(0x05, 0x0) }, /* Platform Security Processor MCA type */ - { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 }, - { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1), 0x3FFFF }, + { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0) }, + { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1) }, /* System Management Unit MCA type */ - { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 }, - { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1), 0x7FF }, + { SMCA_SMU, HWID_MCATYPE(0x01, 0x0) }, + { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1) }, /* Microprocessor 5 Unit MCA type */ - { SMCA_MP5, HWID_MCATYPE(0x01, 0x2), 0x3FF }, + { SMCA_MP5, HWID_MCATYPE(0x01, 0x2) }, + + /* MPDMA MCA type */ + { SMCA_MPDMA, HWID_MCATYPE(0x01, 0x3) }, /* Northbridge IO Unit MCA type */ - { SMCA_NBIO, HWID_MCATYPE(0x18, 0x0), 0x1F }, + { SMCA_NBIO, HWID_MCATYPE(0x18, 0x0) }, /* PCI Express Unit MCA type */ - { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0), 0x1F }, + { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) }, + { SMCA_PCIE_V2, HWID_MCATYPE(0x46, 0x1) }, + + { SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) }, + { SMCA_NBIF, HWID_MCATYPE(0x6C, 0x0) }, + { SMCA_SHUB, HWID_MCATYPE(0x80, 0x0) }, + { SMCA_SATA, HWID_MCATYPE(0xA8, 0x0) }, + { SMCA_USB, HWID_MCATYPE(0xAA, 0x0) }, + { SMCA_GMI_PCS, HWID_MCATYPE(0x241, 0x0) }, + { SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) }, + { SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) }, + { SMCA_GMI_PHY, HWID_MCATYPE(0x269, 0x0) }, }; -struct smca_bank smca_banks[MAX_NR_BANKS]; -EXPORT_SYMBOL_GPL(smca_banks); - /* * In SMCA enabled processors, we can have multiple banks for a given IP type. * So to define a unique name for each bank, we use a temp c-string to append @@ -230,8 +268,9 @@ static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu) static void smca_configure(unsigned int bank, unsigned int cpu) { + u8 *bank_counts = this_cpu_ptr(smca_bank_counts); + const struct smca_hwid *s_hwid; unsigned int i, hwid_mcatype; - struct smca_hwid *s_hwid; u32 high, low; u32 smca_config = MSR_AMD64_SMCA_MCx_CONFIG(bank); @@ -267,10 +306,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu) smca_set_misc_banks_map(bank, cpu); - /* Return early if this bank was already initialized. */ - if (smca_banks[bank].hwid && smca_banks[bank].hwid->hwid_mcatype != 0) - return; - if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { pr_warn("Failed to read MCA_IPID for bank %d\n", bank); return; @@ -281,10 +316,11 @@ static void smca_configure(unsigned int bank, unsigned int cpu) for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { s_hwid = &smca_hwid_mcatypes[i]; + if (hwid_mcatype == s_hwid->hwid_mcatype) { - smca_banks[bank].hwid = s_hwid; - smca_banks[bank].id = low; - smca_banks[bank].sysfs_id = s_hwid->count++; + this_cpu_ptr(smca_banks)[bank].hwid = s_hwid; + this_cpu_ptr(smca_banks)[bank].id = low; + this_cpu_ptr(smca_banks)[bank].sysfs_id = bank_counts[s_hwid->bank_type]++; break; } } @@ -566,16 +602,21 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, bool amd_filter_mce(struct mce *m) { - enum smca_bank_types bank_type = smca_get_bank_type(m->bank); + enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank); struct cpuinfo_x86 *c = &boot_cpu_data; - u8 xec = (m->status >> 16) & 0x3F; /* See Family 17h Models 10h-2Fh Erratum #1114. */ if (c->x86 == 0x17 && c->x86_model >= 0x10 && c->x86_model <= 0x2F && - bank_type == SMCA_IF && xec == 10) + bank_type == SMCA_IF && XEC(m->status, 0x3f) == 10) return true; + /* NB GART TLB error reporting is disabled by default. */ + if (c->x86 < 0x17) { + if (m->bank == 4 && XEC(m->status, 0x1f) == 0x5) + return true; + } + return false; } @@ -599,7 +640,7 @@ void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank) } else if (c->x86 == 0x17 && (c->x86_model >= 0x10 && c->x86_model <= 0x2F)) { - if (smca_get_bank_type(bank) != SMCA_IF) + if (smca_get_bank_type(smp_processor_id(), bank) != SMCA_IF) return; msrs[0] = MSR_AMD64_SMCA_MCx_MISC(bank); @@ -661,213 +702,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) deferred_error_interrupt_enable(c); } -int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) -{ - u64 dram_base_addr, dram_limit_addr, dram_hole_base; - /* We start from the normalized address */ - u64 ret_addr = norm_addr; - - u32 tmp; - - u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask; - u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets; - u8 intlv_addr_sel, intlv_addr_bit; - u8 num_intlv_bits, hashed_bit; - u8 lgcy_mmio_hole_en, base = 0; - u8 cs_mask, cs_id = 0; - bool hash_enabled = false; - - /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */ - if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp)) - goto out_err; - - /* Remove HiAddrOffset from normalized address, if enabled: */ - if (tmp & BIT(0)) { - u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8; - - if (norm_addr >= hi_addr_offset) { - ret_addr -= hi_addr_offset; - base = 1; - } - } - - /* Read D18F0x110 (DramBaseAddress). */ - if (amd_df_indirect_read(nid, 0, 0x110 + (8 * base), umc, &tmp)) - goto out_err; - - /* Check if address range is valid. */ - if (!(tmp & BIT(0))) { - pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n", - __func__, tmp); - goto out_err; - } - - lgcy_mmio_hole_en = tmp & BIT(1); - intlv_num_chan = (tmp >> 4) & 0xF; - intlv_addr_sel = (tmp >> 8) & 0x7; - dram_base_addr = (tmp & GENMASK_ULL(31, 12)) << 16; - - /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */ - if (intlv_addr_sel > 3) { - pr_err("%s: Invalid interleave address select %d.\n", - __func__, intlv_addr_sel); - goto out_err; - } - - /* Read D18F0x114 (DramLimitAddress). */ - if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp)) - goto out_err; - - intlv_num_sockets = (tmp >> 8) & 0x1; - intlv_num_dies = (tmp >> 10) & 0x3; - dram_limit_addr = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); - - intlv_addr_bit = intlv_addr_sel + 8; - - /* Re-use intlv_num_chan by setting it equal to log2(#channels) */ - switch (intlv_num_chan) { - case 0: intlv_num_chan = 0; break; - case 1: intlv_num_chan = 1; break; - case 3: intlv_num_chan = 2; break; - case 5: intlv_num_chan = 3; break; - case 7: intlv_num_chan = 4; break; - - case 8: intlv_num_chan = 1; - hash_enabled = true; - break; - default: - pr_err("%s: Invalid number of interleaved channels %d.\n", - __func__, intlv_num_chan); - goto out_err; - } - - num_intlv_bits = intlv_num_chan; - - if (intlv_num_dies > 2) { - pr_err("%s: Invalid number of interleaved nodes/dies %d.\n", - __func__, intlv_num_dies); - goto out_err; - } - - num_intlv_bits += intlv_num_dies; - - /* Add a bit if sockets are interleaved. */ - num_intlv_bits += intlv_num_sockets; - - /* Assert num_intlv_bits <= 4 */ - if (num_intlv_bits > 4) { - pr_err("%s: Invalid interleave bits %d.\n", - __func__, num_intlv_bits); - goto out_err; - } - - if (num_intlv_bits > 0) { - u64 temp_addr_x, temp_addr_i, temp_addr_y; - u8 die_id_bit, sock_id_bit, cs_fabric_id; - - /* - * Read FabricBlockInstanceInformation3_CS[BlockFabricID]. - * This is the fabric id for this coherent slave. Use - * umc/channel# as instance id of the coherent slave - * for FICAA. - */ - if (amd_df_indirect_read(nid, 0, 0x50, umc, &tmp)) - goto out_err; - - cs_fabric_id = (tmp >> 8) & 0xFF; - die_id_bit = 0; - - /* If interleaved over more than 1 channel: */ - if (intlv_num_chan) { - die_id_bit = intlv_num_chan; - cs_mask = (1 << die_id_bit) - 1; - cs_id = cs_fabric_id & cs_mask; - } - - sock_id_bit = die_id_bit; - - /* Read D18F1x208 (SystemFabricIdMask). */ - if (intlv_num_dies || intlv_num_sockets) - if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp)) - goto out_err; - - /* If interleaved over more than 1 die. */ - if (intlv_num_dies) { - sock_id_bit = die_id_bit + intlv_num_dies; - die_id_shift = (tmp >> 24) & 0xF; - die_id_mask = (tmp >> 8) & 0xFF; - - cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit; - } - - /* If interleaved over more than 1 socket. */ - if (intlv_num_sockets) { - socket_id_shift = (tmp >> 28) & 0xF; - socket_id_mask = (tmp >> 16) & 0xFF; - - cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit; - } - - /* - * The pre-interleaved address consists of XXXXXXIIIYYYYY - * where III is the ID for this CS, and XXXXXXYYYYY are the - * address bits from the post-interleaved address. - * "num_intlv_bits" has been calculated to tell us how many "I" - * bits there are. "intlv_addr_bit" tells us how many "Y" bits - * there are (where "I" starts). - */ - temp_addr_y = ret_addr & GENMASK_ULL(intlv_addr_bit-1, 0); - temp_addr_i = (cs_id << intlv_addr_bit); - temp_addr_x = (ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits; - ret_addr = temp_addr_x | temp_addr_i | temp_addr_y; - } - - /* Add dram base address */ - ret_addr += dram_base_addr; - - /* If legacy MMIO hole enabled */ - if (lgcy_mmio_hole_en) { - if (amd_df_indirect_read(nid, 0, 0x104, umc, &tmp)) - goto out_err; - - dram_hole_base = tmp & GENMASK(31, 24); - if (ret_addr >= dram_hole_base) - ret_addr += (BIT_ULL(32) - dram_hole_base); - } - - if (hash_enabled) { - /* Save some parentheses and grab ls-bit at the end. */ - hashed_bit = (ret_addr >> 12) ^ - (ret_addr >> 18) ^ - (ret_addr >> 21) ^ - (ret_addr >> 30) ^ - cs_id; - - hashed_bit &= BIT(0); - - if (hashed_bit != ((ret_addr >> intlv_addr_bit) & BIT(0))) - ret_addr ^= BIT(intlv_addr_bit); - } - - /* Is calculated system address is above DRAM limit address? */ - if (ret_addr > dram_limit_addr) - goto out_err; - - *sys_addr = ret_addr; - return 0; - -out_err: - return -EINVAL; -} -EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr); - bool amd_mce_is_memory_error(struct mce *m) { /* ErrCodeExt[20:16] */ u8 xec = (m->status >> 16) & 0x1f; if (mce_flags.smca) - return smca_get_bank_type(m->bank) == SMCA_UMC && xec == 0x0; + return smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC && xec == 0x0; return m->bank == 4 && xec == 0x8; } @@ -1171,7 +1012,7 @@ static struct kobj_type threshold_ktype = { .release = threshold_block_release, }; -static const char *get_name(unsigned int bank, struct threshold_block *b) +static const char *get_name(unsigned int cpu, unsigned int bank, struct threshold_block *b) { enum smca_bank_types bank_type; @@ -1182,7 +1023,7 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return th_names[bank]; } - bank_type = smca_get_bank_type(bank); + bank_type = smca_get_bank_type(cpu, bank); if (bank_type >= N_SMCA_BANK_TYPES) return NULL; @@ -1192,12 +1033,12 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return NULL; } - if (smca_banks[bank].hwid->count == 1) + if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1) return smca_get_name(bank_type); snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, - "%s_%x", smca_get_name(bank_type), - smca_banks[bank].sysfs_id); + "%s_%u", smca_get_name(bank_type), + per_cpu(smca_banks, cpu)[bank].sysfs_id); return buf_mcatype; } @@ -1209,10 +1050,10 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb u32 low, high; int err; - if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS)) + if ((bank >= this_cpu_read(mce_num_banks)) || (block >= NR_BLOCKS)) return 0; - if (rdmsr_safe_on_cpu(cpu, address, &low, &high)) + if (rdmsr_safe(address, &low, &high)) return 0; if (!(high & MASK_VALID_HI)) { @@ -1252,7 +1093,7 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb else tb->blocks = b; - err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b)); + err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b)); if (err) goto out_free; recurse: @@ -1267,13 +1108,12 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb if (b) kobject_uevent(&b->kobj, KOBJ_ADD); - return err; + return 0; out_free: if (b) { - kobject_put(&b->kobj); list_del(&b->miscj); - kfree(b); + kobject_put(&b->kobj); } return err; } @@ -1302,12 +1142,13 @@ static int __threshold_add_blocks(struct threshold_bank *b) return err; } -static int threshold_create_bank(unsigned int cpu, unsigned int bank) +static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu, + unsigned int bank) { - struct device *dev = per_cpu(mce_device, cpu); + struct device *dev = this_cpu_read(mce_device); struct amd_northbridge *nb = NULL; struct threshold_bank *b = NULL; - const char *name = get_name(bank, NULL); + const char *name = get_name(cpu, bank, NULL); int err = 0; if (!dev) @@ -1324,7 +1165,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out; - per_cpu(threshold_banks, cpu)[bank] = b; + bp[bank] = b; refcount_inc(&b->cpus); err = __threshold_add_blocks(b); @@ -1339,6 +1180,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out; } + /* Associate the bank with the per-CPU MCE device */ b->kobj = kobject_create_and_add(name, &dev->kobj); if (!b->kobj) { err = -EINVAL; @@ -1346,6 +1188,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) } if (is_shared_bank(bank)) { + b->shared = 1; refcount_set(&b->cpus, 1); /* nb is already initialized, see above */ @@ -1357,16 +1200,16 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank)); if (err) - goto out_free; - - per_cpu(threshold_banks, cpu)[bank] = b; + goto out_kobj; + bp[bank] = b; return 0; - out_free: +out_kobj: + kobject_put(b->kobj); +out_free: kfree(b); - - out: +out: return err; } @@ -1375,21 +1218,16 @@ static void threshold_block_release(struct kobject *kobj) kfree(to_block(kobj)); } -static void deallocate_threshold_block(unsigned int cpu, unsigned int bank) +static void deallocate_threshold_blocks(struct threshold_bank *bank) { - struct threshold_block *pos = NULL; - struct threshold_block *tmp = NULL; - struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank]; - - if (!head) - return; + struct threshold_block *pos, *tmp; - list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { + list_for_each_entry_safe(pos, tmp, &bank->blocks->miscj, miscj) { list_del(&pos->miscj); kobject_put(&pos->kobj); } - kobject_put(&head->blocks->kobj); + kobject_put(&bank->blocks->kobj); } static void __threshold_remove_blocks(struct threshold_bank *b) @@ -1403,122 +1241,108 @@ static void __threshold_remove_blocks(struct threshold_bank *b) kobject_del(&pos->kobj); } -static void threshold_remove_bank(unsigned int cpu, int bank) +static void threshold_remove_bank(struct threshold_bank *bank) { struct amd_northbridge *nb; - struct threshold_bank *b; - b = per_cpu(threshold_banks, cpu)[bank]; - if (!b) - return; + if (!bank->blocks) + goto out_free; - if (!b->blocks) - goto free_out; + if (!bank->shared) + goto out_dealloc; - if (is_shared_bank(bank)) { - if (!refcount_dec_and_test(&b->cpus)) { - __threshold_remove_blocks(b); - per_cpu(threshold_banks, cpu)[bank] = NULL; - return; - } else { - /* - * the last CPU on this node using the shared bank is - * going away, remove that bank now. - */ - nb = node_to_amd_nb(topology_die_id(smp_processor_id())); - nb->bank4 = NULL; - } + if (!refcount_dec_and_test(&bank->cpus)) { + __threshold_remove_blocks(bank); + return; + } else { + /* + * The last CPU on this node using the shared bank is going + * away, remove that bank now. + */ + nb = node_to_amd_nb(topology_die_id(smp_processor_id())); + nb->bank4 = NULL; } - deallocate_threshold_block(cpu, bank); +out_dealloc: + deallocate_threshold_blocks(bank); -free_out: - kobject_del(b->kobj); - kobject_put(b->kobj); - kfree(b); - per_cpu(threshold_banks, cpu)[bank] = NULL; +out_free: + kobject_put(bank->kobj); + kfree(bank); } -int mce_threshold_remove_device(unsigned int cpu) +static void __threshold_remove_device(struct threshold_bank **bp) { - unsigned int bank; + unsigned int bank, numbanks = this_cpu_read(mce_num_banks); - for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) { - if (!(per_cpu(bank_map, cpu) & (1 << bank))) + for (bank = 0; bank < numbanks; bank++) { + if (!bp[bank]) continue; - threshold_remove_bank(cpu, bank); + + threshold_remove_bank(bp[bank]); + bp[bank] = NULL; } - kfree(per_cpu(threshold_banks, cpu)); - per_cpu(threshold_banks, cpu) = NULL; + kfree(bp); +} + +int mce_threshold_remove_device(unsigned int cpu) +{ + struct threshold_bank **bp = this_cpu_read(threshold_banks); + + if (!bp) + return 0; + + /* + * Clear the pointer before cleaning up, so that the interrupt won't + * touch anything of this. + */ + this_cpu_write(threshold_banks, NULL); + + __threshold_remove_device(bp); return 0; } -/* create dir/files for all valid threshold banks */ +/** + * mce_threshold_create_device - Create the per-CPU MCE threshold device + * @cpu: The plugged in CPU + * + * Create directories and files for all valid threshold banks. + * + * This is invoked from the CPU hotplug callback which was installed in + * mcheck_init_device(). The invocation happens in context of the hotplug + * thread running on @cpu. The callback is invoked on all CPUs which are + * online when the callback is installed or during a real hotplug event. + */ int mce_threshold_create_device(unsigned int cpu) { - unsigned int bank; + unsigned int numbanks, bank; struct threshold_bank **bp; - int err = 0; + int err; - bp = per_cpu(threshold_banks, cpu); + if (!mce_flags.amd_threshold) + return 0; + + bp = this_cpu_read(threshold_banks); if (bp) return 0; - bp = kcalloc(per_cpu(mce_num_banks, cpu), sizeof(struct threshold_bank *), - GFP_KERNEL); + numbanks = this_cpu_read(mce_num_banks); + bp = kcalloc(numbanks, sizeof(*bp), GFP_KERNEL); if (!bp) return -ENOMEM; - per_cpu(threshold_banks, cpu) = bp; - - for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) { - if (!(per_cpu(bank_map, cpu) & (1 << bank))) + for (bank = 0; bank < numbanks; ++bank) { + if (!(this_cpu_read(bank_map) & (1 << bank))) continue; - err = threshold_create_bank(cpu, bank); - if (err) - goto err; - } - return err; -err: - mce_threshold_remove_device(cpu); - return err; -} - -static __init int threshold_init_device(void) -{ - unsigned lcpu = 0; - - /* to hit CPUs online before the notifier is up */ - for_each_online_cpu(lcpu) { - int err = mce_threshold_create_device(lcpu); - - if (err) + err = threshold_create_bank(bp, cpu, bank); + if (err) { + __threshold_remove_device(bp); return err; + } } + this_cpu_write(threshold_banks, bp); if (thresholding_irq_en) mce_threshold_vector = amd_threshold_interrupt; - return 0; } -/* - * there are 3 funcs which need to be _initcalled in a logic sequence: - * 1. xen_late_init_mcelog - * 2. mcheck_init_device - * 3. threshold_init_device - * - * xen_late_init_mcelog must register xen_mce_chrdev_device before - * native mce_chrdev_device registration if running under xen platform; - * - * mcheck_init_device should be inited before threshold_init_device to - * initialize mce_device, otherwise a NULL ptr dereference will cause panic. - * - * so we use following _initcalls - * 1. device_initcall(xen_late_init_mcelog); - * 2. device_initcall_sync(mcheck_init_device); - * 3. late_initcall(threshold_init_device); - * - * when running under xen, the initcall order is 1,2,3; - * on baremetal, we skip 1 and we do only 2 and 3. - */ -late_initcall(threshold_init_device); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 6f55ed1cab6fc..d08c9ad26d77b 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -171,7 +171,8 @@ EXPORT_SYMBOL_GPL(mce_inject_log); void mce_register_decode_chain(struct notifier_block *nb) { - if (WARN_ON(nb->priority > MCE_PRIO_MCELOG && nb->priority < MCE_PRIO_EDAC)) + if (WARN_ON(nb->priority < MCE_PRIO_LOWEST || + nb->priority > MCE_PRIO_HIGHEST)) return; blocking_notifier_chain_register(&x86_mce_decoder_chain, nb); @@ -301,11 +302,17 @@ static void wait_for_panic(void) panic("Panicing machine check CPU died"); } -static void mce_panic(const char *msg, struct mce *final, char *exp) +static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) { - int apei_err = 0; struct llist_node *pending; struct mce_evt_llist *l; + int apei_err = 0; + + /* + * Allow instrumentation around external facilities usage. Not that it + * matters a whole lot since the machine is going to panic anyway. + */ + instrumentation_begin(); if (!fake_panic) { /* @@ -320,7 +327,7 @@ static void mce_panic(const char *msg, struct mce *final, char *exp) } else { /* Don't log too much for fake panic */ if (atomic_inc_return(&mce_fake_panicked) > 1) - return; + goto out; } pending = mce_gen_pool_prepare_records(); /* First print corrected ones that are still unlogged */ @@ -358,6 +365,9 @@ static void mce_panic(const char *msg, struct mce *final, char *exp) panic(msg); } else pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); + +out: + instrumentation_end(); } /* Support code for software error injection */ @@ -379,13 +389,16 @@ static int msr_to_offset(u32 msr) return -1; } -__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) { - pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", - (unsigned int)regs->cx, regs->ip, (void *)regs->ip); + if (wrmsr) { + pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", + (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax, + regs->ip, (void *)regs->ip); + } else { + pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", + (unsigned int)regs->cx, regs->ip, (void *)regs->ip); + } show_stack_regs(regs); @@ -393,7 +406,14 @@ __visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, while (true) cpu_relax(); +} +__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr, + unsigned long error_code, + unsigned long fault_addr) +{ + ex_handler_msr_mce(regs, false); return true; } @@ -429,17 +449,7 @@ __visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup, unsigned long error_code, unsigned long fault_addr) { - pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", - (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax, - regs->ip, (void *)regs->ip); - - show_stack_regs(regs); - - panic("MCA architectural violation!\n"); - - while (true) - cpu_relax(); - + ex_handler_msr_mce(regs, true); return true; } @@ -667,7 +677,7 @@ static struct notifier_block mce_default_nb = { /* * Read ADDR and MISC registers. */ -static void mce_read_aux(struct mce *m, int i) +static noinstr void mce_read_aux(struct mce *m, int i) { if (m->status & MCI_STATUS_MISCV) m->misc = mce_rdmsrl(msr_ops.misc(i)); @@ -1045,10 +1055,13 @@ static int mce_start(int *no_way_out) * Synchronize between CPUs after main scanning loop. * This invokes the bulk of the Monarch processing. */ -static int mce_end(int order) +static noinstr int mce_end(int order) { - int ret = -1; u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC; + int ret = -1; + + /* Allow instrumentation around external facilities. */ + instrumentation_begin(); if (!timeout) goto reset; @@ -1092,7 +1105,8 @@ static int mce_end(int order) /* * Don't reset anything. That's done by the Monarch. */ - return 0; + ret = 0; + goto out; } /* @@ -1107,6 +1121,10 @@ static int mce_end(int order) * Let others run again. */ atomic_set(&mce_executing, 0); + +out: + instrumentation_end(); + return ret; } @@ -1242,10 +1260,12 @@ static void kill_me_maybe(struct callback_head *cb) /* * -EHWPOISON from memory_failure() means that it already sent SIGBUS - * to the current process with the proper error info, so no need to - * send SIGBUS here again. + * to the current process with the proper error info, + * -EOPNOTSUPP means hwpoison_filter() filtered the error event, + * + * In both cases, no further processing is required. */ - if (ret == -EHWPOISON) + if (ret == -EHWPOISON || ret == -EOPNOTSUPP) return; if (p->mce_vaddr != (void __user *)-1l) { @@ -1834,6 +1854,7 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c) mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV); mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR); mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA); + mce_flags.amd_threshold = 1; if (mce_flags.smca) { msr_ops.ctl = smca_ctl_reg; @@ -2521,6 +2542,13 @@ static __init void mce_init_banks(void) } } +/* + * When running on XEN, this initcall is ordered against the XEN mcelog + * initcall: + * + * device_initcall(xen_late_init_mcelog); + * device_initcall_sync(mcheck_init_device); + */ static __init int mcheck_init_device(void) { int err; @@ -2552,6 +2580,10 @@ static __init int mcheck_init_device(void) if (err) goto err_out_mem; + /* + * Invokes mce_cpu_online() on all CPUs which are online when + * the state is installed. + */ err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/mce:online", mce_cpu_online, mce_cpu_pre_down); if (err < 0) diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 39ef73e008f1d..e20f0ad0cc471 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -347,7 +347,7 @@ static ssize_t flags_write(struct file *filp, const char __user *ubuf, char buf[MAX_FLAG_OPT_SIZE], *__buf; int err; - if (cnt > MAX_FLAG_OPT_SIZE) + if (!cnt || cnt > MAX_FLAG_OPT_SIZE) return -EINVAL; if (copy_from_user(&buf, ubuf, cnt)) diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 4f90f720b2815..6b6ca1c36235a 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -141,7 +141,7 @@ struct mce_vendor_flags { * Recovery. It indicates support for data poisoning in HW and deferred * error interrupts. */ - succor : 1, + succor : 1, /* * (AMD) SMCA: This bit indicates support for Scalable MCA which expands @@ -149,9 +149,12 @@ struct mce_vendor_flags { * banks. Also, to accommodate the new banks and registers, the MCA * register space is moved to a new MSR range. */ - smca : 1, + smca : 1, - __reserved_0 : 61; + /* AMD-style error thresholding banks present. */ + amd_threshold : 1, + + __reserved_0 : 60; }; extern struct mce_vendor_flags mce_flags; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 4a4198b806b4e..c95a27513a30c 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -772,9 +772,9 @@ static struct subsys_interface mc_cpu_interface = { }; /** - * mc_bp_resume - Update boot CPU microcode during resume. + * microcode_bsp_resume - Update boot CPU microcode during resume. */ -static void mc_bp_resume(void) +void microcode_bsp_resume(void) { int cpu = smp_processor_id(); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -786,7 +786,7 @@ static void mc_bp_resume(void) } static struct syscore_ops mc_syscore_ops = { - .resume = mc_bp_resume, + .resume = microcode_bsp_resume, }; static int mc_cpu_starting(unsigned int cpu) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 1c2f9baf84832..51d95c4b692c3 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -82,7 +82,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) inc_irq_stat(hyperv_stimer0_count); if (hv_stimer0_handler) hv_stimer0_handler(); - add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0); + add_interrupt_randomness(HYPERV_STIMER0_VECTOR); ack_APIC_irq(); exiting_irq(); diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 21f61fafa974d..cffa38c80c6a4 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -260,7 +260,6 @@ static bool __get_mem_config_intel(struct rdt_resource *r) r->num_closid = edx.split.cos_max + 1; r->membw.max_delay = eax.split.max_delay + 1; r->default_ctrl = MAX_MBA_BW; - r->membw.mbm_width = MBM_CNTR_WIDTH; if (ecx & MBA_IS_LINEAR) { r->membw.delay_linear = true; r->membw.min_bw = MAX_MBA_BW - r->membw.max_delay; @@ -290,7 +289,6 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r) /* AMD does not use delay */ r->membw.delay_linear = false; - r->membw.mbm_width = MBM_CNTR_WIDTH_AMD; r->membw.min_bw = 0; r->membw.bw_gran = 1; /* Max value is 2048, Data width should be 4 in decimal */ diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 055c8613b5317..934c8fb8a64a7 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -495,14 +495,16 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of, return ret; } -void mon_event_read(struct rmid_read *rr, struct rdt_domain *d, - struct rdtgroup *rdtgrp, int evtid, int first) +void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, + struct rdt_domain *d, struct rdtgroup *rdtgrp, + int evtid, int first) { /* * setup the parameters to send to the IPI to read the data. */ rr->rgrp = rdtgrp; rr->evtid = evtid; + rr->r = r; rr->d = d; rr->val = 0; rr->first = first; @@ -539,7 +541,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) goto out; } - mon_event_read(&rr, d, rdtgrp, evtid, false); + mon_event_read(&rr, r, d, rdtgrp, evtid, false); if (rr.val & RMID_VAL_ERROR) seq_puts(m, "Error\n"); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 1665ac4df9060..5dcbb1a3ee00b 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -31,16 +31,22 @@ #define CQM_LIMBOCHECK_INTERVAL 1000 -#define MBM_CNTR_WIDTH 24 -#define MBM_CNTR_WIDTH_AMD 44 +#define MBM_CNTR_WIDTH_BASE 24 #define MBM_OVERFLOW_INTERVAL 1000 #define MAX_MBA_BW 100u #define MBA_IS_LINEAR 0x4 #define MBA_MAX_MBPS U32_MAX #define MAX_MBA_BW_AMD 0x800 +#define MBM_CNTR_WIDTH_OFFSET_AMD 20 #define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_UNAVAIL BIT_ULL(62) +/* + * With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for + * data to be returned. The counter width is discovered from the hardware + * as an offset from MBM_CNTR_WIDTH_BASE. + */ +#define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE) struct rdt_fs_context { @@ -88,6 +94,7 @@ union mon_data_bits { struct rmid_read { struct rdtgroup *rgrp; + struct rdt_resource *r; struct rdt_domain *d; int evtid; bool first; @@ -367,7 +374,6 @@ struct rdt_cache { * @min_bw: Minimum memory bandwidth percentage user can request * @bw_gran: Granularity at which the memory bandwidth is allocated * @delay_linear: True if memory B/W delay is in linear scale - * @mbm_width: memory B/W monitor counter width * @mba_sc: True if MBA software controller(mba_sc) is enabled * @mb_map: Mapping of memory B/W percentage to memory B/W delay */ @@ -376,7 +382,6 @@ struct rdt_membw { u32 min_bw; u32 bw_gran; u32 delay_linear; - u32 mbm_width; bool mba_sc; u32 *mb_map; }; @@ -461,6 +466,7 @@ struct rdt_resource { struct list_head evt_list; int num_rmid; unsigned int mon_scale; + unsigned int mbm_width; unsigned long fflags; }; @@ -588,8 +594,9 @@ void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id); void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, struct rdt_domain *d); -void mon_event_read(struct rmid_read *rr, struct rdt_domain *d, - struct rdtgroup *rdtgrp, int evtid, int first); +void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, + struct rdt_domain *d, struct rdtgroup *rdtgrp, + int evtid, int first); void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms); void mbm_handle_overflow(struct work_struct *work); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 8c7a6f31e9329..1e57d83e01f29 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -278,11 +278,10 @@ void free_rmid(u32 rmid) list_add_tail(&entry->list, &rmid_free_lru); } -static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr) +static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) { - u64 shift, chunks; + u64 shift = 64 - width, chunks; - shift = 64 - rdt_resources_all[RDT_RESOURCE_MBA].membw.mbm_width; chunks = (cur_msr << shift) - (prev_msr << shift); return chunks >>= shift; } @@ -320,7 +319,7 @@ static u64 __mon_event_count(u32 rmid, struct rmid_read *rr) return 0; } - chunks = mbm_overflow_count(m->prev_msr, tval); + chunks = mbm_overflow_count(m->prev_msr, tval, rr->r->mbm_width); m->chunks += chunks; m->prev_msr = tval; @@ -343,7 +342,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) return; - chunks = mbm_overflow_count(m->prev_bw_msr, tval); + chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width); cur_bw = (get_corrected_mbm_count(rmid, chunks) * r->mon_scale) >> 20; if (m->delta_comp) @@ -502,11 +501,12 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) } } -static void mbm_update(struct rdt_domain *d, int rmid) +static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid) { struct rmid_read rr; rr.first = false; + rr.r = r; rr.d = d; /* @@ -578,6 +578,7 @@ void mbm_handle_overflow(struct work_struct *work) struct rdtgroup *prgrp, *crgrp; int cpu = smp_processor_id(); struct list_head *head; + struct rdt_resource *r; struct rdt_domain *d; mutex_lock(&rdtgroup_mutex); @@ -585,16 +586,18 @@ void mbm_handle_overflow(struct work_struct *work) if (!static_branch_likely(&rdt_mon_enable_key)) goto out_unlock; - d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]); + r = &rdt_resources_all[RDT_RESOURCE_L3]; + + d = get_domain_from_cpu(cpu, r); if (!d) goto out_unlock; list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { - mbm_update(d, prgrp->mon.rmid); + mbm_update(r, d, prgrp->mon.rmid); head = &prgrp->mon.crdtgrp_list; list_for_each_entry(crgrp, head, mon.crdtgrp_list) - mbm_update(d, crgrp->mon.rmid); + mbm_update(r, d, crgrp->mon.rmid); if (is_mba_sc(NULL)) update_mba_bw(prgrp, d); @@ -682,11 +685,18 @@ static void l3_mon_evt_init(struct rdt_resource *r) int rdt_get_mon_l3_config(struct rdt_resource *r) { + unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; unsigned int cl_size = boot_cpu_data.x86_cache_size; int ret; r->mon_scale = boot_cpu_data.x86_cache_occ_scale; r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1; + r->mbm_width = MBM_CNTR_WIDTH_BASE; + + if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX) + r->mbm_width += mbm_offset; + else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX) + pr_warn("Ignoring impossible MBM counter offset\n"); /* * A reasonable upper limit on the max threshold is the number diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index a910cf12180f6..0472ccd37324c 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2365,7 +2365,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, goto out_destroy; if (is_mbm_event(mevt->evtid)) - mon_event_read(&rr, d, prgrp, mevt->evtid, true); + mon_event_read(&rr, r, d, prgrp, mevt->evtid, true); } kernfs_activate(kn); return 0; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index adf9b71386eff..a6a67707978fb 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -39,8 +39,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, - { X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 }, - { X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 }, + { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 2f9ec14be3b11..50225bc0383b9 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -515,6 +515,7 @@ static const struct intel_early_ops gen11_early_ops __initconst = { .stolen_size = gen9_stolen_size, }; +/* Intel integrated GPUs for which we need to reserve "stolen memory" */ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_I830_IDS(&i830_early_ops), INTEL_I845G_IDS(&i845_early_ops), @@ -587,6 +588,13 @@ static void __init intel_graphics_quirks(int num, int slot, int func) u16 device; int i; + /* + * Reserve "stolen memory" for an integrated GPU. If we've already + * found one, there's nothing to do for other (discrete) GPUs. + */ + if (resource_size(&intel_graphics_stolen_res)) + return; + device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID); for (i = 0; i < ARRAY_SIZE(intel_early_ids); i++) { @@ -699,7 +707,7 @@ static struct chipset early_qrk[] __initdata = { { PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST, PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID, - QFLAG_APPLY_ONCE, intel_graphics_quirks }, + 0, intel_graphics_quirks }, /* * HPET on the current version of the Baytrail platform has accuracy * problems: it will halt in deep idle state - so we disable it. @@ -710,12 +718,6 @@ static struct chipset early_qrk[] __initdata = { */ { PCI_VENDOR_ID_INTEL, 0x0f00, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, - { PCI_VENDOR_ID_INTEL, 0x3e20, - PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, - { PCI_VENDOR_ID_INTEL, 0x3ec4, - PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, - { PCI_VENDOR_ID_INTEL, 0x8a12, - PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, { PCI_VENDOR_ID_BROADCOM, 0x4331, PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset}, {} diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 206a4b6144c2e..950286016f63c 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -383,6 +383,8 @@ static void __init clear_bss(void) { memset(__bss_start, 0, (unsigned long) __bss_stop - (unsigned long) __bss_start); + memset(__brk_base, 0, + (unsigned long) __brk_limit - (unsigned long) __brk_base); } static unsigned long get_cmd_line_ptr(void) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index c6f791bc481eb..9834d221e390f 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -9,6 +9,7 @@ #include #include +#include #undef pr_fmt #define pr_fmt(fmt) "hpet: " fmt @@ -806,6 +807,83 @@ static bool __init hpet_counting(void) return false; } +static bool __init mwait_pc10_supported(void) +{ + unsigned int eax, ebx, ecx, mwait_substates; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (!cpu_feature_enabled(X86_FEATURE_MWAIT)) + return false; + + if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) + return false; + + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); + + return (ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) && + (ecx & CPUID5_ECX_INTERRUPT_BREAK) && + (mwait_substates & (0xF << 28)); +} + +/* + * Check whether the system supports PC10. If so force disable HPET as that + * stops counting in PC10. This check is overbroad as it does not take any + * of the following into account: + * + * - ACPI tables + * - Enablement of intel_idle + * - Command line arguments which limit intel_idle C-state support + * + * That's perfectly fine. HPET is a piece of hardware designed by committee + * and the only reasons why it is still in use on modern systems is the + * fact that it is impossible to reliably query TSC and CPU frequency via + * CPUID or firmware. + * + * If HPET is functional it is useful for calibrating TSC, but this can be + * done via PMTIMER as well which seems to be the last remaining timer on + * X86/INTEL platforms that has not been completely wreckaged by feature + * creep. + * + * In theory HPET support should be removed altogether, but there are older + * systems out there which depend on it because TSC and APIC timer are + * dysfunctional in deeper C-states. + * + * It's only 20 years now that hardware people have been asked to provide + * reliable and discoverable facilities which can be used for timekeeping + * and per CPU timer interrupts. + * + * The probability that this problem is going to be solved in the + * forseeable future is close to zero, so the kernel has to be cluttered + * with heuristics to keep up with the ever growing amount of hardware and + * firmware trainwrecks. Hopefully some day hardware people will understand + * that the approach of "This can be fixed in software" is not sustainable. + * Hope dies last... + */ +static bool __init hpet_is_pc10_damaged(void) +{ + unsigned long long pcfg; + + /* Check whether PC10 substates are supported */ + if (!mwait_pc10_supported()) + return false; + + /* Check whether PC10 is enabled in PKG C-state limit */ + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, pcfg); + if ((pcfg & 0xF) < 8) + return false; + + if (hpet_force_user) { + pr_warn("HPET force enabled via command line, but dysfunctional in PC10.\n"); + return false; + } + + pr_info("HPET dysfunctional in PC10. Force disabled.\n"); + boot_hpet_disable = true; + return true; +} + /** * hpet_enable - Try to setup the HPET timer. Returns 1 on success. */ @@ -819,6 +897,9 @@ int __init hpet_enable(void) if (!is_hpet_capable()) return 0; + if (hpet_is_pc10_damaged()) + return 0; + hpet_set_mapping(); if (!hpet_virt_address) return 0; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 21efee32e2b12..7dfd0185767cc 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -295,8 +295,10 @@ void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)) { if (handler) kvm_posted_intr_wakeup_handler = handler; - else + else { kvm_posted_intr_wakeup_handler = dummy_handler; + synchronize_rcu(); + } } EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index b2510c585e7aa..17bf7febf7a97 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -358,6 +358,10 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) kernel_insn_init(insn, dest, MAX_INSN_SIZE); insn_get_length(insn); + /* We can not probe force emulate prefixed instruction */ + if (insn_has_emulate_prefix(insn)) + return 0; + /* Another subsystem puts a breakpoint, failed to recover */ if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION) return 0; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 6ff2c7cac4c46..f582dda8dd34f 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -59,6 +59,7 @@ static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __align DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible; static int has_steal_clock = 0; +static int has_guest_poll = 0; /* * No need for any "IO delay" on KVM */ @@ -487,7 +488,7 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector) } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) { ipi_bitmap <<= min - apic_id; min = apic_id; - } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) { + } else if (apic_id > min && apic_id < min + KVM_IPI_CLUSTER_SIZE) { max = apic_id < max ? max : apic_id; } else { ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, @@ -584,14 +585,26 @@ static int kvm_cpu_down_prepare(unsigned int cpu) static int kvm_suspend(void) { + u64 val = 0; + kvm_guest_cpu_offline(false); +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL + if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) + rdmsrl(MSR_KVM_POLL_CONTROL, val); + has_guest_poll = !(val & 1); +#endif return 0; } static void kvm_resume(void) { kvm_cpu_online(raw_smp_processor_id()); + +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL + if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL) && has_guest_poll) + wrmsrl(MSR_KVM_POLL_CONTROL, 0); +#endif } static struct syscore_ops kvm_syscore_ops = { diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 4a0802af2e3e0..d81e34e614e00 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -50,18 +50,9 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); static struct pvclock_vsyscall_time_info hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE); static struct pvclock_wall_clock wall_clock __bss_decrypted; -static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); static struct pvclock_vsyscall_time_info *hvclock_mem; - -static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) -{ - return &this_cpu_read(hv_clock_per_cpu)->pvti; -} - -static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void) -{ - return this_cpu_read(hv_clock_per_cpu); -} +DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); +EXPORT_PER_CPU_SYMBOL_GPL(hv_clock_per_cpu); /* * The wallclock is the time of day when we booted. Since then, some time may diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index b8ceec4974fe3..352f876950ab3 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -229,14 +229,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; - struct fpu *prev_fpu = &prev->fpu; - struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_prepare(prev_fpu, cpu); + switch_fpu_prepare(prev_p, cpu); /* * Save away %gs. No need to save %fs, as it was saved on the @@ -292,7 +290,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(current_task, next_p); - switch_fpu_finish(next_fpu); + switch_fpu_finish(next_p); /* Load the Intel cache allocation PQR MSR. */ resctrl_sched_in(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index da3cc3a10d63f..633788362906a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -505,15 +505,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread; struct thread_struct *next = &next_p->thread; - struct fpu *prev_fpu = &prev->fpu; - struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count) != -1); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_prepare(prev_fpu, cpu); + switch_fpu_prepare(prev_p, cpu); /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). @@ -565,7 +563,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(current_task, next_p); this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); - switch_fpu_finish(next_fpu); + switch_fpu_finish(next_p); /* Reload sp0. */ update_task_stack(next_p); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index b1b96d461bc76..fdef27a84d713 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -113,17 +113,9 @@ void __noreturn machine_real_restart(unsigned int type) spin_unlock(&rtc_lock); /* - * Switch back to the initial page table. + * Switch to the trampoline page table. */ -#ifdef CONFIG_X86_32 - load_cr3(initial_page_table); -#else - write_cr3(real_mode_header->trampoline_pgd); - - /* Exiting long mode will fail if CR4.PCIDE is set. */ - if (boot_cpu_has(X86_FEATURE_PCID)) - cr4_clear_bits(X86_CR4_PCIDE); -#endif + load_trampoline_pgtable(); /* Jump to the identity-mapped low memory code */ #ifdef CONFIG_X86_32 @@ -388,10 +380,11 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { }, { /* Handle problems with rebooting on the OptiPlex 990. */ .callback = set_pci_reboot, - .ident = "Dell OptiPlex 990", + .ident = "Dell OptiPlex 990 BIOS A0x", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"), + DMI_MATCH(DMI_BIOS_VERSION, "A0"), }, }, { /* Handle problems with rebooting on Dell 300's */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8367bd7a9a810..1132b50714602 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -102,7 +102,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); /* Logical package management. We might want to allocate that dynamically */ unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); -static unsigned int logical_packages __read_mostly; +unsigned int logical_packages __read_mostly; static unsigned int logical_die __read_mostly; /* Maximum number of SMT threads on any online core */ diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 60d2c3798ba28..2f97d1a1032f3 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -175,8 +175,7 @@ void set_task_blockstep(struct task_struct *task, bool on) * * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if * task is current or it can't be running, otherwise we can race - * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but - * PTRACE_KILL is not safe. + * with __switch_to_xtra(). We rely on ptrace_freeze_traced(). */ local_irq_disable(); debugctl = get_debugctlmsr(); diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index f7476ce23b6e0..42e31358a9d32 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -70,9 +70,6 @@ static int __init control_va_addr_alignment(char *str) if (*str == 0) return 1; - if (*str == '=') - str++; - if (!strcmp(str, "32")) va_align.flags = ALIGN_VA_32; else if (!strcmp(str, "64")) @@ -82,11 +79,11 @@ static int __init control_va_addr_alignment(char *str) else if (!strcmp(str, "on")) va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; else - return 0; + pr_warn("invalid option value: 'align_va_addr=%s'\n", str); return 1; } -__setup("align_va_addr", control_va_addr_alignment); +__setup("align_va_addr=", control_va_addr_alignment); SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7e322e2daaf59..4896ad6ea0b63 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1116,8 +1116,7 @@ static struct clocksource clocksource_tsc_early = { .rating = 299, .read = read_tsc, .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS | - CLOCK_SOURCE_MUST_VERIFY, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, .archdata = { .vclock_mode = VCLOCK_TSC }, .resume = tsc_resume, .mark_unstable = tsc_cs_mark_unstable, @@ -1369,12 +1368,23 @@ static int __init init_tsc_clocksource(void) if (tsc_unstable) goto unreg; - if (tsc_clocksource_reliable || no_tsc_watchdog) - clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; - if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; + /* + * Disable the clocksource watchdog when the system has: + * - TSC running at constant frequency + * - TSC which does not stop in C-States + * - the TSC_ADJUST register which allows to detect even minimal + * modifications + * - not more than two sockets. + */ + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && + boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && + boot_cpu_has(X86_FEATURE_TSC_ADJUST) && + logical_packages <= 2) + clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; + /* * When TSC frequency is known (retrieved via MSR or CPUID), we skip * the refined calibration and directly register it as a clocksource. @@ -1506,7 +1516,7 @@ void __init tsc_init(void) } if (tsc_clocksource_reliable || no_tsc_watchdog) - clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY; + clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; clocksource_register_khz(&clocksource_tsc_early, tsc_khz); detect_art(); diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index ec534f978867d..59b1143063003 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -30,6 +30,7 @@ struct tsc_adjust { }; static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust); +static struct timer_list tsc_sync_check_timer; /* * TSC's on different sockets may be reset asynchronously. @@ -77,6 +78,46 @@ void tsc_verify_tsc_adjust(bool resume) } } +/* + * Normally the tsc_sync will be checked every time system enters idle + * state, but there is still caveat that a system won't enter idle, + * either because it's too busy or configured purposely to not enter + * idle. + * + * So setup a periodic timer (every 10 minutes) to make sure the check + * is always on. + */ + +#define SYNC_CHECK_INTERVAL (HZ * 600) + +static void tsc_sync_check_timer_fn(struct timer_list *unused) +{ + int next_cpu; + + tsc_verify_tsc_adjust(false); + + /* Run the check for all onlined CPUs in turn */ + next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(cpu_online_mask); + + tsc_sync_check_timer.expires += SYNC_CHECK_INTERVAL; + add_timer_on(&tsc_sync_check_timer, next_cpu); +} + +static int __init start_sync_check_timer(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_TSC_ADJUST) || tsc_clocksource_reliable) + return 0; + + timer_setup(&tsc_sync_check_timer, tsc_sync_check_timer_fn, 0); + tsc_sync_check_timer.expires = jiffies + SYNC_CHECK_INTERVAL; + add_timer(&tsc_sync_check_timer); + + return 0; +} +late_initcall(start_sync_check_timer); + static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval, unsigned int cpu, bool bootcpu) { diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 48301210961ad..1871a2b30ccd7 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -592,6 +592,11 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, union cpuid10_eax eax; union cpuid10_edx edx; + if (!static_cpu_has(X86_FEATURE_ARCH_PERFMON)) { + entry->eax = entry->ebx = entry->ecx = entry->edx = 0; + break; + } + perf_get_x86_pmu_capability(&cap); /* diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 60c8dcb907a50..ea48a2fb1677d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1714,11 +1714,6 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, goto exception; } - if (!seg_desc.p) { - err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; - goto exception; - } - dpl = seg_desc.dpl; switch (seg) { @@ -1758,6 +1753,10 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, case VCPU_SREG_TR: if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) goto exception; + if (!seg_desc.p) { + err_vec = NP_VECTOR; + goto exception; + } old_desc = seg_desc; seg_desc.type |= 2; /* busy */ ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc, @@ -1782,6 +1781,11 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, break; } + if (!seg_desc.p) { + err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; + goto exception; + } + if (seg_desc.s) { /* mark segment as accessed */ if (!(seg_desc.type & 1)) { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 78e67569bcac5..fa63ca39ed61a 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -205,7 +205,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, struct kvm_vcpu *vcpu = synic_to_vcpu(synic); int ret; - if (!synic->active && !host) + if (!synic->active && (!host || data)) return 1; trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); @@ -251,6 +251,9 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, case HV_X64_MSR_EOM: { int i; + if (!synic->active) + break; + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) kvm_hv_notify_acked_sint(vcpu, i); break; @@ -514,6 +517,11 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, { union hv_stimer_config new_config = {.as_uint64 = config}, old_config = {.as_uint64 = stimer->config.as_uint64}; + struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); + struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); + + if (!synic->active && (!host || config)) + return 1; trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id, stimer->index, config, host); @@ -533,6 +541,12 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, bool host) { + struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); + struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); + + if (!synic->active && (!host || count)) + return 1; + trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id, stimer->index, count, host); @@ -1502,11 +1516,13 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa, all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; + if (all_cpus) + goto check_and_send_ipi; + if (!sparse_banks_len) goto ret_success; - if (!all_cpus && - kvm_read_guest(kvm, + if (kvm_read_guest(kvm, ingpa + offsetof(struct hv_send_ipi_ex, vp_set.bank_contents), sparse_banks, @@ -1514,6 +1530,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa, return HV_STATUS_INVALID_HYPERCALL_INPUT; } +check_and_send_ipi: if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return HV_STATUS_INVALID_HYPERCALL_INPUT; diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 642031b896f64..24a6905d60ee2 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -91,7 +91,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) { ioapic->rtc_status.pending_eoi = 0; - bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID + 1); + bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID); } static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 283f1f489bcac..ea1a4e0297dae 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -43,13 +43,13 @@ struct kvm_vcpu; struct dest_map { /* vcpu bitmap where IRQ has been sent */ - DECLARE_BITMAP(map, KVM_MAX_VCPU_ID + 1); + DECLARE_BITMAP(map, KVM_MAX_VCPU_ID); /* * Vector sent to a given vcpu, only valid when * the vcpu's bit in map is set */ - u8 vectors[KVM_MAX_VCPU_ID + 1]; + u8 vectors[KVM_MAX_VCPU_ID]; }; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a9bdcbea2f58e..168ce66e5d40b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -118,7 +118,8 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) { - return pi_inject_timer && kvm_vcpu_apicv_active(vcpu); + return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) && + (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm)); } EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt); @@ -2116,10 +2117,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) { - struct kvm_lapic *apic = vcpu->arch.apic; - - apic_set_tpr(apic, ((cr8 & 0x0f) << 4) - | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4)); + apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4); } u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 260c64c205b8c..015da62e4ad75 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4666,7 +4666,15 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { - bool uses_nx = context->nx || + /* + * KVM uses NX when TDP is disabled to handle a variety of scenarios, + * notably for huge SPTEs if iTLB multi-hit mitigation is enabled and + * to generate correct permissions for CR0.WP=0/CR4.SMEP=1/EFER.NX=0. + * The iTLB multi-hit workaround can be toggled at any time, so assume + * NX can be used by any non-nested shadow MMU to avoid having to reset + * MMU contexts. Note, KVM forces EFER.NX=1 when TDP is disabled. + */ + bool uses_nx = context->nx || !tdp_enabled || context->mmu_role.base.smep_andnot_wp; struct rsvd_bits_validate *shadow_zero_check; int i; @@ -5813,6 +5821,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; int nr_zapped, batch = 0; + bool unstable; restart: list_for_each_entry_safe_reverse(sp, node, @@ -5845,11 +5854,12 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm) goto restart; } - if (__kvm_mmu_prepare_zap_page(kvm, sp, - &kvm->arch.zapped_obsolete_pages, &nr_zapped)) { - batch += nr_zapped; + unstable = __kvm_mmu_prepare_zap_page(kvm, sp, + &kvm->arch.zapped_obsolete_pages, &nr_zapped); + batch += nr_zapped; + + if (unstable) goto restart; - } } /* diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 8bba9c48bee76..1a1d2b5e7b357 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -34,9 +34,8 @@ #define PT_HAVE_ACCESSED_DIRTY(mmu) true #ifdef CONFIG_X86_64 #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL - #define CMPXCHG cmpxchg + #define CMPXCHG "cmpxchgq" #else - #define CMPXCHG cmpxchg64 #define PT_MAX_FULL_LEVELS 2 #endif #elif PTTYPE == 32 @@ -52,7 +51,7 @@ #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true - #define CMPXCHG cmpxchg + #define CMPXCHG "cmpxchgl" #elif PTTYPE == PTTYPE_EPT #define pt_element_t u64 #define guest_walker guest_walkerEPT @@ -65,8 +64,10 @@ #define PT_GUEST_DIRTY_SHIFT 9 #define PT_GUEST_ACCESSED_SHIFT 8 #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad) - #define CMPXCHG cmpxchg64 #define PT_MAX_FULL_LEVELS 4 + #ifdef CONFIG_X86_64 + #define CMPXCHG "cmpxchgq" + #endif #else #error Invalid PTTYPE value #endif @@ -132,43 +133,39 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pt_element_t __user *ptep_user, unsigned index, pt_element_t orig_pte, pt_element_t new_pte) { - int npages; - pt_element_t ret; - pt_element_t *table; - struct page *page; - - npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page); - if (likely(npages == 1)) { - table = kmap_atomic(page); - ret = CMPXCHG(&table[index], orig_pte, new_pte); - kunmap_atomic(table); - - kvm_release_page_dirty(page); - } else { - struct vm_area_struct *vma; - unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK; - unsigned long pfn; - unsigned long paddr; - - mmap_read_lock(current->mm); - vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE); - if (!vma || !(vma->vm_flags & VM_PFNMAP)) { - mmap_read_unlock(current->mm); - return -EFAULT; - } - pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - paddr = pfn << PAGE_SHIFT; - table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB); - if (!table) { - mmap_read_unlock(current->mm); - return -EFAULT; - } - ret = CMPXCHG(&table[index], orig_pte, new_pte); - memunmap(table); - mmap_read_unlock(current->mm); - } + int r = -EFAULT; + + if (!user_access_begin(ptep_user, sizeof(pt_element_t))) + return -EFAULT; + +#ifdef CMPXCHG + asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n" + "mov $0, %[r]\n" + "setnz %b[r]\n" + "2:" + _ASM_EXTABLE_UA(1b, 2b) + : [ptr] "+m" (*ptep_user), + [old] "+a" (orig_pte), + [r] "+q" (r) + : [new] "r" (new_pte) + : "memory"); +#else + asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n" + "movl $0, %[r]\n" + "jz 2f\n" + "incl %[r]\n" + "2:" + _ASM_EXTABLE_UA(1b, 2b) + : [ptr] "+m" (*ptep_user), + [old] "+A" (orig_pte), + [r] "+rm" (r) + : [new_lo] "b" ((u32)new_pte), + [new_hi] "c" ((u32)(new_pte >> 32)) + : "memory"); +#endif - return (ret != orig_pte); + user_access_end(); + return r; } static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index aacf99a696cd0..f264b6b082e72 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -143,7 +143,6 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) { unsigned config, type = PERF_TYPE_RAW; - u8 event_select, unit_mask; struct kvm *kvm = pmc->vcpu->kvm; struct kvm_pmu_event_filter *filter; int i; @@ -175,23 +174,18 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) if (!allow_event) return; - event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; - unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; - if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | ARCH_PERFMON_EVENTSEL_INV | ARCH_PERFMON_EVENTSEL_CMASK | HSW_IN_TX | HSW_IN_TX_CHECKPOINTED))) { - config = kvm_x86_ops->pmu_ops->find_arch_event(pmc_to_pmu(pmc), - event_select, - unit_mask); + config = kvm_x86_ops->pmu_ops->pmc_perf_hw_id(pmc); if (config != PERF_COUNT_HW_MAX) type = PERF_TYPE_HARDWARE; } if (type == PERF_TYPE_RAW) - config = eventsel & X86_RAW_EVENT_MASK; + config = eventsel & AMD64_RAW_EVENT_MASK; pmc_reprogram_counter(pmc, type, config, !(eventsel & ARCH_PERFMON_EVENTSEL_USR), diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 3fc98afd72a82..b63859e340e4e 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -22,8 +22,7 @@ struct kvm_event_hw_type_mapping { }; struct kvm_pmu_ops { - unsigned (*find_arch_event)(struct kvm_pmu *pmu, u8 event_select, - u8 unit_mask); + unsigned int (*pmc_perf_hw_id)(struct kvm_pmc *pmc); unsigned (*find_fixed_event)(int idx); bool (*pmc_is_enabled)(struct kvm_pmc *pmc); struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx); diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index c8388389a3b05..799b9a3144e3b 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -44,6 +44,22 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = { [7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, }; +/* duplicated from amd_f17h_perfmon_event_map. */ +static struct kvm_event_hw_type_mapping amd_f17h_event_mapping[] = { + [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES }, + [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS }, + [2] = { 0x60, 0xff, PERF_COUNT_HW_CACHE_REFERENCES }, + [3] = { 0x64, 0x09, PERF_COUNT_HW_CACHE_MISSES }, + [4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, + [5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, + [6] = { 0x87, 0x02, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, + [7] = { 0x87, 0x01, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, +}; + +/* amd_pmc_perf_hw_id depends on these being the same size */ +static_assert(ARRAY_SIZE(amd_event_mapping) == + ARRAY_SIZE(amd_f17h_event_mapping)); + static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type) { struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); @@ -126,21 +142,27 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr, return &pmu->gp_counters[msr_to_index(msr)]; } -static unsigned amd_find_arch_event(struct kvm_pmu *pmu, - u8 event_select, - u8 unit_mask) +static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc) { + struct kvm_event_hw_type_mapping *event_mapping; + u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT; + u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; int i; + if (guest_cpuid_family(pmc->vcpu) >= 0x17) + event_mapping = amd_f17h_event_mapping; + else + event_mapping = amd_event_mapping; + for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++) - if (amd_event_mapping[i].eventsel == event_select - && amd_event_mapping[i].unit_mask == unit_mask) + if (event_mapping[i].eventsel == event_select + && event_mapping[i].unit_mask == unit_mask) break; if (i == ARRAY_SIZE(amd_event_mapping)) return PERF_COUNT_HW_MAX; - return amd_event_mapping[i].event_type; + return event_mapping[i].event_type; } /* return PERF_COUNT_HW_MAX as AMD doesn't have fixed events */ @@ -245,12 +267,10 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* MSR_EVNTSELn */ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); if (pmc) { - if (data == pmc->eventsel) - return 0; - if (!(data & pmu->reserved_bits)) { + data &= ~pmu->reserved_bits; + if (data != pmc->eventsel) reprogram_gp_counter(pmc, data); - return 0; - } + return 0; } return 1; @@ -266,7 +286,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS; pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; - pmu->reserved_bits = 0xffffffff00200000ull; + pmu->reserved_bits = 0xfffffff000280000ull; pmu->version = 1; /* not applicable to AMD; but clean them to prevent any fall out */ pmu->counter_bitmask[KVM_PMC_FIXED] = 0; @@ -302,7 +322,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu) } struct kvm_pmu_ops amd_pmu_ops = { - .find_arch_event = amd_find_arch_event, + .pmc_perf_hw_id = amd_pmc_perf_hw_id, .find_fixed_event = amd_find_fixed_event, .pmc_is_enabled = amd_pmc_is_enabled, .pmc_idx_to_pmc = amd_pmc_idx_to_pmc, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d0d3757dc1dfc..ad8614e5402ac 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1917,7 +1917,8 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages) uint8_t *page_virtual; unsigned long i; - if (npages == 0 || pages == NULL) + if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 || + pages == NULL) return; for (i = 0; i < npages; i++) { @@ -4649,8 +4650,6 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) break; } case AVIC_IPI_FAILURE_INVALID_TARGET: - WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n", - index, svm->vcpu.vcpu_id, icrh, icrl); break; case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: WARN_ONCE(1, "Invalid backing page\n"); diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index 07ebf6882a458..632bed227152e 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -58,7 +58,9 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs); SECONDARY_EXEC_SHADOW_VMCS | \ SECONDARY_EXEC_TSC_SCALING | \ SECONDARY_EXEC_PAUSE_LOOP_EXITING) -#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) +#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL \ + (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \ + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) #define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) #define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 023bd3e1aa0d2..9f61ae64b7277 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2057,12 +2057,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) ~PIN_BASED_VMX_PREEMPTION_TIMER); /* Posted interrupts setting is only taken from vmcs12. */ - if (nested_cpu_has_posted_intr(vmcs12)) { + vmx->nested.pi_pending = false; + if (nested_cpu_has_posted_intr(vmcs12)) vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; - vmx->nested.pi_pending = false; - } else { + else exec_control &= ~PIN_BASED_POSTED_INTR; - } pin_controls_set(vmx, exec_control); /* @@ -3747,12 +3746,12 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, /* update exit information fields: */ vmcs12->vm_exit_reason = exit_reason; vmcs12->exit_qualification = exit_qualification; - vmcs12->vm_exit_intr_info = exit_intr_info; - - vmcs12->idt_vectoring_info_field = 0; - vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + /* + * On VM-Exit due to a failed VM-Entry, the VMCS isn't marked launched + * and only EXIT_REASON and EXIT_QUALIFICATION are updated, all other + * exit info fields are unmodified. + */ if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) { vmcs12->launch_state = 1; @@ -3764,8 +3763,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, * Transfer the event that L0 or L1 may wanted to inject into * L2 to IDT_VECTORING_INFO_FIELD. */ + vmcs12->idt_vectoring_info_field = 0; vmcs12_save_pending_event(vcpu, vmcs12); + vmcs12->vm_exit_intr_info = exit_intr_info; + vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + /* * According to spec, there's no need to store the guest's * MSRs if the exit is due to a VM-entry failure that occurs diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 181e352d38de4..395566018ba95 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -64,10 +64,11 @@ static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data) reprogram_counter(pmu, bit); } -static unsigned intel_find_arch_event(struct kvm_pmu *pmu, - u8 event_select, - u8 unit_mask) +static unsigned int intel_pmc_perf_hw_id(struct kvm_pmc *pmc) { + struct kvm_pmu *pmu = pmc_to_pmu(pmc); + u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT; + u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; int i; for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) @@ -374,7 +375,7 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) } struct kvm_pmu_ops intel_pmu_ops = { - .find_arch_event = intel_find_arch_event, + .pmc_perf_hw_id = intel_pmc_perf_hw_id, .find_fixed_event = intel_find_fixed_event, .pmc_is_enabled = intel_pmc_is_enabled, .pmc_idx_to_pmc = intel_pmc_idx_to_pmc, diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index ca4252f81bf81..946d9205c3b6d 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -92,6 +92,7 @@ ENTRY(vmx_vmexit) pop %_ASM_AX .Lvmexit_skip_rsb: #endif + ISSUE_UNBALANCED_RET_GUARD X86_FEATURE_RSB_VMEXIT_LITE ret ENDPROC(vmx_vmexit) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d32bd16bb3203..df21962b2d7da 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -204,6 +204,9 @@ static const struct { #define L1D_CACHE_ORDER 4 static void *vmx_l1d_flush_pages; +/* Control for disabling CPU Fill buffer clear */ +static bool __read_mostly vmx_fb_clear_ctrl_available; + static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) { struct page *page; @@ -335,6 +338,60 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); } +static void vmx_setup_fb_clear_ctrl(void) +{ + u64 msr; + + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) && + !boot_cpu_has_bug(X86_BUG_MDS) && + !boot_cpu_has_bug(X86_BUG_TAA)) { + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); + if (msr & ARCH_CAP_FB_CLEAR_CTRL) + vmx_fb_clear_ctrl_available = true; + } +} + +static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) +{ + u64 msr; + + if (!vmx->disable_fb_clear) + return; + + rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + msr |= FB_CLEAR_DIS; + wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + /* Cache the MSR value to avoid reading it later */ + vmx->msr_ia32_mcu_opt_ctrl = msr; +} + +static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) +{ + if (!vmx->disable_fb_clear) + return; + + vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; + wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); +} + +static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) +{ + vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; + + /* + * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS + * at VMEntry. Skip the MSR read/write when a guest has no use case to + * execute VERW. + */ + if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) || + ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO))) + vmx->disable_fb_clear = false; +} + static const struct kernel_param_ops vmentry_l1d_flush_ops = { .set = vmentry_l1d_flush_set, .get = vmentry_l1d_flush_get, @@ -785,15 +842,15 @@ void update_exception_bitmap(struct kvm_vcpu *vcpu) /* * Check if MSR is intercepted for currently loaded MSR bitmap. */ -static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) +static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) { unsigned long *msr_bitmap; int f = sizeof(unsigned long); - if (!cpu_has_vmx_msr_bitmap()) + if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS)) return true; - msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; + msr_bitmap = vmx->loaded_vmcs->msr_bitmap; if (msr <= 0x1fff) { return !!test_bit(msr, msr_bitmap + 0x800 / f); @@ -2140,9 +2197,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; } - ret = kvm_set_msr_common(vcpu, msr_info); + ret = kvm_set_msr_common(vcpu, msr_info); } + /* FB_CLEAR may have changed, also update the FB_CLEAR_DIS behavior */ + if (msr_index == MSR_IA32_ARCH_CAPABILITIES) + vmx_update_fb_clear_dis(vcpu, vmx); + return ret; } @@ -4330,6 +4391,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vpid_sync_context(vmx->vpid); if (init_event) vmx_clear_hlt(vcpu); + + vmx_update_fb_clear_dis(vcpu, vmx); } static void enable_irq_window(struct kvm_vcpu *vcpu) @@ -6548,6 +6611,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&mds_user_clear)) mds_clear_cpu_buffers(); + else if (static_branch_unlikely(&mmio_stale_data_clear) && + kvm_arch_has_assigned_device(vcpu->kvm)) + mds_clear_cpu_buffers(); + + vmx_disable_fb_clear(vmx); if (vcpu->arch.cr2 != read_cr2()) write_cr2(vcpu->arch.cr2); @@ -6557,6 +6625,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.cr2 = read_cr2(); + vmx_enable_fb_clear(vmx); + /* * We do not use IBRS in the kernel. If this vCPU has used the * SPEC_CTRL MSR it may have left it on; save the value and @@ -6572,7 +6642,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); @@ -8040,8 +8110,11 @@ static int __init vmx_init(void) return r; } + vmx_setup_fb_clear_ctrl(); + for_each_possible_cpu(cpu) { INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); + INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); } diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 55731dd0096f2..7a3362ab59867 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -280,8 +280,11 @@ struct vcpu_vmx { u64 msr_ia32_feature_control; u64 msr_ia32_feature_control_valid_bits; u64 ept_pointer; + u64 msr_ia32_mcu_opt_ctrl; + bool disable_fb_clear; struct pt_desc pt_desc; + }; enum ept_pointers_status { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a816763f97d5f..bc1f90f41f958 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -69,6 +69,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -1280,7 +1281,7 @@ static const u32 msrs_to_save_all[] = { MSR_IA32_UMWAIT_CONTROL, MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1, - MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3, + MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL, MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1, @@ -1301,6 +1302,13 @@ static const u32 msrs_to_save_all[] = { MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, + + MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, + MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3, + MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2, + MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5, + MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2, + MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5, }; static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)]; @@ -1457,6 +1465,10 @@ static u64 kvm_get_arch_capabilities(void) /* KVM does not emulate MSR_IA32_TSX_CTRL. */ data &= ~ARCH_CAP_TSX_CTRL_MSR; + + /* Guests don't need to know "Fill buffer clear control" exists */ + data &= ~ARCH_CAP_FB_CLEAR_CTRL; + return data; } @@ -2827,6 +2839,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated) { s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; adjust_tsc_offset_guest(vcpu, adj); + /* Before back to guest, tsc_timestamp must be adjusted + * as well, otherwise guest's percpu pvclock time could jump. + */ + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); } vcpu->arch.ia32_tsc_adjust_msr = data; } @@ -5658,6 +5674,7 @@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); int handle_ud(struct kvm_vcpu *vcpu) { + static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX }; int emul_type = EMULTYPE_TRAP_UD; char sig[5]; /* ud2; .ascii "kvm" */ struct x86_exception e; @@ -5665,7 +5682,7 @@ int handle_ud(struct kvm_vcpu *vcpu) if (force_emulation_prefix && kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu), sig, sizeof(sig), &e) == 0 && - memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) { + memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) { kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig)); emul_type = EMULTYPE_TRAP_UD_FORCED; } diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index c126571e5e2ee..3d1cfad36ba21 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -43,8 +43,8 @@ static void delay_loop(unsigned long loops) " jnz 2b \n" "3: dec %0 \n" - : /* we don't need output */ - :"a" (loops) + : "+a" (loops) + : ); } diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 0b5862ba6a75e..404279563891a 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -13,6 +13,8 @@ #include #include +#include + /* Verify next sizeof(t) bytes can be on the same instruction */ #define validate_next(t, insn, n) \ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) @@ -58,6 +60,36 @@ void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) insn->addr_bytes = 4; } +static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX }; +static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX }; + +static int __insn_get_emulate_prefix(struct insn *insn, + const insn_byte_t *prefix, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i]) + goto err_out; + } + + insn->emulate_prefix_size = len; + insn->next_byte += len; + + return 1; + +err_out: + return 0; +} + +static void insn_get_emulate_prefix(struct insn *insn) +{ + if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix))) + return; + + __insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix)); +} + /** * insn_get_prefixes - scan x86 instruction prefix bytes * @insn: &struct insn containing instruction @@ -76,6 +108,8 @@ void insn_get_prefixes(struct insn *insn) if (prefixes->got) return; + insn_get_emulate_prefix(insn); + nb = 0; lb = 0; b = peek_next(insn_byte_t, insn); diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 1847e993ac63a..f3f7f4cb15a6b 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -142,7 +142,7 @@ void __memcpy_flushcache(void *_dst, const void *_src, size_t size) /* cache copy and flush to align dest */ if (!IS_ALIGNED(dest, 8)) { - unsigned len = min_t(unsigned, size, ALIGN(dest, 8) - dest); + size_t len = min_t(size_t, size, ALIGN(dest, 8) - dest); memcpy((void *) dest, (void *) source, len); clean_cache_range((void *) dest, len); diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 296f40b5f2cc5..0bea0e5425d93 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -44,55 +44,6 @@ __visible bool ex_handler_fault(const struct exception_table_entry *fixup, } EXPORT_SYMBOL_GPL(ex_handler_fault); -/* - * Handler for UD0 exception following a failed test against the - * result of a refcount inc/dec/add/sub. - */ -__visible bool ex_handler_refcount(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) -{ - /* First unconditionally saturate the refcount. */ - *(int *)regs->cx = INT_MIN / 2; - - /* - * Strictly speaking, this reports the fixup destination, not - * the fault location, and not the actually overflowing - * instruction, which is the instruction before the "js", but - * since that instruction could be a variety of lengths, just - * report the location after the overflow, which should be close - * enough for finding the overflow, as it's at least back in - * the function, having returned from .text.unlikely. - */ - regs->ip = ex_fixup_addr(fixup); - - /* - * This function has been called because either a negative refcount - * value was seen by any of the refcount functions, or a zero - * refcount value was seen by refcount_dec(). - * - * If we crossed from INT_MAX to INT_MIN, OF (Overflow Flag: result - * wrapped around) will be set. Additionally, seeing the refcount - * reach 0 will set ZF (Zero Flag: result was zero). In each of - * these cases we want a report, since it's a boundary condition. - * The SF case is not reported since it indicates post-boundary - * manipulations below zero or above INT_MAX. And if none of the - * flags are set, something has gone very wrong, so report it. - */ - if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) { - bool zero = regs->flags & X86_EFLAGS_ZF; - - refcount_error_report(regs, zero ? "hit zero" : "overflow"); - } else if ((regs->flags & X86_EFLAGS_SF) == 0) { - /* Report if none of OF, ZF, nor SF are set. */ - refcount_error_report(regs, "unexpected saturation"); - } - - return true; -} -EXPORT_SYMBOL(ex_handler_refcount); - /* * Handler for when we fail to restore a task's FPU state. We should never get * here because the FPU state of a task using the FPU (task->thread.fpu.state) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index b7e8bf4a2ce96..02cde3026f846 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1223,7 +1223,7 @@ static struct kcore_list kcore_vsyscall; static void __init register_page_bootmem_info(void) { -#if defined(CONFIG_NUMA) || defined(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP) +#if defined(CONFIG_NUMA) || defined(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP) int i; for_each_online_node(i) diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 84cda5dc03870..ebc8e3af1c675 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -27,6 +27,15 @@ #undef CONFIG_PARAVIRT_XXL #undef CONFIG_PARAVIRT_SPINLOCKS +/* + * This code runs before CPU feature bits are set. By default, the + * pgtable_l5_enabled() function uses bit X86_FEATURE_LA57 to determine if + * 5-level paging is active, so that won't work here. USE_EARLY_PGTABLE_L5 + * is provided to handle this situation and, instead, use a variable that + * has been set by the early boot code. + */ +#define USE_EARLY_PGTABLE_L5 + #include #include #include diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 281e584cfe39e..d61313f5c5b98 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1967,7 +1967,7 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) /* * Before changing the encryption attribute, we need to flush caches. */ - cpa_flush(&cpa, 1); + cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT)); ret = __change_page_attr_set_clr(&cpa, 1); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 35b2e35c22035..c7c4e2f8c6a5c 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -75,7 +75,7 @@ int pat_debug_enable; static int __init pat_debug_setup(char *str) { pat_debug_enable = 1; - return 0; + return 1; } __setup("debugpat", pat_debug_setup); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 6e884f17634fe..55f62dca28aa2 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -728,6 +728,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, } break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + if (boot_cpu_has(X86_FEATURE_XMM2)) + /* Emit 'lfence' */ + EMIT3(0x0F, 0xAE, 0xE8); + break; + /* ST: *(u8*)(dst_reg + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: if (is_ereg(dst_reg)) diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 0fcba32077c87..2914f900034e0 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -1705,6 +1705,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, i++; break; } + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + if (boot_cpu_has(X86_FEATURE_XMM2)) + /* Emit 'lfence' */ + EMIT3(0x0F, 0xAE, 0xE8); + break; /* ST: *(u8*)(dst_reg + off) = imm */ case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_B: diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 5c11ae66b5d8e..9cf8f5417e7f4 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -442,6 +442,11 @@ void __init xen_msi_init(void) x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs; x86_msi.teardown_msi_irq = xen_teardown_msi_irq; + /* + * With XEN PIRQ/Eventchannels in use PCI/MSI[-X] masking is solely + * controlled by the hypervisor. + */ + pci_msi_ignore_mask = 1; } #endif diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index aefe845dff596..6ca88fbc009cd 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -279,7 +279,8 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) return; } - new = early_memremap(new_phys, new_size); + new = early_memremap_prot(new_phys, new_size, + pgprot_val(pgprot_encrypted(FIXMAP_PAGE_NORMAL))); if (!new) { pr_err("Failed to map new boot services memmap\n"); return; diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c index ee2beda590d0d..1d4a00e767ece 100644 --- a/arch/x86/platform/olpc/olpc.c +++ b/arch/x86/platform/olpc/olpc.c @@ -274,7 +274,7 @@ static struct olpc_ec_driver ec_xo1_driver = { static struct olpc_ec_driver ec_xo1_5_driver = { .ec_cmd = olpc_xo1_ec_cmd, -#ifdef CONFIG_OLPC_XO1_5_SCI +#ifdef CONFIG_OLPC_XO15_SCI /* * XO-1.5 EC wakeups are available when olpc-xo15-sci driver is * compiled in diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 915bb16397632..20dd7023132fb 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -25,6 +25,7 @@ #include #include #include +#include #ifdef CONFIG_X86_32 __visible unsigned long saved_context_ebx; @@ -40,7 +41,8 @@ static void msr_save_context(struct saved_context *ctxt) struct saved_msr *end = msr + ctxt->saved_msrs.num; while (msr < end) { - msr->valid = !rdmsrl_safe(msr->info.msr_no, &msr->info.reg.q); + if (msr->valid) + rdmsrl(msr->info.msr_no, msr->info.reg.q); msr++; } } @@ -262,6 +264,13 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) x86_platform.restore_sched_clock_state(); mtrr_bp_restore(); perf_restore_debug_store(); + + microcode_bsp_resume(); + + /* + * This needs to happen after the microcode has been updated upon resume + * because some of the MSRs are "emulated" in microcode. + */ msr_restore_context(ctxt); } @@ -421,8 +430,10 @@ static int msr_build_context(const u32 *msr_id, const int num) } for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) { + u64 dummy; + msr_array[i].info.msr_no = msr_id[j]; - msr_array[i].valid = false; + msr_array[i].valid = !rdmsrl_safe(msr_id[j], &dummy); msr_array[i].info.reg.q = 0; } saved_msrs->num = total_num; @@ -509,10 +520,24 @@ static int pm_cpu_check(const struct x86_cpu_id *c) return ret; } +static void pm_save_spec_msr(void) +{ + u32 spec_msr_id[] = { + MSR_IA32_SPEC_CTRL, + MSR_IA32_TSX_CTRL, + MSR_TSX_FORCE_ABORT, + MSR_IA32_MCU_OPT_CTRL, + MSR_AMD64_LS_CFG, + }; + + msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); +} + static int pm_check_save_msr(void) { dmi_check_system(msr_save_dmi_table); pm_cpu_check(msr_save_cpu_table); + pm_save_spec_msr(); return 0; } diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index de371e52cfa85..fac50ebb122b5 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -16,6 +16,32 @@ u32 *trampoline_cr4_features; /* Hold the pgd entry used on booting additional CPUs */ pgd_t trampoline_pgd_entry; +void load_trampoline_pgtable(void) +{ +#ifdef CONFIG_X86_32 + load_cr3(initial_page_table); +#else + /* + * This function is called before exiting to real-mode and that will + * fail with CR4.PCIDE still set. + */ + if (boot_cpu_has(X86_FEATURE_PCID)) + cr4_clear_bits(X86_CR4_PCIDE); + + write_cr3(real_mode_header->trampoline_pgd); +#endif + + /* + * The CR3 write above will not flush global TLB entries. + * Stale, global entries from previous page tables may still be + * present. Flush those stale entries. + * + * This ensures that memory accessed while running with + * trampoline_pgd is *actually* mapped into trampoline_pgd. + */ + __flush_tlb_all(); +} + void __init reserve_real_mode(void) { phys_addr_t mem; diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c index 3ee234b6234dd..255a44dd415a9 100644 --- a/arch/x86/um/ldt.c +++ b/arch/x86/um/ldt.c @@ -23,9 +23,11 @@ static long write_ldt_entry(struct mm_id *mm_idp, int func, { long res; void *stub_addr; + + BUILD_BUG_ON(sizeof(*desc) % sizeof(long)); + res = syscall_stub_data(mm_idp, (unsigned long *)desc, - (sizeof(*desc) + sizeof(long) - 1) & - ~(sizeof(long) - 1), + sizeof(*desc) / sizeof(long), addr, &stub_addr); if (!res) { unsigned long args[] = { func, diff --git a/arch/x86/um/shared/sysdep/syscalls_64.h b/arch/x86/um/shared/sysdep/syscalls_64.h index 8a7d5e1da98e5..1e6875b4ffd83 100644 --- a/arch/x86/um/shared/sysdep/syscalls_64.h +++ b/arch/x86/um/shared/sysdep/syscalls_64.h @@ -10,13 +10,12 @@ #include #include -typedef long syscall_handler_t(void); +typedef long syscall_handler_t(long, long, long, long, long, long); extern syscall_handler_t *sys_call_table[]; #define EXECUTE_SYSCALL(syscall, regs) \ - (((long (*)(long, long, long, long, long, long)) \ - (*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(®s->regs), \ + (((*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(®s->regs), \ UPT_SYSCALL_ARG2(®s->regs), \ UPT_SYSCALL_ARG3(®s->regs), \ UPT_SYSCALL_ARG4(®s->regs), \ diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c index 58f51667e2e4b..8249685b40960 100644 --- a/arch/x86/um/syscalls_64.c +++ b/arch/x86/um/syscalls_64.c @@ -11,6 +11,7 @@ #include #include /* XXX This should get the constants from libc */ #include +#include long arch_prctl(struct task_struct *task, int option, unsigned long __user *arg2) @@ -35,7 +36,7 @@ long arch_prctl(struct task_struct *task, int option, switch (option) { case ARCH_SET_FS: case ARCH_SET_GS: - ret = restore_registers(pid, ¤t->thread.regs.regs); + ret = restore_pid_registers(pid, ¤t->thread.regs.regs); if (ret) return ret; break; diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 3e66feff524a8..65cf405cd9753 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -727,8 +727,8 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) preempt_enable(); } -static void xen_convert_trap_info(const struct desc_ptr *desc, - struct trap_info *traps) +static unsigned xen_convert_trap_info(const struct desc_ptr *desc, + struct trap_info *traps, bool full) { unsigned in, out, count; @@ -738,17 +738,18 @@ static void xen_convert_trap_info(const struct desc_ptr *desc, for (in = out = 0; in < count; in++) { gate_desc *entry = (gate_desc *)(desc->address) + in; - if (cvt_gate_to_trap(in, entry, &traps[out])) + if (cvt_gate_to_trap(in, entry, &traps[out]) || full) out++; } - traps[out].address = 0; + + return out; } void xen_copy_trap_info(struct trap_info *traps) { const struct desc_ptr *desc = this_cpu_ptr(&idt_desc); - xen_convert_trap_info(desc, traps); + xen_convert_trap_info(desc, traps, true); } /* Load a new IDT into Xen. In principle this can be per-CPU, so we @@ -758,6 +759,7 @@ static void xen_load_idt(const struct desc_ptr *desc) { static DEFINE_SPINLOCK(lock); static struct trap_info traps[257]; + unsigned out; trace_xen_cpu_load_idt(desc); @@ -765,7 +767,8 @@ static void xen_load_idt(const struct desc_ptr *desc) memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc)); - xen_convert_trap_info(desc, traps); + out = xen_convert_trap_info(desc, traps, false); + memset(&traps[out], 0, sizeof(traps[0])); xen_mc_flush(); if (HYPERVISOR_set_trap_table(traps)) @@ -1183,6 +1186,11 @@ static void __init xen_dom0_set_legacy_features(void) x86_platform.legacy.rtc = 1; } +static void __init xen_domu_set_legacy_features(void) +{ + x86_platform.legacy.rtc = 0; +} + /* First C function to be called on Xen boot */ asmlinkage __visible void __init xen_start_kernel(void) { @@ -1353,6 +1361,8 @@ asmlinkage __visible void __init xen_start_kernel(void) add_preferred_console("xenboot", 0, NULL); if (pci_xen) x86_init.pci.arch_init = pci_xen_init; + x86_platform.set_legacy_features = + xen_domu_set_legacy_features; } else { const struct dom0_vga_console_info *info = (void *)((char *)xen_start_info + diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 12fcb3858303a..8b1e40ec58f65 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -622,8 +622,8 @@ int xen_alloc_p2m_entry(unsigned long pfn) } /* Expanded the p2m? */ - if (pfn > xen_p2m_last_pfn) { - xen_p2m_last_pfn = pfn; + if (pfn >= xen_p2m_last_pfn) { + xen_p2m_last_pfn = ALIGN(pfn + 1, P2M_PER_PAGE); HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn; } diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index e13b0b49fcdfc..d7249f4c90f1b 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -512,10 +512,7 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) return ret; } -bool is_xen_pmu(int cpu) -{ - return (get_xenpmu_data() != NULL); -} +bool is_xen_pmu; void xen_pmu_init(int cpu) { @@ -526,7 +523,7 @@ void xen_pmu_init(int cpu) BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE); - if (xen_hvm_domain()) + if (xen_hvm_domain() || (cpu != 0 && !is_xen_pmu)) return; xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL); @@ -547,7 +544,8 @@ void xen_pmu_init(int cpu) per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data; per_cpu(xenpmu_shared, cpu).flags = 0; - if (cpu == 0) { + if (!is_xen_pmu) { + is_xen_pmu = true; perf_register_guest_info_callbacks(&xen_guest_cbs); xen_pmu_arch_init(); } diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h index 0e83a160589bc..65c58894fc79f 100644 --- a/arch/x86/xen/pmu.h +++ b/arch/x86/xen/pmu.h @@ -4,6 +4,8 @@ #include +extern bool is_xen_pmu; + irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id); #ifdef CONFIG_XEN_HAVE_VPMU void xen_pmu_init(int cpu); @@ -12,7 +14,6 @@ void xen_pmu_finish(int cpu); static inline void xen_pmu_init(int cpu) {} static inline void xen_pmu_finish(int cpu) {} #endif -bool is_xen_pmu(int cpu); bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err); bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err); int pmu_apic_update(uint32_t reg); diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index f8d39440b2923..e5bd9eb421915 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -18,6 +18,12 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void) */ xen_vcpu_setup(0); + /* + * Called again in case the kernel boots on vcpu >= MAX_VIRT_CPUS. + * Refer to comments in xen_hvm_init_time_ops(). + */ + xen_hvm_init_time_ops(); + /* * The alternative logic (which patches the unlock/lock) runs before * the smp bootup up code is activated. Hence we need to set this up diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 0cebe5db691d9..9d9777ded5f7b 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -53,6 +53,7 @@ static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 }; static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 }; static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id); +void asm_cpu_bringup_and_idle(void); static void cpu_bringup(void) { @@ -129,7 +130,7 @@ int xen_smp_intr_init_pv(unsigned int cpu) per_cpu(xen_irq_work, cpu).irq = rc; per_cpu(xen_irq_work, cpu).name = callfunc_name; - if (is_xen_pmu(cpu)) { + if (is_xen_pmu) { pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu); rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu, xen_pmu_irq_handler, @@ -310,7 +311,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) * pointing just below where pt_regs would be if it were a normal * kernel entry. */ - ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; + ctxt->user_regs.eip = (unsigned long)asm_cpu_bringup_and_idle; ctxt->flags = VGCF_IN_KERNEL; ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ ctxt->user_regs.ds = __USER_DS; diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index befbdd8b17f01..4ec6c80f76872 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -547,6 +547,11 @@ static void xen_hvm_setup_cpu_clockevents(void) void __init xen_hvm_init_time_ops(void) { + static bool hvm_time_initialized; + + if (hvm_time_initialized) + return; + /* * vector callback is needed otherwise we cannot receive interrupts * on cpu > 0 and at this point we don't know how many cpus are @@ -556,7 +561,22 @@ void __init xen_hvm_init_time_ops(void) return; if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { - pr_info("Xen doesn't support pvclock on HVM, disable pv timer"); + pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer"); + return; + } + + /* + * Only MAX_VIRT_CPUS 'vcpu_info' are embedded inside 'shared_info'. + * The __this_cpu_read(xen_vcpu) is still NULL when Xen HVM guest + * boots on vcpu >= MAX_VIRT_CPUS (e.g., kexec), To access + * __this_cpu_read(xen_vcpu) via xen_clocksource_read() will panic. + * + * The xen_hvm_init_time_ops() should be called again later after + * __this_cpu_read(xen_vcpu) is available. + */ + if (!__this_cpu_read(xen_vcpu)) { + pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n", + xen_vcpu_nr(0)); return; } @@ -568,6 +588,8 @@ void __init xen_hvm_init_time_ops(void) x86_platform.calibrate_tsc = xen_tsc_khz; x86_platform.get_wallclock = xen_get_wallclock; x86_platform.set_wallclock = xen_set_wallclock; + + hvm_time_initialized = true; } #endif diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index c1d8b90aa4e2d..142da85474801 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -35,7 +35,11 @@ ENTRY(startup_xen) rep __ASM_SIZE(stos) mov %_ASM_SI, xen_start_info - mov $init_thread_union+THREAD_SIZE, %_ASM_SP +#ifdef CONFIG_X86_64 + mov initial_stack(%rip), %rsp +#else + mov initial_stack, %esp +#endif #ifdef CONFIG_X86_64 /* Set up %gs. @@ -51,9 +55,19 @@ ENTRY(startup_xen) wrmsr #endif - jmp xen_start_kernel + call xen_start_kernel END(startup_xen) __FINIT + +#ifdef CONFIG_XEN_PV_SMP +.pushsection .text +SYM_CODE_START(asm_cpu_bringup_and_idle) + UNWIND_HINT_EMPTY + + call cpu_bringup_and_idle +SYM_CODE_END(asm_cpu_bringup_and_idle) +.popsection +#endif #endif .pushsection .text diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 8352037322dfb..499bdd1f0c715 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -27,7 +27,7 @@ config XTENSA select HAVE_DMA_CONTIGUOUS select HAVE_EXIT_THREAD select HAVE_FUNCTION_TRACER - select HAVE_FUTEX_CMPXCHG if !MMU + select HAVE_FUTEX_CMPXCHG if !MMU && FUTEX select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IRQ_TIME_ACCOUNTING select HAVE_OPROFILE diff --git a/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi index 9bf8bad1dd18a..c33932568aa73 100644 --- a/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi +++ b/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi @@ -8,19 +8,19 @@ reg = <0x00000000 0x08000000>; bank-width = <2>; device-width = <2>; - partition@0x0 { + partition@0 { label = "data"; reg = <0x00000000 0x06000000>; }; - partition@0x6000000 { + partition@6000000 { label = "boot loader area"; reg = <0x06000000 0x00800000>; }; - partition@0x6800000 { + partition@6800000 { label = "kernel image"; reg = <0x06800000 0x017e0000>; }; - partition@0x7fe0000 { + partition@7fe0000 { label = "boot environment"; reg = <0x07fe0000 0x00020000>; }; diff --git a/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi index 40c2f81f7cb66..7bde2ab2d6fb5 100644 --- a/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi +++ b/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi @@ -8,19 +8,19 @@ reg = <0x08000000 0x01000000>; bank-width = <2>; device-width = <2>; - partition@0x0 { + partition@0 { label = "boot loader area"; reg = <0x00000000 0x00400000>; }; - partition@0x400000 { + partition@400000 { label = "kernel image"; reg = <0x00400000 0x00600000>; }; - partition@0xa00000 { + partition@a00000 { label = "data"; reg = <0x00a00000 0x005e0000>; }; - partition@0xfe0000 { + partition@fe0000 { label = "boot environment"; reg = <0x00fe0000 0x00020000>; }; diff --git a/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi index fb8d3a9f33c23..0655b868749a4 100644 --- a/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi +++ b/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi @@ -8,11 +8,11 @@ reg = <0x08000000 0x00400000>; bank-width = <2>; device-width = <2>; - partition@0x0 { + partition@0 { label = "boot loader area"; reg = <0x00000000 0x003f0000>; }; - partition@0x3f0000 { + partition@3f0000 { label = "boot environment"; reg = <0x003f0000 0x00010000>; }; diff --git a/arch/xtensa/include/asm/kmem_layout.h b/arch/xtensa/include/asm/kmem_layout.h index 9c12babc016cd..6fc05cba61a27 100644 --- a/arch/xtensa/include/asm/kmem_layout.h +++ b/arch/xtensa/include/asm/kmem_layout.h @@ -11,6 +11,7 @@ #ifndef _XTENSA_KMEM_LAYOUT_H #define _XTENSA_KMEM_LAYOUT_H +#include #include #ifdef CONFIG_MMU @@ -65,6 +66,34 @@ #endif +/* KIO definition */ + +#if XCHAL_HAVE_PTP_MMU +#define XCHAL_KIO_CACHED_VADDR 0xe0000000 +#define XCHAL_KIO_BYPASS_VADDR 0xf0000000 +#define XCHAL_KIO_DEFAULT_PADDR 0xf0000000 +#else +#define XCHAL_KIO_BYPASS_VADDR XCHAL_KIO_PADDR +#define XCHAL_KIO_DEFAULT_PADDR 0x90000000 +#endif +#define XCHAL_KIO_SIZE 0x10000000 + +#if (!XCHAL_HAVE_PTP_MMU || XCHAL_HAVE_SPANNING_WAY) && defined(CONFIG_USE_OF) +#define XCHAL_KIO_PADDR xtensa_get_kio_paddr() +#ifndef __ASSEMBLY__ +extern unsigned long xtensa_kio_paddr; + +static inline unsigned long xtensa_get_kio_paddr(void) +{ + return xtensa_kio_paddr; +} +#endif +#else +#define XCHAL_KIO_PADDR XCHAL_KIO_DEFAULT_PADDR +#endif + +/* KERNEL_STACK definition */ + #ifndef CONFIG_KASAN #define KERNEL_STACK_SHIFT 13 #else diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 7495520d7a3e9..fd1161de45356 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -225,8 +225,8 @@ extern unsigned long get_wchan(struct task_struct *p); #define xtensa_set_sr(x, sr) \ ({ \ - unsigned int v = (unsigned int)(x); \ - __asm__ __volatile__ ("wsr %0, "__stringify(sr) :: "a"(v)); \ + __asm__ __volatile__ ("wsr %0, "__stringify(sr) :: \ + "a"((unsigned int)(x))); \ }) #define xtensa_get_sr(sr) \ diff --git a/arch/xtensa/include/asm/timex.h b/arch/xtensa/include/asm/timex.h index 233ec75e60c69..3f2462f2d0270 100644 --- a/arch/xtensa/include/asm/timex.h +++ b/arch/xtensa/include/asm/timex.h @@ -29,10 +29,6 @@ extern unsigned long ccount_freq; -typedef unsigned long long cycles_t; - -#define get_cycles() (0) - void local_timer_setup(unsigned cpu); /* @@ -59,4 +55,6 @@ static inline void set_linux_timer (unsigned long ccompare) xtensa_set_sr(ccompare, SREG_CCOMPARE + LINUX_TIMER); } +#include + #endif /* _XTENSA_TIMEX_H */ diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h index 79fe3007919eb..4220c6dac44f4 100644 --- a/arch/xtensa/include/asm/vectors.h +++ b/arch/xtensa/include/asm/vectors.h @@ -21,50 +21,14 @@ #include #include -#if XCHAL_HAVE_PTP_MMU -#define XCHAL_KIO_CACHED_VADDR 0xe0000000 -#define XCHAL_KIO_BYPASS_VADDR 0xf0000000 -#define XCHAL_KIO_DEFAULT_PADDR 0xf0000000 -#else -#define XCHAL_KIO_BYPASS_VADDR XCHAL_KIO_PADDR -#define XCHAL_KIO_DEFAULT_PADDR 0x90000000 -#endif -#define XCHAL_KIO_SIZE 0x10000000 - -#if (!XCHAL_HAVE_PTP_MMU || XCHAL_HAVE_SPANNING_WAY) && defined(CONFIG_OF) -#define XCHAL_KIO_PADDR xtensa_get_kio_paddr() -#ifndef __ASSEMBLY__ -extern unsigned long xtensa_kio_paddr; - -static inline unsigned long xtensa_get_kio_paddr(void) -{ - return xtensa_kio_paddr; -} -#endif -#else -#define XCHAL_KIO_PADDR XCHAL_KIO_DEFAULT_PADDR -#endif - -#if defined(CONFIG_MMU) - -#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY -/* Image Virtual Start Address */ -#define KERNELOFFSET (XCHAL_KSEG_CACHED_VADDR + \ - CONFIG_KERNEL_LOAD_ADDRESS - \ +#if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY +#define KERNELOFFSET (CONFIG_KERNEL_LOAD_ADDRESS + \ + XCHAL_KSEG_CACHED_VADDR - \ XCHAL_KSEG_PADDR) #else #define KERNELOFFSET CONFIG_KERNEL_LOAD_ADDRESS #endif -#else /* !defined(CONFIG_MMU) */ - /* MMU Not being used - Virtual == Physical */ - -/* Location of the start of the kernel text, _start */ -#define KERNELOFFSET CONFIG_KERNEL_LOAD_ADDRESS - - -#endif /* CONFIG_MMU */ - #define RESET_VECTOR1_VADDR (XCHAL_RESET_VECTOR1_VADDR) #ifdef CONFIG_VECTORS_OFFSET #define VECBASE_VADDR (KERNELOFFSET - CONFIG_VECTORS_OFFSET) diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S index d956f87fcb095..6a6cc8dae5653 100644 --- a/arch/xtensa/kernel/coprocessor.S +++ b/arch/xtensa/kernel/coprocessor.S @@ -37,7 +37,7 @@ .if XTENSA_HAVE_COPROCESSOR(x); \ .align 4; \ .Lsave_cp_regs_cp##x: \ - xchal_cp##x##_store a2 a4 a5 a6 a7; \ + xchal_cp##x##_store a2 a3 a4 a5 a6; \ jx a0; \ .endif @@ -54,7 +54,7 @@ .if XTENSA_HAVE_COPROCESSOR(x); \ .align 4; \ .Lload_cp_regs_cp##x: \ - xchal_cp##x##_load a2 a4 a5 a6 a7; \ + xchal_cp##x##_load a2 a3 a4 a5 a6; \ jx a0; \ .endif diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c index a48bf2d10ac2d..80cc9770a8d2d 100644 --- a/arch/xtensa/kernel/irq.c +++ b/arch/xtensa/kernel/irq.c @@ -145,7 +145,7 @@ unsigned xtensa_get_ext_irq_no(unsigned irq) void __init init_IRQ(void) { -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF irqchip_init(); #else #ifdef CONFIG_HAVE_SMP diff --git a/arch/xtensa/kernel/jump_label.c b/arch/xtensa/kernel/jump_label.c index 61cf6497a646b..ad1841cecdfb7 100644 --- a/arch/xtensa/kernel/jump_label.c +++ b/arch/xtensa/kernel/jump_label.c @@ -40,7 +40,7 @@ static int patch_text_stop_machine(void *data) { struct patch *patch = data; - if (atomic_inc_return(&patch->cpu_count) == 1) { + if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) { local_patch_text(patch->addr, patch->data, patch->sz); atomic_inc(&patch->cpu_count); } else { @@ -61,7 +61,7 @@ static void patch_text(unsigned long addr, const void *data, size_t sz) .data = data, }; stop_machine_cpuslocked(patch_text_stop_machine, - &patch, NULL); + &patch, cpu_online_mask); } else { unsigned long flags; diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index 145742d70a9f2..998b4249065a6 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -225,12 +225,12 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) void user_enable_single_step(struct task_struct *child) { - child->ptrace |= PT_SINGLESTEP; + set_tsk_thread_flag(child, TIF_SINGLESTEP); } void user_disable_single_step(struct task_struct *child) { - child->ptrace &= ~PT_SINGLESTEP; + clear_tsk_thread_flag(child, TIF_SINGLESTEP); } /* diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index d08172138369b..5a25bc2b80521 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -64,7 +64,7 @@ extern unsigned long initrd_end; extern int initrd_below_start_ok; #endif -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF void *dtb_start = __dtb_start; #endif @@ -126,7 +126,7 @@ __tagtable(BP_TAG_INITRD, parse_tag_initrd); #endif /* CONFIG_BLK_DEV_INITRD */ -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF static int __init parse_tag_fdt(const bp_tag_t *tag) { @@ -136,7 +136,7 @@ static int __init parse_tag_fdt(const bp_tag_t *tag) __tagtable(BP_TAG_FDT, parse_tag_fdt); -#endif /* CONFIG_OF */ +#endif /* CONFIG_USE_OF */ static int __init parse_tag_cmdline(const bp_tag_t* tag) { @@ -184,7 +184,7 @@ static int __init parse_bootparam(const bp_tag_t *tag) } #endif -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF #if !XCHAL_HAVE_PTP_MMU || XCHAL_HAVE_SPANNING_WAY unsigned long xtensa_kio_paddr = XCHAL_KIO_DEFAULT_PADDR; @@ -233,7 +233,7 @@ void __init early_init_devtree(void *params) strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); } -#endif /* CONFIG_OF */ +#endif /* CONFIG_USE_OF */ /* * Initialize architecture. (Early stage) @@ -254,7 +254,7 @@ void __init init_arch(bp_tag_t *bp_start) if (bp_start) parse_bootparam(bp_start); -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF early_init_devtree(dtb_start); #endif diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index f6ab7b8c628fd..0042084508479 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -465,7 +465,7 @@ static void do_signal(struct pt_regs *regs) /* Set up the stack frame */ ret = setup_frame(&ksig, sigmask_to_save(), regs); signal_setup_done(ret, &ksig, 0); - if (current->ptrace & PT_SINGLESTEP) + if (test_thread_flag(TIF_SINGLESTEP)) task_pt_regs(current)->icountlevel = 1; return; @@ -491,7 +491,7 @@ static void do_signal(struct pt_regs *regs) /* If there's no signal to deliver, we just restore the saved mask. */ restore_saved_sigmask(); - if (current->ptrace & PT_SINGLESTEP) + if (test_thread_flag(TIF_SINGLESTEP)) task_pt_regs(current)->icountlevel = 1; return; } diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 69db8c93c1f99..9b87a9b98727b 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -160,6 +160,7 @@ static void __init calibrate_ccount(void) cpu = of_find_compatible_node(NULL, NULL, "cdns,xtensa-cpu"); if (cpu) { clk = of_clk_get(cpu, 0); + of_node_put(cpu); if (!IS_ERR(clk)) { ccount_freq = clk_get_rate(clk); return; diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c index 03678c4afc39b..bc858a7f98ba4 100644 --- a/arch/xtensa/mm/mmu.c +++ b/arch/xtensa/mm/mmu.c @@ -101,7 +101,7 @@ void init_mmu(void) void init_kio(void) { -#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && defined(CONFIG_OF) +#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && defined(CONFIG_USE_OF) /* * Update the IO area mapping in case xtensa_kio_paddr has changed */ diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c index af81a62faba64..e7faea3d73d3b 100644 --- a/arch/xtensa/platforms/iss/console.c +++ b/arch/xtensa/platforms/iss/console.c @@ -168,9 +168,13 @@ static const struct tty_operations serial_ops = { int __init rs_init(void) { - tty_port_init(&serial_port); + int ret; serial_driver = alloc_tty_driver(SERIAL_MAX_NUM_LINES); + if (!serial_driver) + return -ENOMEM; + + tty_port_init(&serial_port); pr_info("%s %s\n", serial_name, serial_version); @@ -190,8 +194,15 @@ int __init rs_init(void) tty_set_operations(serial_driver, &serial_ops); tty_port_link_device(&serial_port, serial_driver, 0); - if (tty_register_driver(serial_driver)) - panic("Couldn't register serial driver\n"); + ret = tty_register_driver(serial_driver); + if (ret) { + pr_err("Couldn't register serial driver\n"); + tty_driver_kref_put(serial_driver); + tty_port_destroy(&serial_port); + + return ret; + } + return 0; } diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index 829115bb381f8..340479a328dc3 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -50,8 +50,12 @@ void platform_power_off(void) void platform_restart(void) { - /* Flush and reset the mmu, simulate a processor reset, and - * jump to the reset vector. */ + /* Try software reset first. */ + WRITE_ONCE(*(u32 *)XTFPGA_SWRST_VADDR, 0xdead); + + /* If software reset did not work, flush and reset the mmu, + * simulate a processor reset, and jump to the reset vector. + */ cpu_reset(); /* control never gets here */ } @@ -81,7 +85,7 @@ void __init platform_calibrate_ccount(void) #endif -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF static void __init xtfpga_clk_setup(struct device_node *np) { @@ -144,6 +148,7 @@ static int __init machine_setup(void) if ((eth = of_find_compatible_node(eth, NULL, "opencores,ethoc"))) update_local_mac(eth); + of_node_put(eth); return 0; } arch_initcall(machine_setup); @@ -299,4 +304,4 @@ static int __init xtavnet_init(void) */ arch_initcall(xtavnet_init); -#endif /* CONFIG_OF */ +#endif /* CONFIG_USE_OF */ diff --git a/block/Kconfig b/block/Kconfig index 41c0917ce6223..7e1cff1cfc1a9 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -32,6 +32,9 @@ config BLK_RQ_ALLOC_TIME config BLK_SCSI_REQUEST bool +config BLK_CGROUP_RWSTAT + bool + config BLK_DEV_BSG bool "Block layer SG support v4" default y @@ -86,6 +89,7 @@ config BLK_DEV_ZONED config BLK_DEV_THROTTLING bool "Block layer bio throttling support" depends on BLK_CGROUP=y + select BLK_CGROUP_RWSTAT ---help--- Block layer bio throttling support. It can be used to limit the IO rate to a device. IO rate policies are per cgroup and @@ -138,6 +142,7 @@ config BLK_CGROUP_IOLATENCY config BLK_CGROUP_IOCOST bool "Enable support for cost model based cgroup IO controller" depends on BLK_CGROUP=y + select BLK_RQ_IO_DATA_LEN select BLK_RQ_ALLOC_TIME ---help--- Enabling this option enables the .weight interface for cost @@ -145,6 +150,17 @@ config BLK_CGROUP_IOCOST distributes IO capacity between different groups based on their share of the overall weight distribution. +config BYTEDANCE_BLK_CGROUP_IOTRACE + default n + bool "Enable support for io stat trace for cgroup IO controller" + depends on BLK_CGROUP=y + help + Enabling this option enable iotrace interface for iotrace. + The iotrace collects io metrics for io cgroup which can + be a complement to current io stat traces. + + The code of iotrace is in block/blk-iotrace.c. + config BLK_WBT_MQ bool "Multiqueue writeback throttling" default y diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index b89310a022ad4..7df14133adc80 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -31,6 +31,7 @@ config IOSCHED_BFQ config BFQ_GROUP_IOSCHED bool "BFQ hierarchical scheduling support" depends on IOSCHED_BFQ && BLK_CGROUP + select BLK_CGROUP_RWSTAT ---help--- Enable hierarchical scheduling in BFQ, using the blkio diff --git a/block/Makefile b/block/Makefile index 9ef57ace90d46..f2165c766aaec 100644 --- a/block/Makefile +++ b/block/Makefile @@ -16,9 +16,11 @@ obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o +obj-$(CONFIG_BLK_CGROUP_RWSTAT) += blk-cgroup-rwstat.o obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o +obj-$(CONFIG_BYTEDANCE_BLK_CGROUP_IOTRACE) += blk-iotrace.o obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 342a1cfa48c57..3ad3537acbf9e 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -347,6 +347,17 @@ void bfqg_and_blkg_put(struct bfq_group *bfqg) bfqg_put(bfqg); } +void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq) +{ + struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg); + + if (!bfqg) + return; + + blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq)); + blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1); +} + /* @stats = 0 */ static void bfqg_stats_reset(struct bfqg_stats *stats) { @@ -431,6 +442,8 @@ void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg) static void bfqg_stats_exit(struct bfqg_stats *stats) { + blkg_rwstat_exit(&stats->bytes); + blkg_rwstat_exit(&stats->ios); #ifdef CONFIG_BFQ_CGROUP_DEBUG blkg_rwstat_exit(&stats->merged); blkg_rwstat_exit(&stats->service_time); @@ -448,6 +461,10 @@ static void bfqg_stats_exit(struct bfqg_stats *stats) static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp) { + if (blkg_rwstat_init(&stats->bytes, gfp) || + blkg_rwstat_init(&stats->ios, gfp)) + return -ENOMEM; + #ifdef CONFIG_BFQ_CGROUP_DEBUG if (blkg_rwstat_init(&stats->merged, gfp) || blkg_rwstat_init(&stats->service_time, gfp) || @@ -536,6 +553,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd) */ bfqg->bfqd = bfqd; bfqg->active_entities = 0; + bfqg->online = true; bfqg->rq_pos_tree = RB_ROOT; } @@ -564,28 +582,11 @@ static void bfq_group_set_parent(struct bfq_group *bfqg, entity->sched_data = &parent->sched_data; } -static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd, - struct blkcg *blkcg) -{ - struct blkcg_gq *blkg; - - blkg = blkg_lookup(blkcg, bfqd->queue); - if (likely(blkg)) - return blkg_to_bfqg(blkg); - return NULL; -} - -struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, - struct blkcg *blkcg) +static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg) { - struct bfq_group *bfqg, *parent; + struct bfq_group *parent; struct bfq_entity *entity; - bfqg = bfq_lookup_bfqg(bfqd, blkcg); - - if (unlikely(!bfqg)) - return NULL; - /* * Update chain of bfq_groups as we might be handling a leaf group * which, along with some of its relatives, has not been hooked yet @@ -602,8 +603,24 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, bfq_group_set_parent(curr_bfqg, parent); } } +} - return bfqg; +struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) +{ + struct blkcg_gq *blkg = bio->bi_blkg; + struct bfq_group *bfqg; + + while (blkg) { + bfqg = blkg_to_bfqg(blkg); + if (bfqg->online) { + bio_associate_blkg_from_css(bio, &blkg->blkcg->css); + return bfqg; + } + blkg = blkg->parent; + } + bio_associate_blkg_from_css(bio, + &bfqg_to_blkg(bfqd->root_group)->blkcg->css); + return bfqd->root_group; } /** @@ -625,6 +642,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_entity *entity = &bfqq->entity; + /* + * oom_bfqq is not allowed to move, oom_bfqq will hold ref to root_group + * until elevator exit. + */ + if (bfqq == &bfqd->oom_bfqq) + return; /* * Get extra reference to prevent bfqq from being freed in * next possible expire or deactivate. @@ -673,25 +696,15 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, * Move bic to blkcg, assuming that bfqd->lock is held; which makes * sure that the reference to cgroup is valid across the call (see * comments in bfq_bic_update_cgroup on this issue) - * - * NOTE: an alternative approach might have been to store the current - * cgroup in bfqq and getting a reference to it, reducing the lookup - * time here, at the price of slightly more complex code. */ -static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, - struct bfq_io_cq *bic, - struct blkcg *blkcg) +static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd, + struct bfq_io_cq *bic, + struct bfq_group *bfqg) { struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0); struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1); - struct bfq_group *bfqg; struct bfq_entity *entity; - bfqg = bfq_find_set_group(bfqd, blkcg); - - if (unlikely(!bfqg)) - bfqg = bfqd->root_group; - if (async_bfqq) { entity = &async_bfqq->entity; @@ -702,9 +715,39 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, } if (sync_bfqq) { - entity = &sync_bfqq->entity; - if (entity->sched_data != &bfqg->sched_data) - bfq_bfqq_move(bfqd, sync_bfqq, bfqg); + if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { + /* We are the only user of this bfqq, just move it */ + if (sync_bfqq->entity.sched_data != &bfqg->sched_data) + bfq_bfqq_move(bfqd, sync_bfqq, bfqg); + } else { + struct bfq_queue *bfqq; + + /* + * The queue was merged to a different queue. Check + * that the merge chain still belongs to the same + * cgroup. + */ + for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) + if (bfqq->entity.sched_data != + &bfqg->sched_data) + break; + if (bfqq) { + /* + * Some queue changed cgroup so the merge is + * not valid anymore. We cannot easily just + * cancel the merge (by clearing new_bfqq) as + * there may be other processes using this + * queue and holding refs to all queues below + * sync_bfqq->new_bfqq. Similarly if the merge + * already happened, we need to detach from + * bfqq now so that we cannot merge bio to a + * request from the old cgroup. + */ + bfq_put_cooperator(sync_bfqq); + bfq_release_process_ref(bfqd, sync_bfqq); + bic_set_bfqq(bic, NULL, 1); + } + } } return bfqg; @@ -713,20 +756,24 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) { struct bfq_data *bfqd = bic_to_bfqd(bic); - struct bfq_group *bfqg = NULL; + struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio); uint64_t serial_nr; - rcu_read_lock(); - serial_nr = __bio_blkcg(bio)->css.serial_nr; + serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr; /* * Check whether blkcg has changed. The condition may trigger * spuriously on a newly created cic but there's no harm. */ if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) - goto out; + return; - bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio)); + /* + * New cgroup for this process. Make sure it is linked to bfq internal + * cgroup hierarchy. + */ + bfq_link_bfqg(bfqd, bfqg); + __bfq_bic_change_cgroup(bfqd, bic, bfqg); /* * Update blkg_path for bfq_log_* functions. We cache this * path, and update it here, for the following @@ -779,8 +826,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) */ blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path)); bic->blkcg_serial_nr = serial_nr; -out: - rcu_read_unlock(); } /** @@ -908,6 +953,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) put_async_queues: bfq_put_async_queues(bfqd, bfqg); + bfqg->online = false; spin_unlock_irqrestore(&bfqd->lock, flags); /* @@ -1083,18 +1129,35 @@ static ssize_t bfq_io_set_weight(struct kernfs_open_file *of, return bfq_io_set_device_weight(of, buf, nbytes, off); } -#ifdef CONFIG_BFQ_CGROUP_DEBUG -static int bfqg_print_stat(struct seq_file *sf, void *v) +static int bfqg_print_rwstat(struct seq_file *sf, void *v) { - blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat, - &blkcg_policy_bfq, seq_cft(sf)->private, false); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat, + &blkcg_policy_bfq, seq_cft(sf)->private, true); return 0; } -static int bfqg_print_rwstat(struct seq_file *sf, void *v) +static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf, + struct blkg_policy_data *pd, int off) { - blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat, - &blkcg_policy_bfq, seq_cft(sf)->private, true); + struct blkg_rwstat_sample sum; + + blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum); + return __blkg_prfill_rwstat(sf, pd, &sum); +} + +static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v) +{ + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), + bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq, + seq_cft(sf)->private, true); + return 0; +} + +#ifdef CONFIG_BFQ_CGROUP_DEBUG +static int bfqg_print_stat(struct seq_file *sf, void *v) +{ + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat, + &blkcg_policy_bfq, seq_cft(sf)->private, false); return 0; } @@ -1123,15 +1186,6 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, return __blkg_prfill_u64(sf, pd, sum); } -static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf, - struct blkg_policy_data *pd, int off) -{ - struct blkg_rwstat_sample sum; - - blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum); - return __blkg_prfill_rwstat(sf, pd, &sum); -} - static int bfqg_print_stat_recursive(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), @@ -1140,18 +1194,11 @@ static int bfqg_print_stat_recursive(struct seq_file *sf, void *v) return 0; } -static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v) -{ - blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), - bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq, - seq_cft(sf)->private, true); - return 0; -} - static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - u64 sum = blkg_rwstat_total(&pd->blkg->stat_bytes); + struct bfq_group *bfqg = blkg_to_bfqg(pd->blkg); + u64 sum = blkg_rwstat_total(&bfqg->stats.bytes); return __blkg_prfill_u64(sf, pd, sum >> 9); } @@ -1168,8 +1215,8 @@ static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf, { struct blkg_rwstat_sample tmp; - blkg_rwstat_recursive_sum(pd->blkg, NULL, - offsetof(struct blkcg_gq, stat_bytes), &tmp); + blkg_rwstat_recursive_sum(pd->blkg, &blkcg_policy_bfq, + offsetof(struct bfq_group, stats.bytes), &tmp); return __blkg_prfill_u64(sf, pd, (tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9); @@ -1252,13 +1299,13 @@ struct cftype bfq_blkcg_legacy_files[] = { /* statistics, covers only the tasks in the bfqg */ { .name = "bfq.io_service_bytes", - .private = (unsigned long)&blkcg_policy_bfq, - .seq_show = blkg_print_stat_bytes, + .private = offsetof(struct bfq_group, stats.bytes), + .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_serviced", - .private = (unsigned long)&blkcg_policy_bfq, - .seq_show = blkg_print_stat_ios, + .private = offsetof(struct bfq_group, stats.ios), + .seq_show = bfqg_print_rwstat, }, #ifdef CONFIG_BFQ_CGROUP_DEBUG { @@ -1295,13 +1342,13 @@ struct cftype bfq_blkcg_legacy_files[] = { /* the same statistics which cover the bfqg and its descendants */ { .name = "bfq.io_service_bytes_recursive", - .private = (unsigned long)&blkcg_policy_bfq, - .seq_show = blkg_print_stat_bytes_recursive, + .private = offsetof(struct bfq_group, stats.bytes), + .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_serviced_recursive", - .private = (unsigned long)&blkcg_policy_bfq, - .seq_show = blkg_print_stat_ios_recursive, + .private = offsetof(struct bfq_group, stats.ios), + .seq_show = bfqg_print_rwstat_recursive, }, #ifdef CONFIG_BFQ_CGROUP_DEBUG { @@ -1396,7 +1443,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd) bfq_end_wr_async_queues(bfqd, bfqd->root_group); } -struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg) +struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) { return bfqd->root_group; } diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index afe73f0b49ebf..50317c848d7d3 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2227,10 +2227,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio, spin_lock_irq(&bfqd->lock); - if (bic) + if (bic) { + /* + * Make sure cgroup info is uptodate for current process before + * considering the merge. + */ + bfq_bic_update_cgroup(bic, bio); + bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf)); - else + } else { bfqd->bio_bfqq = NULL; + } bfqd->bio_bic = bic; ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); @@ -2260,8 +2267,6 @@ static int bfq_request_merge(struct request_queue *q, struct request **req, return ELEVATOR_NO_MERGE; } -static struct bfq_queue *bfq_init_rq(struct request *rq); - static void bfq_request_merged(struct request_queue *q, struct request *req, enum elv_merge type) { @@ -2270,7 +2275,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, blk_rq_pos(req) < blk_rq_pos(container_of(rb_prev(&req->rb_node), struct request, rb_node))) { - struct bfq_queue *bfqq = bfq_init_rq(req); + struct bfq_queue *bfqq = RQ_BFQQ(req); struct bfq_data *bfqd; struct request *prev, *next_rq; @@ -2322,8 +2327,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, static void bfq_requests_merged(struct request_queue *q, struct request *rq, struct request *next) { - struct bfq_queue *bfqq = bfq_init_rq(rq), - *next_bfqq = bfq_init_rq(next); + struct bfq_queue *bfqq = RQ_BFQQ(rq), + *next_bfqq = RQ_BFQQ(next); if (!bfqq) return; @@ -2502,6 +2507,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) if (process_refs == 0 || new_process_refs == 0) return NULL; + /* + * Make sure merged queues belong to the same parent. Parents could + * have changed since the time we decided the two queues are suitable + * for merging. + */ + if (new_bfqq->entity.parent != bfqq->entity.parent) + return NULL; + bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", new_bfqq->pid); @@ -2526,6 +2539,15 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) * are likely to increase the throughput. */ bfqq->new_bfqq = new_bfqq; + /* + * The above assignment schedules the following redirections: + * each time some I/O for bfqq arrives, the process that + * generated that I/O is disassociated from bfqq and + * associated with new_bfqq. Here we increases new_bfqq->ref + * in advance, adding the number of processes that are + * expected to be associated with new_bfqq as they happen to + * issue I/O. + */ new_bfqq->ref += process_refs; return new_bfqq; } @@ -2585,6 +2607,10 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_queue *in_service_bfqq, *new_bfqq; + /* if a merge has already been setup, then proceed with that first */ + if (bfqq->new_bfqq) + return bfqq->new_bfqq; + /* * Do not perform queue merging if the device is non * rotational and performs internal queueing. In fact, such a @@ -2639,9 +2665,6 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (bfq_too_late_for_merging(bfqq)) return NULL; - if (bfqq->new_bfqq) - return bfqq->new_bfqq; - if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq)) return NULL; @@ -4796,7 +4819,7 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; struct request *rq; struct bfq_queue *in_serv_queue; - bool waiting_rq, idle_timer_disabled; + bool waiting_rq, idle_timer_disabled = false; spin_lock_irq(&bfqd->lock); @@ -4804,14 +4827,15 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue); rq = __bfq_dispatch_request(hctx); - - idle_timer_disabled = - waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); + if (in_serv_queue == bfqd->in_service_queue) { + idle_timer_disabled = + waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); + } spin_unlock_irq(&bfqd->lock); - - bfq_update_dispatch_stats(hctx->queue, rq, in_serv_queue, - idle_timer_disabled); + bfq_update_dispatch_stats(hctx->queue, rq, + idle_timer_disabled ? in_serv_queue : NULL, + idle_timer_disabled); return rq; } @@ -4903,7 +4927,7 @@ void bfq_put_queue(struct bfq_queue *bfqq) bfqg_and_blkg_put(bfqg); } -static void bfq_put_cooperator(struct bfq_queue *bfqq) +void bfq_put_cooperator(struct bfq_queue *bfqq) { struct bfq_queue *__bfqq, *next; @@ -5007,7 +5031,7 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) if (bfqq->new_ioprio >= IOPRIO_BE_NR) { pr_crit("bfq_set_next_ioprio_data: new_ioprio %d\n", bfqq->new_ioprio); - bfqq->new_ioprio = IOPRIO_BE_NR; + bfqq->new_ioprio = IOPRIO_BE_NR - 1; } bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio); @@ -5134,14 +5158,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, struct bfq_queue *bfqq; struct bfq_group *bfqg; - rcu_read_lock(); - - bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio)); - if (!bfqg) { - bfqq = &bfqd->oom_bfqq; - goto out; - } - + bfqg = bfq_bio_bfqg(bfqd, bio); if (!is_sync) { async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, ioprio); @@ -5185,7 +5202,6 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, out: bfqq->ref++; /* get a process reference to this queue */ bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, bfqq->ref); - rcu_read_unlock(); return bfqq; } @@ -5488,6 +5504,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q, unsigned int cmd_flags) {} #endif /* CONFIG_BFQ_CGROUP_DEBUG */ +static struct bfq_queue *bfq_init_rq(struct request *rq); + static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head) { @@ -5497,18 +5515,19 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool idle_timer_disabled = false; unsigned int cmd_flags; +#ifdef CONFIG_BFQ_GROUP_IOSCHED + if (!cgroup_subsys_on_dfl(io_cgrp_subsys) && rq->bio) + bfqg_stats_update_legacy_io(q, rq); +#endif spin_lock_irq(&bfqd->lock); + bfqq = bfq_init_rq(rq); if (blk_mq_sched_try_insert_merge(q, rq)) { spin_unlock_irq(&bfqd->lock); return; } - spin_unlock_irq(&bfqd->lock); - blk_mq_sched_request_inserted(rq); - spin_lock_irq(&bfqd->lock); - bfqq = bfq_init_rq(rq); if (!bfqq || at_head || blk_rq_is_passthrough(rq)) { if (at_head) list_add(&rq->queuelist, &bfqd->dispatch); @@ -6392,6 +6411,8 @@ static void bfq_exit_queue(struct elevator_queue *e) spin_unlock_irq(&bfqd->lock); #endif + wbt_enable_default(bfqd->queue); + kfree(bfqd); } diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index de98fdfe9ea17..4b2e77db0cf02 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -10,6 +10,8 @@ #include #include +#include "blk-cgroup-rwstat.h" + #define BFQ_IOPRIO_CLASSES 3 #define BFQ_CL_IDLE_TIMEOUT (HZ/5) @@ -809,6 +811,9 @@ struct bfq_stat { }; struct bfqg_stats { + /* basic stats */ + struct blkg_rwstat bytes; + struct blkg_rwstat ios; #ifdef CONFIG_BFQ_CGROUP_DEBUG /* number of ios merged */ struct blkg_rwstat merged; @@ -896,6 +901,8 @@ struct bfq_group { /* reference counter (see comments in bfq_bic_update_cgroup) */ int ref; + /* Is bfq_group still online? */ + bool online; struct bfq_entity entity; struct bfq_sched_data sched_data; @@ -949,6 +956,7 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd, void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool compensate, enum bfqq_expiration reason); void bfq_put_queue(struct bfq_queue *bfqq); +void bfq_put_cooperator(struct bfq_queue *bfqq); void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq); void bfq_schedule_dispatch(struct bfq_data *bfqd); @@ -958,6 +966,7 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); /* ---------------- cgroups-support interface ---------------- */ +void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq); void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, unsigned int op); void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op); @@ -975,8 +984,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg); void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio); void bfq_end_wr_async(struct bfq_data *bfqd); -struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, - struct blkcg *blkcg); +struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio); struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); struct bfq_group *bfqq_group(struct bfq_queue *bfqq); struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node); diff --git a/block/bio-integrity.c b/block/bio-integrity.c index c9dc2b17ce251..ec295be93ca0d 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -380,7 +380,7 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) struct blk_integrity *bi = blk_get_integrity(bio->bi_disk); unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); - bip->bip_iter.bi_sector += bytes_done >> 9; + bip->bip_iter.bi_sector += bio_integrity_intervals(bi, bytes_done >> 9); bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes); } diff --git a/block/bio.c b/block/bio.c index 4f28cff50afd7..fdee7a6410520 100644 --- a/block/bio.c +++ b/block/bio.c @@ -569,7 +569,8 @@ void bio_truncate(struct bio *bio, unsigned new_size) offset = new_size - done; else offset = 0; - zero_user(bv.bv_page, offset, bv.bv_len - offset); + zero_user(bv.bv_page, bv.bv_offset + offset, + bv.bv_len - offset); truncated = true; } done += bv.bv_len; @@ -1626,7 +1627,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, if (bytes > len) bytes = len; - page = alloc_page(q->bounce_gfp | gfp_mask); + page = alloc_page(q->bounce_gfp | __GFP_ZERO | gfp_mask); if (!page) goto cleanup; @@ -1907,6 +1908,9 @@ struct bio *bio_split(struct bio *bio, int sectors, bio_integrity_trim(split); bio_advance(bio, split->bi_iter.bi_size); +#ifdef CONFIG_BYTEDANCE_BLK_CGROUP_IOTRACE + bio_issue_init_sector(&split->bi_issue, bio_sectors(split)); +#endif if (bio_flagged(bio, BIO_TRACE_COMPLETION)) bio_set_flag(split, BIO_TRACE_COMPLETION); @@ -2180,7 +2184,7 @@ void bio_clone_blkg_association(struct bio *dst, struct bio *src) rcu_read_lock(); if (src->bi_blkg) - __bio_associate_blkg(dst, src->bi_blkg); + bio_associate_blkg_from_css(dst, &bio_blkcg(src)->css); rcu_read_unlock(); } diff --git a/block/blk-cgroup-rwstat.c b/block/blk-cgroup-rwstat.c new file mode 100644 index 0000000000000..85d5790ac49b0 --- /dev/null +++ b/block/blk-cgroup-rwstat.c @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Legacy blkg rwstat helpers enabled by CONFIG_BLK_CGROUP_RWSTAT. + * Do not use in new code. + */ +#include "blk-cgroup-rwstat.h" + +int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp) +{ + int i, ret; + + for (i = 0; i < BLKG_RWSTAT_NR; i++) { + ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp); + if (ret) { + while (--i >= 0) + percpu_counter_destroy(&rwstat->cpu_cnt[i]); + return ret; + } + atomic64_set(&rwstat->aux_cnt[i], 0); + } + return 0; +} +EXPORT_SYMBOL_GPL(blkg_rwstat_init); + +void blkg_rwstat_exit(struct blkg_rwstat *rwstat) +{ + int i; + + for (i = 0; i < BLKG_RWSTAT_NR; i++) + percpu_counter_destroy(&rwstat->cpu_cnt[i]); +} +EXPORT_SYMBOL_GPL(blkg_rwstat_exit); + +/** + * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat + * @sf: seq_file to print to + * @pd: policy private data of interest + * @rwstat: rwstat to print + * + * Print @rwstat to @sf for the device assocaited with @pd. + */ +u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, + const struct blkg_rwstat_sample *rwstat) +{ + static const char *rwstr[] = { + [BLKG_RWSTAT_READ] = "Read", + [BLKG_RWSTAT_WRITE] = "Write", + [BLKG_RWSTAT_SYNC] = "Sync", + [BLKG_RWSTAT_ASYNC] = "Async", + [BLKG_RWSTAT_DISCARD] = "Discard", + }; + const char *dname = blkg_dev_name(pd->blkg); + u64 v; + int i; + + if (!dname) + return 0; + + for (i = 0; i < BLKG_RWSTAT_NR; i++) + seq_printf(sf, "%s %s %llu\n", dname, rwstr[i], + rwstat->cnt[i]); + + v = rwstat->cnt[BLKG_RWSTAT_READ] + + rwstat->cnt[BLKG_RWSTAT_WRITE] + + rwstat->cnt[BLKG_RWSTAT_DISCARD]; + seq_printf(sf, "%s Total %llu\n", dname, v); + return v; +} +EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat); + +/** + * blkg_prfill_rwstat - prfill callback for blkg_rwstat + * @sf: seq_file to print to + * @pd: policy private data of interest + * @off: offset to the blkg_rwstat in @pd + * + * prfill callback for printing a blkg_rwstat. + */ +u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, + int off) +{ + struct blkg_rwstat_sample rwstat = { }; + + blkg_rwstat_read((void *)pd + off, &rwstat); + return __blkg_prfill_rwstat(sf, pd, &rwstat); +} +EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); + +/** + * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat + * @blkg: blkg of interest + * @pol: blkcg_policy which contains the blkg_rwstat + * @off: offset to the blkg_rwstat in blkg_policy_data or @blkg + * @sum: blkg_rwstat_sample structure containing the results + * + * Collect the blkg_rwstat specified by @blkg, @pol and @off and all its + * online descendants and their aux counts. The caller must be holding the + * queue lock for online tests. + * + * If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it + * is at @off bytes into @blkg's blkg_policy_data of the policy. + */ +void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, + int off, struct blkg_rwstat_sample *sum) +{ + struct blkcg_gq *pos_blkg; + struct cgroup_subsys_state *pos_css; + unsigned int i; + + lockdep_assert_held(&blkg->q->queue_lock); + + rcu_read_lock(); + blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { + struct blkg_rwstat *rwstat; + + if (!pos_blkg->online) + continue; + + if (pol) + rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off; + else + rwstat = (void *)pos_blkg + off; + + for (i = 0; i < BLKG_RWSTAT_NR; i++) + sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum); diff --git a/block/blk-cgroup-rwstat.h b/block/blk-cgroup-rwstat.h new file mode 100644 index 0000000000000..ee746919c41fc --- /dev/null +++ b/block/blk-cgroup-rwstat.h @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Legacy blkg rwstat helpers enabled by CONFIG_BLK_CGROUP_RWSTAT. + * Do not use in new code. + */ +#ifndef _BLK_CGROUP_RWSTAT_H +#define _BLK_CGROUP_RWSTAT_H + +#include + +enum blkg_rwstat_type { + BLKG_RWSTAT_READ, + BLKG_RWSTAT_WRITE, + BLKG_RWSTAT_SYNC, + BLKG_RWSTAT_ASYNC, + BLKG_RWSTAT_DISCARD, + + BLKG_RWSTAT_NR, + BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, +}; + +/* + * blkg_[rw]stat->aux_cnt is excluded for local stats but included for + * recursive. Used to carry stats of dead children. + */ +struct blkg_rwstat { + struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR]; + atomic64_t aux_cnt[BLKG_RWSTAT_NR]; +}; + +struct blkg_rwstat_sample { + u64 cnt[BLKG_RWSTAT_NR]; +}; + +static inline u64 blkg_rwstat_read_counter(struct blkg_rwstat *rwstat, + unsigned int idx) +{ + return atomic64_read(&rwstat->aux_cnt[idx]) + + percpu_counter_sum_positive(&rwstat->cpu_cnt[idx]); +} + +int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp); +void blkg_rwstat_exit(struct blkg_rwstat *rwstat); +u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, + const struct blkg_rwstat_sample *rwstat); +u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, + int off); +void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, + int off, struct blkg_rwstat_sample *sum); + + +/** + * blkg_rwstat_add - add a value to a blkg_rwstat + * @rwstat: target blkg_rwstat + * @op: REQ_OP and flags + * @val: value to add + * + * Add @val to @rwstat. The counters are chosen according to @rw. The + * caller is responsible for synchronizing calls to this function. + */ +static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, + unsigned int op, uint64_t val) +{ + struct percpu_counter *cnt; + + if (op_is_discard(op)) + cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD]; + else if (op_is_write(op)) + cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; + else + cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; + + percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); + + if (op_is_sync(op)) + cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; + else + cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; + + percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); +} + +/** + * blkg_rwstat_read - read the current values of a blkg_rwstat + * @rwstat: blkg_rwstat to read + * + * Read the current snapshot of @rwstat and return it in the aux counts. + */ +static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat, + struct blkg_rwstat_sample *result) +{ + int i; + + for (i = 0; i < BLKG_RWSTAT_NR; i++) + result->cnt[i] = + percpu_counter_sum_positive(&rwstat->cpu_cnt[i]); +} + +/** + * blkg_rwstat_total - read the total count of a blkg_rwstat + * @rwstat: blkg_rwstat to read + * + * Return the total count of @rwstat regardless of the IO direction. This + * function can be called without synchronization and takes care of u64 + * atomicity. + */ +static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) +{ + struct blkg_rwstat_sample tmp = { }; + + blkg_rwstat_read(rwstat, &tmp); + return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; +} + +/** + * blkg_rwstat_reset - reset a blkg_rwstat + * @rwstat: blkg_rwstat to reset + */ +static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) +{ + int i; + + for (i = 0; i < BLKG_RWSTAT_NR; i++) { + percpu_counter_set(&rwstat->cpu_cnt[i], 0); + atomic64_set(&rwstat->aux_cnt[i], 0); + } +} + +/** + * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count + * @to: the destination blkg_rwstat + * @from: the source + * + * Add @from's count including the aux one to @to's aux count. + */ +static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to, + struct blkg_rwstat *from) +{ + u64 sum[BLKG_RWSTAT_NR]; + int i; + + for (i = 0; i < BLKG_RWSTAT_NR; i++) + sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]); + + for (i = 0; i < BLKG_RWSTAT_NR; i++) + atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]), + &to->aux_cnt[i]); +} +#endif /* _BLK_CGROUP_RWSTAT_H */ diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index dde8d0acfb34f..1611fdc8b900b 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -80,8 +80,7 @@ static void blkg_free(struct blkcg_gq *blkg) if (blkg->pd[i]) blkcg_policy[i]->pd_free_fn(blkg->pd[i]); - blkg_rwstat_exit(&blkg->stat_ios); - blkg_rwstat_exit(&blkg->stat_bytes); + free_percpu(blkg->iostat_cpu); percpu_ref_exit(&blkg->refcnt); kfree(blkg); } @@ -146,7 +145,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, gfp_t gfp_mask) { struct blkcg_gq *blkg; - int i; + int i, cpu; /* alloc and init base part */ blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node); @@ -156,8 +155,8 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, if (percpu_ref_init(&blkg->refcnt, blkg_release, 0, gfp_mask)) goto err_free; - if (blkg_rwstat_init(&blkg->stat_bytes, gfp_mask) || - blkg_rwstat_init(&blkg->stat_ios, gfp_mask)) + blkg->iostat_cpu = alloc_percpu_gfp(struct blkg_iostat_set, gfp_mask); + if (!blkg->iostat_cpu) goto err_free; blkg->q = q; @@ -167,6 +166,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); blkg->blkcg = blkcg; + u64_stats_init(&blkg->iostat.sync); + for_each_possible_cpu(cpu) + u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync); + for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; struct blkg_policy_data *pd; @@ -393,7 +396,6 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, static void blkg_destroy(struct blkcg_gq *blkg) { struct blkcg *blkcg = blkg->blkcg; - struct blkcg_gq *parent = blkg->parent; int i; lockdep_assert_held(&blkg->q->queue_lock); @@ -410,11 +412,6 @@ static void blkg_destroy(struct blkcg_gq *blkg) pol->pd_offline_fn(blkg->pd[i]); } - if (parent) { - blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes); - blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios); - } - blkg->online = false; radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); @@ -464,7 +461,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css, { struct blkcg *blkcg = css_to_blkcg(css); struct blkcg_gq *blkg; - int i; + int i, cpu; mutex_lock(&blkcg_pol_mutex); spin_lock_irq(&blkcg->lock); @@ -475,8 +472,12 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css, * anyway. If you get hit by a race, retry. */ hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { - blkg_rwstat_reset(&blkg->stat_bytes); - blkg_rwstat_reset(&blkg->stat_ios); + for_each_possible_cpu(cpu) { + struct blkg_iostat_set *bis = + per_cpu_ptr(blkg->iostat_cpu, cpu); + memset(bis, 0, sizeof(*bis)); + } + memset(&blkg->iostat, 0, sizeof(blkg->iostat)); for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -560,186 +561,6 @@ u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v) } EXPORT_SYMBOL_GPL(__blkg_prfill_u64); -/** - * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat - * @sf: seq_file to print to - * @pd: policy private data of interest - * @rwstat: rwstat to print - * - * Print @rwstat to @sf for the device assocaited with @pd. - */ -u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, - const struct blkg_rwstat_sample *rwstat) -{ - static const char *rwstr[] = { - [BLKG_RWSTAT_READ] = "Read", - [BLKG_RWSTAT_WRITE] = "Write", - [BLKG_RWSTAT_SYNC] = "Sync", - [BLKG_RWSTAT_ASYNC] = "Async", - [BLKG_RWSTAT_DISCARD] = "Discard", - }; - const char *dname = blkg_dev_name(pd->blkg); - u64 v; - int i; - - if (!dname) - return 0; - - for (i = 0; i < BLKG_RWSTAT_NR; i++) - seq_printf(sf, "%s %s %llu\n", dname, rwstr[i], - rwstat->cnt[i]); - - v = rwstat->cnt[BLKG_RWSTAT_READ] + - rwstat->cnt[BLKG_RWSTAT_WRITE] + - rwstat->cnt[BLKG_RWSTAT_DISCARD]; - seq_printf(sf, "%s Total %llu\n", dname, v); - return v; -} -EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat); - -/** - * blkg_prfill_rwstat - prfill callback for blkg_rwstat - * @sf: seq_file to print to - * @pd: policy private data of interest - * @off: offset to the blkg_rwstat in @pd - * - * prfill callback for printing a blkg_rwstat. - */ -u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, - int off) -{ - struct blkg_rwstat_sample rwstat = { }; - - blkg_rwstat_read((void *)pd + off, &rwstat); - return __blkg_prfill_rwstat(sf, pd, &rwstat); -} -EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); - -static u64 blkg_prfill_rwstat_field(struct seq_file *sf, - struct blkg_policy_data *pd, int off) -{ - struct blkg_rwstat_sample rwstat = { }; - - blkg_rwstat_read((void *)pd->blkg + off, &rwstat); - return __blkg_prfill_rwstat(sf, pd, &rwstat); -} - -/** - * blkg_print_stat_bytes - seq_show callback for blkg->stat_bytes - * @sf: seq_file to print to - * @v: unused - * - * To be used as cftype->seq_show to print blkg->stat_bytes. - * cftype->private must be set to the blkcg_policy. - */ -int blkg_print_stat_bytes(struct seq_file *sf, void *v) -{ - blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), - blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private, - offsetof(struct blkcg_gq, stat_bytes), true); - return 0; -} -EXPORT_SYMBOL_GPL(blkg_print_stat_bytes); - -/** - * blkg_print_stat_bytes - seq_show callback for blkg->stat_ios - * @sf: seq_file to print to - * @v: unused - * - * To be used as cftype->seq_show to print blkg->stat_ios. cftype->private - * must be set to the blkcg_policy. - */ -int blkg_print_stat_ios(struct seq_file *sf, void *v) -{ - blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), - blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private, - offsetof(struct blkcg_gq, stat_ios), true); - return 0; -} -EXPORT_SYMBOL_GPL(blkg_print_stat_ios); - -static u64 blkg_prfill_rwstat_field_recursive(struct seq_file *sf, - struct blkg_policy_data *pd, - int off) -{ - struct blkg_rwstat_sample rwstat; - - blkg_rwstat_recursive_sum(pd->blkg, NULL, off, &rwstat); - return __blkg_prfill_rwstat(sf, pd, &rwstat); -} - -/** - * blkg_print_stat_bytes_recursive - recursive version of blkg_print_stat_bytes - * @sf: seq_file to print to - * @v: unused - */ -int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v) -{ - blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), - blkg_prfill_rwstat_field_recursive, - (void *)seq_cft(sf)->private, - offsetof(struct blkcg_gq, stat_bytes), true); - return 0; -} -EXPORT_SYMBOL_GPL(blkg_print_stat_bytes_recursive); - -/** - * blkg_print_stat_ios_recursive - recursive version of blkg_print_stat_ios - * @sf: seq_file to print to - * @v: unused - */ -int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v) -{ - blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), - blkg_prfill_rwstat_field_recursive, - (void *)seq_cft(sf)->private, - offsetof(struct blkcg_gq, stat_ios), true); - return 0; -} -EXPORT_SYMBOL_GPL(blkg_print_stat_ios_recursive); - -/** - * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat - * @blkg: blkg of interest - * @pol: blkcg_policy which contains the blkg_rwstat - * @off: offset to the blkg_rwstat in blkg_policy_data or @blkg - * @sum: blkg_rwstat_sample structure containing the results - * - * Collect the blkg_rwstat specified by @blkg, @pol and @off and all its - * online descendants and their aux counts. The caller must be holding the - * queue lock for online tests. - * - * If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it - * is at @off bytes into @blkg's blkg_policy_data of the policy. - */ -void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, - int off, struct blkg_rwstat_sample *sum) -{ - struct blkcg_gq *pos_blkg; - struct cgroup_subsys_state *pos_css; - unsigned int i; - - lockdep_assert_held(&blkg->q->queue_lock); - - rcu_read_lock(); - blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { - struct blkg_rwstat *rwstat; - - if (!pos_blkg->online) - continue; - - if (pol) - rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off; - else - rwstat = (void *)pos_blkg + off; - - for (i = 0; i < BLKG_RWSTAT_NR; i++) - sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i); - } - rcu_read_unlock(); -} -EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum); - /* Performs queue bypass and policy enabled checks then looks up blkg. */ static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg, const struct blkcg_policy *pol, @@ -929,21 +750,155 @@ void blkg_conf_finish(struct blkg_conf_ctx *ctx) } EXPORT_SYMBOL_GPL(blkg_conf_finish); +static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src) +{ + int i; + + for (i = 0; i < BLKG_IOSTAT_NR; i++) { + dst->bytes[i] = src->bytes[i]; + dst->ios[i] = src->ios[i]; + } +} + +static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src) +{ + int i; + + for (i = 0; i < BLKG_IOSTAT_NR; i++) { + dst->bytes[i] += src->bytes[i]; + dst->ios[i] += src->ios[i]; + } +} + +static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src) +{ + int i; + + for (i = 0; i < BLKG_IOSTAT_NR; i++) { + dst->bytes[i] -= src->bytes[i]; + dst->ios[i] -= src->ios[i]; + } +} + +static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) +{ + struct blkcg *blkcg = css_to_blkcg(css); + struct blkcg_gq *blkg; + + /* Root-level stats are sourced from system-wide IO stats */ + if (!cgroup_parent(css->cgroup)) + return; + + rcu_read_lock(); + + hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { + struct blkcg_gq *parent = blkg->parent; + struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu); + struct blkg_iostat cur, delta; + unsigned long flags; + unsigned int seq; + + /* fetch the current per-cpu values */ + do { + seq = u64_stats_fetch_begin(&bisc->sync); + blkg_iostat_set(&cur, &bisc->cur); + } while (u64_stats_fetch_retry(&bisc->sync, seq)); + + /* propagate percpu delta to global */ + flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); + blkg_iostat_set(&delta, &cur); + blkg_iostat_sub(&delta, &bisc->last); + blkg_iostat_add(&blkg->iostat.cur, &delta); + blkg_iostat_add(&bisc->last, &delta); + u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); + + /* propagate global delta to parent (unless that's root) */ + if (parent && parent->parent) { + flags = u64_stats_update_begin_irqsave(&parent->iostat.sync); + blkg_iostat_set(&delta, &blkg->iostat.cur); + blkg_iostat_sub(&delta, &blkg->iostat.last); + blkg_iostat_add(&parent->iostat.cur, &delta); + blkg_iostat_add(&blkg->iostat.last, &delta); + u64_stats_update_end_irqrestore(&parent->iostat.sync, flags); + } + } + + rcu_read_unlock(); +} + +/* + * We source root cgroup stats from the system-wide stats to avoid + * tracking the same information twice and incurring overhead when no + * cgroups are defined. For that reason, cgroup_rstat_flush in + * blkcg_print_stat does not actually fill out the iostat in the root + * cgroup's blkcg_gq. + * + * However, we would like to re-use the printing code between the root and + * non-root cgroups to the extent possible. For that reason, we simulate + * flushing the root cgroup's stats by explicitly filling in the iostat + * with disk level statistics. + */ +static void blkcg_fill_root_iostats(void) +{ + struct class_dev_iter iter; + struct device *dev; + + class_dev_iter_init(&iter, &block_class, NULL, &disk_type); + while ((dev = class_dev_iter_next(&iter))) { + struct gendisk *disk = dev_to_disk(dev); + struct hd_struct *part = disk_get_part(disk, 0); + struct blkcg_gq *blkg = blk_queue_root_blkg(disk->queue); + struct blkg_iostat tmp; + int cpu; + unsigned long flags; + + memset(&tmp, 0, sizeof(tmp)); + for_each_possible_cpu(cpu) { + struct disk_stats *cpu_dkstats; + + cpu_dkstats = per_cpu_ptr(part->dkstats, cpu); + tmp.ios[BLKG_IOSTAT_READ] += + cpu_dkstats->ios[STAT_READ]; + tmp.ios[BLKG_IOSTAT_WRITE] += + cpu_dkstats->ios[STAT_WRITE]; + tmp.ios[BLKG_IOSTAT_DISCARD] += + cpu_dkstats->ios[STAT_DISCARD]; + // convert sectors to bytes + tmp.bytes[BLKG_IOSTAT_READ] += + cpu_dkstats->sectors[STAT_READ] << 9; + tmp.bytes[BLKG_IOSTAT_WRITE] += + cpu_dkstats->sectors[STAT_WRITE] << 9; + tmp.bytes[BLKG_IOSTAT_DISCARD] += + cpu_dkstats->sectors[STAT_DISCARD] << 9; + } + + flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); + blkg_iostat_set(&blkg->iostat.cur, &tmp); + u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); + } +} + static int blkcg_print_stat(struct seq_file *sf, void *v) { struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); struct blkcg_gq *blkg; + if (!seq_css(sf)->parent) + blkcg_fill_root_iostats(); + else + cgroup_rstat_flush(blkcg->css.cgroup); + rcu_read_lock(); hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { + struct blkg_iostat_set *bis = &blkg->iostat; const char *dname; char *buf; - struct blkg_rwstat_sample rwstat; u64 rbytes, wbytes, rios, wios, dbytes, dios; size_t size = seq_get_buf(sf, &buf), off = 0; int i; bool has_stats = false; + unsigned seq; spin_lock_irq(&blkg->q->queue_lock); @@ -962,17 +917,16 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) */ off += scnprintf(buf+off, size-off, "%s ", dname); - blkg_rwstat_recursive_sum(blkg, NULL, - offsetof(struct blkcg_gq, stat_bytes), &rwstat); - rbytes = rwstat.cnt[BLKG_RWSTAT_READ]; - wbytes = rwstat.cnt[BLKG_RWSTAT_WRITE]; - dbytes = rwstat.cnt[BLKG_RWSTAT_DISCARD]; + do { + seq = u64_stats_fetch_begin(&bis->sync); - blkg_rwstat_recursive_sum(blkg, NULL, - offsetof(struct blkcg_gq, stat_ios), &rwstat); - rios = rwstat.cnt[BLKG_RWSTAT_READ]; - wios = rwstat.cnt[BLKG_RWSTAT_WRITE]; - dios = rwstat.cnt[BLKG_RWSTAT_DISCARD]; + rbytes = bis->cur.bytes[BLKG_IOSTAT_READ]; + wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE]; + dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD]; + rios = bis->cur.ios[BLKG_IOSTAT_READ]; + wios = bis->cur.ios[BLKG_IOSTAT_WRITE]; + dios = bis->cur.ios[BLKG_IOSTAT_DISCARD]; + } while (u64_stats_fetch_retry(&bis->sync, seq)); if (rbytes || wbytes || rios || wios) { has_stats = true; @@ -1022,7 +976,6 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) static struct cftype blkcg_files[] = { { .name = "stat", - .flags = CFTYPE_NOT_ON_ROOT, .seq_show = blkcg_print_stat, }, { } /* terminate */ @@ -1072,8 +1025,8 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) /* this prevents anyone from attaching or migrating to this blkcg */ wb_blkcg_offline(blkcg); - /* put the base cgwb reference allowing step 2 to be triggered */ - blkcg_cgwb_put(blkcg); + /* put the base online pin allowing step 2 to be triggered */ + blkcg_unpin_online(blkcg); } /** @@ -1180,11 +1133,11 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) } spin_lock_init(&blkcg->lock); + refcount_set(&blkcg->online_pin, 1); INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT | __GFP_NOWARN); INIT_HLIST_HEAD(&blkcg->blkg_list); #ifdef CONFIG_CGROUP_WRITEBACK INIT_LIST_HEAD(&blkcg->cgwb_list); - refcount_set(&blkcg->cgwb_refcnt, 1); #endif list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); @@ -1203,6 +1156,21 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) return ret; } +static int blkcg_css_online(struct cgroup_subsys_state *css) +{ + struct blkcg *blkcg = css_to_blkcg(css); + struct blkcg *parent = blkcg_parent(blkcg); + + /* + * blkcg_pin_online() is used to delay blkcg offline so that blkgs + * don't go offline while cgwbs are still active on them. Pin the + * parent so that offline always happens towards the root. + */ + if (parent) + blkcg_pin_online(parent); + return 0; +} + /** * blkcg_init_queue - initialize blkcg part of request queue * @q: request_queue to initialize @@ -1238,6 +1206,10 @@ int blkcg_init_queue(struct request_queue *q) if (preloaded) radix_tree_preload_end(); + ret = blk_iotrace_init(q); + if (ret) + goto err_destroy_all; + ret = blk_throtl_init(q); if (ret) goto err_destroy_all; @@ -1347,9 +1319,11 @@ static void blkcg_exit(struct task_struct *tsk) struct cgroup_subsys io_cgrp_subsys = { .css_alloc = blkcg_css_alloc, + .css_online = blkcg_css_online, .css_offline = blkcg_css_offline, .css_free = blkcg_css_free, .can_attach = blkcg_can_attach, + .css_rstat_flush = blkcg_rstat_flush, .bind = blkcg_bind, .dfl_cftypes = blkcg_files, .legacy_cftypes = blkcg_legacy_files, @@ -1669,6 +1643,10 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now) { u64 old = atomic64_read(&blkg->delay_start); + /* negative use_delay means no scaling, see blkcg_set_delay() */ + if (atomic_read(&blkg->use_delay) < 0) + return; + /* * We only want to scale down every second. The idea here is that we * want to delay people for min(delay_nsec, NSEC_PER_SEC) in a certain @@ -1721,16 +1699,24 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now) static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay) { unsigned long pflags; + bool clamp; u64 now = ktime_to_ns(ktime_get()); u64 exp; u64 delay_nsec = 0; int tok; while (blkg->parent) { - if (atomic_read(&blkg->use_delay)) { + int use_delay = atomic_read(&blkg->use_delay); + + if (use_delay) { + u64 this_delay; + blkcg_scale_delay(blkg, now); - delay_nsec = max_t(u64, delay_nsec, - atomic64_read(&blkg->delay_nsec)); + this_delay = atomic64_read(&blkg->delay_nsec); + if (this_delay > delay_nsec) { + delay_nsec = this_delay; + clamp = use_delay > 0; + } } blkg = blkg->parent; } @@ -1742,10 +1728,13 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay) * Let's not sleep for all eternity if we've amassed a huge delay. * Swapping or metadata IO can accumulate 10's of seconds worth of * delay, and we want userspace to be able to do _something_ so cap the - * delays at 1 second. If there's 10's of seconds worth of delay then - * the tasks will be delayed for 1 second for every syscall. + * delays at 0.25s. If there's 10's of seconds worth of delay then the + * tasks will be delayed for 0.25 second for every syscall. If + * blkcg_set_delay() was used as indicated by negative use_delay, the + * caller is responsible for regulating the range. */ - delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC); + if (clamp) + delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC); if (use_memdelay) psi_memstall_enter(&pflags); @@ -1856,6 +1845,8 @@ void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) */ void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta) { + if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) + return; blkcg_scale_delay(blkg, now); atomic64_add(delta, &blkg->delay_nsec); } diff --git a/block/blk-flush.c b/block/blk-flush.c index 5aa6fada22598..f66ff16855310 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -222,8 +222,10 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) return; } - if (fq->rq_status != BLK_STS_OK) + if (fq->rq_status != BLK_STS_OK) { error = fq->rq_status; + fq->rq_status = BLK_STS_OK; + } hctx = flush_rq->mq_hctx; if (!q->elevator) { diff --git a/block/blk-iocost.c b/block/blk-iocost.c index ef287c33d6d97..682041cf03911 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -39,7 +39,7 @@ * On top of that, a size cost proportional to the length of the IO is * added. While simple, this model captures the operational * characteristics of a wide varienty of devices well enough. Default - * paramters for several different classes of devices are provided and the + * parameters for several different classes of devices are provided and the * parameters can be configured from userspace via * /sys/fs/cgroup/io.cost.model. * @@ -71,7 +71,7 @@ * gets 300/(100+300) or 75% share, and A0 and A1 equally splits the rest, * 12.5% each. The distribution mechanism only cares about these flattened * shares. They're called hweights (hierarchical weights) and always add - * upto 1 (HWEIGHT_WHOLE). + * upto 1 (WEIGHT_ONE). * * A given cgroup's vtime runs slower in inverse proportion to its hweight. * For example, with 12.5% weight, A0's time runs 8 times slower (100/12.5) @@ -80,7 +80,7 @@ * * This constitutes the basis of IO capacity distribution. Each cgroup's * vtime is running at a rate determined by its hweight. A cgroup tracks - * the vtime consumed by past IOs and can issue a new IO iff doing so + * the vtime consumed by past IOs and can issue a new IO if doing so * wouldn't outrun the current device vtime. Otherwise, the IO is * suspended until the vtime has progressed enough to cover it. * @@ -158,7 +158,7 @@ * Instead of debugfs or other clumsy monitoring mechanisms, this * controller uses a drgn based monitoring script - * tools/cgroup/iocost_monitor.py. For details on drgn, please see - * https://github.com/osandov/drgn. The ouput looks like the following. + * https://github.com/osandov/drgn. The output looks like the following. * * sdb RUN per=300ms cur_per=234.218:v203.695 busy= +1 vrate= 62.12% * active weight hweight% inflt% dbt delay usages% @@ -182,6 +182,8 @@ #include #include #include +#include +#include #include "blk-rq-qos.h" #include "blk-stat.h" #include "blk-wbt.h" @@ -218,36 +220,21 @@ enum { MAX_PERIOD = USEC_PER_SEC, /* - * A cgroup's vtime can run 50% behind the device vtime, which + * iocg->vtime is targeted at 50% behind the device vtime, which * serves as its IO credit buffer. Surplus weight adjustment is * immediately canceled if the vtime margin runs below 10%. */ - MARGIN_PCT = 50, - INUSE_MARGIN_PCT = 10, + MARGIN_MIN_PCT = 10, + MARGIN_LOW_PCT = 20, + MARGIN_TARGET_PCT = 50, - /* Have some play in waitq timer operations */ - WAITQ_TIMER_MARGIN_PCT = 5, + INUSE_ADJ_STEP_PCT = 25, - /* - * vtime can wrap well within a reasonable uptime when vrate is - * consistently raised. Don't trust recorded cgroup vtime if the - * period counter indicates that it's older than 5mins. - */ - VTIME_VALID_DUR = 300 * USEC_PER_SEC, - - /* - * Remember the past three non-zero usages and use the max for - * surplus calculation. Three slots guarantee that we remember one - * full period usage from the last active stretch even after - * partial deactivation and re-activation periods. Don't start - * giving away weight before collecting two data points to prevent - * hweight adjustments based on one partial activation period. - */ - NR_USAGE_SLOTS = 3, - MIN_VALID_USAGES = 2, + /* Have some play in timer operations */ + TIMER_SLACK_PCT = 1, /* 1/64k is granular enough and can easily be handled w/ u32 */ - HWEIGHT_WHOLE = 1 << 16, + WEIGHT_ONE = 1 << 16, /* * As vtime is used to calculate the cost of each IO, it needs to @@ -263,6 +250,7 @@ enum { VTIME_PER_SEC_SHIFT = 37, VTIME_PER_SEC = 1LLU << VTIME_PER_SEC_SHIFT, VTIME_PER_USEC = VTIME_PER_SEC / USEC_PER_SEC, + VTIME_PER_NSEC = VTIME_PER_SEC / NSEC_PER_SEC, /* bound vrate adjustments within two orders of magnitude */ VRATE_MIN_PPM = 10000, /* 1% */ @@ -277,16 +265,37 @@ enum { /* unbusy hysterisis */ UNBUSY_THR_PCT = 75, - /* don't let cmds which take a very long time pin lagging for too long */ - MAX_LAGGING_PERIODS = 10, - /* - * If usage% * 1.25 + 2% is lower than hweight% by more than 3%, - * donate the surplus. + * The effect of delay is indirect and non-linear and a huge amount of + * future debt can accumulate abruptly while unthrottled. Linearly scale + * up delay as debt is going up and then let it decay exponentially. + * This gives us quick ramp ups while delay is accumulating and long + * tails which can help reducing the frequency of debt explosions on + * unthrottle. The parameters are experimentally determined. + * + * The delay mechanism provides adequate protection and behavior in many + * cases. However, this is far from ideal and falls shorts on both + * fronts. The debtors are often throttled too harshly costing a + * significant level of fairness and possibly total work while the + * protection against their impacts on the system can be choppy and + * unreliable. + * + * The shortcoming primarily stems from the fact that, unlike for page + * cache, the kernel doesn't have well-defined back-pressure propagation + * mechanism and policies for anonymous memory. Fully addressing this + * issue will likely require substantial improvements in the area. */ - SURPLUS_SCALE_PCT = 125, /* * 125% */ - SURPLUS_SCALE_ABS = HWEIGHT_WHOLE / 50, /* + 2% */ - SURPLUS_MIN_ADJ_DELTA = HWEIGHT_WHOLE / 33, /* 3% */ + MIN_DELAY_THR_PCT = 500, + MAX_DELAY_THR_PCT = 25000, + MIN_DELAY = 250, + MAX_DELAY = 250 * USEC_PER_MSEC, + + /* halve debts if avg usage over 100ms is under 50% */ + DFGV_USAGE_PCT = 50, + DFGV_PERIOD = 100 * USEC_PER_MSEC, + + /* don't let cmds which take a very long time pin lagging for too long */ + MAX_LAGGING_PERIODS = 10, /* switch iff the conditions are met for longer than this */ AUTOP_CYCLE_NSEC = 10LLU * NSEC_PER_SEC, @@ -364,8 +373,6 @@ enum { AUTOP_SSD_FAST, }; -struct ioc_gq; - struct ioc_params { u32 qos[NR_QOS_PARAMS]; u64 i_lcoefs[NR_I_LCOEFS]; @@ -374,9 +381,15 @@ struct ioc_params { u32 too_slow_vrate_pct; }; +struct ioc_margins { + s64 min; + s64 low; + s64 target; +}; + struct ioc_missed { - u32 nr_met; - u32 nr_missed; + local_t nr_met; + local_t nr_missed; u32 last_met; u32 last_missed; }; @@ -384,7 +397,7 @@ struct ioc_missed { struct ioc_pcpu_stat { struct ioc_missed missed[2]; - u64 rq_wait_ns; + local64_t rq_wait_ns; u64 last_rq_wait_ns; }; @@ -395,8 +408,9 @@ struct ioc { bool enabled; struct ioc_params params; + struct ioc_margins margins; u32 period_us; - u32 margin_us; + u32 timer_slack_ns; u64 vrate_min; u64 vrate_max; @@ -407,18 +421,24 @@ struct ioc { enum ioc_running running; atomic64_t vtime_rate; + u64 vtime_base_rate; + s64 vtime_err; seqcount_t period_seqcount; - u32 period_at; /* wallclock starttime */ + u64 period_at; /* wallclock starttime */ u64 period_at_vtime; /* vtime starttime */ atomic64_t cur_period; /* inc'd each period */ int busy_level; /* saturation history */ - u64 inuse_margin_vtime; bool weights_updated; atomic_t hweight_gen; /* for lazy hweights */ + /* debt forgivness */ + u64 dfgv_period_at; + u64 dfgv_period_rem; + u64 dfgv_usage_us_sum; + u64 autop_too_fast_at; u64 autop_too_slow_at; int autop_idx; @@ -426,6 +446,17 @@ struct ioc { bool user_cost_model:1; }; +struct iocg_pcpu_stat { + local64_t abs_vusage; +}; + +struct iocg_stat { + u64 usage_us; + u64 wait_us; + u64 indebt_us; + u64 indelay_us; +}; + /* per device-cgroup pair */ struct ioc_gq { struct blkg_policy_data pd; @@ -445,32 +476,37 @@ struct ioc_gq { * * `last_inuse` remembers `inuse` while an iocg is idle to persist * surplus adjustments. + * + * `inuse` may be adjusted dynamically during period. `saved_*` are used + * to determine and track adjustments. */ u32 cfg_weight; u32 weight; u32 active; u32 inuse; + u32 last_inuse; + s64 saved_margin; sector_t cursor; /* to detect randio */ /* * `vtime` is this iocg's vtime cursor which progresses as IOs are * issued. If lagging behind device vtime, the delta represents - * the currently available IO budget. If runnning ahead, the + * the currently available IO budget. If running ahead, the * overage. * * `vtime_done` is the same but progressed on completion rather * than issue. The delta behind `vtime` represents the cost of * currently in-flight IOs. - * - * `last_vtime` is used to remember `vtime` at the end of the last - * period to calculate utilization. */ atomic64_t vtime; atomic64_t done_vtime; u64 abs_vdebt; - u64 last_vtime; + + /* current delay in effect and when it started */ + u64 delay; + u64 delay_at; /* * The period this iocg was last active in. Used for deactivation @@ -479,21 +515,34 @@ struct ioc_gq { atomic64_t active_period; struct list_head active_list; - /* see __propagate_active_weight() and current_hweight() for details */ + /* see __propagate_weights() and current_hweight() for details */ u64 child_active_sum; u64 child_inuse_sum; + u64 child_adjusted_sum; int hweight_gen; u32 hweight_active; u32 hweight_inuse; - bool has_surplus; + u32 hweight_donating; + u32 hweight_after_donation; + + struct list_head walk_list; + struct list_head surplus_list; struct wait_queue_head waitq; struct hrtimer waitq_timer; - struct hrtimer delay_timer; - /* usage is recorded as fractions of HWEIGHT_WHOLE */ - int usage_idx; - u32 usages[NR_USAGE_SLOTS]; + /* timestamp at the latest activation */ + u64 activated_at; + + /* statistics */ + struct iocg_pcpu_stat __percpu *pcpu_stat; + struct iocg_stat stat; + struct iocg_stat last_stat; + u64 last_stat_abs_vusage; + u64 usage_delta_us; + u64 wait_since; + u64 indebt_since; + u64 indelay_since; /* this iocg's depth in the hierarchy and ancestors including self */ int level; @@ -508,7 +557,7 @@ struct ioc_cgrp { struct ioc_now { u64 now_ns; - u32 now; + u64 now; u64 vnow; u64 vrate; }; @@ -658,7 +707,7 @@ static struct ioc_cgrp *blkcg_to_iocc(struct blkcg *blkcg) */ static u64 abs_cost_to_cost(u64 abs_cost, u32 hw_inuse) { - return DIV64_U64_ROUND_UP(abs_cost * HWEIGHT_WHOLE, hw_inuse); + return DIV64_U64_ROUND_UP(abs_cost * WEIGHT_ONE, hw_inuse); } /* @@ -666,18 +715,56 @@ static u64 abs_cost_to_cost(u64 abs_cost, u32 hw_inuse) */ static u64 cost_to_abs_cost(u64 cost, u32 hw_inuse) { - return DIV64_U64_ROUND_UP(cost * hw_inuse, HWEIGHT_WHOLE); + return DIV64_U64_ROUND_UP(cost * hw_inuse, WEIGHT_ONE); } -static void iocg_commit_bio(struct ioc_gq *iocg, struct bio *bio, u64 cost) +static void iocg_commit_bio(struct ioc_gq *iocg, struct bio *bio, + u64 abs_cost, u64 cost) { + struct iocg_pcpu_stat *gcs; + bio->bi_iocost_cost = cost; atomic64_add(cost, &iocg->vtime); + + gcs = get_cpu_ptr(iocg->pcpu_stat); + local64_add(abs_cost, &gcs->abs_vusage); + put_cpu_ptr(gcs); +} + +static void iocg_lock(struct ioc_gq *iocg, bool lock_ioc, unsigned long *flags) +{ + if (lock_ioc) { + spin_lock_irqsave(&iocg->ioc->lock, *flags); + spin_lock(&iocg->waitq.lock); + } else { + spin_lock_irqsave(&iocg->waitq.lock, *flags); + } +} + +static void iocg_unlock(struct ioc_gq *iocg, bool unlock_ioc, unsigned long *flags) +{ + if (unlock_ioc) { + spin_unlock(&iocg->waitq.lock); + spin_unlock_irqrestore(&iocg->ioc->lock, *flags); + } else { + spin_unlock_irqrestore(&iocg->waitq.lock, *flags); + } } #define CREATE_TRACE_POINTS #include +static void ioc_refresh_margins(struct ioc *ioc) +{ + struct ioc_margins *margins = &ioc->margins; + u32 period_us = ioc->period_us; + u64 vrate = ioc->vtime_base_rate; + + margins->min = (period_us * MARGIN_MIN_PCT / 100) * vrate; + margins->low = (period_us * MARGIN_LOW_PCT / 100) * vrate; + margins->target = (period_us * MARGIN_TARGET_PCT / 100) * vrate; +} + /* latency Qos params changed, update period_us and all the dependent params */ static void ioc_refresh_period_us(struct ioc *ioc) { @@ -711,9 +798,10 @@ static void ioc_refresh_period_us(struct ioc *ioc) /* calculate dependent params */ ioc->period_us = period_us; - ioc->margin_us = period_us * MARGIN_PCT / 100; - ioc->inuse_margin_vtime = DIV64_U64_ROUND_UP( - period_us * VTIME_PER_USEC * INUSE_MARGIN_PCT, 100); + ioc->timer_slack_ns = div64_u64( + (u64)period_us * NSEC_PER_USEC * TIMER_SLACK_PCT, + 100); + ioc_refresh_margins(ioc); } static int ioc_autop_idx(struct ioc *ioc) @@ -740,8 +828,7 @@ static int ioc_autop_idx(struct ioc *ioc) return idx; /* step up/down based on the vrate */ - vrate_pct = div64_u64(atomic64_read(&ioc->vtime_rate) * 100, - VTIME_PER_USEC); + vrate_pct = div64_u64(ioc->vtime_base_rate * 100, VTIME_PER_USEC); now_ns = ktime_get_ns(); if (p->too_fast_vrate_pct && p->too_fast_vrate_pct <= vrate_pct) { @@ -849,6 +936,91 @@ static bool ioc_refresh_params(struct ioc *ioc, bool force) return true; } +/* + * When an iocg accumulates too much vtime or gets deactivated, we throw away + * some vtime, which lowers the overall device utilization. As the exact amount + * which is being thrown away is known, we can compensate by accelerating the + * vrate accordingly so that the extra vtime generated in the current period + * matches what got lost. + */ +static void ioc_refresh_vrate(struct ioc *ioc, struct ioc_now *now) +{ + s64 pleft = ioc->period_at + ioc->period_us - now->now; + s64 vperiod = ioc->period_us * ioc->vtime_base_rate; + s64 vcomp, vcomp_min, vcomp_max; + + lockdep_assert_held(&ioc->lock); + + /* we need some time left in this period */ + if (pleft <= 0) + goto done; + + /* + * Calculate how much vrate should be adjusted to offset the error. + * Limit the amount of adjustment and deduct the adjusted amount from + * the error. + */ + vcomp = -div64_s64(ioc->vtime_err, pleft); + vcomp_min = -(ioc->vtime_base_rate >> 1); + vcomp_max = ioc->vtime_base_rate; + vcomp = clamp(vcomp, vcomp_min, vcomp_max); + + ioc->vtime_err += vcomp * pleft; + + atomic64_set(&ioc->vtime_rate, ioc->vtime_base_rate + vcomp); +done: + /* bound how much error can accumulate */ + ioc->vtime_err = clamp(ioc->vtime_err, -vperiod, vperiod); +} + +static void ioc_adjust_base_vrate(struct ioc *ioc, u32 rq_wait_pct, + int nr_lagging, int nr_shortages, + int prev_busy_level, u32 *missed_ppm) +{ + u64 vrate = ioc->vtime_base_rate; + u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max; + + if (!ioc->busy_level || (ioc->busy_level < 0 && nr_lagging)) { + if (ioc->busy_level != prev_busy_level || nr_lagging) + trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate), + missed_ppm, rq_wait_pct, + nr_lagging, nr_shortages); + + return; + } + + /* + * If vrate is out of bounds, apply clamp gradually as the + * bounds can change abruptly. Otherwise, apply busy_level + * based adjustment. + */ + if (vrate < vrate_min) { + vrate = div64_u64(vrate * (100 + VRATE_CLAMP_ADJ_PCT), 100); + vrate = min(vrate, vrate_min); + } else if (vrate > vrate_max) { + vrate = div64_u64(vrate * (100 - VRATE_CLAMP_ADJ_PCT), 100); + vrate = max(vrate, vrate_max); + } else { + int idx = min_t(int, abs(ioc->busy_level), + ARRAY_SIZE(vrate_adj_pct) - 1); + u32 adj_pct = vrate_adj_pct[idx]; + + if (ioc->busy_level > 0) + adj_pct = 100 - adj_pct; + else + adj_pct = 100 + adj_pct; + + vrate = clamp(DIV64_U64_ROUND_UP(vrate * adj_pct, 100), + vrate_min, vrate_max); + } + + trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct, + nr_lagging, nr_shortages); + + ioc->vtime_base_rate = vrate; + ioc_refresh_margins(ioc); +} + /* take a snapshot of the current [v]time and vrate */ static void ioc_now(struct ioc *ioc, struct ioc_now *now) { @@ -889,16 +1061,35 @@ static void ioc_start_period(struct ioc *ioc, struct ioc_now *now) /* * Update @iocg's `active` and `inuse` to @active and @inuse, update level - * weight sums and propagate upwards accordingly. + * weight sums and propagate upwards accordingly. If @save, the current margin + * is saved to be used as reference for later inuse in-period adjustments. */ -static void __propagate_active_weight(struct ioc_gq *iocg, u32 active, u32 inuse) +static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse, + bool save, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; int lvl; lockdep_assert_held(&ioc->lock); - inuse = min(active, inuse); + /* + * For an active leaf node, its inuse shouldn't be zero or exceed + * @active. An active internal node's inuse is solely determined by the + * inuse to active ratio of its children regardless of @inuse. + */ + if (list_empty(&iocg->active_list) && iocg->child_active_sum) { + inuse = DIV64_U64_ROUND_UP(active * iocg->child_inuse_sum, + iocg->child_active_sum); + } else { + inuse = clamp_t(u32, inuse, 1, active); + } + + iocg->last_inuse = iocg->inuse; + if (save) + iocg->saved_margin = now->vnow - atomic64_read(&iocg->vtime); + + if (active == iocg->active && inuse == iocg->inuse) + return; for (lvl = iocg->level - 1; lvl >= 0; lvl--) { struct ioc_gq *parent = iocg->ancestors[lvl]; @@ -908,13 +1099,13 @@ static void __propagate_active_weight(struct ioc_gq *iocg, u32 active, u32 inuse /* update the level sums */ parent->child_active_sum += (s32)(active - child->active); parent->child_inuse_sum += (s32)(inuse - child->inuse); - /* apply the udpates */ + /* apply the updates */ child->active = active; child->inuse = inuse; /* * The delta between inuse and active sums indicates that - * that much of weight is being given away. Parent's inuse + * much of weight is being given away. Parent's inuse * and active should reflect the ratio. */ if (parent->child_active_sum) { @@ -936,7 +1127,7 @@ static void __propagate_active_weight(struct ioc_gq *iocg, u32 active, u32 inuse ioc->weights_updated = true; } -static void commit_active_weights(struct ioc *ioc) +static void commit_weights(struct ioc *ioc) { lockdep_assert_held(&ioc->lock); @@ -948,10 +1139,11 @@ static void commit_active_weights(struct ioc *ioc) } } -static void propagate_active_weight(struct ioc_gq *iocg, u32 active, u32 inuse) +static void propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse, + bool save, struct ioc_now *now) { - __propagate_active_weight(iocg, active, inuse); - commit_active_weights(iocg->ioc); + __propagate_weights(iocg, active, inuse, save, now); + commit_weights(iocg->ioc); } static void current_hweight(struct ioc_gq *iocg, u32 *hw_activep, u32 *hw_inusep) @@ -967,9 +1159,9 @@ static void current_hweight(struct ioc_gq *iocg, u32 *hw_activep, u32 *hw_inusep goto out; /* - * Paired with wmb in commit_active_weights(). If we saw the - * updated hweight_gen, all the weight updates from - * __propagate_active_weight() are visible too. + * Paired with wmb in commit_weights(). If we saw the updated + * hweight_gen, all the weight updates from __propagate_weights() are + * visible too. * * We can race with weight updates during calculation and get it * wrong. However, hweight_gen would have changed and a future @@ -978,12 +1170,12 @@ static void current_hweight(struct ioc_gq *iocg, u32 *hw_activep, u32 *hw_inusep */ smp_rmb(); - hwa = hwi = HWEIGHT_WHOLE; + hwa = hwi = WEIGHT_ONE; for (lvl = 0; lvl <= iocg->level - 1; lvl++) { struct ioc_gq *parent = iocg->ancestors[lvl]; struct ioc_gq *child = iocg->ancestors[lvl + 1]; - u32 active_sum = READ_ONCE(parent->child_active_sum); - u32 inuse_sum = READ_ONCE(parent->child_inuse_sum); + u64 active_sum = READ_ONCE(parent->child_active_sum); + u64 inuse_sum = READ_ONCE(parent->child_inuse_sum); u32 active = READ_ONCE(child->active); u32 inuse = READ_ONCE(child->inuse); @@ -991,11 +1183,11 @@ static void current_hweight(struct ioc_gq *iocg, u32 *hw_activep, u32 *hw_inusep if (!active_sum || !inuse_sum) continue; - active_sum = max(active, active_sum); - hwa = hwa * active / active_sum; /* max 16bits * 10000 */ + active_sum = max_t(u64, active, active_sum); + hwa = div64_u64((u64)hwa * active, active_sum); - inuse_sum = max(inuse, inuse_sum); - hwi = hwi * inuse / inuse_sum; /* max 16bits * 10000 */ + inuse_sum = max_t(u64, inuse, inuse_sum); + hwi = div64_u64((u64)hwi * inuse, inuse_sum); } iocg->hweight_active = max_t(u32, hwa, 1); @@ -1008,7 +1200,33 @@ static void current_hweight(struct ioc_gq *iocg, u32 *hw_activep, u32 *hw_inusep *hw_inusep = iocg->hweight_inuse; } -static void weight_updated(struct ioc_gq *iocg) +/* + * Calculate the hweight_inuse @iocg would get with max @inuse assuming all the + * other weights stay unchanged. + */ +static u32 current_hweight_max(struct ioc_gq *iocg) +{ + u32 hwm = WEIGHT_ONE; + u32 inuse = iocg->active; + u64 child_inuse_sum; + int lvl; + + lockdep_assert_held(&iocg->ioc->lock); + + for (lvl = iocg->level - 1; lvl >= 0; lvl--) { + struct ioc_gq *parent = iocg->ancestors[lvl]; + struct ioc_gq *child = iocg->ancestors[lvl + 1]; + + child_inuse_sum = parent->child_inuse_sum + inuse - child->inuse; + hwm = div64_u64((u64)hwm * inuse, child_inuse_sum); + inuse = DIV64_U64_ROUND_UP(parent->active * child_inuse_sum, + parent->child_active_sum); + } + + return max_t(u32, hwm, 1); +} + +static void weight_updated(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; struct blkcg_gq *blkg = iocg_to_blkg(iocg); @@ -1019,16 +1237,15 @@ static void weight_updated(struct ioc_gq *iocg) weight = iocg->cfg_weight ?: iocc->dfl_weight; if (weight != iocg->weight && iocg->active) - propagate_active_weight(iocg, weight, - DIV64_U64_ROUND_UP(iocg->inuse * weight, iocg->weight)); + propagate_weights(iocg, weight, iocg->inuse, true, now); iocg->weight = weight; } static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; - u64 last_period, cur_period, max_period_delta; - u64 vtime, vmargin, vmin; + u64 last_period, cur_period; + u64 vtime, vtarget; int i; /* @@ -1067,22 +1284,15 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now) goto fail_unlock; /* - * vtime may wrap when vrate is raised substantially due to - * underestimated IO costs. Look at the period and ignore its - * vtime if the iocg has been idle for too long. Also, cap the - * budget it can start with to the margin. + * Always start with the target budget. On deactivation, we throw away + * anything above it. */ - max_period_delta = DIV64_U64_ROUND_UP(VTIME_VALID_DUR, ioc->period_us); + vtarget = now->vnow - ioc->margins.target; vtime = atomic64_read(&iocg->vtime); - vmargin = ioc->margin_us * now->vrate; - vmin = now->vnow - vmargin; - if (last_period + max_period_delta < cur_period || - time_before64(vtime, vmin)) { - atomic64_add(vmin - vtime, &iocg->vtime); - atomic64_add(vmin - vtime, &iocg->done_vtime); - vtime = vmin; - } + atomic64_add(vtarget - vtime, &iocg->vtime); + atomic64_add(vtarget - vtime, &iocg->done_vtime); + vtime = vtarget; /* * Activate, propagate weight and start period timer if not @@ -1091,16 +1301,19 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now) */ iocg->hweight_gen = atomic_read(&ioc->hweight_gen) - 1; list_add(&iocg->active_list, &ioc->active_iocgs); - propagate_active_weight(iocg, iocg->weight, - iocg->last_inuse ?: iocg->weight); + + propagate_weights(iocg, iocg->weight, + iocg->last_inuse ?: iocg->weight, true, now); TRACE_IOCG_PATH(iocg_activate, iocg, now, last_period, cur_period, vtime); - iocg->last_vtime = vtime; + iocg->activated_at = now->now; if (ioc->running == IOC_IDLE) { ioc->running = IOC_RUNNING; + ioc->dfgv_period_at = now->now; + ioc->dfgv_period_rem = 0; ioc_start_period(ioc, now); } @@ -1113,6 +1326,110 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now) return false; } +static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) +{ + struct ioc *ioc = iocg->ioc; + struct blkcg_gq *blkg = iocg_to_blkg(iocg); + u64 tdelta, delay, new_delay; + s64 vover, vover_pct; + u32 hwa; + + lockdep_assert_held(&iocg->waitq.lock); + + /* calculate the current delay in effect - 1/2 every second */ + tdelta = now->now - iocg->delay_at; + if (iocg->delay) + delay = iocg->delay >> div64_u64(tdelta, USEC_PER_SEC); + else + delay = 0; + + /* calculate the new delay from the debt amount */ + current_hweight(iocg, &hwa, NULL); + vover = atomic64_read(&iocg->vtime) + + abs_cost_to_cost(iocg->abs_vdebt, hwa) - now->vnow; + vover_pct = div64_s64(100 * vover, + ioc->period_us * ioc->vtime_base_rate); + + if (vover_pct <= MIN_DELAY_THR_PCT) + new_delay = 0; + else if (vover_pct >= MAX_DELAY_THR_PCT) + new_delay = MAX_DELAY; + else + new_delay = MIN_DELAY + + div_u64((MAX_DELAY - MIN_DELAY) * + (vover_pct - MIN_DELAY_THR_PCT), + MAX_DELAY_THR_PCT - MIN_DELAY_THR_PCT); + + /* pick the higher one and apply */ + if (new_delay > delay) { + iocg->delay = new_delay; + iocg->delay_at = now->now; + delay = new_delay; + } + + if (delay >= MIN_DELAY) { + if (!iocg->indelay_since) + iocg->indelay_since = now->now; + blkcg_set_delay(blkg, delay * NSEC_PER_USEC); + return true; + } else { + if (iocg->indelay_since) { + iocg->stat.indelay_us += now->now - iocg->indelay_since; + iocg->indelay_since = 0; + } + iocg->delay = 0; + blkcg_clear_delay(blkg); + return false; + } +} + +static void iocg_incur_debt(struct ioc_gq *iocg, u64 abs_cost, + struct ioc_now *now) +{ + struct iocg_pcpu_stat *gcs; + + lockdep_assert_held(&iocg->ioc->lock); + lockdep_assert_held(&iocg->waitq.lock); + WARN_ON_ONCE(list_empty(&iocg->active_list)); + + /* + * Once in debt, debt handling owns inuse. @iocg stays at the minimum + * inuse donating all of it share to others until its debt is paid off. + */ + if (!iocg->abs_vdebt && abs_cost) { + iocg->indebt_since = now->now; + propagate_weights(iocg, iocg->active, 0, false, now); + } + + iocg->abs_vdebt += abs_cost; + + gcs = get_cpu_ptr(iocg->pcpu_stat); + local64_add(abs_cost, &gcs->abs_vusage); + put_cpu_ptr(gcs); +} + +static void iocg_pay_debt(struct ioc_gq *iocg, u64 abs_vpay, + struct ioc_now *now) +{ + lockdep_assert_held(&iocg->ioc->lock); + lockdep_assert_held(&iocg->waitq.lock); + + /* make sure that nobody messed with @iocg */ + WARN_ON_ONCE(list_empty(&iocg->active_list)); + WARN_ON_ONCE(iocg->inuse > 1); + + iocg->abs_vdebt -= min(abs_vpay, iocg->abs_vdebt); + + /* if debt is paid in full, restore inuse */ + if (!iocg->abs_vdebt) { + iocg->stat.indebt_us += now->now - iocg->indebt_since; + iocg->indebt_since = 0; + + propagate_weights(iocg, iocg->active, iocg->last_inuse, + false, now); + } +} + static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode, int flags, void *key) { @@ -1125,151 +1442,121 @@ static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode, if (ctx->vbudget < 0) return -1; - iocg_commit_bio(ctx->iocg, wait->bio, cost); + iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost); + wait->committed = true; /* * autoremove_wake_function() removes the wait entry only when it - * actually changed the task state. We want the wait always - * removed. Remove explicitly and use default_wake_function(). + * actually changed the task state. We want the wait always removed. + * Remove explicitly and use default_wake_function(). Note that the + * order of operations is important as finish_wait() tests whether + * @wq_entry is removed without grabbing the lock. */ - list_del_init(&wq_entry->entry); - wait->committed = true; - default_wake_function(wq_entry, mode, flags, key); + list_del_init_careful(&wq_entry->entry); return 0; } -static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now) +/* + * Calculate the accumulated budget, pay debt if @pay_debt and wake up waiters + * accordingly. When @pay_debt is %true, the caller must be holding ioc->lock in + * addition to iocg->waitq.lock. + */ +static void iocg_kick_waitq(struct ioc_gq *iocg, bool pay_debt, + struct ioc_now *now) { struct ioc *ioc = iocg->ioc; struct iocg_wake_ctx ctx = { .iocg = iocg }; - u64 margin_ns = (u64)(ioc->period_us * - WAITQ_TIMER_MARGIN_PCT / 100) * NSEC_PER_USEC; - u64 vdebt, vshortage, expires, oexpires; + u64 vshortage, expires, oexpires; s64 vbudget; - u32 hw_inuse; + u32 hwa; lockdep_assert_held(&iocg->waitq.lock); - current_hweight(iocg, NULL, &hw_inuse); + current_hweight(iocg, &hwa, NULL); vbudget = now->vnow - atomic64_read(&iocg->vtime); /* pay off debt */ - vdebt = abs_cost_to_cost(iocg->abs_vdebt, hw_inuse); - if (vdebt && vbudget > 0) { - u64 delta = min_t(u64, vbudget, vdebt); - u64 abs_delta = min(cost_to_abs_cost(delta, hw_inuse), - iocg->abs_vdebt); + if (pay_debt && iocg->abs_vdebt && vbudget > 0) { + u64 abs_vbudget = cost_to_abs_cost(vbudget, hwa); + u64 abs_vpay = min_t(u64, abs_vbudget, iocg->abs_vdebt); + u64 vpay = abs_cost_to_cost(abs_vpay, hwa); + + lockdep_assert_held(&ioc->lock); + + atomic64_add(vpay, &iocg->vtime); + atomic64_add(vpay, &iocg->done_vtime); + iocg_pay_debt(iocg, abs_vpay, now); + vbudget -= vpay; + } + + if (iocg->abs_vdebt || iocg->delay) + iocg_kick_delay(iocg, now); - atomic64_add(delta, &iocg->vtime); - atomic64_add(delta, &iocg->done_vtime); - iocg->abs_vdebt -= abs_delta; + /* + * Debt can still be outstanding if we haven't paid all yet or the + * caller raced and called without @pay_debt. Shouldn't wake up waiters + * under debt. Make sure @vbudget reflects the outstanding amount and is + * not positive. + */ + if (iocg->abs_vdebt) { + s64 vdebt = abs_cost_to_cost(iocg->abs_vdebt, hwa); + vbudget = min_t(s64, 0, vbudget - vdebt); } /* - * Wake up the ones which are due and see how much vtime we'll need - * for the next one. + * Wake up the ones which are due and see how much vtime we'll need for + * the next one. As paying off debt restores hw_inuse, it must be read + * after the above debt payment. */ - ctx.hw_inuse = hw_inuse; - ctx.vbudget = vbudget - vdebt; + ctx.vbudget = vbudget; + current_hweight(iocg, NULL, &ctx.hw_inuse); + __wake_up_locked_key(&iocg->waitq, TASK_NORMAL, &ctx); - if (!waitqueue_active(&iocg->waitq)) + + if (!waitqueue_active(&iocg->waitq)) { + if (iocg->wait_since) { + iocg->stat.wait_us += now->now - iocg->wait_since; + iocg->wait_since = 0; + } return; + } + + if (!iocg->wait_since) + iocg->wait_since = now->now; + if (WARN_ON_ONCE(ctx.vbudget >= 0)) return; - /* determine next wakeup, add a quarter margin to guarantee chunking */ + /* determine next wakeup, add a timer margin to guarantee chunking */ vshortage = -ctx.vbudget; expires = now->now_ns + - DIV64_U64_ROUND_UP(vshortage, now->vrate) * NSEC_PER_USEC; - expires += margin_ns / 4; + DIV64_U64_ROUND_UP(vshortage, ioc->vtime_base_rate) * + NSEC_PER_USEC; + expires += ioc->timer_slack_ns; /* if already active and close enough, don't bother */ oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->waitq_timer)); if (hrtimer_is_queued(&iocg->waitq_timer) && - abs(oexpires - expires) <= margin_ns / 4) + abs(oexpires - expires) <= ioc->timer_slack_ns) return; hrtimer_start_range_ns(&iocg->waitq_timer, ns_to_ktime(expires), - margin_ns / 4, HRTIMER_MODE_ABS); + ioc->timer_slack_ns, HRTIMER_MODE_ABS); } static enum hrtimer_restart iocg_waitq_timer_fn(struct hrtimer *timer) { struct ioc_gq *iocg = container_of(timer, struct ioc_gq, waitq_timer); + bool pay_debt = READ_ONCE(iocg->abs_vdebt); struct ioc_now now; unsigned long flags; ioc_now(iocg->ioc, &now); - spin_lock_irqsave(&iocg->waitq.lock, flags); - iocg_kick_waitq(iocg, &now); - spin_unlock_irqrestore(&iocg->waitq.lock, flags); - - return HRTIMER_NORESTART; -} - -static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost) -{ - struct ioc *ioc = iocg->ioc; - struct blkcg_gq *blkg = iocg_to_blkg(iocg); - u64 vtime = atomic64_read(&iocg->vtime); - u64 vmargin = ioc->margin_us * now->vrate; - u64 margin_ns = ioc->margin_us * NSEC_PER_USEC; - u64 expires, oexpires; - u32 hw_inuse; - - lockdep_assert_held(&iocg->waitq.lock); - - /* debt-adjust vtime */ - current_hweight(iocg, NULL, &hw_inuse); - vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse); - - /* - * Clear or maintain depending on the overage. Non-zero vdebt is what - * guarantees that @iocg is online and future iocg_kick_delay() will - * clear use_delay. Don't leave it on when there's no vdebt. - */ - if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) { - blkcg_clear_delay(blkg); - return false; - } - if (!atomic_read(&blkg->use_delay) && - time_before_eq64(vtime, now->vnow + vmargin)) - return false; - - /* use delay */ - if (cost) { - u64 cost_ns = DIV64_U64_ROUND_UP(cost * NSEC_PER_USEC, - now->vrate); - blkcg_add_delay(blkg, now->now_ns, cost_ns); - } - blkcg_use_delay(blkg); - - expires = now->now_ns + DIV64_U64_ROUND_UP(vtime - now->vnow, - now->vrate) * NSEC_PER_USEC; - - /* if already active and close enough, don't bother */ - oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->delay_timer)); - if (hrtimer_is_queued(&iocg->delay_timer) && - abs(oexpires - expires) <= margin_ns / 4) - return true; - - hrtimer_start_range_ns(&iocg->delay_timer, ns_to_ktime(expires), - margin_ns / 4, HRTIMER_MODE_ABS); - return true; -} - -static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer) -{ - struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer); - struct ioc_now now; - unsigned long flags; - - spin_lock_irqsave(&iocg->waitq.lock, flags); - ioc_now(iocg->ioc, &now); - iocg_kick_delay(iocg, &now, 0); - spin_unlock_irqrestore(&iocg->waitq.lock, flags); + iocg_lock(iocg, pay_debt, &flags); + iocg_kick_waitq(iocg, pay_debt, &now); + iocg_unlock(iocg, pay_debt, &flags); return HRTIMER_NORESTART; } @@ -1286,8 +1573,8 @@ static void ioc_lat_stat(struct ioc *ioc, u32 *missed_ppm_ar, u32 *rq_wait_pct_p u64 this_rq_wait_ns; for (rw = READ; rw <= WRITE; rw++) { - u32 this_met = READ_ONCE(stat->missed[rw].nr_met); - u32 this_missed = READ_ONCE(stat->missed[rw].nr_missed); + u32 this_met = local_read(&stat->missed[rw].nr_met); + u32 this_missed = local_read(&stat->missed[rw].nr_missed); nr_met[rw] += this_met - stat->missed[rw].last_met; nr_missed[rw] += this_missed - stat->missed[rw].last_missed; @@ -1295,7 +1582,7 @@ static void ioc_lat_stat(struct ioc *ioc, u32 *missed_ppm_ar, u32 *rq_wait_pct_p stat->missed[rw].last_missed = this_missed; } - this_rq_wait_ns = READ_ONCE(stat->rq_wait_ns); + this_rq_wait_ns = local64_read(&stat->rq_wait_ns); rq_wait_ns += this_rq_wait_ns - stat->last_rq_wait_ns; stat->last_rq_wait_ns = this_rq_wait_ns; } @@ -1330,211 +1617,741 @@ static bool iocg_is_idle(struct ioc_gq *iocg) return true; } -/* returns usage with margin added if surplus is large enough */ -static u32 surplus_adjusted_hweight_inuse(u32 usage, u32 hw_inuse) +/* + * Call this function on the target leaf @iocg's to build pre-order traversal + * list of all the ancestors in @inner_walk. The inner nodes are linked through + * ->walk_list and the caller is responsible for dissolving the list after use. + */ +static void iocg_build_inner_walk(struct ioc_gq *iocg, + struct list_head *inner_walk) { - /* add margin */ - usage = DIV_ROUND_UP(usage * SURPLUS_SCALE_PCT, 100); - usage += SURPLUS_SCALE_ABS; + int lvl; - /* don't bother if the surplus is too small */ - if (usage + SURPLUS_MIN_ADJ_DELTA > hw_inuse) - return 0; + WARN_ON_ONCE(!list_empty(&iocg->walk_list)); + + /* find the first ancestor which hasn't been visited yet */ + for (lvl = iocg->level - 1; lvl >= 0; lvl--) { + if (!list_empty(&iocg->ancestors[lvl]->walk_list)) + break; + } + + /* walk down and visit the inner nodes to get pre-order traversal */ + while (++lvl <= iocg->level - 1) { + struct ioc_gq *inner = iocg->ancestors[lvl]; - return usage; + /* record traversal order */ + list_add_tail(&inner->walk_list, inner_walk); + } } -static void ioc_timer_fn(struct timer_list *timer) +/* propagate the deltas to the parent */ +static void iocg_flush_stat_upward(struct ioc_gq *iocg) { - struct ioc *ioc = container_of(timer, struct ioc, timer); - struct ioc_gq *iocg, *tiocg; - struct ioc_now now; - int nr_surpluses = 0, nr_shortages = 0, nr_lagging = 0; - u32 ppm_rthr = MILLION - ioc->params.qos[QOS_RPPM]; - u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM]; - u32 missed_ppm[2], rq_wait_pct; - u64 period_vtime; - int prev_busy_level, i; + if (iocg->level > 0) { + struct iocg_stat *parent_stat = + &iocg->ancestors[iocg->level - 1]->stat; + + parent_stat->usage_us += + iocg->stat.usage_us - iocg->last_stat.usage_us; + parent_stat->wait_us += + iocg->stat.wait_us - iocg->last_stat.wait_us; + parent_stat->indebt_us += + iocg->stat.indebt_us - iocg->last_stat.indebt_us; + parent_stat->indelay_us += + iocg->stat.indelay_us - iocg->last_stat.indelay_us; + } - /* how were the latencies during the period? */ - ioc_lat_stat(ioc, missed_ppm, &rq_wait_pct); + iocg->last_stat = iocg->stat; +} - /* take care of active iocgs */ - spin_lock_irq(&ioc->lock); +/* collect per-cpu counters and propagate the deltas to the parent */ +static void iocg_flush_stat_leaf(struct ioc_gq *iocg, struct ioc_now *now) +{ + struct ioc *ioc = iocg->ioc; + u64 abs_vusage = 0; + u64 vusage_delta; + int cpu; - ioc_now(ioc, &now); + lockdep_assert_held(&iocg->ioc->lock); - period_vtime = now.vnow - ioc->period_at_vtime; - if (WARN_ON_ONCE(!period_vtime)) { - spin_unlock_irq(&ioc->lock); - return; + /* collect per-cpu counters */ + for_each_possible_cpu(cpu) { + abs_vusage += local64_read( + per_cpu_ptr(&iocg->pcpu_stat->abs_vusage, cpu)); } + vusage_delta = abs_vusage - iocg->last_stat_abs_vusage; + iocg->last_stat_abs_vusage = abs_vusage; - /* - * Waiters determine the sleep durations based on the vrate they - * saw at the time of sleep. If vrate has increased, some waiters - * could be sleeping for too long. Wake up tardy waiters which - * should have woken up in the last period and expire idle iocgs. - */ - list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) { - if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt && - !iocg_is_idle(iocg)) - continue; + iocg->usage_delta_us = div64_u64(vusage_delta, ioc->vtime_base_rate); + iocg->stat.usage_us += iocg->usage_delta_us; - spin_lock(&iocg->waitq.lock); + iocg_flush_stat_upward(iocg); +} - if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) { - /* might be oversleeping vtime / hweight changes, kick */ - iocg_kick_waitq(iocg, &now); - iocg_kick_delay(iocg, &now, 0); - } else if (iocg_is_idle(iocg)) { - /* no waiter and idle, deactivate */ - iocg->last_inuse = iocg->inuse; - __propagate_active_weight(iocg, 0, 0); - list_del_init(&iocg->active_list); - } +/* get stat counters ready for reading on all active iocgs */ +static void iocg_flush_stat(struct list_head *target_iocgs, struct ioc_now *now) +{ + LIST_HEAD(inner_walk); + struct ioc_gq *iocg, *tiocg; - spin_unlock(&iocg->waitq.lock); + /* flush leaves and build inner node walk list */ + list_for_each_entry(iocg, target_iocgs, active_list) { + iocg_flush_stat_leaf(iocg, now); + iocg_build_inner_walk(iocg, &inner_walk); } - commit_active_weights(ioc); - /* calc usages and see whether some weights need to be moved around */ - list_for_each_entry(iocg, &ioc->active_iocgs, active_list) { - u64 vdone, vtime, vusage, vmargin, vmin; - u32 hw_active, hw_inuse, usage; + /* keep flushing upwards by walking the inner list backwards */ + list_for_each_entry_safe_reverse(iocg, tiocg, &inner_walk, walk_list) { + iocg_flush_stat_upward(iocg); + list_del_init(&iocg->walk_list); + } +} - /* - * Collect unused and wind vtime closer to vnow to prevent - * iocgs from accumulating a large amount of budget. - */ - vdone = atomic64_read(&iocg->done_vtime); - vtime = atomic64_read(&iocg->vtime); - current_hweight(iocg, &hw_active, &hw_inuse); +/* + * Determine what @iocg's hweight_inuse should be after donating unused + * capacity. @hwm is the upper bound and used to signal no donation. This + * function also throws away @iocg's excess budget. + */ +static u32 hweight_after_donation(struct ioc_gq *iocg, u32 old_hwi, u32 hwm, + u32 usage, struct ioc_now *now) +{ + struct ioc *ioc = iocg->ioc; + u64 vtime = atomic64_read(&iocg->vtime); + s64 excess, delta, target, new_hwi; + + /* debt handling owns inuse for debtors */ + if (iocg->abs_vdebt) + return 1; + + /* see whether minimum margin requirement is met */ + if (waitqueue_active(&iocg->waitq) || + time_after64(vtime, now->vnow - ioc->margins.min)) + return hwm; + + /* throw away excess above target */ + excess = now->vnow - vtime - ioc->margins.target; + if (excess > 0) { + atomic64_add(excess, &iocg->vtime); + atomic64_add(excess, &iocg->done_vtime); + vtime += excess; + ioc->vtime_err -= div64_u64(excess * old_hwi, WEIGHT_ONE); + } - /* - * Latency QoS detection doesn't account for IOs which are - * in-flight for longer than a period. Detect them by - * comparing vdone against period start. If lagging behind - * IOs from past periods, don't increase vrate. - */ - if ((ppm_rthr != MILLION || ppm_wthr != MILLION) && - !atomic_read(&iocg_to_blkg(iocg)->use_delay) && - time_after64(vtime, vdone) && - time_after64(vtime, now.vnow - - MAX_LAGGING_PERIODS * period_vtime) && - time_before64(vdone, now.vnow - period_vtime)) - nr_lagging++; + /* + * Let's say the distance between iocg's and device's vtimes as a + * fraction of period duration is delta. Assuming that the iocg will + * consume the usage determined above, we want to determine new_hwi so + * that delta equals MARGIN_TARGET at the end of the next period. + * + * We need to execute usage worth of IOs while spending the sum of the + * new budget (1 - MARGIN_TARGET) and the leftover from the last period + * (delta): + * + * usage = (1 - MARGIN_TARGET + delta) * new_hwi + * + * Therefore, the new_hwi is: + * + * new_hwi = usage / (1 - MARGIN_TARGET + delta) + */ + delta = div64_s64(WEIGHT_ONE * (now->vnow - vtime), + now->vnow - ioc->period_at_vtime); + target = WEIGHT_ONE * MARGIN_TARGET_PCT / 100; + new_hwi = div64_s64(WEIGHT_ONE * usage, WEIGHT_ONE - target + delta); - if (waitqueue_active(&iocg->waitq)) - vusage = now.vnow - iocg->last_vtime; - else if (time_before64(iocg->last_vtime, vtime)) - vusage = vtime - iocg->last_vtime; - else - vusage = 0; + return clamp_t(s64, new_hwi, 1, hwm); +} - iocg->last_vtime += vusage; +/* + * For work-conservation, an iocg which isn't using all of its share should + * donate the leftover to other iocgs. There are two ways to achieve this - 1. + * bumping up vrate accordingly 2. lowering the donating iocg's inuse weight. + * + * #1 is mathematically simpler but has the drawback of requiring synchronous + * global hweight_inuse updates when idle iocg's get activated or inuse weights + * change due to donation snapbacks as it has the possibility of grossly + * overshooting what's allowed by the model and vrate. + * + * #2 is inherently safe with local operations. The donating iocg can easily + * snap back to higher weights when needed without worrying about impacts on + * other nodes as the impacts will be inherently correct. This also makes idle + * iocg activations safe. The only effect activations have is decreasing + * hweight_inuse of others, the right solution to which is for those iocgs to + * snap back to higher weights. + * + * So, we go with #2. The challenge is calculating how each donating iocg's + * inuse should be adjusted to achieve the target donation amounts. This is done + * using Andy's method described in the following pdf. + * + * https://drive.google.com/file/d/1PsJwxPFtjUnwOY1QJ5AeICCcsL7BM3bo + * + * Given the weights and target after-donation hweight_inuse values, Andy's + * method determines how the proportional distribution should look like at each + * sibling level to maintain the relative relationship between all non-donating + * pairs. To roughly summarize, it divides the tree into donating and + * non-donating parts, calculates global donation rate which is used to + * determine the target hweight_inuse for each node, and then derives per-level + * proportions. + * + * The following pdf shows that global distribution calculated this way can be + * achieved by scaling inuse weights of donating leaves and propagating the + * adjustments upwards proportionally. + * + * https://drive.google.com/file/d/1vONz1-fzVO7oY5DXXsLjSxEtYYQbOvsE + * + * Combining the above two, we can determine how each leaf iocg's inuse should + * be adjusted to achieve the target donation. + * + * https://drive.google.com/file/d/1WcrltBOSPN0qXVdBgnKm4mdp9FhuEFQN + * + * The inline comments use symbols from the last pdf. + * + * b is the sum of the absolute budgets in the subtree. 1 for the root node. + * f is the sum of the absolute budgets of non-donating nodes in the subtree. + * t is the sum of the absolute budgets of donating nodes in the subtree. + * w is the weight of the node. w = w_f + w_t + * w_f is the non-donating portion of w. w_f = w * f / b + * w_b is the donating portion of w. w_t = w * t / b + * s is the sum of all sibling weights. s = Sum(w) for siblings + * s_f and s_t are the non-donating and donating portions of s. + * + * Subscript p denotes the parent's counterpart and ' the adjusted value - e.g. + * w_pt is the donating portion of the parent's weight and w'_pt the same value + * after adjustments. Subscript r denotes the root node's values. + */ +static void transfer_surpluses(struct list_head *surpluses, struct ioc_now *now) +{ + LIST_HEAD(over_hwa); + LIST_HEAD(inner_walk); + struct ioc_gq *iocg, *tiocg, *root_iocg; + u32 after_sum, over_sum, over_target, gamma; + + /* + * It's pretty unlikely but possible for the total sum of + * hweight_after_donation's to be higher than WEIGHT_ONE, which will + * confuse the following calculations. If such condition is detected, + * scale down everyone over its full share equally to keep the sum below + * WEIGHT_ONE. + */ + after_sum = 0; + over_sum = 0; + list_for_each_entry(iocg, surpluses, surplus_list) { + u32 hwa; + + current_hweight(iocg, &hwa, NULL); + after_sum += iocg->hweight_after_donation; + + if (iocg->hweight_after_donation > hwa) { + over_sum += iocg->hweight_after_donation; + list_add(&iocg->walk_list, &over_hwa); + } + } + + if (after_sum >= WEIGHT_ONE) { /* - * Factor in in-flight vtime into vusage to avoid - * high-latency completions appearing as idle. This should - * be done after the above ->last_time adjustment. + * The delta should be deducted from the over_sum, calculate + * target over_sum value. */ - vusage = max(vusage, vtime - vdone); - - /* calculate hweight based usage ratio and record */ - if (vusage) { - usage = DIV64_U64_ROUND_UP(vusage * hw_inuse, - period_vtime); - iocg->usage_idx = (iocg->usage_idx + 1) % NR_USAGE_SLOTS; - iocg->usages[iocg->usage_idx] = usage; - } else { - usage = 0; + u32 over_delta = after_sum - (WEIGHT_ONE - 1); + WARN_ON_ONCE(over_sum <= over_delta); + over_target = over_sum - over_delta; + } else { + over_target = 0; + } + + list_for_each_entry_safe(iocg, tiocg, &over_hwa, walk_list) { + if (over_target) + iocg->hweight_after_donation = + div_u64((u64)iocg->hweight_after_donation * + over_target, over_sum); + list_del_init(&iocg->walk_list); + } + + /* + * Build pre-order inner node walk list and prepare for donation + * adjustment calculations. + */ + list_for_each_entry(iocg, surpluses, surplus_list) { + iocg_build_inner_walk(iocg, &inner_walk); + } + + root_iocg = list_first_entry(&inner_walk, struct ioc_gq, walk_list); + WARN_ON_ONCE(root_iocg->level > 0); + + list_for_each_entry(iocg, &inner_walk, walk_list) { + iocg->child_adjusted_sum = 0; + iocg->hweight_donating = 0; + iocg->hweight_after_donation = 0; + } + + /* + * Propagate the donating budget (b_t) and after donation budget (b'_t) + * up the hierarchy. + */ + list_for_each_entry(iocg, surpluses, surplus_list) { + struct ioc_gq *parent = iocg->ancestors[iocg->level - 1]; + + parent->hweight_donating += iocg->hweight_donating; + parent->hweight_after_donation += iocg->hweight_after_donation; + } + + list_for_each_entry_reverse(iocg, &inner_walk, walk_list) { + if (iocg->level > 0) { + struct ioc_gq *parent = iocg->ancestors[iocg->level - 1]; + + parent->hweight_donating += iocg->hweight_donating; + parent->hweight_after_donation += iocg->hweight_after_donation; } + } - /* see whether there's surplus vtime */ - vmargin = ioc->margin_us * now.vrate; - vmin = now.vnow - vmargin; - - iocg->has_surplus = false; - - if (!waitqueue_active(&iocg->waitq) && - time_before64(vtime, vmin)) { - u64 delta = vmin - vtime; - - /* throw away surplus vtime */ - atomic64_add(delta, &iocg->vtime); - atomic64_add(delta, &iocg->done_vtime); - iocg->last_vtime += delta; - /* if usage is sufficiently low, maybe it can donate */ - if (surplus_adjusted_hweight_inuse(usage, hw_inuse)) { - iocg->has_surplus = true; - nr_surpluses++; - } - } else if (hw_inuse < hw_active) { - u32 new_hwi, new_inuse; - - /* was donating but might need to take back some */ - if (waitqueue_active(&iocg->waitq)) { - new_hwi = hw_active; - } else { - new_hwi = max(hw_inuse, - usage * SURPLUS_SCALE_PCT / 100 + - SURPLUS_SCALE_ABS); - } + /* + * Calculate inner hwa's (b) and make sure the donation values are + * within the accepted ranges as we're doing low res calculations with + * roundups. + */ + list_for_each_entry(iocg, &inner_walk, walk_list) { + if (iocg->level) { + struct ioc_gq *parent = iocg->ancestors[iocg->level - 1]; - new_inuse = div64_u64((u64)iocg->inuse * new_hwi, - hw_inuse); - new_inuse = clamp_t(u32, new_inuse, 1, iocg->active); + iocg->hweight_active = DIV64_U64_ROUND_UP( + (u64)parent->hweight_active * iocg->active, + parent->child_active_sum); - if (new_inuse > iocg->inuse) { - TRACE_IOCG_PATH(inuse_takeback, iocg, &now, - iocg->inuse, new_inuse, - hw_inuse, new_hwi); - __propagate_active_weight(iocg, iocg->weight, - new_inuse); - } - } else { - /* genuninely out of vtime */ - nr_shortages++; } + + iocg->hweight_donating = min(iocg->hweight_donating, + iocg->hweight_active); + iocg->hweight_after_donation = min(iocg->hweight_after_donation, + iocg->hweight_donating - 1); + if (WARN_ON_ONCE(iocg->hweight_active <= 1 || + iocg->hweight_donating <= 1 || + iocg->hweight_after_donation == 0)) { + pr_warn("iocg: invalid donation weights in "); + pr_cont_cgroup_path(iocg_to_blkg(iocg)->blkcg->css.cgroup); + pr_cont(": active=%u donating=%u after=%u\n", + iocg->hweight_active, iocg->hweight_donating, + iocg->hweight_after_donation); + } + } + + /* + * Calculate the global donation rate (gamma) - the rate to adjust + * non-donating budgets by. + * + * No need to use 64bit multiplication here as the first operand is + * guaranteed to be smaller than WEIGHT_ONE (1<<16). + * + * We know that there are beneficiary nodes and the sum of the donating + * hweights can't be whole; however, due to the round-ups during hweight + * calculations, root_iocg->hweight_donating might still end up equal to + * or greater than whole. Limit the range when calculating the divider. + * + * gamma = (1 - t_r') / (1 - t_r) + */ + gamma = DIV_ROUND_UP( + (WEIGHT_ONE - root_iocg->hweight_after_donation) * WEIGHT_ONE, + WEIGHT_ONE - min_t(u32, root_iocg->hweight_donating, WEIGHT_ONE - 1)); + + /* + * Calculate adjusted hwi, child_adjusted_sum and inuse for the inner + * nodes. + */ + list_for_each_entry(iocg, &inner_walk, walk_list) { + struct ioc_gq *parent; + u32 inuse, wpt, wptp; + u64 st, sf; + + if (iocg->level == 0) { + /* adjusted weight sum for 1st level: s' = s * b_pf / b'_pf */ + iocg->child_adjusted_sum = DIV64_U64_ROUND_UP( + iocg->child_active_sum * (WEIGHT_ONE - iocg->hweight_donating), + WEIGHT_ONE - iocg->hweight_after_donation); + continue; + } + + parent = iocg->ancestors[iocg->level - 1]; + + /* b' = gamma * b_f + b_t' */ + iocg->hweight_inuse = DIV64_U64_ROUND_UP( + (u64)gamma * (iocg->hweight_active - iocg->hweight_donating), + WEIGHT_ONE) + iocg->hweight_after_donation; + + /* w' = s' * b' / b'_p */ + inuse = DIV64_U64_ROUND_UP( + (u64)parent->child_adjusted_sum * iocg->hweight_inuse, + parent->hweight_inuse); + + /* adjusted weight sum for children: s' = s_f + s_t * w'_pt / w_pt */ + st = DIV64_U64_ROUND_UP( + iocg->child_active_sum * iocg->hweight_donating, + iocg->hweight_active); + sf = iocg->child_active_sum - st; + wpt = DIV64_U64_ROUND_UP( + (u64)iocg->active * iocg->hweight_donating, + iocg->hweight_active); + wptp = DIV64_U64_ROUND_UP( + (u64)inuse * iocg->hweight_after_donation, + iocg->hweight_inuse); + + iocg->child_adjusted_sum = sf + DIV64_U64_ROUND_UP(st * wptp, wpt); + } + + /* + * All inner nodes now have ->hweight_inuse and ->child_adjusted_sum and + * we can finally determine leaf adjustments. + */ + list_for_each_entry(iocg, surpluses, surplus_list) { + struct ioc_gq *parent = iocg->ancestors[iocg->level - 1]; + u32 inuse; + + /* + * In-debt iocgs participated in the donation calculation with + * the minimum target hweight_inuse. Configuring inuse + * accordingly would work fine but debt handling expects + * @iocg->inuse stay at the minimum and we don't wanna + * interfere. + */ + if (iocg->abs_vdebt) { + WARN_ON_ONCE(iocg->inuse > 1); + continue; + } + + /* w' = s' * b' / b'_p, note that b' == b'_t for donating leaves */ + inuse = DIV64_U64_ROUND_UP( + parent->child_adjusted_sum * iocg->hweight_after_donation, + parent->hweight_inuse); + + TRACE_IOCG_PATH(inuse_transfer, iocg, now, + iocg->inuse, inuse, + iocg->hweight_inuse, + iocg->hweight_after_donation); + + __propagate_weights(iocg, iocg->active, inuse, true, now); + } + + /* walk list should be dissolved after use */ + list_for_each_entry_safe(iocg, tiocg, &inner_walk, walk_list) + list_del_init(&iocg->walk_list); +} + +/* + * A low weight iocg can amass a large amount of debt, for example, when + * anonymous memory gets reclaimed aggressively. If the system has a lot of + * memory paired with a slow IO device, the debt can span multiple seconds or + * more. If there are no other subsequent IO issuers, the in-debt iocg may end + * up blocked paying its debt while the IO device is idle. + * + * The following protects against such cases. If the device has been + * sufficiently idle for a while, the debts are halved and delays are + * recalculated. + */ +static void ioc_forgive_debts(struct ioc *ioc, u64 usage_us_sum, int nr_debtors, + struct ioc_now *now) +{ + struct ioc_gq *iocg; + u64 dur, usage_pct, nr_cycles; + + /* if no debtor, reset the cycle */ + if (!nr_debtors) { + ioc->dfgv_period_at = now->now; + ioc->dfgv_period_rem = 0; + ioc->dfgv_usage_us_sum = 0; + return; } - if (!nr_shortages || !nr_surpluses) - goto skip_surplus_transfers; + /* + * Debtors can pass through a lot of writes choking the device and we + * don't want to be forgiving debts while the device is struggling from + * write bursts. If we're missing latency targets, consider the device + * fully utilized. + */ + if (ioc->busy_level > 0) + usage_us_sum = max_t(u64, usage_us_sum, ioc->period_us); + + ioc->dfgv_usage_us_sum += usage_us_sum; + if (time_before64(now->now, ioc->dfgv_period_at + DFGV_PERIOD)) + return; + + /* + * At least DFGV_PERIOD has passed since the last period. Calculate the + * average usage and reset the period counters. + */ + dur = now->now - ioc->dfgv_period_at; + usage_pct = div64_u64(100 * ioc->dfgv_usage_us_sum, dur); + + ioc->dfgv_period_at = now->now; + ioc->dfgv_usage_us_sum = 0; + + /* if was too busy, reset everything */ + if (usage_pct > DFGV_USAGE_PCT) { + ioc->dfgv_period_rem = 0; + return; + } + + /* + * Usage is lower than threshold. Let's forgive some debts. Debt + * forgiveness runs off of the usual ioc timer but its period usually + * doesn't match ioc's. Compensate the difference by performing the + * reduction as many times as would fit in the duration since the last + * run and carrying over the left-over duration in @ioc->dfgv_period_rem + * - if ioc period is 75% of DFGV_PERIOD, one out of three consecutive + * reductions is doubled. + */ + nr_cycles = dur + ioc->dfgv_period_rem; + ioc->dfgv_period_rem = do_div(nr_cycles, DFGV_PERIOD); - /* there are both shortages and surpluses, transfer surpluses */ list_for_each_entry(iocg, &ioc->active_iocgs, active_list) { - u32 usage, hw_active, hw_inuse, new_hwi, new_inuse; - int nr_valid = 0; + u64 __maybe_unused old_debt, __maybe_unused old_delay; - if (!iocg->has_surplus) + if (!iocg->abs_vdebt && !iocg->delay) continue; - /* base the decision on max historical usage */ - for (i = 0, usage = 0; i < NR_USAGE_SLOTS; i++) { - if (iocg->usages[i]) { - usage = max(usage, iocg->usages[i]); - nr_valid++; + spin_lock(&iocg->waitq.lock); + + old_debt = iocg->abs_vdebt; + old_delay = iocg->delay; + + if (iocg->abs_vdebt) + iocg->abs_vdebt = iocg->abs_vdebt >> nr_cycles ?: 1; + if (iocg->delay) + iocg->delay = iocg->delay >> nr_cycles ?: 1; + + iocg_kick_waitq(iocg, true, now); + + TRACE_IOCG_PATH(iocg_forgive_debt, iocg, now, usage_pct, + old_debt, iocg->abs_vdebt, + old_delay, iocg->delay); + + spin_unlock(&iocg->waitq.lock); + } +} + +/* + * Check the active iocgs' state to avoid oversleeping and deactive + * idle iocgs. + * + * Since waiters determine the sleep durations based on the vrate + * they saw at the time of sleep, if vrate has increased, some + * waiters could be sleeping for too long. Wake up tardy waiters + * which should have woken up in the last period and expire idle + * iocgs. + */ +static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now) +{ + int nr_debtors = 0; + struct ioc_gq *iocg, *tiocg; + + list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) { + if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt && + !iocg->delay && !iocg_is_idle(iocg)) + continue; + + spin_lock(&iocg->waitq.lock); + + /* flush wait and indebt stat deltas */ + if (iocg->wait_since) { + iocg->stat.wait_us += now->now - iocg->wait_since; + iocg->wait_since = now->now; + } + if (iocg->indebt_since) { + iocg->stat.indebt_us += + now->now - iocg->indebt_since; + iocg->indebt_since = now->now; + } + if (iocg->indelay_since) { + iocg->stat.indelay_us += + now->now - iocg->indelay_since; + iocg->indelay_since = now->now; + } + + if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt || + iocg->delay) { + /* might be oversleeping vtime / hweight changes, kick */ + iocg_kick_waitq(iocg, true, now); + if (iocg->abs_vdebt || iocg->delay) + nr_debtors++; + } else if (iocg_is_idle(iocg)) { + /* no waiter and idle, deactivate */ + u64 vtime = atomic64_read(&iocg->vtime); + s64 excess; + + /* + * @iocg has been inactive for a full duration and will + * have a high budget. Account anything above target as + * error and throw away. On reactivation, it'll start + * with the target budget. + */ + excess = now->vnow - vtime - ioc->margins.target; + if (excess > 0) { + u32 old_hwi; + + current_hweight(iocg, NULL, &old_hwi); + ioc->vtime_err -= div64_u64(excess * old_hwi, + WEIGHT_ONE); } + + TRACE_IOCG_PATH(iocg_idle, iocg, now, + atomic64_read(&iocg->active_period), + atomic64_read(&ioc->cur_period), vtime); + __propagate_weights(iocg, 0, 0, false, now); + list_del_init(&iocg->active_list); } - if (nr_valid < MIN_VALID_USAGES) - continue; + spin_unlock(&iocg->waitq.lock); + } + + commit_weights(ioc); + return nr_debtors; +} + +static void ioc_timer_fn(struct timer_list *timer) +{ + struct ioc *ioc = container_of(timer, struct ioc, timer); + struct ioc_gq *iocg, *tiocg; + struct ioc_now now; + LIST_HEAD(surpluses); + int nr_debtors, nr_shortages = 0, nr_lagging = 0; + u64 usage_us_sum = 0; + u32 ppm_rthr = MILLION - ioc->params.qos[QOS_RPPM]; + u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM]; + u32 missed_ppm[2], rq_wait_pct; + u64 period_vtime; + int prev_busy_level; + + /* how were the latencies during the period? */ + ioc_lat_stat(ioc, missed_ppm, &rq_wait_pct); + + /* take care of active iocgs */ + spin_lock_irq(&ioc->lock); + + ioc_now(ioc, &now); + + period_vtime = now.vnow - ioc->period_at_vtime; + if (WARN_ON_ONCE(!period_vtime)) { + spin_unlock_irq(&ioc->lock); + return; + } + + nr_debtors = ioc_check_iocgs(ioc, &now); + + /* + * Wait and indebt stat are flushed above and the donation calculation + * below needs updated usage stat. Let's bring stat up-to-date. + */ + iocg_flush_stat(&ioc->active_iocgs, &now); + + /* calc usage and see whether some weights need to be moved around */ + list_for_each_entry(iocg, &ioc->active_iocgs, active_list) { + u64 vdone, vtime, usage_us; + u32 hw_active, hw_inuse; + + /* + * Collect unused and wind vtime closer to vnow to prevent + * iocgs from accumulating a large amount of budget. + */ + vdone = atomic64_read(&iocg->done_vtime); + vtime = atomic64_read(&iocg->vtime); current_hweight(iocg, &hw_active, &hw_inuse); - new_hwi = surplus_adjusted_hweight_inuse(usage, hw_inuse); - if (!new_hwi) - continue; - new_inuse = DIV64_U64_ROUND_UP((u64)iocg->inuse * new_hwi, - hw_inuse); - if (new_inuse < iocg->inuse) { - TRACE_IOCG_PATH(inuse_giveaway, iocg, &now, - iocg->inuse, new_inuse, - hw_inuse, new_hwi); - __propagate_active_weight(iocg, iocg->weight, new_inuse); + /* + * Latency QoS detection doesn't account for IOs which are + * in-flight for longer than a period. Detect them by + * comparing vdone against period start. If lagging behind + * IOs from past periods, don't increase vrate. + */ + if ((ppm_rthr != MILLION || ppm_wthr != MILLION) && + !atomic_read(&iocg_to_blkg(iocg)->use_delay) && + time_after64(vtime, vdone) && + time_after64(vtime, now.vnow - + MAX_LAGGING_PERIODS * period_vtime) && + time_before64(vdone, now.vnow - period_vtime)) + nr_lagging++; + + /* + * Determine absolute usage factoring in in-flight IOs to avoid + * high-latency completions appearing as idle. + */ + usage_us = iocg->usage_delta_us; + usage_us_sum += usage_us; + + /* see whether there's surplus vtime */ + WARN_ON_ONCE(!list_empty(&iocg->surplus_list)); + if (hw_inuse < hw_active || + (!waitqueue_active(&iocg->waitq) && + time_before64(vtime, now.vnow - ioc->margins.low))) { + u32 hwa, old_hwi, hwm, new_hwi, usage; + u64 usage_dur; + + if (vdone != vtime) { + u64 inflight_us = DIV64_U64_ROUND_UP( + cost_to_abs_cost(vtime - vdone, hw_inuse), + ioc->vtime_base_rate); + + usage_us = max(usage_us, inflight_us); + } + + /* convert to hweight based usage ratio */ + if (time_after64(iocg->activated_at, ioc->period_at)) + usage_dur = max_t(u64, now.now - iocg->activated_at, 1); + else + usage_dur = max_t(u64, now.now - ioc->period_at, 1); + + usage = clamp_t(u32, + DIV64_U64_ROUND_UP(usage_us * WEIGHT_ONE, + usage_dur), + 1, WEIGHT_ONE); + + /* + * Already donating or accumulated enough to start. + * Determine the donation amount. + */ + current_hweight(iocg, &hwa, &old_hwi); + hwm = current_hweight_max(iocg); + new_hwi = hweight_after_donation(iocg, old_hwi, hwm, + usage, &now); + /* + * Donation calculation assumes hweight_after_donation + * to be positive, a condition that a donor w/ hwa < 2 + * can't meet. Don't bother with donation if hwa is + * below 2. It's not gonna make a meaningful difference + * anyway. + */ + if (new_hwi < hwm && hwa >= 2) { + iocg->hweight_donating = hwa; + iocg->hweight_after_donation = new_hwi; + list_add(&iocg->surplus_list, &surpluses); + } else if (!iocg->abs_vdebt) { + /* + * @iocg doesn't have enough to donate. Reset + * its inuse to active. + * + * Don't reset debtors as their inuse's are + * owned by debt handling. This shouldn't affect + * donation calculuation in any meaningful way + * as @iocg doesn't have a meaningful amount of + * share anyway. + */ + TRACE_IOCG_PATH(inuse_shortage, iocg, &now, + iocg->inuse, iocg->active, + iocg->hweight_inuse, new_hwi); + + __propagate_weights(iocg, iocg->active, + iocg->active, true, &now); + nr_shortages++; + } + } else { + /* genuinely short on vtime */ + nr_shortages++; } } -skip_surplus_transfers: - commit_active_weights(ioc); + + if (!list_empty(&surpluses) && nr_shortages) + transfer_surpluses(&surpluses, &now); + + commit_weights(ioc); + + /* surplus list should be dissolved after use */ + list_for_each_entry_safe(iocg, tiocg, &surpluses, surplus_list) + list_del_init(&iocg->surplus_list); /* * If q is getting clogged or we're missing too much, we're issuing @@ -1562,11 +2379,9 @@ static void ioc_timer_fn(struct timer_list *timer) /* * If there are IOs spanning multiple periods, wait - * them out before pushing the device harder. If - * there are surpluses, let redistribution work it - * out first. + * them out before pushing the device harder. */ - if (!nr_lagging && !nr_surpluses) + if (!nr_lagging) ioc->busy_level--; } else { /* @@ -1584,56 +2399,13 @@ static void ioc_timer_fn(struct timer_list *timer) ioc->busy_level = clamp(ioc->busy_level, -1000, 1000); - if (ioc->busy_level > 0 || (ioc->busy_level < 0 && !nr_lagging)) { - u64 vrate = atomic64_read(&ioc->vtime_rate); - u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max; - - /* rq_wait signal is always reliable, ignore user vrate_min */ - if (rq_wait_pct > RQ_WAIT_BUSY_PCT) - vrate_min = VRATE_MIN; - - /* - * If vrate is out of bounds, apply clamp gradually as the - * bounds can change abruptly. Otherwise, apply busy_level - * based adjustment. - */ - if (vrate < vrate_min) { - vrate = div64_u64(vrate * (100 + VRATE_CLAMP_ADJ_PCT), - 100); - vrate = min(vrate, vrate_min); - } else if (vrate > vrate_max) { - vrate = div64_u64(vrate * (100 - VRATE_CLAMP_ADJ_PCT), - 100); - vrate = max(vrate, vrate_max); - } else { - int idx = min_t(int, abs(ioc->busy_level), - ARRAY_SIZE(vrate_adj_pct) - 1); - u32 adj_pct = vrate_adj_pct[idx]; - - if (ioc->busy_level > 0) - adj_pct = 100 - adj_pct; - else - adj_pct = 100 + adj_pct; - - vrate = clamp(DIV64_U64_ROUND_UP(vrate * adj_pct, 100), - vrate_min, vrate_max); - } - - trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct, - nr_lagging, nr_shortages, - nr_surpluses); - - atomic64_set(&ioc->vtime_rate, vrate); - ioc->inuse_margin_vtime = DIV64_U64_ROUND_UP( - ioc->period_us * vrate * INUSE_MARGIN_PCT, 100); - } else if (ioc->busy_level != prev_busy_level || nr_lagging) { - trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate), - missed_ppm, rq_wait_pct, nr_lagging, - nr_shortages, nr_surpluses); - } + ioc_adjust_base_vrate(ioc, rq_wait_pct, nr_lagging, nr_shortages, + prev_busy_level, missed_ppm); ioc_refresh_params(ioc, false); + ioc_forgive_debts(ioc, usage_us_sum, nr_debtors, &now); + /* * This period is done. Move onto the next one. If nothing's * going on with the device, stop the timer. @@ -1645,13 +2417,76 @@ static void ioc_timer_fn(struct timer_list *timer) ioc_start_period(ioc, &now); } else { ioc->busy_level = 0; + ioc->vtime_err = 0; ioc->running = IOC_IDLE; } + + ioc_refresh_vrate(ioc, &now); } spin_unlock_irq(&ioc->lock); } +static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime, + u64 abs_cost, struct ioc_now *now) +{ + struct ioc *ioc = iocg->ioc; + struct ioc_margins *margins = &ioc->margins; + u32 __maybe_unused old_inuse = iocg->inuse, __maybe_unused old_hwi; + u32 hwi, adj_step; + s64 margin; + u64 cost, new_inuse; + + current_hweight(iocg, NULL, &hwi); + old_hwi = hwi; + cost = abs_cost_to_cost(abs_cost, hwi); + margin = now->vnow - vtime - cost; + + /* debt handling owns inuse for debtors */ + if (iocg->abs_vdebt) + return cost; + + /* + * We only increase inuse during period and do so if the margin has + * deteriorated since the previous adjustment. + */ + if (margin >= iocg->saved_margin || margin >= margins->low || + iocg->inuse == iocg->active) + return cost; + + spin_lock_irq(&ioc->lock); + + /* we own inuse only when @iocg is in the normal active state */ + if (iocg->abs_vdebt || list_empty(&iocg->active_list)) { + spin_unlock_irq(&ioc->lock); + return cost; + } + + /* + * Bump up inuse till @abs_cost fits in the existing budget. + * adj_step must be determined after acquiring ioc->lock - we might + * have raced and lost to another thread for activation and could + * be reading 0 iocg->active before ioc->lock which will lead to + * infinite loop. + */ + new_inuse = iocg->inuse; + adj_step = DIV_ROUND_UP(iocg->active * INUSE_ADJ_STEP_PCT, 100); + do { + new_inuse = new_inuse + adj_step; + propagate_weights(iocg, iocg->active, new_inuse, true, now); + current_hweight(iocg, NULL, &hwi); + cost = abs_cost_to_cost(abs_cost, hwi); + } while (time_after64(vtime + cost, now->vnow) && + iocg->inuse != iocg->active); + + spin_unlock_irq(&ioc->lock); + + TRACE_IOCG_PATH(inuse_adjust, iocg, now, + old_inuse, iocg->inuse, old_hwi, hwi); + + return cost; +} + static void calc_vtime_cost_builtin(struct bio *bio, struct ioc_gq *iocg, bool is_merge, u64 *costp) { @@ -1701,6 +2536,31 @@ static u64 calc_vtime_cost(struct bio *bio, struct ioc_gq *iocg, bool is_merge) return cost; } +static void calc_size_vtime_cost_builtin(struct request *rq, struct ioc *ioc, + u64 *costp) +{ + unsigned int pages = blk_rq_stats_sectors(rq) >> IOC_SECT_TO_PAGE_SHIFT; + + switch (req_op(rq)) { + case REQ_OP_READ: + *costp = pages * ioc->params.lcoefs[LCOEF_RPAGE]; + break; + case REQ_OP_WRITE: + *costp = pages * ioc->params.lcoefs[LCOEF_WPAGE]; + break; + default: + *costp = 0; + } +} + +static u64 calc_size_vtime_cost(struct request *rq, struct ioc *ioc) +{ + u64 cost; + + calc_size_vtime_cost_builtin(rq, ioc, &cost); + return cost; +} + static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) { struct blkcg_gq *blkg = bio->bi_blkg; @@ -1708,15 +2568,12 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) struct ioc_gq *iocg = blkg_to_iocg(blkg); struct ioc_now now; struct iocg_wait wait; - u32 hw_active, hw_inuse; u64 abs_cost, cost, vtime; + bool use_debt, ioc_locked; + unsigned long flags; - /* bypass IOs if disabled or for root cgroup */ - if (!ioc->enabled || !iocg->level) - return; - - /* always activate so that even 0 cost IOs get protected to some level */ - if (!iocg_activate(iocg, &now)) + /* bypass IOs if disabled, still initializing, or for root cgroup */ + if (!ioc->enabled || !iocg || !iocg->level) return; /* calculate the absolute vtime cost */ @@ -1724,22 +2581,12 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) if (!abs_cost) return; - iocg->cursor = bio_end_sector(bio); + if (!iocg_activate(iocg, &now)) + return; + iocg->cursor = bio_end_sector(bio); vtime = atomic64_read(&iocg->vtime); - current_hweight(iocg, &hw_active, &hw_inuse); - - if (hw_inuse < hw_active && - time_after_eq64(vtime + ioc->inuse_margin_vtime, now.vnow)) { - TRACE_IOCG_PATH(inuse_reset, iocg, &now, - iocg->inuse, iocg->weight, hw_inuse, hw_active); - spin_lock_irq(&ioc->lock); - propagate_active_weight(iocg, iocg->weight, iocg->weight); - spin_unlock_irq(&ioc->lock); - current_hweight(iocg, &hw_active, &hw_inuse); - } - - cost = abs_cost_to_cost(abs_cost, hw_inuse); + cost = adjust_inuse_and_calc_cost(iocg, vtime, abs_cost, &now); /* * If no one's waiting and within budget, issue right away. The @@ -1748,21 +2595,32 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) */ if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt && time_before_eq64(vtime + cost, now.vnow)) { - iocg_commit_bio(iocg, bio, cost); + iocg_commit_bio(iocg, bio, abs_cost, cost); return; } /* - * We activated above but w/o any synchronization. Deactivation is - * synchronized with waitq.lock and we won't get deactivated as long - * as we're waiting or has debt, so we're good if we're activated - * here. In the unlikely case that we aren't, just issue the IO. + * We're over budget. This can be handled in two ways. IOs which may + * cause priority inversions are punted to @ioc->aux_iocg and charged as + * debt. Otherwise, the issuer is blocked on @iocg->waitq. Debt handling + * requires @ioc->lock, waitq handling @iocg->waitq.lock. Determine + * whether debt handling is needed and acquire locks accordingly. */ - spin_lock_irq(&iocg->waitq.lock); + use_debt = bio_issue_as_root_blkg(bio) || fatal_signal_pending(current); + ioc_locked = use_debt || READ_ONCE(iocg->abs_vdebt); +retry_lock: + iocg_lock(iocg, ioc_locked, &flags); + /* + * @iocg must stay activated for debt and waitq handling. Deactivation + * is synchronized against both ioc->lock and waitq.lock and we won't + * get deactivated as long as we're waiting or has debt, so we're good + * if we're activated here. In the unlikely cases that we aren't, just + * issue the IO. + */ if (unlikely(list_empty(&iocg->active_list))) { - spin_unlock_irq(&iocg->waitq.lock); - iocg_commit_bio(iocg, bio, cost); + iocg_unlock(iocg, ioc_locked, &flags); + iocg_commit_bio(iocg, bio, abs_cost, cost); return; } @@ -1783,15 +2641,26 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) * clear them and leave @iocg inactive w/ dangling use_delay heavily * penalizing the cgroup and its descendants. */ - if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) { - iocg->abs_vdebt += abs_cost; - if (iocg_kick_delay(iocg, &now, cost)) + if (use_debt) { + iocg_incur_debt(iocg, abs_cost, &now); + if (iocg_kick_delay(iocg, &now)) blkcg_schedule_throttle(rqos->q, (bio->bi_opf & REQ_SWAP) == REQ_SWAP); - spin_unlock_irq(&iocg->waitq.lock); + iocg_unlock(iocg, ioc_locked, &flags); return; } + /* guarantee that iocgs w/ waiters have maximum inuse */ + if (!iocg->abs_vdebt && iocg->inuse != iocg->active) { + if (!ioc_locked) { + iocg_unlock(iocg, false, &flags); + ioc_locked = true; + goto retry_lock; + } + propagate_weights(iocg, iocg->active, iocg->active, true, + &now); + } + /* * Append self to the waitq and schedule the wakeup timer if we're * the first waiter. The timer duration is calculated based on the @@ -1812,9 +2681,9 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) wait.committed = false; /* will be set true by waker */ __add_wait_queue_entry_tail(&iocg->waitq, &wait.wait); - iocg_kick_waitq(iocg, &now); + iocg_kick_waitq(iocg, ioc_locked, &now); - spin_unlock_irq(&iocg->waitq.lock); + iocg_unlock(iocg, ioc_locked, &flags); while (true) { set_current_state(TASK_UNINTERRUPTIBLE); @@ -1831,15 +2700,14 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio) { struct ioc_gq *iocg = blkg_to_iocg(bio->bi_blkg); - struct ioc *ioc = iocg->ioc; + struct ioc *ioc = rqos_to_ioc(rqos); sector_t bio_end = bio_end_sector(bio); struct ioc_now now; - u32 hw_inuse; - u64 abs_cost, cost; + u64 vtime, abs_cost, cost; unsigned long flags; - /* bypass if disabled or for root cgroup */ - if (!ioc->enabled || !iocg->level) + /* bypass if disabled, still initializing, or for root cgroup */ + if (!ioc->enabled || !iocg || !iocg->level) return; abs_cost = calc_vtime_cost(bio, iocg, true); @@ -1847,8 +2715,9 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, return; ioc_now(ioc, &now); - current_hweight(iocg, NULL, &hw_inuse); - cost = abs_cost_to_cost(abs_cost, hw_inuse); + + vtime = atomic64_read(&iocg->vtime); + cost = adjust_inuse_and_calc_cost(iocg, vtime, abs_cost, &now); /* update cursor if backmerging into the request at the cursor */ if (blk_rq_pos(rq) < bio_end && @@ -1861,7 +2730,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, */ if (rq->bio && rq->bio->bi_iocost_cost && time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) { - iocg_commit_bio(iocg, bio, cost); + iocg_commit_bio(iocg, bio, abs_cost, cost); return; } @@ -1870,14 +2739,20 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, * be for the vast majority of cases. See debt handling in * ioc_rqos_throttle() for details. */ - spin_lock_irqsave(&iocg->waitq.lock, flags); + spin_lock_irqsave(&ioc->lock, flags); + spin_lock(&iocg->waitq.lock); + if (likely(!list_empty(&iocg->active_list))) { - iocg->abs_vdebt += abs_cost; - iocg_kick_delay(iocg, &now, cost); + iocg_incur_debt(iocg, abs_cost, &now); + if (iocg_kick_delay(iocg, &now)) + blkcg_schedule_throttle(rqos->q, + (bio->bi_opf & REQ_SWAP) == REQ_SWAP); } else { - iocg_commit_bio(iocg, bio, cost); + iocg_commit_bio(iocg, bio, abs_cost, cost); } - spin_unlock_irqrestore(&iocg->waitq.lock, flags); + + spin_unlock(&iocg->waitq.lock); + spin_unlock_irqrestore(&ioc->lock, flags); } static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio) @@ -1891,7 +2766,8 @@ static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio) static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq) { struct ioc *ioc = rqos_to_ioc(rqos); - u64 on_q_ns, rq_wait_ns; + struct ioc_pcpu_stat *ccs; + u64 on_q_ns, rq_wait_ns, size_nsec; int pidx, rw; if (!ioc->enabled || !rq->alloc_time_ns || !rq->start_time_ns) @@ -1912,13 +2788,19 @@ static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq) on_q_ns = ktime_get_ns() - rq->alloc_time_ns; rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns; + size_nsec = div64_u64(calc_size_vtime_cost(rq, ioc), VTIME_PER_NSEC); + + ccs = get_cpu_ptr(ioc->pcpu_stat); - if (on_q_ns <= ioc->params.qos[pidx] * NSEC_PER_USEC) - this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_met); + if (on_q_ns <= size_nsec || + on_q_ns - size_nsec <= ioc->params.qos[pidx] * NSEC_PER_USEC) + local_inc(&ccs->missed[rw].nr_met); else - this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_missed); + local_inc(&ccs->missed[rw].nr_missed); + + local64_add(rq_wait_ns, &ccs->rq_wait_ns); - this_cpu_add(ioc->pcpu_stat->rq_wait_ns, rq_wait_ns); + put_cpu_ptr(ccs); } static void ioc_rqos_queue_depth_changed(struct rq_qos *rqos) @@ -1958,7 +2840,7 @@ static int blk_iocost_init(struct request_queue *q) { struct ioc *ioc; struct rq_qos *rqos; - int ret; + int i, cpu, ret; ioc = kzalloc(sizeof(*ioc), GFP_KERNEL); if (!ioc) @@ -1970,6 +2852,16 @@ static int blk_iocost_init(struct request_queue *q) return -ENOMEM; } + for_each_possible_cpu(cpu) { + struct ioc_pcpu_stat *ccs = per_cpu_ptr(ioc->pcpu_stat, cpu); + + for (i = 0; i < ARRAY_SIZE(ccs->missed); i++) { + local_set(&ccs->missed[i].nr_met, 0); + local_set(&ccs->missed[i].nr_missed, 0); + } + local64_set(&ccs->rq_wait_ns, 0); + } + rqos = &ioc->rqos; rqos->id = RQ_QOS_COST; rqos->ops = &ioc_rqos_ops; @@ -1980,6 +2872,7 @@ static int blk_iocost_init(struct request_queue *q) INIT_LIST_HEAD(&ioc->active_iocgs); ioc->running = IOC_IDLE; + ioc->vtime_base_rate = VTIME_PER_USEC; atomic64_set(&ioc->vtime_rate, VTIME_PER_USEC); seqcount_init(&ioc->period_seqcount); ioc->period_at = ktime_to_us(ktime_get()); @@ -1991,6 +2884,12 @@ static int blk_iocost_init(struct request_queue *q) ioc_refresh_params(ioc, true); spin_unlock_irq(&ioc->lock); + /* + * rqos must be added before activation to allow iocg_pd_init() to + * lookup the ioc from q. This means that the rqos methods may get + * called before policy activation completion, can't assume that the + * target bio has an iocg associated and need to test for NULL iocg. + */ rq_qos_add(q, rqos); ret = blkcg_activate_policy(q, &blkcg_policy_iocost); if (ret) { @@ -2010,7 +2909,7 @@ static struct blkcg_policy_data *ioc_cpd_alloc(gfp_t gfp) if (!iocc) return NULL; - iocc->dfl_weight = CGROUP_WEIGHT_DFL; + iocc->dfl_weight = CGROUP_WEIGHT_DFL * WEIGHT_ONE; return &iocc->cpd; } @@ -2025,11 +2924,16 @@ static struct blkg_policy_data *ioc_pd_alloc(gfp_t gfp, struct request_queue *q, int levels = blkcg->css.cgroup->level + 1; struct ioc_gq *iocg; - iocg = kzalloc_node(sizeof(*iocg) + levels * sizeof(iocg->ancestors[0]), - gfp, q->node); + iocg = kzalloc_node(struct_size(iocg, ancestors, levels), gfp, q->node); if (!iocg) return NULL; + iocg->pcpu_stat = alloc_percpu_gfp(struct iocg_pcpu_stat, gfp); + if (!iocg->pcpu_stat) { + kfree(iocg); + return NULL; + } + return &iocg->pd; } @@ -2049,14 +2953,14 @@ static void ioc_pd_init(struct blkg_policy_data *pd) atomic64_set(&iocg->done_vtime, now.vnow); atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period)); INIT_LIST_HEAD(&iocg->active_list); - iocg->hweight_active = HWEIGHT_WHOLE; - iocg->hweight_inuse = HWEIGHT_WHOLE; + INIT_LIST_HEAD(&iocg->walk_list); + INIT_LIST_HEAD(&iocg->surplus_list); + iocg->hweight_active = WEIGHT_ONE; + iocg->hweight_inuse = WEIGHT_ONE; init_waitqueue_head(&iocg->waitq); hrtimer_init(&iocg->waitq_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); iocg->waitq_timer.function = iocg_waitq_timer_fn; - hrtimer_init(&iocg->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - iocg->delay_timer.function = iocg_delay_timer_fn; iocg->level = blkg->blkcg->css.cgroup->level; @@ -2066,7 +2970,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd) } spin_lock_irqsave(&ioc->lock, flags); - weight_updated(iocg); + weight_updated(iocg, &now); spin_unlock_irqrestore(&ioc->lock, flags); } @@ -2078,18 +2982,56 @@ static void ioc_pd_free(struct blkg_policy_data *pd) if (ioc) { spin_lock_irqsave(&ioc->lock, flags); + if (!list_empty(&iocg->active_list)) { - propagate_active_weight(iocg, 0, 0); + struct ioc_now now; + + ioc_now(ioc, &now); + propagate_weights(iocg, 0, 0, false, &now); list_del_init(&iocg->active_list); } + + WARN_ON_ONCE(!list_empty(&iocg->walk_list)); + WARN_ON_ONCE(!list_empty(&iocg->surplus_list)); + spin_unlock_irqrestore(&ioc->lock, flags); hrtimer_cancel(&iocg->waitq_timer); - hrtimer_cancel(&iocg->delay_timer); } + free_percpu(iocg->pcpu_stat); kfree(iocg); } +static size_t ioc_pd_stat(struct blkg_policy_data *pd, char *buf, size_t size) +{ + struct ioc_gq *iocg = pd_to_iocg(pd); + struct ioc *ioc = iocg->ioc; + size_t pos = 0; + + if (!ioc->enabled) + return 0; + + if (iocg->level == 0) { + unsigned vp10k = DIV64_U64_ROUND_CLOSEST( + ioc->vtime_base_rate * 10000, + VTIME_PER_USEC); + pos += scnprintf(buf + pos, size - pos, " cost.vrate=%u.%02u", + vp10k / 100, vp10k % 100); + } + + pos += scnprintf(buf + pos, size - pos, " cost.usage=%llu", + iocg->last_stat.usage_us); + + if (blkcg_debug_stats) + pos += scnprintf(buf + pos, size - pos, + " cost.wait=%llu cost.indebt=%llu cost.indelay=%llu", + iocg->last_stat.wait_us, + iocg->last_stat.indebt_us, + iocg->last_stat.indelay_us); + + return pos; +} + static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd, int off) { @@ -2097,7 +3039,7 @@ static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd, struct ioc_gq *iocg = pd_to_iocg(pd); if (dname && iocg->cfg_weight) - seq_printf(sf, "%s %u\n", dname, iocg->cfg_weight); + seq_printf(sf, "%s %u\n", dname, iocg->cfg_weight / WEIGHT_ONE); return 0; } @@ -2107,7 +3049,7 @@ static int ioc_weight_show(struct seq_file *sf, void *v) struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); struct ioc_cgrp *iocc = blkcg_to_iocc(blkcg); - seq_printf(sf, "default %u\n", iocc->dfl_weight); + seq_printf(sf, "default %u\n", iocc->dfl_weight / WEIGHT_ONE); blkcg_print_blkgs(sf, blkcg, ioc_weight_prfill, &blkcg_policy_iocost, seq_cft(sf)->private, false); return 0; @@ -2119,6 +3061,7 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf, struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct ioc_cgrp *iocc = blkcg_to_iocc(blkcg); struct blkg_conf_ctx ctx; + struct ioc_now now; struct ioc_gq *iocg; u32 v; int ret; @@ -2132,18 +3075,19 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf, if (v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX) return -EINVAL; - spin_lock(&blkcg->lock); - iocc->dfl_weight = v; + spin_lock_irq(&blkcg->lock); + iocc->dfl_weight = v * WEIGHT_ONE; hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { struct ioc_gq *iocg = blkg_to_iocg(blkg); if (iocg) { - spin_lock_irq(&iocg->ioc->lock); - weight_updated(iocg); - spin_unlock_irq(&iocg->ioc->lock); + spin_lock(&iocg->ioc->lock); + ioc_now(iocg->ioc, &now); + weight_updated(iocg, &now); + spin_unlock(&iocg->ioc->lock); } } - spin_unlock(&blkcg->lock); + spin_unlock_irq(&blkcg->lock); return nbytes; } @@ -2164,8 +3108,9 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf, } spin_lock(&iocg->ioc->lock); - iocg->cfg_weight = v; - weight_updated(iocg); + iocg->cfg_weight = v * WEIGHT_ONE; + ioc_now(iocg->ioc, &now); + weight_updated(iocg, &now); spin_unlock(&iocg->ioc->lock); blkg_conf_finish(&ctx); @@ -2321,6 +3266,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, spin_lock_irq(&ioc->lock); if (enable) { + blk_stat_enable_accounting(ioc->rqos.q); blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, ioc->rqos.q); ioc->enabled = true; } else { @@ -2502,6 +3448,7 @@ static struct blkcg_policy blkcg_policy_iocost = { .pd_alloc_fn = ioc_pd_alloc, .pd_init_fn = ioc_pd_init, .pd_free_fn = ioc_pd_free, + .pd_stat_fn = ioc_pd_stat, }; static int __init ioc_init(void) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 71a82528d4bfe..a4156b3b33c31 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -86,7 +86,17 @@ struct iolatency_grp; struct blk_iolatency { struct rq_qos rqos; struct timer_list timer; - atomic_t enabled; + + /* + * ->enabled is the master enable switch gating the throttling logic and + * inflight tracking. The number of cgroups which have iolat enabled is + * tracked in ->enable_cnt, and ->enable is flipped on/off accordingly + * from ->enable_work with the request_queue frozen. For details, See + * blkiolatency_enable_work_fn(). + */ + bool enabled; + atomic_t enable_cnt; + struct work_struct enable_work; }; static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos) @@ -94,11 +104,6 @@ static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos) return container_of(rqos, struct blk_iolatency, rqos); } -static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat) -{ - return atomic_read(&blkiolat->enabled) > 0; -} - struct child_latency_info { spinlock_t lock; @@ -463,7 +468,7 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio) struct blkcg_gq *blkg = bio->bi_blkg; bool issue_as_root = bio_issue_as_root_blkg(bio); - if (!blk_iolatency_enabled(blkiolat)) + if (!blkiolat->enabled) return; while (blkg && blkg->parent) { @@ -593,7 +598,6 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) u64 window_start; u64 now = ktime_to_ns(ktime_get()); bool issue_as_root = bio_issue_as_root_blkg(bio); - bool enabled = false; int inflight = 0; blkg = bio->bi_blkg; @@ -604,8 +608,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) if (!iolat) return; - enabled = blk_iolatency_enabled(iolat->blkiolat); - if (!enabled) + if (!iolat->blkiolat->enabled) return; while (blkg && blkg->parent) { @@ -643,6 +646,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos) struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); del_timer_sync(&blkiolat->timer); + flush_work(&blkiolat->enable_work); blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency); kfree(blkiolat); } @@ -714,6 +718,44 @@ static void blkiolatency_timer_fn(struct timer_list *t) rcu_read_unlock(); } +/** + * blkiolatency_enable_work_fn - Enable or disable iolatency on the device + * @work: enable_work of the blk_iolatency of interest + * + * iolatency needs to keep track of the number of in-flight IOs per cgroup. This + * is relatively expensive as it involves walking up the hierarchy twice for + * every IO. Thus, if iolatency is not enabled in any cgroup for the device, we + * want to disable the in-flight tracking. + * + * We have to make sure that the counting is balanced - we don't want to leak + * the in-flight counts by disabling accounting in the completion path while IOs + * are in flight. This is achieved by ensuring that no IO is in flight by + * freezing the queue while flipping ->enabled. As this requires a sleepable + * context, ->enabled flipping is punted to this work function. + */ +static void blkiolatency_enable_work_fn(struct work_struct *work) +{ + struct blk_iolatency *blkiolat = container_of(work, struct blk_iolatency, + enable_work); + bool enabled; + + /* + * There can only be one instance of this function running for @blkiolat + * and it's guaranteed to be executed at least once after the latest + * ->enabled_cnt modification. Acting on the latest ->enable_cnt is + * sufficient. + * + * Also, we know @blkiolat is safe to access as ->enable_work is flushed + * in blkcg_iolatency_exit(). + */ + enabled = atomic_read(&blkiolat->enable_cnt); + if (enabled != blkiolat->enabled) { + blk_mq_freeze_queue(blkiolat->rqos.q); + blkiolat->enabled = enabled; + blk_mq_unfreeze_queue(blkiolat->rqos.q); + } +} + int blk_iolatency_init(struct request_queue *q) { struct blk_iolatency *blkiolat; @@ -739,17 +781,15 @@ int blk_iolatency_init(struct request_queue *q) } timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0); + INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn); return 0; } -/* - * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise - * return 0. - */ -static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) +static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) { struct iolatency_grp *iolat = blkg_to_lat(blkg); + struct blk_iolatency *blkiolat = iolat->blkiolat; u64 oldval = iolat->min_lat_nsec; iolat->min_lat_nsec = val; @@ -757,13 +797,15 @@ static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec, BLKIOLATENCY_MAX_WIN_SIZE); - if (!oldval && val) - return 1; + if (!oldval && val) { + if (atomic_inc_return(&blkiolat->enable_cnt) == 1) + schedule_work(&blkiolat->enable_work); + } if (oldval && !val) { blkcg_clear_delay(blkg); - return -1; + if (atomic_dec_return(&blkiolat->enable_cnt) == 0) + schedule_work(&blkiolat->enable_work); } - return 0; } static void iolatency_clear_scaling(struct blkcg_gq *blkg) @@ -795,7 +837,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, u64 lat_val = 0; u64 oldval; int ret; - int enable = 0; ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx); if (ret) @@ -830,41 +871,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, blkg = ctx.blkg; oldval = iolat->min_lat_nsec; - enable = iolatency_set_min_lat_nsec(blkg, lat_val); - if (enable) { - if (!blk_get_queue(blkg->q)) { - ret = -ENODEV; - goto out; - } - - blkg_get(blkg); - } - - if (oldval != iolat->min_lat_nsec) { + iolatency_set_min_lat_nsec(blkg, lat_val); + if (oldval != iolat->min_lat_nsec) iolatency_clear_scaling(blkg); - } - ret = 0; out: blkg_conf_finish(&ctx); - if (ret == 0 && enable) { - struct iolatency_grp *tmp = blkg_to_lat(blkg); - struct blk_iolatency *blkiolat = tmp->blkiolat; - - blk_mq_freeze_queue(blkg->q); - - if (enable == 1) - atomic_inc(&blkiolat->enabled); - else if (enable == -1) - atomic_dec(&blkiolat->enabled); - else - WARN_ON_ONCE(1); - - blk_mq_unfreeze_queue(blkg->q); - - blkg_put(blkg); - blk_put_queue(blkg->q); - } return ret ?: nbytes; } @@ -1005,14 +1017,8 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd) { struct iolatency_grp *iolat = pd_to_lat(pd); struct blkcg_gq *blkg = lat_to_blkg(iolat); - struct blk_iolatency *blkiolat = iolat->blkiolat; - int ret; - ret = iolatency_set_min_lat_nsec(blkg, 0); - if (ret == 1) - atomic_inc(&blkiolat->enabled); - if (ret == -1) - atomic_dec(&blkiolat->enabled); + iolatency_set_min_lat_nsec(blkg, 0); iolatency_clear_scaling(blkg); } diff --git a/block/blk-iotrace.c b/block/blk-iotrace.c new file mode 100644 index 0000000000000..4de4adf958b5a --- /dev/null +++ b/block/blk-iotrace.c @@ -0,0 +1,557 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Block iotrace code + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "blk-rq-qos.h" +#include "blk-stat.h" + +static struct blkcg_policy blkcg_policy_iotrace; + +struct blk_iotrace { + struct rq_qos rqos; +}; + +#define LAT_BUCKET_NR ARRAY_SIZE(def_latb_thresh) +/* default latency bucket(ns) */ +static uint64_t def_latb_thresh[] = { + 5000000, /* 5 ms */ + 10000000, /* 10 ms */ + 30000000, /* 30 ms */ + 50000000, /* 50 ms */ + 100000000, /* 100 ms */ +}; + +enum { + IOT_READ, + IOT_WRITE, + IOT_OTHER, + IOT_NR, +}; + +struct iotrace_stat { + struct blk_rq_stat rqs; + uint64_t ios[IOT_NR]; + uint64_t sts[IOT_NR]; + uint64_t tms[IOT_NR]; + uint64_t dtms[IOT_NR]; + uint64_t hit[IOT_NR][LAT_BUCKET_NR + 1]; + uint64_t dhit[IOT_NR][LAT_BUCKET_NR + 1]; +}; + +struct iotrace_grp { + struct blkg_policy_data pd; + struct iotrace_stat __percpu *stat_pcpu; + uint64_t thresh_ns[LAT_BUCKET_NR]; + struct iotrace_stat stat; +}; + +/* iotrace_mode */ +#define IOTRACE_DISABLE 0 /* iotrace disable */ +#define IOTRACE_ENABLE 1 /* iotrace enable without q2c */ +#define IOTRACE_ENABLE_ALL 2 /* iotrace enable all */ + +static int __read_mostly iotrace_mode; + +static inline struct blk_iotrace *BLKIOTRACE(struct rq_qos *rqos) +{ + return container_of(rqos, struct blk_iotrace, rqos); +} + +static inline struct iotrace_grp *pd_to_iot(struct blkg_policy_data *pd) +{ + return pd ? container_of(pd, struct iotrace_grp, pd) : NULL; +} + +static inline struct iotrace_grp *blkg_to_iot(struct blkcg_gq *blkg) +{ + return pd_to_iot(blkg_to_pd(blkg, &blkcg_policy_iotrace)); +} + +static inline struct blkcg_gq *iot_to_blkg(struct iotrace_grp *iot) +{ + return pd_to_blkg(&iot->pd); +} + +static struct blkg_policy_data *iotrace_pd_alloc(gfp_t gfp, struct request_queue *q, + struct blkcg *blkcg) +{ + struct iotrace_grp *iot; + + iot = kzalloc_node(sizeof(*iot), gfp, q->node); + if (!iot) + return NULL; + + iot->stat_pcpu = alloc_percpu_gfp(struct iotrace_stat, gfp); + if (!iot->stat_pcpu) { + kfree(iot); + return NULL; + } + + return &iot->pd; +} + +static void iotrace_pd_init(struct blkg_policy_data *pd) +{ + struct iotrace_grp *iot = pd_to_iot(pd); + int i, j, cpu; + + for_each_possible_cpu(cpu) { + struct iotrace_stat *stat; + + stat = per_cpu_ptr(iot->stat_pcpu, cpu); + blk_rq_stat_init(&stat->rqs); + for (i = 0; i < IOT_NR; i++) { + stat->ios[i] = stat->sts[i] = 0; + stat->tms[i] = stat->dtms[i] = 0; + for (j = 0; j < LAT_BUCKET_NR + 1; j++) + stat->hit[i][j] = 0; + } + } + + blk_rq_stat_init(&iot->stat.rqs); + for (i = 0; i < IOT_NR; i++) { + iot->stat.ios[i] = iot->stat.sts[i] = 0; + iot->stat.tms[i] = iot->stat.dtms[i] = 0; + for (j = 0; j < LAT_BUCKET_NR + 1; j++) + iot->stat.hit[i][j] = 0; + } + + for (i = 0; i < LAT_BUCKET_NR; i++) + iot->thresh_ns[i] = def_latb_thresh[i]; +} + +static void iotrace_pd_free(struct blkg_policy_data *pd) +{ + struct iotrace_grp *iot = pd_to_iot(pd); + + free_percpu(iot->stat_pcpu); + kfree(iot); +} + +static inline u64 iotrace_prfill_formal(struct seq_file *sf, struct blkg_policy_data *pd, + int off) +{ + struct iotrace_grp *iot = pd_to_iot(pd); + struct iotrace_stat *stat = &iot->stat; + const char *dname = blkg_dev_name(pd->blkg); + int cpu, i; + + if (!dname) + return 0; + memset(stat, 0, sizeof(struct iotrace_stat)); + + /* collect per cpu data */ + for_each_online_cpu(cpu) { + struct iotrace_stat *s; + + s = per_cpu_ptr(iot->stat_pcpu, cpu); + for (i = 0; i < IOT_NR; i++) { + int j; + + stat->ios[i] += s->ios[i]; + stat->sts[i] += s->sts[i]; + stat->tms[i] += s->tms[i]; + for (j = 0; j < LAT_BUCKET_NR + 1; j++) + stat->hit[i][j] += s->hit[i][j]; + } + } + + seq_printf(sf, "%s rios: %llu wios: %llu oios: %llu rsts: %llu wsts: %llu osts: %llu rtms: %llu wtms: %llu otms: %llu ", + dname, + stat->ios[IOT_READ], stat->ios[IOT_WRITE], stat->ios[IOT_OTHER], + stat->sts[IOT_READ], stat->sts[IOT_WRITE], stat->sts[IOT_OTHER], + stat->tms[IOT_READ], stat->tms[IOT_WRITE], stat->tms[IOT_OTHER]); + + /* read hit */ + seq_puts(sf, " rhit:"); + for (i = 0; i < LAT_BUCKET_NR + 1; i++) + seq_printf(sf, " %llu", stat->hit[IOT_READ][i]); + + /* write hit */ + seq_puts(sf, " whit:"); + for (i = 0; i < LAT_BUCKET_NR + 1; i++) + seq_printf(sf, " %llu", stat->hit[IOT_WRITE][i]); + + /* other hit */ + seq_puts(sf, " ohit:"); + for (i = 0; i < LAT_BUCKET_NR + 1; i++) + seq_printf(sf, " %llu", stat->hit[IOT_OTHER][i]); + + seq_putc(sf, '\n'); + + return 0; +} + +static inline u64 iotrace_prfill_extend(struct seq_file *sf, struct blkg_policy_data *pd, + int off) +{ + struct iotrace_grp *iot = pd_to_iot(pd); + struct iotrace_stat *stat = &iot->stat; + const char *dname = blkg_dev_name(pd->blkg); + int cpu, i, j; + + if (!dname) + return 0; + + memset(stat, 0, sizeof(struct iotrace_stat)); + + /* collect per cpu data */ + for_each_online_cpu(cpu) { + struct iotrace_stat *s; + + s = per_cpu_ptr(iot->stat_pcpu, cpu); + for (i = 0; i < IOT_NR; i++) { + stat->ios[i] += s->ios[i]; + stat->sts[i] += s->sts[i]; + stat->tms[i] += s->tms[i]; + stat->dtms[i] += s->dtms[i]; + for (j = 0; j < LAT_BUCKET_NR + 1; j++) { + stat->hit[i][j] += s->hit[i][j]; + stat->dhit[i][j] += s->dhit[i][j]; + } + } + } + + seq_printf(sf, "%s rios: %llu wios: %llu oios: %llu rsts: %llu wsts: %llu osts: %llu rtms: %llu wtms: %llu otms: %llu rdtms: %llu wdtms: %llu odtms: %llu", + dname, + stat->ios[IOT_READ], stat->ios[IOT_WRITE], stat->ios[IOT_OTHER], + stat->sts[IOT_READ], stat->sts[IOT_WRITE], stat->sts[IOT_OTHER], + stat->tms[IOT_READ], stat->tms[IOT_WRITE], stat->tms[IOT_OTHER], + stat->dtms[IOT_READ], stat->dtms[IOT_WRITE], stat->dtms[IOT_OTHER]); + + /* read hit */ + seq_puts(sf, " rhit:"); + for (i = 0; i < LAT_BUCKET_NR + 1; i++) + seq_printf(sf, " %llu", stat->hit[IOT_READ][i]); + + /* write hit */ + seq_puts(sf, " whit:"); + for (i = 0; i < LAT_BUCKET_NR + 1; i++) + seq_printf(sf, " %llu", stat->hit[IOT_WRITE][i]); + + /* other hit */ + seq_puts(sf, " ohit:"); + for (i = 0; i < LAT_BUCKET_NR + 1; i++) + seq_printf(sf, " %llu", stat->hit[IOT_OTHER][i]); + + /* read dhit */ + seq_puts(sf, " rdhit:"); + for (i = 0; i < LAT_BUCKET_NR + 1; i++) + seq_printf(sf, " %llu", stat->dhit[IOT_READ][i]); + + /* write dhit */ + seq_puts(sf, " wdhit:"); + for (i = 0; i < LAT_BUCKET_NR + 1; i++) + seq_printf(sf, " %llu", stat->dhit[IOT_WRITE][i]); + + /* other dhit */ + seq_puts(sf, " odhit:"); + for (i = 0; i < LAT_BUCKET_NR + 1; i++) + seq_printf(sf, " %llu", stat->dhit[IOT_OTHER][i]); + + seq_putc(sf, '\n'); + + return 0; +} + +static u64 iotrace_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, + int off) +{ + if (iotrace_mode == IOTRACE_ENABLE) + return iotrace_prfill_formal(sf, pd, off); + + if (iotrace_mode == IOTRACE_ENABLE_ALL) + return iotrace_prfill_extend(sf, pd, off); + + return 0; +} + + +static int iotrace_print_stat(struct seq_file *sf, void *v) +{ + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), iotrace_prfill_stat, + &blkcg_policy_iotrace, seq_cft(sf)->private, false); + return 0; +} + +static u64 iotrace_prfill_lat_thresh(struct seq_file *sf, + struct blkg_policy_data *pd, int off) +{ + struct iotrace_grp *iot = pd_to_iot(pd); + const char *dname = blkg_dev_name(pd->blkg); + int i; + + if (!dname) + return 0; + + seq_puts(sf, dname); + for (i = 0; i < LAT_BUCKET_NR; i++) + seq_printf(sf, " %llu", iot->thresh_ns[i]); + + seq_putc(sf, '\n'); + + return 0; +} + +static int iotrace_print_lat_thresh(struct seq_file *sf, void *v) +{ + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), + iotrace_prfill_lat_thresh, &blkcg_policy_iotrace, + seq_cft(sf)->private, false); + return 0; +} + +static ssize_t iotrace_set_lat_thresh(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct blkcg *blkcg = css_to_blkcg(of_css(of)); + struct blkg_conf_ctx ctx; + struct iotrace_grp *iot; + uint64_t tmp[LAT_BUCKET_NR]; + int i, ret; + char *p; + + ret = blkg_conf_prep(blkcg, &blkcg_policy_iotrace, buf, &ctx); + if (ret) + return ret; + + iot = blkg_to_iot(ctx.blkg); + p = ctx.body; + + ret = -EINVAL; + if (sscanf(p, "%llu %llu %llu %llu %llu", + &tmp[0], &tmp[1], &tmp[2], &tmp[3], &tmp[4]) != LAT_BUCKET_NR) + goto out; + + /* make sure threshold in order */ + for (i = 0; i < LAT_BUCKET_NR - 1; i++) { + if (tmp[i] >= tmp[i + 1]) + goto out; + } + + /* update threshold for each bucket */ + for (i = 0; i < LAT_BUCKET_NR; i++) + iot->thresh_ns[i] = tmp[i]; + + ret = 0; +out: + blkg_conf_finish(&ctx); + return ret ? : nbytes; +} + +static int iotrace_print_enable(struct seq_file *sf, void *v) +{ + seq_printf(sf, "%d\n", iotrace_mode); + return 0; +} + +static ssize_t iotrace_set_mode(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + int mode; + + if (kstrtoint(buf, 10, &mode)) + return -EINVAL; + if ((mode < IOTRACE_DISABLE) || (mode > IOTRACE_ENABLE_ALL)) + return -EINVAL; + + iotrace_mode = mode; + + return nbytes; +} + +/* cgroup v2: io.iotrace.stat */ +static struct cftype iotrace_def_files[] = { + { + .name = "iotrace.stat", + .seq_show = iotrace_print_stat, + }, + { + .name = "iotrace.lat_thresh", + .seq_show = iotrace_print_lat_thresh, + .write = iotrace_set_lat_thresh, + }, + { + .name = "iotrace.enable", + .flags = CFTYPE_ONLY_ON_ROOT, + .seq_show = iotrace_print_enable, + .write = iotrace_set_mode, + }, + {} +}; + +static struct blkcg_policy blkcg_policy_iotrace = { + .dfl_cftypes = iotrace_def_files, + .pd_alloc_fn = iotrace_pd_alloc, + .pd_init_fn = iotrace_pd_init, + .pd_free_fn = iotrace_pd_free, +}; + +static void iotrace_account_bio(struct iotrace_grp *iot, struct bio *bio, + u64 now) +{ + u64 delta, start = bio_issue_time(&bio->bi_issue); + u64 delta_disk, start_disk = bio_issue_time(&bio->bi_start); + struct iotrace_stat *stat; + int i, t; + + now = __bio_issue_time(now); + + if (now <= start) + return; + + switch (bio_op(bio)) { + case REQ_OP_READ: + t = IOT_READ; + break; + case REQ_OP_WRITE: + t = IOT_WRITE; + break; + default: + t = IOT_OTHER; + break; + } + + if (start) + delta = now - start; + else + delta = 0; + stat = get_cpu_ptr(iot->stat_pcpu); + stat->ios[t]++; + stat->sts[t] += bio_issue_size(&bio->bi_issue); + stat->tms[t] += delta; + if (start_disk && (start_disk > start) && (now > start_disk)) + delta_disk = now - start_disk; + else + delta_disk = 0; + stat->dtms[t] += delta_disk; + if (delta >= iot->thresh_ns[LAT_BUCKET_NR - 1]) { + stat->hit[t][LAT_BUCKET_NR]++; + } else { + for (i = 0; i < LAT_BUCKET_NR; i++) { + if (delta < iot->thresh_ns[i]) { + stat->hit[t][i]++; + break; + } + } + } + + if (iotrace_mode != IOTRACE_ENABLE_ALL) { + put_cpu_ptr(stat); + return; + } + + if (delta_disk >= iot->thresh_ns[LAT_BUCKET_NR - 1]) + stat->dhit[t][LAT_BUCKET_NR]++; + else { + for (i = 0; i < LAT_BUCKET_NR; i++) { + if (delta_disk < iot->thresh_ns[i]) { + stat->dhit[t][i]++; + break; + } + } + } + + put_cpu_ptr(stat); +} + +static void blkcg_iotrace_done_bio(struct rq_qos *rqos, struct bio *bio) +{ + struct blkcg_gq *blkg; + struct iotrace_grp *iot; + u64 now; + + if (iotrace_mode == IOTRACE_DISABLE) + return; + + now = ktime_to_ns(ktime_get()); + blkg = bio->bi_blkg; + /* account io statistics */ + while (blkg) { + iot = blkg_to_iot(blkg); + if (!iot) { + WARN_ONCE(1, "struct iotrace_grp is not sync with blkg"); + blkg = blkg->parent; + continue; + } + + iotrace_account_bio(iot, bio, now); + blkg = blkg->parent; + } +} + +static void blkcg_iotrace_issue(struct rq_qos *rqos, struct request *rq) +{ + struct bio *bio; + + if (iotrace_mode != IOTRACE_ENABLE_ALL) + return; + + __rq_for_each_bio(bio, rq) + bio_issue_init_time(&bio->bi_start, rq->io_start_time_ns); +} + +static void blkcg_iotrace_exit(struct rq_qos *rqos) +{ + struct blk_iotrace *blkiotrace = BLKIOTRACE(rqos); + + blkcg_deactivate_policy(rqos->q, &blkcg_policy_iotrace); + kfree(blkiotrace); +} + +static struct rq_qos_ops blkcg_iotrace_ops = { + .issue = blkcg_iotrace_issue, + .done_bio = blkcg_iotrace_done_bio, + .exit = blkcg_iotrace_exit, +}; + +int blk_iotrace_init(struct request_queue *q) +{ + struct blk_iotrace *blkiotrace; + struct rq_qos *rqos; + int ret; + + blkiotrace = kzalloc(sizeof(*blkiotrace), GFP_KERNEL); + if (!blkiotrace) + return -ENOMEM; + + rqos = &blkiotrace->rqos; + rqos->id = RQ_QOS_IOTRACE; + rqos->ops = &blkcg_iotrace_ops; + rqos->q = q; + + rq_qos_add(q, rqos); + + ret = blkcg_activate_policy(q, &blkcg_policy_iotrace); + if (ret) { + rq_qos_del(q, rqos); + kfree(blkiotrace); + return ret; + } + + return 0; +} + +static int __init iotrace_init(void) +{ + return blkcg_policy_register(&blkcg_policy_iotrace); +} + +static void __exit iotrace_exit(void) +{ + return blkcg_policy_unregister(&blkcg_policy_iotrace); +} + +module_init(iotrace_init); +module_exit(iotrace_exit); +MODULE_LICENSE("GPL"); diff --git a/block/blk-merge.c b/block/blk-merge.c index 49f2311cf1148..5219064cd72bb 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -571,6 +572,9 @@ static inline unsigned int blk_rq_get_max_segments(struct request *rq) static inline int ll_new_hw_segment(struct request *req, struct bio *bio, unsigned int nr_phys_segs) { + if (!blk_cgroup_mergeable(req, bio)) + goto no_merge; + if (blk_integrity_merge_bio(req->q, req, bio) == false) goto no_merge; @@ -662,6 +666,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, if (total_phys_segments > blk_rq_get_max_segments(req)) return 0; + if (!blk_cgroup_mergeable(req, next->bio)) + return 0; + if (blk_integrity_merge_rq(q, req, next) == false) return 0; @@ -870,6 +877,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (rq->rq_disk != bio->bi_disk) return false; + /* don't merge across cgroup boundaries */ + if (!blk_cgroup_mergeable(rq, bio)) + return false; + /* only merge integrity protected bio into ditto rq */ if (blk_integrity_merge_bio(rq->q, rq, bio) == false) return false; diff --git a/block/blk-mq.c b/block/blk-mq.c index 0674f53c60528..84798d09ca464 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -733,7 +733,6 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list) /* this request will be re-inserted to io scheduler queue */ blk_mq_sched_requeue_request(rq); - BUG_ON(!list_empty(&rq->queuelist)); blk_mq_add_to_requeue_list(rq, true, kick_requeue_list); } EXPORT_SYMBOL(blk_mq_requeue_request); diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 2bcb3495e376b..caecf5a99e11f 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -17,6 +17,7 @@ enum rq_qos_id { RQ_QOS_WBT, RQ_QOS_LATENCY, RQ_QOS_COST, + RQ_QOS_IOTRACE, }; struct rq_wait { @@ -88,6 +89,8 @@ static inline const char *rq_qos_id_to_name(enum rq_qos_id id) return "latency"; case RQ_QOS_COST: return "cost"; + case RQ_QOS_IOTRACE: + return "iotrace"; } return "unknown"; } diff --git a/block/blk-stat.c b/block/blk-stat.c index 940f15d600f8a..a381a52149098 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -136,6 +136,7 @@ void blk_stat_add_callback(struct request_queue *q, struct blk_stat_callback *cb) { unsigned int bucket; + unsigned long flags; int cpu; for_each_possible_cpu(cpu) { @@ -146,20 +147,22 @@ void blk_stat_add_callback(struct request_queue *q, blk_rq_stat_init(&cpu_stat[bucket]); } - spin_lock(&q->stats->lock); + spin_lock_irqsave(&q->stats->lock, flags); list_add_tail_rcu(&cb->list, &q->stats->callbacks); blk_queue_flag_set(QUEUE_FLAG_STATS, q); - spin_unlock(&q->stats->lock); + spin_unlock_irqrestore(&q->stats->lock, flags); } void blk_stat_remove_callback(struct request_queue *q, struct blk_stat_callback *cb) { - spin_lock(&q->stats->lock); + unsigned long flags; + + spin_lock_irqsave(&q->stats->lock, flags); list_del_rcu(&cb->list); if (list_empty(&q->stats->callbacks) && !q->stats->enable_accounting) blk_queue_flag_clear(QUEUE_FLAG_STATS, q); - spin_unlock(&q->stats->lock); + spin_unlock_irqrestore(&q->stats->lock, flags); del_timer_sync(&cb->timer); } @@ -182,10 +185,12 @@ void blk_stat_free_callback(struct blk_stat_callback *cb) void blk_stat_enable_accounting(struct request_queue *q) { - spin_lock(&q->stats->lock); + unsigned long flags; + + spin_lock_irqsave(&q->stats->lock, flags); q->stats->enable_accounting = true; blk_queue_flag_set(QUEUE_FLAG_STATS, q); - spin_unlock(&q->stats->lock); + spin_unlock_irqrestore(&q->stats->lock, flags); } EXPORT_SYMBOL_GPL(blk_stat_enable_accounting); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index bf33570da5ac7..3fb37135264b2 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -1060,15 +1060,17 @@ void blk_unregister_queue(struct gendisk *disk) */ if (queue_is_mq(q)) blk_mq_unregister_dev(disk_to_dev(disk), q); - - kobject_uevent(&q->kobj, KOBJ_REMOVE); - kobject_del(&q->kobj); blk_trace_remove_sysfs(disk_to_dev(disk)); mutex_lock(&q->sysfs_lock); if (q->elevator) elv_unregister_queue(q); mutex_unlock(&q->sysfs_lock); + + /* Now that we've deleted all child objects, we can delete the queue. */ + kobject_uevent(&q->kobj, KOBJ_REMOVE); + kobject_del(&q->kobj); + mutex_unlock(&q->sysfs_dir_lock); kobject_put(&disk_to_dev(disk)->kobj); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index bd870f9ae4586..b1729d07f1722 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -12,6 +12,7 @@ #include #include #include "blk.h" +#include "blk-cgroup-rwstat.h" /* Max dispatch from a group in 1 round */ static int throtl_grp_quantum = 8; @@ -176,6 +177,9 @@ struct throtl_grp { unsigned int bio_cnt; /* total bios */ unsigned int bad_bio_cnt; /* bios exceeding latency threshold */ unsigned long bio_cnt_reset_time; + + struct blkg_rwstat stat_bytes; + struct blkg_rwstat stat_ios; }; /* We measure latency for request size from <= 4k to >= 1M */ @@ -489,6 +493,12 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, if (!tg) return NULL; + if (blkg_rwstat_init(&tg->stat_bytes, gfp)) + goto err_free_tg; + + if (blkg_rwstat_init(&tg->stat_ios, gfp)) + goto err_exit_stat_bytes; + throtl_service_queue_init(&tg->service_queue); for (rw = READ; rw <= WRITE; rw++) { @@ -513,6 +523,12 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD; return &tg->pd; + +err_exit_stat_bytes: + blkg_rwstat_exit(&tg->stat_bytes); +err_free_tg: + kfree(tg); + return NULL; } static void throtl_pd_init(struct blkg_policy_data *pd) @@ -611,6 +627,8 @@ static void throtl_pd_free(struct blkg_policy_data *pd) struct throtl_grp *tg = pd_to_tg(pd); del_timer_sync(&tg->service_queue.pending_timer); + blkg_rwstat_exit(&tg->stat_bytes); + blkg_rwstat_exit(&tg->stat_ios); kfree(tg); } @@ -1464,6 +1482,32 @@ static ssize_t tg_set_conf_uint(struct kernfs_open_file *of, return tg_set_conf(of, buf, nbytes, off, false); } +static int tg_print_rwstat(struct seq_file *sf, void *v) +{ + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), + blkg_prfill_rwstat, &blkcg_policy_throtl, + seq_cft(sf)->private, true); + return 0; +} + +static u64 tg_prfill_rwstat_recursive(struct seq_file *sf, + struct blkg_policy_data *pd, int off) +{ + struct blkg_rwstat_sample sum; + + blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_throtl, off, + &sum); + return __blkg_prfill_rwstat(sf, pd, &sum); +} + +static int tg_print_rwstat_recursive(struct seq_file *sf, void *v) +{ + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), + tg_prfill_rwstat_recursive, &blkcg_policy_throtl, + seq_cft(sf)->private, true); + return 0; +} + static struct cftype throtl_legacy_files[] = { { .name = "throttle.read_bps_device", @@ -1491,23 +1535,23 @@ static struct cftype throtl_legacy_files[] = { }, { .name = "throttle.io_service_bytes", - .private = (unsigned long)&blkcg_policy_throtl, - .seq_show = blkg_print_stat_bytes, + .private = offsetof(struct throtl_grp, stat_bytes), + .seq_show = tg_print_rwstat, }, { .name = "throttle.io_service_bytes_recursive", - .private = (unsigned long)&blkcg_policy_throtl, - .seq_show = blkg_print_stat_bytes_recursive, + .private = offsetof(struct throtl_grp, stat_bytes), + .seq_show = tg_print_rwstat_recursive, }, { .name = "throttle.io_serviced", - .private = (unsigned long)&blkcg_policy_throtl, - .seq_show = blkg_print_stat_ios, + .private = offsetof(struct throtl_grp, stat_ios), + .seq_show = tg_print_rwstat, }, { .name = "throttle.io_serviced_recursive", - .private = (unsigned long)&blkcg_policy_throtl, - .seq_show = blkg_print_stat_ios_recursive, + .private = offsetof(struct throtl_grp, stat_ios), + .seq_show = tg_print_rwstat_recursive, }, { } /* terminate */ }; @@ -2127,7 +2171,16 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, WARN_ON_ONCE(!rcu_read_lock_held()); /* see throtl_charge_bio() */ - if (bio_flagged(bio, BIO_THROTTLED) || !tg->has_rules[rw]) + if (bio_flagged(bio, BIO_THROTTLED)) + goto out; + + if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) { + blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf, + bio->bi_iter.bi_size); + blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1); + } + + if (!tg->has_rules[rw]) goto out; spin_lock_irq(&q->queue_lock); diff --git a/block/blk-zoned.c b/block/blk-zoned.c index b17c094cb977c..a85d0a06a6ff2 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -316,9 +316,6 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, if (!blk_queue_is_zoned(q)) return -ENOTTY; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report))) return -EFAULT; @@ -374,9 +371,6 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, if (!blk_queue_is_zoned(q)) return -ENOTTY; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - if (!(mode & FMODE_WRITE)) return -EBADF; diff --git a/block/blk.h b/block/blk.h index 0b8884353f6bf..363d9449979ba 100644 --- a/block/blk.h +++ b/block/blk.h @@ -173,6 +173,12 @@ static inline void bio_integrity_free(struct bio *bio) } #endif /* CONFIG_BLK_DEV_INTEGRITY */ +#ifdef CONFIG_BYTEDANCE_BLK_CGROUP_IOTRACE +int blk_iotrace_init(struct request_queue *q); +#else +static inline int blk_iotrace_init(struct request_queue *q) { return 0; } +#endif + unsigned long blk_rq_timeout(unsigned long timeout); void blk_add_timer(struct request *req); diff --git a/block/bsg.c b/block/bsg.c index 0d012efef5274..c8b9714e69232 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -371,10 +371,13 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case SG_GET_RESERVED_SIZE: case SG_SET_RESERVED_SIZE: case SG_EMULATED_HOST: - case SCSI_IOCTL_SEND_COMMAND: return scsi_cmd_ioctl(bd->queue, NULL, file->f_mode, cmd, uarg); case SG_IO: return bsg_sg_io(bd->queue, file->f_mode, uarg); + case SCSI_IOCTL_SEND_COMMAND: + pr_warn_ratelimited("%s: calling unsupported SCSI_IOCTL_SEND_COMMAND\n", + current->comm); + return -EINVAL; default: return -ENOTTY; } diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 7f053468b50d7..d490ac220ba86 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -393,7 +393,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) return 0; case BLKGETSIZE: size = i_size_read(bdev->bd_inode); - if ((size >> 9) > ~0UL) + if ((size >> 9) > ~(compat_ulong_t)0) return -EFBIG; return compat_put_ulong(arg, size >> 9); diff --git a/block/elevator.c b/block/elevator.c index 30c934efacb4c..3ba826230c578 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -525,8 +525,6 @@ void elv_unregister_queue(struct request_queue *q) kobject_del(&e->kobj); e->registered = 0; - /* Re-enable throttling in case elevator disabled it */ - wbt_enable_default(q); } } diff --git a/block/genhd.c b/block/genhd.c index ad2ac24b05e53..431fd567312f6 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -37,8 +37,6 @@ struct kobject *block_depr; static DEFINE_SPINLOCK(ext_devt_lock); static DEFINE_IDR(ext_devt_idr); -static const struct device_type disk_type; - static void disk_check_events(struct disk_events *ev, unsigned int *clearing_ptr); static void disk_alloc_events(struct gendisk *disk); @@ -1368,7 +1366,7 @@ static char *block_devnode(struct device *dev, umode_t *mode, return NULL; } -static const struct device_type disk_type = { +const struct device_type disk_type = { .name = "disk", .groups = disk_attr_groups, .release = disk_release, diff --git a/block/ioprio.c b/block/ioprio.c index 77bcab11dce57..d70980d85c551 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -207,6 +207,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) pgrp = task_pgrp(current); else pgrp = find_vpid(who); + read_lock(&tasklist_lock); do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { tmpio = get_task_ioprio(p); if (tmpio < 0) @@ -216,6 +217,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) else ret = ioprio_best(ret, tmpio); } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); + read_unlock(&tasklist_lock); + break; case IOPRIO_WHO_USER: uid = make_kuid(current_user_ns(), who); diff --git a/bytedance/build.sh b/bytedance/build.sh index b450f01b4d3fe..174f54bcc2922 100755 --- a/bytedance/build.sh +++ b/bytedance/build.sh @@ -2,30 +2,102 @@ set -x set -e -declare -A archs=( - ["x86_64"]="amd64" - ["aarch64"]="arm64" -) +declare -A distributions=( ["8"]="jessie" ["9"]="stretch" ["10"]="buster" ["jessie"]="jessie" ["stretch"]="stretch" ["buster"]="buster" ) +declare -A archs=( ["x86_64"]="amd64" ["aarch64"]="arm64" ) + +function usage { + echo "$0 [-d jessie|stretch|buster|8|9|10] [-o OUTPUT] [-p PATTERN]" + echo "" + echo "Build Linux kernel based on distribution of the current machine or specified by the option of -d" + echo "" + echo "OPTIONS:" + echo " -d distribution. The valid parameter is 'jessie'(aka '8') or 'stretch'(aka '9') or 'buster'(aka '10')" + echo " -o output. The location to store deb packages (defaults to current directory)." + echo " -p specify a tag (git) pattern to search for a specific tag to build kernel. Defaults to v[0-9].[0-9]*.[0-9]*.bsk.[0-9]*" + echo " -h help" + echo "" +} + +while getopts ":d:hj:o:p:" opt +do + case $opt in + o) output=$OPTARG + ;; + d) distribution=$OPTARG + ;; + h) usage + exit + ;; + j) + # Ignore + ;; + p) pattern=$OPTARG + ;; + \?) echo "Unknown option: -$OPTARG" >&2 + usage + exit 1 + ;; + esac + + if [ "$opt" != "d" ]; then + parameter="$parameter -$opt $OPTARG" + fi +done + +if [ ! -z $(uname -m) ]; then + arch=${archs[$(uname -m)]} +fi -arch=${archs[$(uname -m)]} if [ -z $arch ]; then echo "Unknown arch: $(uname -m)" exit 1 fi -pattern="v[0-9].[0-9]*.[0-9]*.bsk.[0-9]*" -version=$(git describe --dirty --tags --match "$pattern" | cut -c 2-) +toplevel=$(git rev-parse --show-toplevel) + +if [ ! -z $distribution ]; then + declare -A docker_suffix=( ["amd64"]="" ["arm64"]="-aarch64" ) + + image=${distributions[$distribution]} + if [ -z $image ]; then + usage + exit 1 + fi + + tmp=$(mktemp -d /var/tmp/linux-build.XXXXXX) + build="$tmp/$image" + trap "rm -rf $tmp" SIGHUP SIGINT SIGQUIT SIGTERM ERR EXIT + mkdir $build + + # Do docker build + docker run --network=host -ti --rm -v $build:/linux -v $toplevel:/linux/src \ + gaea-hub.byted.org/kernel-compile/kernel-compile:$image${docker_suffix[$arch]}-latest \ + bash -c "apt install rsync -y; \ + cd /linux/src/$(git rev-parse --show-prefix); \ + $0 $parameter" + + output="$toplevel/$output/$image" + if test -e $output; then + rm -rf $output + fi + mkdir -p $output + mv $tmp/$image/*.deb $output + exit 0 +fi + +pattern=${pattern:="v[0-9].[0-9]*.[0-9]*.bsk.[0-9]*"} +version=$(git describe --dirty --tags --match $pattern | cut -c 2-) if [ -z $version ]; then - echo "Unknown tag pattern: "$pattern"" + echo "Unknown tag pattern: $pattern" exit 1 fi timestamp="Debian $version $(date)" -cp config.$(uname -m) .config -if ! lsb_release -c | grep -q jessie; then - CFLAGS="-Werror" -fi +cd $toplevel +# The source tree should be clean +git clean -idfxq +cp config.$(uname -m) .config make deb-pkg \ BUILD_TOOLS=y \ @@ -35,7 +107,4 @@ make deb-pkg \ KBUILD_BUILD_TIMESTAMP="$timestamp" \ KBUILD_BUILD_USER="STE-Kernel" \ KBUILD_BUILD_HOST="ByteDance" \ - DPKG_FLAGS="-sn" \ - CFLAGS_KERNEL="$CFLAGS" \ - CFLAGS_MODULE="$CFLAGS" \ - "$@" + -j$(nproc) diff --git a/certs/Makefile b/certs/Makefile index f4b90bad8690a..2baef6fba029e 100644 --- a/certs/Makefile +++ b/certs/Makefile @@ -46,11 +46,19 @@ endif redirect_openssl = 2>&1 quiet_redirect_openssl = 2>&1 silent_redirect_openssl = 2>/dev/null +openssl_available = $(shell openssl help 2>/dev/null && echo yes) # We do it this way rather than having a boolean option for enabling an # external private key, because 'make randconfig' might enable such a # boolean option and we unfortunately can't make it depend on !RANDCONFIG. ifeq ($(CONFIG_MODULE_SIG_KEY),"certs/signing_key.pem") + +ifeq ($(openssl_available),yes) +X509TEXT=$(shell openssl x509 -in "certs/signing_key.pem" -text 2>/dev/null) + +$(if $(findstring rsaEncryption,$(X509TEXT)),,$(shell rm -f "certs/signing_key.pem")) +endif + $(obj)/signing_key.pem: $(obj)/x509.genkey @$(kecho) "###" @$(kecho) "### Now generating an X.509 key pair to be used for signing modules." diff --git a/certs/blacklist_hashes.c b/certs/blacklist_hashes.c index 344892337be07..d5961aa3d3380 100644 --- a/certs/blacklist_hashes.c +++ b/certs/blacklist_hashes.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "blacklist.h" -const char __initdata *const blacklist_hashes[] = { +const char __initconst *const blacklist_hashes[] = { #include CONFIG_SYSTEM_BLACKLIST_HASH_LIST , NULL }; diff --git a/config.aarch64 b/config.aarch64 index 63c378fddf701..20f4ef208e794 100644 --- a/config.aarch64 +++ b/config.aarch64 @@ -116,6 +116,7 @@ CONFIG_GENERIC_SCHED_CLOCK=y # # Scheduler features # +CONFIG_BYTEDANCE_BURST_SCHED=y # CONFIG_UCLAMP_TASK is not set # end of Scheduler features @@ -138,7 +139,7 @@ CONFIG_CFS_BANDWIDTH=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_RDMA=y CONFIG_CGROUP_FREEZER=y -# CONFIG_CGROUP_HUGETLB is not set +CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y CONFIG_PROC_PID_CPUSET=y CONFIG_CGROUP_DEVICE=y @@ -419,7 +420,7 @@ CONFIG_ARM64_PTR_AUTH=y CONFIG_ARM64_SVE=y CONFIG_ARM64_MODULE_PLTS=y -# CONFIG_ARM64_PSEUDO_NMI is not set +CONFIG_ARM64_PSEUDO_NMI=y CONFIG_RELOCATABLE=y CONFIG_RANDOMIZE_BASE=y CONFIG_RANDOMIZE_MODULE_REGION_FULL=y @@ -803,6 +804,7 @@ CONFIG_BLK_DEV_THROTTLING=y # CONFIG_BLK_CMDLINE_PARSER is not set CONFIG_BLK_WBT=y # CONFIG_BLK_CGROUP_IOLATENCY is not set +CONFIG_BYTEDANCE_BLK_CGROUP_IOTRACE=y CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y @@ -7163,21 +7165,30 @@ CONFIG_VFIO_PCI_INTX=y # CONFIG_VFIO_MDEV is not set CONFIG_VIRT_DRIVERS=y CONFIG_VIRTIO=m +CONFIG_VIRTIO_PCI_LIB=m CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_PCI_LEGACY=y +CONFIG_VIRTIO_VDPA=m # CONFIG_VIRTIO_PMEM is not set CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=m CONFIG_VIRTIO_MMIO=m # CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES is not set -# CONFIG_VDPA is not set -CONFIG_VHOST_IOTLB=m +CONFIG_VDPA=m +CONFIG_VDPA_SIM=m +CONFIG_VDPA_SIM_NET=m +CONFIG_VDPA_SIM_BLOCK=m +CONFIG_VDPA_USER=m + CONFIG_VHOST=m +CONFIG_VHOST_IOTLB=m +CONFIG_VHOST_RING=m CONFIG_VHOST_MENU=y CONFIG_VHOST_NET=m CONFIG_VHOST_SCSI=m CONFIG_VHOST_VSOCK=m +CONFIG_VHOST_VDPA=m # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set # @@ -8384,7 +8395,7 @@ CONFIG_QUOTACTL=y CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m -# CONFIG_VIRTIO_FS is not set +CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m # CONFIG_OVERLAY_FS_REDIRECT_DIR is not set CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW=y @@ -9079,7 +9090,7 @@ CONFIG_SBITMAP=y # printk and dmesg options # CONFIG_PRINTK_TIME=y -# CONFIG_PRINTK_CALLER is not set +CONFIG_PRINTK_CALLER=y CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7 CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 @@ -9094,7 +9105,7 @@ CONFIG_DEBUG_INFO=y # CONFIG_DEBUG_INFO_REDUCED is not set # CONFIG_DEBUG_INFO_SPLIT is not set # CONFIG_DEBUG_INFO_DWARF4 is not set -# CONFIG_DEBUG_INFO_BTF is not set +CONFIG_DEBUG_INFO_BTF=y # CONFIG_GDB_SCRIPTS is not set CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=2048 diff --git a/config.aarch64-64k b/config.aarch64-64k new file mode 100644 index 0000000000000..d8e36eac4dd4f --- /dev/null +++ b/config.aarch64-64k @@ -0,0 +1,9345 @@ +# +# General setup +# +CONFIG_INIT_ENV_ARG_LIMIT=32 +# CONFIG_COMPILE_TEST is not set +CONFIG_LOCALVERSION="" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_BUILD_SALT="5.4.y.bsk-arm64" +CONFIG_DEFAULT_HOSTNAME="(none)" +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y +CONFIG_CROSS_MEMORY_ATTACH=y +# CONFIG_USELIB is not set +CONFIG_AUDIT=y +CONFIG_HAVE_ARCH_AUDITSYSCALL=y +CONFIG_AUDITSYSCALL=y + +# +# IRQ subsystem +# +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_IRQ_SHOW=y +CONFIG_GENERIC_IRQ_SHOW_LEVEL=y +CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y +CONFIG_GENERIC_IRQ_MIGRATION=y +CONFIG_GENERIC_IRQ_INJECTION=y +CONFIG_HARDIRQS_SW_RESEND=y +CONFIG_GENERIC_IRQ_CHIP=y +CONFIG_IRQ_DOMAIN=y +CONFIG_IRQ_DOMAIN_HIERARCHY=y +CONFIG_GENERIC_MSI_IRQ=y +CONFIG_GENERIC_MSI_IRQ_DOMAIN=y +CONFIG_IRQ_MSI_IOMMU=y +CONFIG_HANDLE_DOMAIN_IRQ=y +CONFIG_IRQ_FORCED_THREADING=y +CONFIG_SPARSE_IRQ=y +# CONFIG_GENERIC_IRQ_DEBUGFS is not set +# end of IRQ subsystem + +CONFIG_GENERIC_IRQ_MULTI_HANDLER=y +CONFIG_ARCH_CLOCKSOURCE_DATA=y +CONFIG_GENERIC_TIME_VSYSCALL=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_ARCH_HAS_TICK_BROADCAST=y +CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y + +# +# Timers subsystem +# +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ_COMMON=y +# CONFIG_HZ_PERIODIC is not set +CONFIG_NO_HZ_IDLE=y +# CONFIG_NO_HZ_FULL is not set +# CONFIG_NO_HZ is not set +CONFIG_HIGH_RES_TIMERS=y +# end of Timers subsystem + +CONFIG_BPF=y +CONFIG_HAVE_EBPF_JIT=y + +# +# BPF subsystem +# +CONFIG_BPF_SYSCALL=y +CONFIG_BPF_JIT=y +# CONFIG_BPF_JIT_ALWAYS_ON is not set +# CONFIG_BPF_UNPRIV_DEFAULT_OFF is not set +CONFIG_BPF_UNSAFE_HELPER=y +# end of BPF subsystem + +# CONFIG_PREEMPT_NONE is not set +CONFIG_PREEMPT_VOLUNTARY=y +# CONFIG_PREEMPT is not set + +# +# CPU/Task time and stats accounting +# +CONFIG_TICK_CPU_ACCOUNTING=y +# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set +# CONFIG_IRQ_TIME_ACCOUNTING is not set +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_PSI=y +# CONFIG_PSI_DEFAULT_DISABLED is not set +# end of CPU/Task time and stats accounting + +CONFIG_CPU_ISOLATION=y + +# +# RCU Subsystem +# +CONFIG_TREE_RCU=y +# CONFIG_RCU_EXPERT is not set +CONFIG_SRCU=y +CONFIG_TREE_SRCU=y +CONFIG_RCU_STALL_COMMON=y +CONFIG_RCU_NEED_SEGCBLIST=y +# end of RCU Subsystem + +CONFIG_BUILD_BIN2C=y +# CONFIG_IKCONFIG is not set +# CONFIG_IKHEADERS is not set +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 +CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 +CONFIG_GENERIC_SCHED_CLOCK=y + +# +# Scheduler features +# +CONFIG_BYTEDANCE_BURST_SCHED=y +# CONFIG_UCLAMP_TASK is not set +# end of Scheduler features + +CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y +CONFIG_ARCH_SUPPORTS_INT128=y +CONFIG_NUMA_BALANCING=y +CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y +CONFIG_CGROUPS=y +CONFIG_PAGE_COUNTER=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +# CONFIG_MEMCG_BGD_RECLAIM is not set +CONFIG_MEMCG_KMEM=y +CONFIG_BLK_CGROUP=y +CONFIG_CGROUP_WRITEBACK=y +CONFIG_CGROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +CONFIG_CFS_BANDWIDTH=y +# CONFIG_RT_GROUP_SCHED is not set +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_RDMA=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CPUSETS=y +CONFIG_PROC_PID_CPUSET=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_BPF=y +# CONFIG_CGROUP_DEBUG is not set +CONFIG_SOCK_CGROUP_DATA=y +CONFIG_NAMESPACES=y +CONFIG_UTS_NS=y +CONFIG_IPC_NS=y +CONFIG_USER_NS=y +CONFIG_PID_NS=y +CONFIG_NET_NS=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_SCHED_AUTOGROUP=y +# CONFIG_SYSFS_DEPRECATED is not set +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +CONFIG_RD_BZIP2=y +CONFIG_RD_LZMA=y +CONFIG_RD_XZ=y +CONFIG_RD_LZO=y +CONFIG_RD_LZ4=y +CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SYSCTL=y +CONFIG_HAVE_UID16=y +CONFIG_SYSCTL_EXCEPTION_TRACE=y +CONFIG_EXPERT=y +CONFIG_UID16=y +CONFIG_MULTIUSER=y +# CONFIG_SGETMASK_SYSCALL is not set +# CONFIG_SYSFS_SYSCALL is not set +# CONFIG_SYSCTL_SYSCALL is not set +CONFIG_FHANDLE=y +CONFIG_POSIX_TIMERS=y +CONFIG_PRINTK=y +CONFIG_PRINTK_NMI=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_FUTEX_PI=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_AIO=y +CONFIG_IO_URING=y +CONFIG_ADVISE_SYSCALLS=y +CONFIG_MEMBARRIER=y +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KALLSYMS_BASE_RELATIVE=y +CONFIG_USERFAULTFD=y +CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y +CONFIG_RSEQ=y +# CONFIG_DEBUG_RSEQ is not set +# CONFIG_EMBEDDED is not set +CONFIG_HAVE_PERF_EVENTS=y +# CONFIG_PC104 is not set + +# +# Kernel Performance Events And Counters +# +CONFIG_PERF_EVENTS=y +# CONFIG_DEBUG_PERF_USE_VMALLOC is not set +# end of Kernel Performance Events And Counters + +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLUB_DEBUG=y +# CONFIG_SLUB_MEMCG_SYSFS_ON is not set +# CONFIG_COMPAT_BRK is not set +# CONFIG_SLAB is not set +CONFIG_SLUB=y +# CONFIG_SLOB is not set +CONFIG_SLAB_MERGE_DEFAULT=y +CONFIG_SLAB_FREELIST_RANDOM=y +CONFIG_SLAB_FREELIST_HARDENED=y +CONFIG_SHUFFLE_PAGE_ALLOCATOR=y +CONFIG_SLUB_CPU_PARTIAL=y +CONFIG_SYSTEM_DATA_VERIFICATION=y +CONFIG_PROFILING=y +CONFIG_TRACEPOINTS=y +# end of General setup + +CONFIG_ARM64=y +CONFIG_64BIT=y +CONFIG_MMU=y +CONFIG_ARM64_PAGE_SHIFT=16 +CONFIG_ARM64_CONT_SHIFT=5 +CONFIG_ARCH_MMAP_RND_BITS_MIN=18 +CONFIG_ARCH_MMAP_RND_BITS_MAX=33 +CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=11 +CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16 +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000 +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CSUM=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_ZONE_DMA32=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y +CONFIG_SMP=y +CONFIG_KERNEL_MODE_NEON=y +CONFIG_FIX_EARLYCON_MEM=y +CONFIG_PGTABLE_LEVELS=4 +CONFIG_ARCH_SUPPORTS_UPROBES=y +CONFIG_ARCH_PROC_KCORE_TEXT=y + +# +# Platform selection +# +# CONFIG_ARCH_ACTIONS is not set +# CONFIG_ARCH_AGILEX is not set +CONFIG_ARCH_SUNXI=y +# CONFIG_ARCH_ALPINE is not set +CONFIG_ARCH_BCM2835=y +# CONFIG_ARCH_BCM_IPROC is not set +# CONFIG_ARCH_BERLIN is not set +# CONFIG_ARCH_BITMAIN is not set +# CONFIG_ARCH_BRCMSTB is not set +# CONFIG_ARCH_EXYNOS is not set +# CONFIG_ARCH_K3 is not set +# CONFIG_ARCH_LAYERSCAPE is not set +# CONFIG_ARCH_LG1K is not set +CONFIG_ARCH_HISI=y +# CONFIG_ARCH_MEDIATEK is not set +CONFIG_ARCH_MESON=y +CONFIG_ARCH_MVEBU=y +# CONFIG_ARCH_MXC is not set +CONFIG_ARCH_QCOM=y +# CONFIG_ARCH_REALTEK is not set +# CONFIG_ARCH_RENESAS is not set +CONFIG_ARCH_ROCKCHIP=y +CONFIG_ARCH_SEATTLE=y +# CONFIG_ARCH_STRATIX10 is not set +CONFIG_ARCH_SYNQUACER=y +CONFIG_ARCH_TEGRA=y +# CONFIG_ARCH_SPRD is not set +CONFIG_ARCH_THUNDER=y +CONFIG_ARCH_THUNDER2=y +# CONFIG_ARCH_UNIPHIER is not set +CONFIG_ARCH_VEXPRESS=y +CONFIG_ARCH_XGENE=y +# CONFIG_ARCH_ZX is not set +CONFIG_ARCH_ZYNQMP=y +# end of Platform selection + +# +# Kernel Features +# + +# +# ARM errata workarounds via the alternatives framework +# +CONFIG_ARM64_WORKAROUND_CLEAN_CACHE=y +CONFIG_ARM64_ERRATUM_826319=y +CONFIG_ARM64_ERRATUM_827319=y +CONFIG_ARM64_ERRATUM_824069=y +CONFIG_ARM64_ERRATUM_819472=y +CONFIG_ARM64_ERRATUM_832075=y +CONFIG_ARM64_ERRATUM_834220=y +CONFIG_ARM64_ERRATUM_845719=y +CONFIG_ARM64_ERRATUM_843419=y +CONFIG_ARM64_ERRATUM_1024718=y +CONFIG_ARM64_ERRATUM_1418040=y +CONFIG_ARM64_ERRATUM_1165522=y +CONFIG_ARM64_ERRATUM_1286807=y +CONFIG_ARM64_ERRATUM_1463225=y +CONFIG_ARM64_ERRATUM_1542419=y +CONFIG_CAVIUM_ERRATUM_22375=y +CONFIG_CAVIUM_ERRATUM_23144=y +CONFIG_CAVIUM_ERRATUM_23154=y +CONFIG_CAVIUM_ERRATUM_27456=y +CONFIG_CAVIUM_ERRATUM_30115=y +CONFIG_CAVIUM_TX2_ERRATUM_219=y +CONFIG_QCOM_FALKOR_ERRATUM_1003=y +CONFIG_ARM64_WORKAROUND_REPEAT_TLBI=y +CONFIG_QCOM_FALKOR_ERRATUM_1009=y +CONFIG_QCOM_QDF2400_ERRATUM_0065=y +CONFIG_SOCIONEXT_SYNQUACER_PREITS=y +CONFIG_HISILICON_ERRATUM_161600802=y +CONFIG_QCOM_FALKOR_ERRATUM_E1041=y +CONFIG_FUJITSU_ERRATUM_010001=y +# end of ARM errata workarounds via the alternatives framework + +# CONFIG_ARM64_4K_PAGES is not set +# CONFIG_ARM64_16K_PAGES is not set +CONFIG_ARM64_64K_PAGES=y +# CONFIG_ARM64_VA_BITS_39 is not set +CONFIG_ARM64_VA_BITS_48=y +CONFIG_ARM64_VA_BITS=48 +CONFIG_ARM64_PA_BITS_48=y +CONFIG_ARM64_PA_BITS=48 +# CONFIG_CPU_BIG_ENDIAN is not set +CONFIG_SCHED_MC=y +CONFIG_SCHED_SMT=y +CONFIG_NR_CPUS=256 +CONFIG_HOTPLUG_CPU=y +CONFIG_NUMA=y +CONFIG_NODES_SHIFT=6 +CONFIG_USE_PERCPU_NUMA_NODE_ID=y +CONFIG_HAVE_SETUP_PER_CPU_AREA=y +CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y +CONFIG_HOLES_IN_ZONE=y +CONFIG_NODES_SPAN_OTHER_NODES=y +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_300 is not set +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +CONFIG_SCHED_HRTICK=y +CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_DEFAULT=y +CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_HAVE_ARCH_PFN_VALID=y +CONFIG_HW_PERF_EVENTS=y +CONFIG_SYS_SUPPORTS_HUGETLBFS=y +CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y +CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y +CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y +CONFIG_SECCOMP=y +CONFIG_PARAVIRT=y +# CONFIG_PARAVIRT_TIME_ACCOUNTING is not set +CONFIG_KEXEC=y +# CONFIG_KEXEC_FILE is not set +CONFIG_CRASH_DUMP=y +CONFIG_XEN_DOM0=y +CONFIG_XEN=y +CONFIG_TANGO_BT=y +CONFIG_FORCE_MAX_ZONEORDER=11 +CONFIG_UNMAP_KERNEL_AT_EL0=y +CONFIG_HARDEN_BRANCH_PREDICTOR=y +CONFIG_HARDEN_EL2_VECTORS=y +CONFIG_ARM64_SSBD=y +CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ARM64_SW_TTBR0_PAN is not set +CONFIG_ARM64_TAGGED_ADDR_ABI=y +CONFIG_COMPAT=y +CONFIG_KUSER_HELPERS=y +CONFIG_ARMV8_DEPRECATED=y +CONFIG_SWP_EMULATION=y +CONFIG_CP15_BARRIER_EMULATION=y +CONFIG_SETEND_EMULATION=y + +# +# ARMv8.1 architectural features +# +CONFIG_ARM64_HW_AFDBM=y +CONFIG_ARM64_PAN=y +CONFIG_ARM64_LSE_ATOMICS=y +CONFIG_ARM64_VHE=y +# end of ARMv8.1 architectural features + +# +# ARMv8.2 architectural features +# +CONFIG_ARM64_UAO=y +CONFIG_ARM64_PMEM=y +CONFIG_ARM64_RAS_EXTN=y +CONFIG_ARM64_CNP=y +# end of ARMv8.2 architectural features + +# +# ARMv8.3 architectural features +# +CONFIG_ARM64_PTR_AUTH=y +# end of ARMv8.3 architectural features + +CONFIG_ARM64_SVE=y +CONFIG_ARM64_MODULE_PLTS=y +CONFIG_ARM64_PSEUDO_NMI=y +CONFIG_RELOCATABLE=y +CONFIG_RANDOMIZE_BASE=y +CONFIG_RANDOMIZE_MODULE_REGION_FULL=y +# end of Kernel Features + +# +# Boot options +# +CONFIG_ARM64_ACPI_PARKING_PROTOCOL=y +CONFIG_CMDLINE="" +# CONFIG_CMDLINE_FORCE is not set +CONFIG_EFI_STUB=y +CONFIG_EFI=y +CONFIG_DMI=y +# end of Boot options + +CONFIG_SYSVIPC_COMPAT=y +CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y + +# +# Power management options +# +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y +# CONFIG_SUSPEND_SKIP_SYNC is not set +CONFIG_HIBERNATE_CALLBACKS=y +CONFIG_HIBERNATION=y +CONFIG_PM_STD_PARTITION="" +CONFIG_PM_SLEEP=y +CONFIG_PM_SLEEP_SMP=y +# CONFIG_PM_AUTOSLEEP is not set +# CONFIG_PM_WAKELOCKS is not set +CONFIG_PM=y +CONFIG_PM_DEBUG=y +CONFIG_PM_ADVANCED_DEBUG=y +# CONFIG_PM_TEST_SUSPEND is not set +CONFIG_PM_SLEEP_DEBUG=y +# CONFIG_DPM_WATCHDOG is not set +CONFIG_PM_CLK=y +CONFIG_PM_GENERIC_DOMAINS=y +# CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set +CONFIG_PM_GENERIC_DOMAINS_SLEEP=y +CONFIG_PM_GENERIC_DOMAINS_OF=y +CONFIG_CPU_PM=y +CONFIG_ENERGY_MODEL=y +CONFIG_ARCH_HIBERNATION_POSSIBLE=y +CONFIG_ARCH_HIBERNATION_HEADER=y +CONFIG_ARCH_SUSPEND_POSSIBLE=y +# end of Power management options + +# +# CPU Power Management +# + +# +# CPU Idle +# +CONFIG_CPU_IDLE=y +CONFIG_CPU_IDLE_MULTIPLE_DRIVERS=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPU_IDLE_GOV_MENU=y +# CONFIG_CPU_IDLE_GOV_TEO is not set +CONFIG_DT_IDLE_STATES=y + +# +# ARM CPU Idle Drivers +# +CONFIG_ARM_CPUIDLE=y +# CONFIG_ARM_PSCI_CPUIDLE is not set +# end of ARM CPU Idle Drivers +# end of CPU Idle + +# +# CPU Frequency scaling +# +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_GOV_ATTR_SET=y +CONFIG_CPU_FREQ_GOV_COMMON=y +CONFIG_CPU_FREQ_STAT=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=m +CONFIG_CPU_FREQ_GOV_USERSPACE=m +CONFIG_CPU_FREQ_GOV_ONDEMAND=m +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m +CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y + +# +# CPU frequency scaling drivers +# +CONFIG_CPUFREQ_DT=m +CONFIG_CPUFREQ_DT_PLATDEV=y +CONFIG_ACPI_CPPC_CPUFREQ=m +CONFIG_ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM=m +CONFIG_ARM_ARMADA_37XX_CPUFREQ=m +# CONFIG_ARM_ARMADA_8K_CPUFREQ is not set +# CONFIG_ARM_QCOM_CPUFREQ_HW is not set +CONFIG_ARM_TEGRA20_CPUFREQ=y +CONFIG_ARM_TEGRA124_CPUFREQ=y +# CONFIG_QORIQ_CPUFREQ is not set +# end of CPU Frequency scaling +# end of CPU Power Management + +# +# Firmware Drivers +# +# CONFIG_ARM_SCMI_PROTOCOL is not set +# CONFIG_ARM_SCPI_PROTOCOL is not set +# CONFIG_ARM_SDE_INTERFACE is not set +# CONFIG_FIRMWARE_MEMMAP is not set +CONFIG_DMIID=y +CONFIG_DMI_SYSFS=y +# CONFIG_ISCSI_IBFT is not set +CONFIG_RASPBERRYPI_FIRMWARE=y +CONFIG_FW_CFG_SYSFS=m +# CONFIG_FW_CFG_SYSFS_CMDLINE is not set +CONFIG_QCOM_SCM=y +CONFIG_QCOM_SCM_64=y +# CONFIG_QCOM_SCM_DOWNLOAD_MODE_DEFAULT is not set +CONFIG_TURRIS_MOX_RWTM=m +CONFIG_HAVE_ARM_SMCCC=y +CONFIG_ARM_PSCI_FW=y +# CONFIG_ARM_PSCI_CHECKER is not set +CONFIG_GOOGLE_FIRMWARE=y +CONFIG_GOOGLE_COREBOOT_TABLE=m +CONFIG_GOOGLE_MEMCONSOLE=m +# CONFIG_GOOGLE_FRAMEBUFFER_COREBOOT is not set +CONFIG_GOOGLE_MEMCONSOLE_COREBOOT=m +# CONFIG_GOOGLE_VPD is not set + +# +# EFI (Extensible Firmware Interface) Support +# +CONFIG_EFI_VARS=m +CONFIG_EFI_ESRT=y +CONFIG_EFI_VARS_PSTORE=m +# CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE is not set +CONFIG_EFI_PARAMS_FROM_FDT=y +CONFIG_EFI_RUNTIME_WRAPPERS=y +CONFIG_EFI_ARMSTUB=y +CONFIG_EFI_ARMSTUB_DTB_LOADER=y +CONFIG_EFI_BOOTLOADER_CONTROL=m +CONFIG_EFI_CAPSULE_LOADER=m +# CONFIG_EFI_TEST is not set +# CONFIG_RESET_ATTACK_MITIGATION is not set +# end of EFI (Extensible Firmware Interface) Support + +CONFIG_UEFI_CPER=y +CONFIG_UEFI_CPER_ARM=y +CONFIG_EFI_EARLYCON=y +CONFIG_EFI_CUSTOM_SSDT_OVERLAYS=y +CONFIG_MESON_SM=y + +# +# Tegra firmware driver +# +# CONFIG_TEGRA_IVC is not set +# end of Tegra firmware driver + +# +# Zynq MPSoC Firmware Drivers +# +CONFIG_ZYNQMP_FIRMWARE=y +# CONFIG_ZYNQMP_FIRMWARE_DEBUG is not set +# end of Zynq MPSoC Firmware Drivers +# end of Firmware Drivers + +CONFIG_ARCH_SUPPORTS_ACPI=y +CONFIG_ACPI=y +CONFIG_ACPI_GENERIC_GSI=y +CONFIG_ACPI_CCA_REQUIRED=y +# CONFIG_ACPI_DEBUGGER is not set +CONFIG_ACPI_SPCR_TABLE=y +# CONFIG_ACPI_EC_DEBUGFS is not set +CONFIG_ACPI_AC=y +CONFIG_ACPI_BATTERY=y +CONFIG_ACPI_BUTTON=y +CONFIG_ACPI_FAN=y +CONFIG_ACPI_TAD=m +# CONFIG_ACPI_DOCK is not set +CONFIG_ACPI_PROCESSOR_IDLE=y +CONFIG_ACPI_MCFG=y +CONFIG_ACPI_CPPC_LIB=y +CONFIG_ACPI_PROCESSOR=y +CONFIG_ACPI_IPMI=m +CONFIG_ACPI_HOTPLUG_CPU=y +CONFIG_ACPI_THERMAL=y +CONFIG_ACPI_NUMA=y +CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y +CONFIG_ACPI_TABLE_UPGRADE=y +# CONFIG_ACPI_DEBUG is not set +CONFIG_ACPI_PCI_SLOT=y +CONFIG_ACPI_CONTAINER=y +# CONFIG_ACPI_HOTPLUG_MEMORY is not set +CONFIG_ACPI_HED=y +# CONFIG_ACPI_CUSTOM_METHOD is not set +CONFIG_ACPI_BGRT=y +CONFIG_ACPI_REDUCED_HARDWARE_ONLY=y +CONFIG_ACPI_NFIT=m +# CONFIG_NFIT_SECURITY_DEBUG is not set +# CONFIG_ACPI_HMAT is not set +CONFIG_HAVE_ACPI_APEI=y +CONFIG_ACPI_APEI=y +CONFIG_ACPI_APEI_GHES=y +CONFIG_ACPI_APEI_PCIEAER=y +CONFIG_ACPI_APEI_SEA=y +CONFIG_ACPI_APEI_MEMORY_FAILURE=y +CONFIG_ACPI_APEI_EINJ=m +# CONFIG_ACPI_APEI_ERST_DEBUG is not set +CONFIG_ACPI_WATCHDOG=y +# CONFIG_PMIC_OPREGION is not set +# CONFIG_ACPI_CONFIGFS is not set +CONFIG_ACPI_IORT=y +CONFIG_ACPI_GTDT=y +CONFIG_ACPI_PPTT=y +CONFIG_HAVE_KVM_IRQCHIP=y +CONFIG_HAVE_KVM_IRQFD=y +CONFIG_HAVE_KVM_IRQ_ROUTING=y +CONFIG_HAVE_KVM_EVENTFD=y +CONFIG_KVM_MMIO=y +CONFIG_HAVE_KVM_MSI=y +CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y +CONFIG_KVM_VFIO=y +CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL=y +CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y +CONFIG_HAVE_KVM_IRQ_BYPASS=y +CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE=y +CONFIG_IRQ_BYPASS_MANAGER=y +CONFIG_VIRTUALIZATION=y +CONFIG_KVM=y +CONFIG_KVM_ARM_HOST=y +CONFIG_KVM_ARM_PMU=y +CONFIG_KVM_INDIRECT_VECTORS=y +CONFIG_ARM64_CRYPTO=y +CONFIG_CRYPTO_SHA256_ARM64=m +# CONFIG_CRYPTO_SHA512_ARM64 is not set +CONFIG_CRYPTO_SHA1_ARM64_CE=m +CONFIG_CRYPTO_SHA2_ARM64_CE=m +# CONFIG_CRYPTO_SHA512_ARM64_CE is not set +# CONFIG_CRYPTO_SHA3_ARM64 is not set +# CONFIG_CRYPTO_SM3_ARM64_CE is not set +# CONFIG_CRYPTO_SM4_ARM64_CE is not set +CONFIG_CRYPTO_GHASH_ARM64_CE=m +# CONFIG_CRYPTO_CRCT10DIF_ARM64_CE is not set +CONFIG_CRYPTO_AES_ARM64=m +CONFIG_CRYPTO_AES_ARM64_CE=m +CONFIG_CRYPTO_AES_ARM64_CE_CCM=m +CONFIG_CRYPTO_AES_ARM64_CE_BLK=m +# CONFIG_CRYPTO_AES_ARM64_NEON_BLK is not set +# CONFIG_CRYPTO_CHACHA20_NEON is not set +# CONFIG_CRYPTO_NHPOLY1305_NEON is not set +# CONFIG_CRYPTO_AES_ARM64_BS is not set + +# +# General architecture-dependent options +# +CONFIG_CRASH_CORE=y +CONFIG_KEXEC_CORE=y +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +# CONFIG_STATIC_KEYS_SELFTEST is not set +CONFIG_UPROBES=y +CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y +CONFIG_KRETPROBES=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y +CONFIG_HAVE_NMI=y +CONFIG_HAVE_ARCH_TRACEHOOK=y +CONFIG_HAVE_DMA_CONTIGUOUS=y +CONFIG_GENERIC_SMP_IDLE_THREAD=y +CONFIG_GENERIC_IDLE_POLL_SETUP=y +CONFIG_ARCH_HAS_FORTIFY_SOURCE=y +CONFIG_ARCH_HAS_KEEPINITRD=y +CONFIG_ARCH_HAS_SET_MEMORY=y +CONFIG_ARCH_HAS_SET_DIRECT_MAP=y +CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y +CONFIG_HAVE_ASM_MODVERSIONS=y +CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y +CONFIG_HAVE_RSEQ=y +CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y +CONFIG_HAVE_CLK=y +CONFIG_HAVE_HW_BREAKPOINT=y +CONFIG_HAVE_PERF_REGS=y +CONFIG_HAVE_PERF_USER_STACK_DUMP=y +CONFIG_HAVE_ARCH_JUMP_LABEL=y +CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y +CONFIG_HAVE_RCU_TABLE_FREE=y +CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y +CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y +CONFIG_HAVE_CMPXCHG_LOCAL=y +CONFIG_HAVE_CMPXCHG_DOUBLE=y +CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y +CONFIG_HAVE_ARCH_SECCOMP_FILTER=y +CONFIG_SECCOMP_FILTER=y +CONFIG_HAVE_ARCH_STACKLEAK=y +CONFIG_HAVE_STACKPROTECTOR=y +CONFIG_CC_HAS_STACKPROTECTOR_NONE=y +CONFIG_STACKPROTECTOR=y +CONFIG_STACKPROTECTOR_STRONG=y +CONFIG_HAVE_CONTEXT_TRACKING=y +CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y +CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y +CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y +CONFIG_HAVE_ARCH_HUGE_VMAP=y +CONFIG_HAVE_MOD_ARCH_SPECIFIC=y +CONFIG_MODULES_USE_ELF_RELA=y +CONFIG_ARCH_HAS_ELF_RANDOMIZE=y +CONFIG_HAVE_ARCH_MMAP_RND_BITS=y +CONFIG_ARCH_MMAP_RND_BITS=18 +CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y +CONFIG_ARCH_MMAP_RND_COMPAT_BITS=11 +CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT=y +CONFIG_HAVE_COPY_THREAD_TLS=y +CONFIG_CLONE_BACKWARDS=y +CONFIG_OLD_SIGSUSPEND3=y +CONFIG_COMPAT_OLD_SIGACTION=y +CONFIG_64BIT_TIME=y +CONFIG_COMPAT_32BIT_TIME=y +CONFIG_HAVE_ARCH_VMAP_STACK=y +CONFIG_VMAP_STACK=y +CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y +CONFIG_STRICT_KERNEL_RWX=y +CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y +CONFIG_STRICT_MODULE_RWX=y +CONFIG_REFCOUNT_FULL=y +CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y +CONFIG_ARCH_USE_MEMREMAP_PROT=y +# CONFIG_LOCK_EVENT_COUNTS is not set +CONFIG_ARCH_HAS_RELR=y + +# +# GCOV-based kernel profiling +# +# CONFIG_GCOV_KERNEL is not set +CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y +# end of GCOV-based kernel profiling + +CONFIG_PLUGIN_HOSTCC="" +CONFIG_HAVE_GCC_PLUGINS=y +# end of General architecture-dependent options + +CONFIG_RT_MUTEXES=y +CONFIG_BASE_SMALL=0 +CONFIG_MODULE_SIG_FORMAT=y +CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_ASM_MODVERSIONS=y +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_MODULE_SIG=y +# CONFIG_MODULE_SIG_FORCE is not set +# CONFIG_MODULE_SIG_ALL is not set +# CONFIG_MODULE_SIG_SHA1 is not set +# CONFIG_MODULE_SIG_SHA224 is not set +CONFIG_MODULE_SIG_SHA256=y +# CONFIG_MODULE_SIG_SHA384 is not set +# CONFIG_MODULE_SIG_SHA512 is not set +CONFIG_MODULE_SIG_HASH="sha256" +# CONFIG_MODULE_COMPRESS is not set +# CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS is not set +# CONFIG_UNUSED_SYMBOLS is not set +# CONFIG_TRIM_UNUSED_KSYMS is not set +CONFIG_MODULES_TREE_LOOKUP=y +CONFIG_BLOCK=y +CONFIG_BLK_RQ_ALLOC_TIME=y +CONFIG_BLK_SCSI_REQUEST=y +CONFIG_BLK_DEV_BSG=y +CONFIG_BLK_DEV_BSGLIB=y +CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_BLK_DEV_ZONED=y +CONFIG_BLK_DEV_THROTTLING=y +# CONFIG_BLK_DEV_THROTTLING_LOW is not set +# CONFIG_BLK_CMDLINE_PARSER is not set +CONFIG_BLK_WBT=y +# CONFIG_BLK_CGROUP_IOLATENCY is not set +CONFIG_BYTEDANCE_BLK_CGROUP_IOTRACE=y +CONFIG_BLK_CGROUP_IOCOST=y +CONFIG_BLK_WBT_MQ=y +CONFIG_BLK_DEBUG_FS=y +CONFIG_BLK_DEBUG_FS_ZONED=y +CONFIG_BLK_SED_OPAL=y + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_AIX_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y +# CONFIG_SYSV68_PARTITION is not set +# CONFIG_CMDLINE_PARTITION is not set +# end of Partition Types + +CONFIG_BLOCK_COMPAT=y +CONFIG_BLK_MQ_PCI=y +CONFIG_BLK_MQ_VIRTIO=y +CONFIG_BLK_MQ_RDMA=y +CONFIG_BLK_PM=y + +# +# IO Schedulers +# +CONFIG_MQ_IOSCHED_DEADLINE=y +CONFIG_MQ_IOSCHED_KYBER=m +CONFIG_IOSCHED_BFQ=m +CONFIG_BFQ_GROUP_IOSCHED=y +# CONFIG_BFQ_CGROUP_DEBUG is not set +# end of IO Schedulers + +CONFIG_PREEMPT_NOTIFIERS=y +CONFIG_PADATA=y +CONFIG_ASN1=y +CONFIG_ARCH_INLINE_SPIN_TRYLOCK=y +CONFIG_ARCH_INLINE_SPIN_TRYLOCK_BH=y +CONFIG_ARCH_INLINE_SPIN_LOCK=y +CONFIG_ARCH_INLINE_SPIN_LOCK_BH=y +CONFIG_ARCH_INLINE_SPIN_LOCK_IRQ=y +CONFIG_ARCH_INLINE_SPIN_LOCK_IRQSAVE=y +CONFIG_ARCH_INLINE_SPIN_UNLOCK=y +CONFIG_ARCH_INLINE_SPIN_UNLOCK_BH=y +CONFIG_ARCH_INLINE_SPIN_UNLOCK_IRQ=y +CONFIG_ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE=y +CONFIG_ARCH_INLINE_READ_LOCK=y +CONFIG_ARCH_INLINE_READ_LOCK_BH=y +CONFIG_ARCH_INLINE_READ_LOCK_IRQ=y +CONFIG_ARCH_INLINE_READ_LOCK_IRQSAVE=y +CONFIG_ARCH_INLINE_READ_UNLOCK=y +CONFIG_ARCH_INLINE_READ_UNLOCK_BH=y +CONFIG_ARCH_INLINE_READ_UNLOCK_IRQ=y +CONFIG_ARCH_INLINE_READ_UNLOCK_IRQRESTORE=y +CONFIG_ARCH_INLINE_WRITE_LOCK=y +CONFIG_ARCH_INLINE_WRITE_LOCK_BH=y +CONFIG_ARCH_INLINE_WRITE_LOCK_IRQ=y +CONFIG_ARCH_INLINE_WRITE_LOCK_IRQSAVE=y +CONFIG_ARCH_INLINE_WRITE_UNLOCK=y +CONFIG_ARCH_INLINE_WRITE_UNLOCK_BH=y +CONFIG_ARCH_INLINE_WRITE_UNLOCK_IRQ=y +CONFIG_ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE=y +CONFIG_INLINE_SPIN_TRYLOCK=y +CONFIG_INLINE_SPIN_TRYLOCK_BH=y +CONFIG_INLINE_SPIN_LOCK=y +CONFIG_INLINE_SPIN_LOCK_BH=y +CONFIG_INLINE_SPIN_LOCK_IRQ=y +CONFIG_INLINE_SPIN_LOCK_IRQSAVE=y +CONFIG_INLINE_SPIN_UNLOCK_BH=y +CONFIG_INLINE_SPIN_UNLOCK_IRQ=y +CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE=y +CONFIG_INLINE_READ_LOCK=y +CONFIG_INLINE_READ_LOCK_BH=y +CONFIG_INLINE_READ_LOCK_IRQ=y +CONFIG_INLINE_READ_LOCK_IRQSAVE=y +CONFIG_INLINE_READ_UNLOCK=y +CONFIG_INLINE_READ_UNLOCK_BH=y +CONFIG_INLINE_READ_UNLOCK_IRQ=y +CONFIG_INLINE_READ_UNLOCK_IRQRESTORE=y +CONFIG_INLINE_WRITE_LOCK=y +CONFIG_INLINE_WRITE_LOCK_BH=y +CONFIG_INLINE_WRITE_LOCK_IRQ=y +CONFIG_INLINE_WRITE_LOCK_IRQSAVE=y +CONFIG_INLINE_WRITE_UNLOCK=y +CONFIG_INLINE_WRITE_UNLOCK_BH=y +CONFIG_INLINE_WRITE_UNLOCK_IRQ=y +CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE=y +CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y +CONFIG_MUTEX_SPIN_ON_OWNER=y +CONFIG_RWSEM_SPIN_ON_OWNER=y +CONFIG_LOCK_SPIN_ON_OWNER=y +CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y +CONFIG_QUEUED_SPINLOCKS=y +CONFIG_ARCH_USE_QUEUED_RWLOCKS=y +CONFIG_QUEUED_RWLOCKS=y +CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y +CONFIG_FREEZER=y + +# +# Executable file formats +# +CONFIG_BINFMT_ELF=y +CONFIG_COMPAT_BINFMT_ELF=y +CONFIG_ELFCORE=y +CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y +CONFIG_BINFMT_SCRIPT=y +CONFIG_BINFMT_MISC=y +CONFIG_COREDUMP=y +# end of Executable file formats + +# +# Memory Management options +# +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y +CONFIG_NEED_MULTIPLE_NODES=y +CONFIG_HAVE_MEMORY_PRESENT=y +CONFIG_SPARSEMEM_EXTREME=y +CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y +CONFIG_SPARSEMEM_VMEMMAP=y +CONFIG_HAVE_MEMBLOCK_NODE_MAP=y +CONFIG_HAVE_FAST_GUP=y +CONFIG_ARCH_KEEP_MEMBLOCK=y +CONFIG_MEMORY_ISOLATION=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTPLUG_SPARSE=y +# CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_MEMORY_BALLOON=y +CONFIG_BALLOON_COMPACTION=y +CONFIG_COMPACTION=y +CONFIG_MIGRATION=y +CONFIG_CONTIG_ALLOC=y +CONFIG_PHYS_ADDR_T_64BIT=y +CONFIG_MMU_NOTIFIER=y +CONFIG_KSM=y +CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 +CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y +CONFIG_MEMORY_FAILURE=y +CONFIG_HWPOISON_INJECT=m +CONFIG_TRANSPARENT_HUGEPAGE=y +# CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS is not set +CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y +CONFIG_TRANSPARENT_HUGE_PAGECACHE=y +# CONFIG_CLEANCACHE is not set +CONFIG_FRONTSWAP=y +# CONFIG_CMA is not set +CONFIG_ZSWAP=y +CONFIG_ZPOOL=y +CONFIG_ZBUD=y +CONFIG_Z3FOLD=m +CONFIG_ZSMALLOC=m +# CONFIG_ZSMALLOC_STAT is not set +CONFIG_GENERIC_EARLY_IOREMAP=y +# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set +# CONFIG_IDLE_PAGE_TRACKING is not set +CONFIG_ARCH_HAS_PTE_DEVMAP=y +CONFIG_FRAME_VECTOR=y +# CONFIG_PERCPU_STATS is not set +# CONFIG_GUP_BENCHMARK is not set +# CONFIG_READ_ONLY_THP_FOR_FS is not set +CONFIG_ARCH_HAS_PTE_SPECIAL=y + +# +# Data Access Monitoring +# +# CONFIG_DAMON is not set +# end of Data Access Monitoring +# end of Memory Management options + +CONFIG_NET=y +CONFIG_COMPAT_NETLINK_MESSAGES=y +CONFIG_NET_INGRESS=y +CONFIG_NET_EGRESS=y +CONFIG_NET_REDIRECT=y +CONFIG_SKB_EXTENSIONS=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_DIAG=m +CONFIG_UNIX=y +CONFIG_UNIX_SCM=y +CONFIG_UNIX_DIAG=m +# CONFIG_TLS is not set +CONFIG_XFRM=y +CONFIG_XFRM_OFFLOAD=y +CONFIG_XFRM_ALGO=m +CONFIG_XFRM_USER=m +CONFIG_XFRM_INTERFACE=m +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_MIGRATE=y +CONFIG_XFRM_STATISTICS=y +CONFIG_XFRM_IPCOMP=m +CONFIG_NET_KEY=m +CONFIG_NET_KEY_MIGRATE=y +CONFIG_SMC=m +CONFIG_SMC_DIAG=m +CONFIG_XDP_SOCKETS=y +# CONFIG_XDP_SOCKETS_DIAG is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_FIB_TRIE_STATS=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_ROUTE_CLASSID=y +# CONFIG_IP_PNP is not set +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IP_TUNNEL=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE_COMMON=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_NET_IPVTI=m +CONFIG_NET_UDP_TUNNEL=m +CONFIG_NET_FOU=m +CONFIG_NET_FOU_IP_TUNNELS=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_ESP_OFFLOAD=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_TUNNEL=m +CONFIG_INET_TUNNEL=m +CONFIG_INET_DIAG=m +CONFIG_INET_TCP_DIAG=m +CONFIG_INET_UDP_DIAG=m +CONFIG_INET_RAW_DIAG=m +CONFIG_INET_DIAG_DESTROY=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_BIC=m +CONFIG_TCP_CONG_CUBIC=y +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_VEGAS=m +CONFIG_TCP_CONG_NV=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +CONFIG_TCP_CONG_DCTCP=m +CONFIG_TCP_CONG_CDG=m +CONFIG_TCP_CONG_BBR=m +# CONFIG_TCP_CONG_BBR2 is not set +CONFIG_DEFAULT_CUBIC=y +# CONFIG_DEFAULT_RENO is not set +CONFIG_DEFAULT_TCP_CONG="cubic" +CONFIG_TCP_MD5SIG=y +CONFIG_TCP_SKB_TRACE=y +CONFIG_IPV6=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_ESP_OFFLOAD=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_ILA=m +CONFIG_INET6_XFRM_TUNNEL=m +CONFIG_INET6_TUNNEL=m +CONFIG_IPV6_VTI=m +CONFIG_IPV6_SIT=m +CONFIG_IPV6_SIT_6RD=y +CONFIG_IPV6_NDISC_NODETYPE=y +CONFIG_IPV6_TUNNEL=m +CONFIG_IPV6_GRE=m +CONFIG_IPV6_FOU=m +CONFIG_IPV6_FOU_TUNNEL=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_SUBTREES=y +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y +CONFIG_IPV6_PIMSM_V2=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPV6_SEG6_HMAC=y +CONFIG_IPV6_SEG6_BPF=y +# CONFIG_NETLABEL is not set +CONFIG_NETWORK_SECMARK=y +CONFIG_NET_PTP_CLASSIFY=y +# CONFIG_NETWORK_PHY_TIMESTAMPING is not set +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_BRIDGE_NETFILTER=m + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_INGRESS=y +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_FAMILY_BRIDGE=y +CONFIG_NETFILTER_FAMILY_ARP=y +CONFIG_NETFILTER_NETLINK_ACCT=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m +CONFIG_NETFILTER_NETLINK_OSF=m +CONFIG_NF_CONNTRACK=m +CONFIG_NF_LOG_COMMON=m +CONFIG_NF_LOG_NETDEV=m +CONFIG_NETFILTER_CONNCOUNT=m +CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_ZONES=y +CONFIG_NF_CONNTRACK_PROCFS=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_TIMEOUT=y +CONFIG_NF_CONNTRACK_TIMESTAMP=y +CONFIG_NF_CONNTRACK_LABELS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_GRE=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_BROADCAST=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_SNMP=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NF_CT_NETLINK_TIMEOUT=m +CONFIG_NF_CT_NETLINK_HELPER=m +CONFIG_NETFILTER_NETLINK_GLUE_CT=y +CONFIG_NF_NAT=m +CONFIG_NF_NAT_AMANDA=m +CONFIG_NF_NAT_FTP=m +CONFIG_NF_NAT_IRC=m +CONFIG_NF_NAT_SIP=m +CONFIG_NF_NAT_TFTP=m +CONFIG_NF_NAT_REDIRECT=y +CONFIG_NF_NAT_MASQUERADE=y +CONFIG_NETFILTER_SYNPROXY=m +CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_SET=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y +CONFIG_NFT_NUMGEN=m +CONFIG_NFT_CT=m +CONFIG_NFT_FLOW_OFFLOAD=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_CONNLIMIT=m +CONFIG_NFT_LOG=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_MASQ=m +CONFIG_NFT_REDIR=m +CONFIG_NFT_NAT=m +CONFIG_NFT_TUNNEL=m +CONFIG_NFT_OBJREF=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_QUOTA=m +CONFIG_NFT_REJECT=m +CONFIG_NFT_REJECT_INET=m +CONFIG_NFT_COMPAT=m +CONFIG_NFT_HASH=m +CONFIG_NFT_FIB=m +CONFIG_NFT_FIB_INET=m +# CONFIG_NFT_XFRM is not set +CONFIG_NFT_SOCKET=m +CONFIG_NFT_OSF=m +CONFIG_NFT_TPROXY=m +# CONFIG_NFT_SYNPROXY is not set +CONFIG_NF_DUP_NETDEV=m +CONFIG_NFT_DUP_NETDEV=m +CONFIG_NFT_FWD_NETDEV=m +CONFIG_NFT_FIB_NETDEV=m +CONFIG_NF_FLOW_TABLE_INET=m +CONFIG_NF_FLOW_TABLE=m +CONFIG_NETFILTER_XTABLES=m + +# +# Xtables combined modules +# +CONFIG_NETFILTER_XT_MARK=m +CONFIG_NETFILTER_XT_CONNMARK=m +CONFIG_NETFILTER_XT_SET=m + +# +# Xtables targets +# +CONFIG_NETFILTER_XT_TARGET_AUDIT=m +CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HL=m +CONFIG_NETFILTER_XT_TARGET_HMARK=m +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m +CONFIG_NETFILTER_XT_TARGET_LED=m +CONFIG_NETFILTER_XT_TARGET_LOG=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_NAT=m +CONFIG_NETFILTER_XT_TARGET_NETMAP=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +# CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set +CONFIG_NETFILTER_XT_TARGET_RATEEST=m +CONFIG_NETFILTER_XT_TARGET_REDIRECT=m +CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m +CONFIG_NETFILTER_XT_TARGET_TEE=m +CONFIG_NETFILTER_XT_TARGET_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m + +# +# Xtables matches +# +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_CGROUP=m +CONFIG_NETFILTER_XT_MATCH_CLUSTER=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ECN=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_HL=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_IPVS=m +CONFIG_NETFILTER_XT_MATCH_L2TP=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_NFACCT=m +CONFIG_NETFILTER_XT_MATCH_OSF=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_SCTP=m +CONFIG_NETFILTER_XT_MATCH_SOCKET=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +# end of Core Netfilter Configuration + +CONFIG_IP_SET=m +CONFIG_IP_SET_MAX=256 +CONFIG_IP_SET_BITMAP_IP=m +CONFIG_IP_SET_BITMAP_IPMAC=m +CONFIG_IP_SET_BITMAP_PORT=m +CONFIG_IP_SET_HASH_IP=m +CONFIG_IP_SET_HASH_IPMARK=m +CONFIG_IP_SET_HASH_IPPORT=m +CONFIG_IP_SET_HASH_IPPORTIP=m +CONFIG_IP_SET_HASH_IPPORTNET=m +CONFIG_IP_SET_HASH_IPMAC=m +CONFIG_IP_SET_HASH_MAC=m +CONFIG_IP_SET_HASH_NETPORTNET=m +CONFIG_IP_SET_HASH_NET=m +CONFIG_IP_SET_HASH_NETNET=m +CONFIG_IP_SET_HASH_NETPORT=m +CONFIG_IP_SET_HASH_NETIFACE=m +CONFIG_IP_SET_LIST_SET=m +CONFIG_IP_VS=m +CONFIG_IP_VS_IPV6=y +# CONFIG_IP_VS_DEBUG is not set +CONFIG_IP_VS_TAB_BITS=12 + +# +# IPVS transport protocol load balancing support +# +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_AH_ESP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_PROTO_SCTP=y + +# +# IPVS scheduler +# +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_FO=m +CONFIG_IP_VS_OVF=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_DH=m +CONFIG_IP_VS_SH=m +CONFIG_IP_VS_MH=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m + +# +# IPVS SH scheduler +# +CONFIG_IP_VS_SH_TAB_BITS=8 + +# +# IPVS MH scheduler +# +CONFIG_IP_VS_MH_TAB_INDEX=12 + +# +# IPVS application helper +# +CONFIG_IP_VS_FTP=m +CONFIG_IP_VS_NFCT=y +CONFIG_IP_VS_PE_SIP=m + +# +# IP: Netfilter Configuration +# +CONFIG_NF_DEFRAG_IPV4=m +CONFIG_NF_SOCKET_IPV4=m +CONFIG_NF_TPROXY_IPV4=m +CONFIG_NF_TABLES_IPV4=y +CONFIG_NFT_REJECT_IPV4=m +CONFIG_NFT_DUP_IPV4=m +CONFIG_NFT_FIB_IPV4=m +CONFIG_NF_TABLES_ARP=y +CONFIG_NF_FLOW_TABLE_IPV4=m +CONFIG_NF_DUP_IPV4=m +CONFIG_NF_LOG_ARP=m +CONFIG_NF_LOG_IPV4=m +CONFIG_NF_REJECT_IPV4=m +CONFIG_NF_NAT_SNMP_BASIC=m +CONFIG_NF_NAT_PPTP=m +CONFIG_NF_NAT_H323=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_SYNPROXY=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_SECURITY=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +# end of IP: Netfilter Configuration + +# +# IPv6: Netfilter Configuration +# +CONFIG_NF_SOCKET_IPV6=m +CONFIG_NF_TPROXY_IPV6=m +CONFIG_NF_TABLES_IPV6=y +CONFIG_NFT_REJECT_IPV6=m +CONFIG_NFT_DUP_IPV6=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_NF_FLOW_TABLE_IPV6=m +CONFIG_NF_DUP_IPV6=m +CONFIG_NF_REJECT_IPV6=m +CONFIG_NF_LOG_IPV6=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RT=m +# CONFIG_IP6_NF_MATCH_SRH is not set +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_TARGET_SYNPROXY=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_SECURITY=m +CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m +CONFIG_IP6_NF_TARGET_NPT=m +# end of IPv6: Netfilter Configuration + +CONFIG_NF_DEFRAG_IPV6=m +CONFIG_NF_TABLES_BRIDGE=m +# CONFIG_NFT_BRIDGE_META is not set +CONFIG_NFT_BRIDGE_REJECT=m +CONFIG_NF_LOG_BRIDGE=m +# CONFIG_NF_CONNTRACK_BRIDGE is not set +CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_EBT_BROUTE=m +CONFIG_BRIDGE_EBT_T_FILTER=m +CONFIG_BRIDGE_EBT_T_NAT=m +CONFIG_BRIDGE_EBT_802_3=m +CONFIG_BRIDGE_EBT_AMONG=m +CONFIG_BRIDGE_EBT_ARP=m +CONFIG_BRIDGE_EBT_IP=m +CONFIG_BRIDGE_EBT_IP6=m +CONFIG_BRIDGE_EBT_LIMIT=m +CONFIG_BRIDGE_EBT_MARK=m +CONFIG_BRIDGE_EBT_PKTTYPE=m +CONFIG_BRIDGE_EBT_STP=m +CONFIG_BRIDGE_EBT_VLAN=m +CONFIG_BRIDGE_EBT_ARPREPLY=m +CONFIG_BRIDGE_EBT_DNAT=m +CONFIG_BRIDGE_EBT_MARK_T=m +CONFIG_BRIDGE_EBT_REDIRECT=m +CONFIG_BRIDGE_EBT_SNAT=m +CONFIG_BRIDGE_EBT_LOG=m +CONFIG_BRIDGE_EBT_NFLOG=m +# CONFIG_BPFILTER is not set +CONFIG_IP_DCCP=m +CONFIG_INET_DCCP_DIAG=m + +# +# DCCP CCIDs Configuration +# +# CONFIG_IP_DCCP_CCID2_DEBUG is not set +CONFIG_IP_DCCP_CCID3=y +# CONFIG_IP_DCCP_CCID3_DEBUG is not set +CONFIG_IP_DCCP_TFRC_LIB=y +# end of DCCP CCIDs Configuration + +# +# DCCP Kernel Hacking +# +# CONFIG_IP_DCCP_DEBUG is not set +# end of DCCP Kernel Hacking + +CONFIG_IP_SCTP=m +# CONFIG_SCTP_DBG_OBJCNT is not set +CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5=y +# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1 is not set +# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE is not set +CONFIG_SCTP_COOKIE_HMAC_MD5=y +CONFIG_SCTP_COOKIE_HMAC_SHA1=y +CONFIG_INET_SCTP_DIAG=m +CONFIG_RDS=m +CONFIG_RDS_RDMA=m +CONFIG_RDS_TCP=m +# CONFIG_RDS_DEBUG is not set +CONFIG_TIPC=m +CONFIG_TIPC_MEDIA_IB=y +CONFIG_TIPC_MEDIA_UDP=y +CONFIG_TIPC_DIAG=m +CONFIG_ATM=m +CONFIG_ATM_CLIP=m +# CONFIG_ATM_CLIP_NO_ICMP is not set +CONFIG_ATM_LANE=m +CONFIG_ATM_MPOA=m +CONFIG_ATM_BR2684=m +# CONFIG_ATM_BR2684_IPFILTER is not set +CONFIG_L2TP=m +CONFIG_L2TP_DEBUGFS=m +CONFIG_L2TP_V3=y +CONFIG_L2TP_IP=m +CONFIG_L2TP_ETH=m +CONFIG_STP=m +CONFIG_GARP=m +CONFIG_MRP=m +CONFIG_BRIDGE=m +CONFIG_BRIDGE_IGMP_SNOOPING=y +CONFIG_BRIDGE_VLAN_FILTERING=y +CONFIG_HAVE_NET_DSA=y +CONFIG_NET_DSA=m +# CONFIG_NET_DSA_TAG_8021Q is not set +# CONFIG_NET_DSA_TAG_BRCM is not set +# CONFIG_NET_DSA_TAG_BRCM_PREPEND is not set +# CONFIG_NET_DSA_TAG_GSWIP is not set +CONFIG_NET_DSA_TAG_DSA=m +CONFIG_NET_DSA_TAG_EDSA=m +# CONFIG_NET_DSA_TAG_MTK is not set +# CONFIG_NET_DSA_TAG_KSZ is not set +# CONFIG_NET_DSA_TAG_QCA is not set +# CONFIG_NET_DSA_TAG_LAN9303 is not set +# CONFIG_NET_DSA_TAG_SJA1105 is not set +CONFIG_NET_DSA_TAG_TRAILER=m +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y +CONFIG_VLAN_8021Q_MVRP=y +# CONFIG_DECNET is not set +CONFIG_LLC=m +CONFIG_LLC2=m +CONFIG_ATALK=m +CONFIG_DEV_APPLETALK=m +CONFIG_IPDDP=m +CONFIG_IPDDP_ENCAP=y +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +CONFIG_PHONET=m +CONFIG_6LOWPAN=m +# CONFIG_6LOWPAN_DEBUGFS is not set +CONFIG_6LOWPAN_NHC=m +CONFIG_6LOWPAN_NHC_DEST=m +CONFIG_6LOWPAN_NHC_FRAGMENT=m +CONFIG_6LOWPAN_NHC_HOP=m +CONFIG_6LOWPAN_NHC_IPV6=m +CONFIG_6LOWPAN_NHC_MOBILITY=m +CONFIG_6LOWPAN_NHC_ROUTING=m +CONFIG_6LOWPAN_NHC_UDP=m +CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m +CONFIG_6LOWPAN_GHC_UDP=m +CONFIG_6LOWPAN_GHC_ICMPV6=m +CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m +CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m +CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m +CONFIG_IEEE802154=m +# CONFIG_IEEE802154_NL802154_EXPERIMENTAL is not set +CONFIG_IEEE802154_SOCKET=m +CONFIG_IEEE802154_6LOWPAN=m +CONFIG_MAC802154=m +CONFIG_NET_SCHED=y + +# +# Queueing/Scheduling +# +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_ATM=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_MULTIQ=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFB=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_CBS=m +CONFIG_NET_SCH_ETF=m +# CONFIG_NET_SCH_TAPRIO is not set +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_MQPRIO=m +CONFIG_NET_SCH_SKBPRIO=m +CONFIG_NET_SCH_CHOKE=m +CONFIG_NET_SCH_QFQ=m +CONFIG_NET_SCH_CODEL=m +CONFIG_NET_SCH_FQ_CODEL=m +CONFIG_NET_SCH_CAKE=m +CONFIG_NET_SCH_FQ=m +CONFIG_NET_SCH_HHF=m +CONFIG_NET_SCH_PIE=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_SCH_PLUG=m +# CONFIG_NET_SCH_DEFAULT is not set + +# +# Classification +# +CONFIG_NET_CLS=y +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +CONFIG_NET_CLS_CGROUP=m +CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +CONFIG_NET_EMATCH_CMP=m +CONFIG_NET_EMATCH_NBYTE=m +CONFIG_NET_EMATCH_U32=m +CONFIG_NET_EMATCH_META=m +CONFIG_NET_EMATCH_TEXT=m +CONFIG_NET_EMATCH_CANID=m +CONFIG_NET_EMATCH_IPSET=m +CONFIG_NET_EMATCH_IPT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_SAMPLE=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_ACT_CSUM=m +# CONFIG_NET_ACT_MPLS is not set +CONFIG_NET_ACT_VLAN=m +CONFIG_NET_ACT_BPF=m +CONFIG_NET_ACT_CONNMARK=m +# CONFIG_NET_ACT_CTINFO is not set +CONFIG_NET_ACT_SKBMOD=m +CONFIG_NET_ACT_IFE=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_CT=m +CONFIG_NET_IFE_SKBMARK=m +CONFIG_NET_IFE_SKBPRIO=m +CONFIG_NET_IFE_SKBTCINDEX=m +# CONFIG_NET_TC_SKB_EXT is not set +CONFIG_NET_SCH_FIFO=y +CONFIG_DCB=y +CONFIG_DNS_RESOLVER=m +CONFIG_BATMAN_ADV=m +# CONFIG_BATMAN_ADV_BATMAN_V is not set +CONFIG_BATMAN_ADV_BLA=y +CONFIG_BATMAN_ADV_DAT=y +CONFIG_BATMAN_ADV_NC=y +CONFIG_BATMAN_ADV_MCAST=y +CONFIG_BATMAN_ADV_DEBUGFS=y +# CONFIG_BATMAN_ADV_DEBUG is not set +CONFIG_BATMAN_ADV_SYSFS=y +# CONFIG_BATMAN_ADV_TRACING is not set +CONFIG_OPENVSWITCH=m +CONFIG_OPENVSWITCH_GRE=m +CONFIG_OPENVSWITCH_VXLAN=m +CONFIG_OPENVSWITCH_GENEVE=m +CONFIG_VSOCKETS=m +CONFIG_VSOCKETS_DIAG=m +CONFIG_VIRTIO_VSOCKETS=m +CONFIG_VIRTIO_VSOCKETS_COMMON=m +CONFIG_NETLINK_DIAG=m +CONFIG_MPLS=y +CONFIG_NET_MPLS_GSO=y +CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_NSH=m +# CONFIG_HSR is not set +CONFIG_NET_SWITCHDEV=y +CONFIG_NET_L3_MASTER_DEV=y +# CONFIG_QRTR is not set +# CONFIG_NET_NCSI is not set +CONFIG_RPS=y +CONFIG_RFS_ACCEL=y +CONFIG_XPS=y +CONFIG_CGROUP_NET_PRIO=y +CONFIG_CGROUP_NET_CLASSID=y +CONFIG_NET_RX_BUSY_POLL=y +CONFIG_BQL=y +CONFIG_BPF_STREAM_PARSER=y +CONFIG_NET_FLOW_LIMIT=y + +# +# Network testing +# +CONFIG_NET_PKTGEN=m +CONFIG_NET_DROP_MONITOR=y +# end of Network testing +# end of Networking options + +CONFIG_HAMRADIO=y + +# +# Packet Radio protocols +# +CONFIG_AX25=m +CONFIG_AX25_DAMA_SLAVE=y +CONFIG_NETROM=m +CONFIG_ROSE=m + +# +# AX.25 network device drivers +# +CONFIG_MKISS=m +CONFIG_6PACK=m +CONFIG_BPQETHER=m +CONFIG_BAYCOM_SER_FDX=m +CONFIG_BAYCOM_SER_HDX=m +CONFIG_BAYCOM_PAR=m +CONFIG_YAM=m +# end of AX.25 network device drivers + +CONFIG_CAN=m +CONFIG_CAN_RAW=m +CONFIG_CAN_BCM=m +CONFIG_CAN_GW=m +# CONFIG_CAN_J1939 is not set + +# +# CAN Device Drivers +# +CONFIG_CAN_VCAN=m +CONFIG_CAN_VXCAN=m +CONFIG_CAN_SLCAN=m +CONFIG_CAN_DEV=m +CONFIG_CAN_CALC_BITTIMING=y +# CONFIG_CAN_FLEXCAN is not set +# CONFIG_CAN_GRCAN is not set +# CONFIG_CAN_KVASER_PCIEFD is not set +# CONFIG_CAN_XILINXCAN is not set +# CONFIG_CAN_C_CAN is not set +# CONFIG_CAN_CC770 is not set +# CONFIG_CAN_IFI_CANFD is not set +# CONFIG_CAN_M_CAN is not set +CONFIG_CAN_PEAK_PCIEFD=m +CONFIG_CAN_SJA1000=m +CONFIG_CAN_EMS_PCI=m +# CONFIG_CAN_F81601 is not set +CONFIG_CAN_KVASER_PCI=m +CONFIG_CAN_PEAK_PCI=m +CONFIG_CAN_PEAK_PCIEC=y +CONFIG_CAN_PLX_PCI=m +CONFIG_CAN_SJA1000_ISA=m +# CONFIG_CAN_SJA1000_PLATFORM is not set +CONFIG_CAN_SOFTING=m + +# +# CAN SPI interfaces +# +# CONFIG_CAN_HI311X is not set +# CONFIG_CAN_MCP251X is not set +# end of CAN SPI interfaces + +# +# CAN USB interfaces +# +CONFIG_CAN_8DEV_USB=m +CONFIG_CAN_EMS_USB=m +CONFIG_CAN_ESD_USB2=m +CONFIG_CAN_GS_USB=m +CONFIG_CAN_KVASER_USB=m +CONFIG_CAN_MCBA_USB=m +CONFIG_CAN_PEAK_USB=m +CONFIG_CAN_UCAN=m +# end of CAN USB interfaces + +# CONFIG_CAN_DEBUG_DEVICES is not set +# end of CAN Device Drivers + +CONFIG_BT=m +CONFIG_BT_BREDR=y +CONFIG_BT_RFCOMM=m +CONFIG_BT_RFCOMM_TTY=y +CONFIG_BT_BNEP=m +CONFIG_BT_BNEP_MC_FILTER=y +CONFIG_BT_BNEP_PROTO_FILTER=y +CONFIG_BT_HIDP=m +CONFIG_BT_HS=y +CONFIG_BT_LE=y +CONFIG_BT_6LOWPAN=m +CONFIG_BT_LEDS=y +# CONFIG_BT_SELFTEST is not set +CONFIG_BT_DEBUGFS=y + +# +# Bluetooth device drivers +# +CONFIG_BT_INTEL=m +CONFIG_BT_BCM=m +CONFIG_BT_RTL=m +CONFIG_BT_QCA=m +CONFIG_BT_HCIBTUSB=m +CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y +CONFIG_BT_HCIBTUSB_BCM=y +# CONFIG_BT_HCIBTUSB_MTK is not set +CONFIG_BT_HCIBTUSB_RTL=y +CONFIG_BT_HCIBTSDIO=m +CONFIG_BT_HCIUART=m +CONFIG_BT_HCIUART_SERDEV=y +CONFIG_BT_HCIUART_H4=y +CONFIG_BT_HCIUART_NOKIA=m +# CONFIG_BT_HCIUART_BCSP is not set +CONFIG_BT_HCIUART_ATH3K=y +CONFIG_BT_HCIUART_LL=y +CONFIG_BT_HCIUART_3WIRE=y +CONFIG_BT_HCIUART_INTEL=y +CONFIG_BT_HCIUART_RTL=y +CONFIG_BT_HCIUART_QCA=y +CONFIG_BT_HCIUART_AG6XX=y +CONFIG_BT_HCIUART_MRVL=y +# CONFIG_BT_HCIBCM203X is not set +# CONFIG_BT_HCIBPA10X is not set +# CONFIG_BT_HCIBFUSB is not set +# CONFIG_BT_HCIVHCI is not set +CONFIG_BT_MRVL=m +CONFIG_BT_MRVL_SDIO=m +CONFIG_BT_ATH3K=m +CONFIG_BT_WILINK=m +# CONFIG_BT_MTKSDIO is not set +CONFIG_BT_MTKUART=m +CONFIG_BT_QCOMSMD=m +CONFIG_BT_HCIRSI=m +# end of Bluetooth device drivers + +CONFIG_AF_RXRPC=m +CONFIG_AF_RXRPC_IPV6=y +# CONFIG_AF_RXRPC_INJECT_LOSS is not set +# CONFIG_AF_RXRPC_DEBUG is not set +CONFIG_RXKAD=y +# CONFIG_AF_KCM is not set +CONFIG_STREAM_PARSER=y +CONFIG_FIB_RULES=y +CONFIG_WIRELESS=y +CONFIG_WIRELESS_EXT=y +CONFIG_WEXT_CORE=y +CONFIG_WEXT_PROC=y +CONFIG_WEXT_SPY=y +CONFIG_WEXT_PRIV=y +CONFIG_CFG80211=m +# CONFIG_NL80211_TESTMODE is not set +# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +# CONFIG_CFG80211_CERTIFICATION_ONUS is not set +CONFIG_CFG80211_REQUIRE_SIGNED_REGDB=y +CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS=y +CONFIG_CFG80211_DEFAULT_PS=y +# CONFIG_CFG80211_DEBUGFS is not set +CONFIG_CFG80211_CRDA_SUPPORT=y +CONFIG_CFG80211_WEXT=y +CONFIG_CFG80211_WEXT_EXPORT=y +CONFIG_LIB80211=m +CONFIG_LIB80211_CRYPT_WEP=m +CONFIG_LIB80211_CRYPT_CCMP=m +CONFIG_LIB80211_CRYPT_TKIP=m +# CONFIG_LIB80211_DEBUG is not set +CONFIG_MAC80211=m +CONFIG_MAC80211_HAS_RC=y +CONFIG_MAC80211_RC_MINSTREL=y +CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y +CONFIG_MAC80211_RC_DEFAULT="minstrel_ht" +CONFIG_MAC80211_MESH=y +CONFIG_MAC80211_LEDS=y +# CONFIG_MAC80211_DEBUGFS is not set +# CONFIG_MAC80211_MESSAGE_TRACING is not set +# CONFIG_MAC80211_DEBUG_MENU is not set +CONFIG_MAC80211_STA_HASH_MAX_SIZE=0 +CONFIG_WIMAX=m +CONFIG_WIMAX_DEBUG_LEVEL=8 +CONFIG_RFKILL=m +CONFIG_RFKILL_LEDS=y +CONFIG_RFKILL_INPUT=y +# CONFIG_RFKILL_GPIO is not set +CONFIG_NET_9P=m +CONFIG_NET_9P_VIRTIO=m +CONFIG_NET_9P_XEN=m +CONFIG_NET_9P_RDMA=m +# CONFIG_NET_9P_DEBUG is not set +# CONFIG_CAIF is not set +CONFIG_CEPH_LIB=m +# CONFIG_CEPH_LIB_PRETTYDEBUG is not set +# CONFIG_CEPH_LIB_USE_DNS_RESOLVER is not set +CONFIG_NFC=m +CONFIG_NFC_DIGITAL=m +# CONFIG_NFC_NCI is not set +# CONFIG_NFC_HCI is not set + +# +# Near Field Communication (NFC) devices +# +# CONFIG_NFC_TRF7970A is not set +CONFIG_NFC_SIM=m +CONFIG_NFC_PORT100=m +CONFIG_NFC_PN533=m +CONFIG_NFC_PN533_USB=m +# CONFIG_NFC_PN533_I2C is not set +# CONFIG_NFC_ST95HF is not set +# end of Near Field Communication (NFC) devices + +CONFIG_PSAMPLE=m +CONFIG_NET_IFE=m +CONFIG_LWTUNNEL=y +CONFIG_LWTUNNEL_BPF=y +CONFIG_DST_CACHE=y +CONFIG_GRO_CELLS=y +CONFIG_NET_SOCK_MSG=y +CONFIG_NET_DEVLINK=y +CONFIG_PAGE_POOL=y +CONFIG_FAILOVER=m + +# +# Device Drivers +# +CONFIG_ARM_AMBA=y +CONFIG_TEGRA_AHB=y +CONFIG_HAVE_PCI=y +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +CONFIG_PCI_DOMAINS_GENERIC=y +CONFIG_PCI_SYSCALL=y +CONFIG_PCIEPORTBUS=y +CONFIG_HOTPLUG_PCI_PCIE=y +CONFIG_PCIEAER=y +CONFIG_PCIEAER_INJECT=m +# CONFIG_PCIE_ECRC is not set +CONFIG_PCIEASPM=y +# CONFIG_PCIEASPM_DEBUG is not set +CONFIG_PCIEASPM_DEFAULT=y +# CONFIG_PCIEASPM_POWERSAVE is not set +# CONFIG_PCIEASPM_POWER_SUPERSAVE is not set +# CONFIG_PCIEASPM_PERFORMANCE is not set +CONFIG_PCIE_PME=y +CONFIG_PCIE_DPC=y +CONFIG_PCIE_PTM=y +# CONFIG_PCIE_BW is not set +# CONFIG_PCIE_EDR is not set +CONFIG_PCI_MSI=y +CONFIG_PCI_MSI_IRQ_DOMAIN=y +CONFIG_PCI_QUIRKS=y +# CONFIG_PCI_DEBUG is not set +CONFIG_PCI_REALLOC_ENABLE_AUTO=y +CONFIG_PCI_STUB=m +CONFIG_PCI_PF_STUB=m +CONFIG_PCI_ATS=y +CONFIG_PCI_ECAM=y +CONFIG_PCI_BRIDGE_EMUL=y +CONFIG_PCI_IOV=y +CONFIG_PCI_PRI=y +CONFIG_PCI_PASID=y +CONFIG_PCI_LABEL=y +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_ACPI=y +CONFIG_HOTPLUG_PCI_ACPI_IBM=m +CONFIG_HOTPLUG_PCI_CPCI=y +CONFIG_HOTPLUG_PCI_SHPC=y + +# +# PCI controller drivers +# +CONFIG_PCI_AARDVARK=y + +# +# Cadence PCIe controllers support +# +# CONFIG_PCIE_CADENCE_HOST is not set +# end of Cadence PCIe controllers support + +CONFIG_PCIE_XILINX_NWL=y +# CONFIG_PCI_FTPCI100 is not set +CONFIG_PCI_TEGRA=y +CONFIG_PCI_HOST_COMMON=y +CONFIG_PCI_HOST_GENERIC=y +# CONFIG_PCIE_XILINX is not set +CONFIG_PCI_XGENE=y +CONFIG_PCI_XGENE_MSI=y +# CONFIG_PCIE_ALTERA is not set +CONFIG_PCI_HOST_THUNDER_PEM=y +CONFIG_PCI_HOST_THUNDER_ECAM=y +CONFIG_PCIE_ROCKCHIP=y +CONFIG_PCIE_ROCKCHIP_HOST=y +# CONFIG_PCIE_MOBIVEIL is not set + +# +# DesignWare PCI Core Support +# +CONFIG_PCIE_DW=y +CONFIG_PCIE_DW_HOST=y +# CONFIG_PCIE_DW_PLAT_HOST is not set +CONFIG_PCI_HISI=y +CONFIG_PCIE_QCOM=y +CONFIG_PCIE_ARMADA_8K=y +CONFIG_PCIE_KIRIN=y +# CONFIG_PCIE_HISI_STB is not set +# CONFIG_PCI_MESON is not set +# CONFIG_PCIE_AL is not set +# end of DesignWare PCI Core Support +# end of PCI controller drivers + +# +# PCI Endpoint +# +# CONFIG_PCI_ENDPOINT is not set +# end of PCI Endpoint + +# +# PCI switch controller drivers +# +# CONFIG_PCI_SW_SWITCHTEC is not set +# end of PCI switch controller drivers + +# CONFIG_PCCARD is not set +# CONFIG_RAPIDIO is not set + +# +# Generic Driver Options +# +# CONFIG_UEVENT_HELPER is not set +CONFIG_DEVTMPFS=y +# CONFIG_DEVTMPFS_MOUNT is not set +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y + +# +# Firmware loader +# +CONFIG_FW_LOADER=y +CONFIG_EXTRA_FIRMWARE="" +# CONFIG_FW_LOADER_USER_HELPER is not set +# CONFIG_FW_LOADER_COMPRESS is not set +# end of Firmware loader + +CONFIG_WANT_DEV_COREDUMP=y +CONFIG_ALLOW_DEV_COREDUMP=y +CONFIG_DEV_COREDUMP=y +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set +# CONFIG_TEST_ASYNC_DRIVER_PROBE is not set +CONFIG_SYS_HYPERVISOR=y +CONFIG_GENERIC_CPU_AUTOPROBE=y +CONFIG_GENERIC_CPU_VULNERABILITIES=y +CONFIG_SOC_BUS=y +CONFIG_REGMAP=y +CONFIG_REGMAP_I2C=y +CONFIG_REGMAP_SPI=m +CONFIG_REGMAP_SPMI=y +CONFIG_REGMAP_MMIO=y +CONFIG_REGMAP_IRQ=y +CONFIG_DMA_SHARED_BUFFER=y +# CONFIG_DMA_FENCE_TRACE is not set +CONFIG_GENERIC_ARCH_TOPOLOGY=y +# end of Generic Driver Options + +# +# Bus devices +# +CONFIG_ARM_CCI=y +CONFIG_ARM_CCI400_COMMON=y +# CONFIG_BRCMSTB_GISB_ARB is not set +# CONFIG_MOXTET is not set +CONFIG_HISILICON_LPC=y +CONFIG_QCOM_EBI2=y +# CONFIG_SIMPLE_PM_BUS is not set +CONFIG_SUN50I_DE2_BUS=y +CONFIG_SUNXI_RSB=y +CONFIG_TEGRA_ACONNECT=y +# CONFIG_TEGRA_GMI is not set +CONFIG_VEXPRESS_CONFIG=y +# end of Bus devices + +CONFIG_CONNECTOR=y +CONFIG_PROC_EVENTS=y +CONFIG_GNSS=m +CONFIG_GNSS_SERIAL=m +# CONFIG_GNSS_MTK_SERIAL is not set +CONFIG_GNSS_SIRF_SERIAL=m +CONFIG_GNSS_UBX_SERIAL=m +CONFIG_MTD=m +# CONFIG_MTD_TESTS is not set + +# +# Partition parsers +# +CONFIG_MTD_AR7_PARTS=m +# CONFIG_MTD_CMDLINE_PARTS is not set +CONFIG_MTD_OF_PARTS=m +# CONFIG_MTD_AFS_PARTS is not set +# CONFIG_MTD_REDBOOT_PARTS is not set +# end of Partition parsers + +# +# User Modules And Translation Layers +# +CONFIG_MTD_BLKDEVS=m +CONFIG_MTD_BLOCK=m +CONFIG_MTD_BLOCK_RO=m +# CONFIG_FTL is not set +# CONFIG_NFTL is not set +# CONFIG_INFTL is not set +CONFIG_RFD_FTL=m +CONFIG_SSFDC=m +# CONFIG_SM_FTL is not set +CONFIG_MTD_OOPS=m +CONFIG_MTD_SWAP=m +# CONFIG_MTD_PARTITIONED_MASTER is not set + +# +# RAM/ROM/Flash chip drivers +# +# CONFIG_MTD_CFI is not set +# CONFIG_MTD_JEDECPROBE is not set +CONFIG_MTD_MAP_BANK_WIDTH_1=y +CONFIG_MTD_MAP_BANK_WIDTH_2=y +CONFIG_MTD_MAP_BANK_WIDTH_4=y +CONFIG_MTD_CFI_I1=y +CONFIG_MTD_CFI_I2=y +CONFIG_MTD_RAM=m +# CONFIG_MTD_ROM is not set +# CONFIG_MTD_ABSENT is not set +# end of RAM/ROM/Flash chip drivers + +# +# Mapping drivers for chip access +# +# CONFIG_MTD_COMPLEX_MAPPINGS is not set +# CONFIG_MTD_PHYSMAP is not set +CONFIG_MTD_INTEL_VR_NOR=m +CONFIG_MTD_PLATRAM=m +# end of Mapping drivers for chip access + +# +# Self-contained MTD device drivers +# +# CONFIG_MTD_PMC551 is not set +CONFIG_MTD_DATAFLASH=m +# CONFIG_MTD_DATAFLASH_WRITE_VERIFY is not set +# CONFIG_MTD_DATAFLASH_OTP is not set +# CONFIG_MTD_MCHP23K256 is not set +CONFIG_MTD_SST25L=m +# CONFIG_MTD_SLRAM is not set +# CONFIG_MTD_PHRAM is not set +# CONFIG_MTD_MTDRAM is not set +# CONFIG_MTD_BLOCK2MTD is not set + +# +# Disk-On-Chip Device Drivers +# +# CONFIG_MTD_DOCG3 is not set +# end of Self-contained MTD device drivers + +CONFIG_MTD_ONENAND=m +CONFIG_MTD_ONENAND_VERIFY_WRITE=y +# CONFIG_MTD_ONENAND_GENERIC is not set +# CONFIG_MTD_ONENAND_OTP is not set +CONFIG_MTD_ONENAND_2X_PROGRAM=y +# CONFIG_MTD_RAW_NAND is not set +# CONFIG_MTD_SPI_NAND is not set + +# +# LPDDR & LPDDR2 PCM memory drivers +# +CONFIG_MTD_LPDDR=m +CONFIG_MTD_QINFO_PROBE=m +# end of LPDDR & LPDDR2 PCM memory drivers + +CONFIG_MTD_SPI_NOR=m +CONFIG_MTD_SPI_NOR_USE_4K_SECTORS=y +# CONFIG_SPI_CADENCE_QUADSPI is not set +CONFIG_SPI_HISI_SFC=m +# CONFIG_SPI_MTK_QUADSPI is not set +CONFIG_MTD_UBI=m +CONFIG_MTD_UBI_WL_THRESHOLD=4096 +CONFIG_MTD_UBI_BEB_LIMIT=20 +# CONFIG_MTD_UBI_FASTMAP is not set +# CONFIG_MTD_UBI_GLUEBI is not set +CONFIG_MTD_UBI_BLOCK=y +# CONFIG_MTD_HYPERBUS is not set +CONFIG_DTC=y +CONFIG_OF=y +# CONFIG_OF_UNITTEST is not set +CONFIG_OF_FLATTREE=y +CONFIG_OF_EARLY_FLATTREE=y +CONFIG_OF_KOBJ=y +CONFIG_OF_ADDRESS=y +CONFIG_OF_IRQ=y +CONFIG_OF_NET=y +CONFIG_OF_MDIO=m +CONFIG_OF_RESERVED_MEM=y +# CONFIG_OF_OVERLAY is not set +CONFIG_OF_NUMA=y +CONFIG_PARPORT=m +# CONFIG_PARPORT_AX88796 is not set +CONFIG_PARPORT_1284=y +CONFIG_PARPORT_NOT_PC=y +CONFIG_PNP=y +# CONFIG_PNP_DEBUG_MESSAGES is not set + +# +# Protocols +# +CONFIG_PNPACPI=y +CONFIG_BLK_DEV=y +CONFIG_BLK_DEV_NULL_BLK=m +# CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION is not set +CONFIG_CDROM=m +CONFIG_BLK_DEV_PCIESSD_MTIP32XX=m +CONFIG_ZRAM=m +CONFIG_ZRAM_WRITEBACK=y +CONFIG_ZRAM_MEMORY_TRACKING=y +# CONFIG_BLK_DEV_UMEM is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +CONFIG_BLK_DEV_DRBD=m +# CONFIG_DRBD_FAULT_INJECTION is not set +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_SKD=m +# CONFIG_BLK_DEV_SX8 is not set +CONFIG_BLK_DEV_RAM=m +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=16384 +# CONFIG_CDROM_PKTCDVD is not set +CONFIG_ATA_OVER_ETH=m +CONFIG_XEN_BLKDEV_FRONTEND=m +CONFIG_XEN_BLKDEV_BACKEND=m +CONFIG_VIRTIO_BLK=m +# CONFIG_VIRTIO_BLK_SCSI is not set +CONFIG_BLK_DEV_RBD=m +# CONFIG_BLK_DEV_RSXX is not set + +# +# NVME Support +# +CONFIG_NVME_CORE=m +CONFIG_BLK_DEV_NVME=m +CONFIG_NVME_MULTIPATH=y +CONFIG_NVME_FABRICS=m +CONFIG_NVME_RDMA=m +CONFIG_NVME_FC=m +CONFIG_NVME_TCP=m +CONFIG_NVME_TARGET=m +# CONFIG_NVME_TARGET_LOOP is not set +CONFIG_NVME_TARGET_RDMA=m +CONFIG_NVME_TARGET_FC=m +# CONFIG_NVME_TARGET_FCLOOP is not set +CONFIG_NVME_TARGET_TCP=m +# end of NVME Support + +# +# Misc devices +# +CONFIG_SENSORS_LIS3LV02D=m +CONFIG_AD525X_DPOT=m +CONFIG_AD525X_DPOT_I2C=m +CONFIG_AD525X_DPOT_SPI=m +# CONFIG_DUMMY_IRQ is not set +# CONFIG_PHANTOM is not set +CONFIG_TIFM_CORE=m +CONFIG_TIFM_7XX1=m +CONFIG_ICS932S401=m +CONFIG_ENCLOSURE_SERVICES=m +# CONFIG_HP_ILO is not set +CONFIG_QCOM_COINCELL=m +# CONFIG_QCOM_FASTRPC is not set +CONFIG_APDS9802ALS=m +CONFIG_ISL29003=m +CONFIG_ISL29020=m +CONFIG_SENSORS_TSL2550=m +CONFIG_SENSORS_BH1770=m +CONFIG_SENSORS_APDS990X=m +CONFIG_HMC6352=m +CONFIG_DS1682=m +# CONFIG_LATTICE_ECP3_CONFIG is not set +CONFIG_SRAM=y +CONFIG_VEXPRESS_SYSCFG=y +# CONFIG_PCI_ENDPOINT_TEST is not set +# CONFIG_XILINX_SDFEC is not set +CONFIG_MISC_RTSX=m +# CONFIG_PVPANIC is not set +CONFIG_C2PORT=m + +# +# EEPROM support +# +CONFIG_EEPROM_AT24=m +CONFIG_EEPROM_AT25=m +CONFIG_EEPROM_LEGACY=m +CONFIG_EEPROM_MAX6875=m +CONFIG_EEPROM_93CX6=m +# CONFIG_EEPROM_93XX46 is not set +# CONFIG_EEPROM_IDT_89HPESX is not set +# CONFIG_EEPROM_EE1004 is not set +# end of EEPROM support + +CONFIG_CB710_CORE=m +# CONFIG_CB710_DEBUG is not set +CONFIG_CB710_DEBUG_ASSUMPTIONS=y + +# +# Texas Instruments shared transport line discipline +# +CONFIG_TI_ST=m +# end of Texas Instruments shared transport line discipline + +CONFIG_SENSORS_LIS3_I2C=m +CONFIG_ALTERA_STAPL=m + +# +# Intel MIC & related support +# + +# +# Intel MIC Bus Driver +# + +# +# SCIF Bus Driver +# + +# +# VOP Bus Driver +# +# CONFIG_VOP_BUS is not set + +# +# Intel MIC Host Driver +# + +# +# Intel MIC Card Driver +# + +# +# SCIF Driver +# + +# +# Intel MIC Coprocessor State Management (COSM) Drivers +# + +# +# VOP Driver +# +# end of Intel MIC & related support + +# CONFIG_GENWQE is not set +# CONFIG_ECHO is not set +# CONFIG_MISC_ALCOR_PCI is not set +CONFIG_MISC_RTSX_PCI=m +CONFIG_MISC_RTSX_USB=m +# CONFIG_HABANA_AI is not set +# end of Misc devices + +# +# SCSI device support +# +CONFIG_SCSI_MOD=m +CONFIG_RAID_ATTRS=m +CONFIG_SCSI=m +CONFIG_SCSI_DMA=y +CONFIG_SCSI_NETLINK=y +# CONFIG_SCSI_PROC_FS is not set + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=m +CONFIG_CHR_DEV_ST=m +CONFIG_BLK_DEV_SR=m +CONFIG_CHR_DEV_SG=m +CONFIG_CHR_DEV_SCH=m +CONFIG_SCSI_ENCLOSURE=m +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SCAN_ASYNC=y + +# +# SCSI Transports +# +CONFIG_SCSI_SPI_ATTRS=m +CONFIG_SCSI_FC_ATTRS=m +CONFIG_SCSI_ISCSI_ATTRS=m +CONFIG_SCSI_SAS_ATTRS=m +CONFIG_SCSI_SAS_LIBSAS=m +CONFIG_SCSI_SAS_ATA=y +CONFIG_SCSI_SAS_HOST_SMP=y +CONFIG_SCSI_SRP_ATTRS=m +# end of SCSI Transports + +CONFIG_SCSI_LOWLEVEL=y +CONFIG_ISCSI_TCP=m +CONFIG_ISCSI_BOOT_SYSFS=m +CONFIG_SCSI_CXGB3_ISCSI=m +CONFIG_SCSI_CXGB4_ISCSI=m +CONFIG_SCSI_BNX2_ISCSI=m +CONFIG_SCSI_BNX2X_FCOE=m +CONFIG_BE2ISCSI=m +CONFIG_BLK_DEV_3W_XXXX_RAID=m +CONFIG_SCSI_HPSA=m +CONFIG_SCSI_3W_9XXX=m +CONFIG_SCSI_3W_SAS=m +CONFIG_SCSI_ACARD=m +CONFIG_SCSI_AACRAID=m +CONFIG_SCSI_AIC7XXX=m +CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 +CONFIG_AIC7XXX_RESET_DELAY_MS=15000 +CONFIG_AIC7XXX_DEBUG_ENABLE=y +CONFIG_AIC7XXX_DEBUG_MASK=0 +CONFIG_AIC7XXX_REG_PRETTY_PRINT=y +CONFIG_SCSI_AIC79XX=m +CONFIG_AIC79XX_CMDS_PER_DEVICE=32 +CONFIG_AIC79XX_RESET_DELAY_MS=15000 +CONFIG_AIC79XX_DEBUG_ENABLE=y +CONFIG_AIC79XX_DEBUG_MASK=0 +CONFIG_AIC79XX_REG_PRETTY_PRINT=y +CONFIG_SCSI_AIC94XX=m +# CONFIG_AIC94XX_DEBUG is not set +CONFIG_SCSI_HISI_SAS=m +CONFIG_SCSI_HISI_SAS_PCI=m +CONFIG_SCSI_MVSAS=m +# CONFIG_SCSI_MVSAS_DEBUG is not set +# CONFIG_SCSI_MVSAS_TASKLET is not set +CONFIG_SCSI_MVUMI=m +CONFIG_SCSI_ADVANSYS=m +# CONFIG_SCSI_ARCMSR is not set +CONFIG_SCSI_ESAS2R=m +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +CONFIG_MEGARAID_SAS=m +CONFIG_SCSI_MPT3SAS=m +CONFIG_SCSI_MPT2SAS_MAX_SGE=128 +CONFIG_SCSI_MPT3SAS_MAX_SGE=128 +CONFIG_SCSI_MPT2SAS=m +CONFIG_SCSI_SMARTPQI=m +CONFIG_SCSI_UFSHCD=m +CONFIG_SCSI_UFSHCD_PCI=m +# CONFIG_SCSI_UFS_DWC_TC_PCI is not set +# CONFIG_SCSI_UFSHCD_PLATFORM is not set +# CONFIG_SCSI_UFS_BSG is not set +CONFIG_SCSI_HPTIOP=m +# CONFIG_SCSI_MYRB is not set +# CONFIG_SCSI_MYRS is not set +CONFIG_XEN_SCSI_FRONTEND=m +CONFIG_LIBFC=m +CONFIG_LIBFCOE=m +CONFIG_FCOE=m +CONFIG_SCSI_SNIC=m +# CONFIG_SCSI_SNIC_DEBUG_FS is not set +CONFIG_SCSI_DMX3191D=m +# CONFIG_SCSI_FDOMAIN_PCI is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +CONFIG_SCSI_STEX=m +CONFIG_SCSI_SYM53C8XX_2=m +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 +CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 +CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 +CONFIG_SCSI_SYM53C8XX_MMIO=y +# CONFIG_SCSI_IPR is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +CONFIG_SCSI_QLA_FC=m +CONFIG_TCM_QLA2XXX=m +# CONFIG_TCM_QLA2XXX_DEBUG is not set +CONFIG_SCSI_QLA_ISCSI=m +CONFIG_QEDI=m +CONFIG_QEDF=m +CONFIG_SCSI_LPFC=m +# CONFIG_SCSI_LPFC_DEBUG_FS is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_AM53C974 is not set +CONFIG_SCSI_WD719X=m +# CONFIG_SCSI_DEBUG is not set +CONFIG_SCSI_PMCRAID=m +CONFIG_SCSI_PM8001=m +CONFIG_SCSI_BFA_FC=m +CONFIG_SCSI_VIRTIO=m +CONFIG_SCSI_CHELSIO_FCOE=m +CONFIG_SCSI_DH=y +CONFIG_SCSI_DH_RDAC=m +CONFIG_SCSI_DH_HP_SW=m +CONFIG_SCSI_DH_EMC=m +CONFIG_SCSI_DH_ALUA=m +# end of SCSI device support + +CONFIG_HAVE_PATA_PLATFORM=y +CONFIG_ATA=m +CONFIG_ATA_VERBOSE_ERROR=y +CONFIG_ATA_ACPI=y +CONFIG_SATA_ZPODD=y +CONFIG_SATA_PMP=y + +# +# Controllers with non-SFF native interface +# +CONFIG_SATA_AHCI=m +CONFIG_SATA_MOBILE_LPM_POLICY=3 +CONFIG_SATA_AHCI_PLATFORM=m +CONFIG_AHCI_CEVA=m +CONFIG_AHCI_MVEBU=m +# CONFIG_AHCI_SUNXI is not set +CONFIG_AHCI_TEGRA=m +CONFIG_AHCI_XGENE=m +# CONFIG_AHCI_QORIQ is not set +CONFIG_SATA_AHCI_SEATTLE=m +# CONFIG_SATA_INIC162X is not set +CONFIG_SATA_ACARD_AHCI=m +CONFIG_SATA_SIL24=m +CONFIG_ATA_SFF=y + +# +# SFF controllers with custom DMA interface +# +CONFIG_PDC_ADMA=m +CONFIG_SATA_QSTOR=m +CONFIG_SATA_SX4=m +CONFIG_ATA_BMDMA=y + +# +# SATA SFF controllers with BMDMA +# +CONFIG_ATA_PIIX=m +# CONFIG_SATA_DWC is not set +CONFIG_SATA_MV=m +CONFIG_SATA_NV=m +CONFIG_SATA_PROMISE=m +CONFIG_SATA_SIL=m +CONFIG_SATA_SIS=m +CONFIG_SATA_SVW=m +CONFIG_SATA_ULI=m +CONFIG_SATA_VIA=m +CONFIG_SATA_VITESSE=m + +# +# PATA SFF controllers with BMDMA +# +# CONFIG_PATA_ALI is not set +# CONFIG_PATA_AMD is not set +CONFIG_PATA_ARTOP=m +# CONFIG_PATA_ATIIXP is not set +CONFIG_PATA_ATP867X=m +CONFIG_PATA_CMD64X=m +# CONFIG_PATA_CYPRESS is not set +# CONFIG_PATA_EFAR is not set +# CONFIG_PATA_HPT366 is not set +# CONFIG_PATA_HPT37X is not set +# CONFIG_PATA_HPT3X2N is not set +# CONFIG_PATA_HPT3X3 is not set +CONFIG_PATA_IT8213=m +CONFIG_PATA_IT821X=m +CONFIG_PATA_JMICRON=m +CONFIG_PATA_MARVELL=m +# CONFIG_PATA_NETCELL is not set +CONFIG_PATA_NINJA32=m +# CONFIG_PATA_NS87415 is not set +# CONFIG_PATA_OLDPIIX is not set +# CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PDC2027X is not set +# CONFIG_PATA_PDC_OLD is not set +# CONFIG_PATA_RADISYS is not set +CONFIG_PATA_RDC=m +CONFIG_PATA_SCH=m +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_SIL680 is not set +CONFIG_PATA_SIS=m +CONFIG_PATA_TOSHIBA=m +# CONFIG_PATA_TRIFLEX is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set + +# +# PIO-only SFF controllers +# +# CONFIG_PATA_CMD640_PCI is not set +# CONFIG_PATA_MPIIX is not set +# CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_OPTI is not set +# CONFIG_PATA_PLATFORM is not set +# CONFIG_PATA_RZ1000 is not set + +# +# Generic fallback / legacy drivers +# +# CONFIG_PATA_ACPI is not set +CONFIG_ATA_GENERIC=m +# CONFIG_PATA_LEGACY is not set +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_RAID10=m +CONFIG_MD_RAID456=m +CONFIG_MD_MULTIPATH=m +CONFIG_MD_FAULTY=m +# CONFIG_MD_CLUSTER is not set +CONFIG_BCACHE=m +# CONFIG_BCACHE_DEBUG is not set +# CONFIG_BCACHE_CLOSURES_DEBUG is not set +CONFIG_BLK_DEV_DM_BUILTIN=y +CONFIG_BLK_DEV_DM=m +# CONFIG_DM_DEBUG is not set +CONFIG_DM_BUFIO=m +# CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING is not set +CONFIG_DM_BIO_PRISON=m +CONFIG_DM_PERSISTENT_DATA=m +CONFIG_DM_UNSTRIPED=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_THIN_PROVISIONING=m +CONFIG_DM_CACHE=m +CONFIG_DM_CACHE_SMQ=m +CONFIG_DM_WRITECACHE=m +CONFIG_DM_ERA=m +# CONFIG_DM_CLONE is not set +CONFIG_DM_MIRROR=m +CONFIG_DM_LOG_USERSPACE=m +CONFIG_DM_RAID=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_DELAY=m +# CONFIG_DM_DUST is not set +CONFIG_DM_UEVENT=y +CONFIG_DM_FLAKEY=m +CONFIG_DM_VERITY=m +# CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG is not set +# CONFIG_DM_VERITY_FEC is not set +CONFIG_DM_SWITCH=m +CONFIG_DM_LOG_WRITES=m +CONFIG_DM_INTEGRITY=m +CONFIG_DM_ZONED=m +CONFIG_TARGET_CORE=m +CONFIG_TCM_IBLOCK=m +CONFIG_TCM_FILEIO=m +CONFIG_TCM_PSCSI=m +CONFIG_TCM_USER2=m +CONFIG_LOOPBACK_TARGET=m +CONFIG_TCM_FC=m +CONFIG_ISCSI_TARGET=m +CONFIG_ISCSI_TARGET_CXGB4=m +CONFIG_SBP_TARGET=m +CONFIG_FUSION=y +CONFIG_FUSION_SPI=m +CONFIG_FUSION_FC=m +CONFIG_FUSION_SAS=m +CONFIG_FUSION_MAX_SGE=128 +CONFIG_FUSION_CTL=m +# CONFIG_FUSION_LOGGING is not set + +# +# IEEE 1394 (FireWire) support +# +CONFIG_FIREWIRE=m +CONFIG_FIREWIRE_OHCI=m +CONFIG_FIREWIRE_SBP2=m +CONFIG_FIREWIRE_NET=m +CONFIG_FIREWIRE_NOSY=m +# end of IEEE 1394 (FireWire) support + +CONFIG_NETDEVICES=y +CONFIG_MII=m +CONFIG_NET_CORE=y +CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_EQUALIZER=m +# CONFIG_NET_FC is not set +CONFIG_IFB=m +CONFIG_NET_TEAM=m +CONFIG_NET_TEAM_MODE_BROADCAST=m +CONFIG_NET_TEAM_MODE_ROUNDROBIN=m +CONFIG_NET_TEAM_MODE_RANDOM=m +CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m +CONFIG_NET_TEAM_MODE_LOADBALANCE=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_IPVLAN_L3S=y +CONFIG_IPVLAN=m +CONFIG_IPVTAP=m +CONFIG_VXLAN=m +CONFIG_GENEVE=m +CONFIG_GTP=m +CONFIG_MACSEC=m +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETPOLL=y +CONFIG_NET_POLL_CONTROLLER=y +CONFIG_TUN=m +CONFIG_TAP=m +# CONFIG_TUN_VNET_CROSS_LE is not set +CONFIG_VETH=m +CONFIG_VIRTIO_NET=m +CONFIG_NLMON=m +CONFIG_NET_VRF=m +CONFIG_VSOCKMON=m +# CONFIG_ARCNET is not set +CONFIG_ATM_DRIVERS=y +CONFIG_ATM_DUMMY=m +# CONFIG_ATM_TCP is not set +# CONFIG_ATM_LANAI is not set +# CONFIG_ATM_ENI is not set +CONFIG_ATM_NICSTAR=m +CONFIG_ATM_NICSTAR_USE_SUNI=y +CONFIG_ATM_NICSTAR_USE_IDT77105=y +# CONFIG_ATM_IDT77252 is not set +CONFIG_ATM_IA=m +# CONFIG_ATM_IA_DEBUG is not set +CONFIG_ATM_FORE200E=m +# CONFIG_ATM_FORE200E_USE_TASKLET is not set +CONFIG_ATM_FORE200E_TX_RETRY=16 +CONFIG_ATM_FORE200E_DEBUG=0 +# CONFIG_ATM_HE is not set +CONFIG_ATM_SOLOS=m + +# +# CAIF transport drivers +# + +# +# Distributed Switch Architecture drivers +# +# CONFIG_B53 is not set +# CONFIG_NET_DSA_BCM_SF2 is not set +# CONFIG_NET_DSA_LOOP is not set +# CONFIG_NET_DSA_LANTIQ_GSWIP is not set +# CONFIG_NET_DSA_MT7530 is not set +CONFIG_NET_DSA_MV88E6060=m +# CONFIG_NET_DSA_MICROCHIP_KSZ9477 is not set +# CONFIG_NET_DSA_MICROCHIP_KSZ8795 is not set +CONFIG_NET_DSA_MV88E6XXX=m +CONFIG_NET_DSA_MV88E6XXX_GLOBAL2=y +# CONFIG_NET_DSA_MV88E6XXX_PTP is not set +# CONFIG_NET_DSA_SJA1105 is not set +# CONFIG_NET_DSA_QCA8K is not set +# CONFIG_NET_DSA_REALTEK_SMI is not set +# CONFIG_NET_DSA_SMSC_LAN9303_I2C is not set +# CONFIG_NET_DSA_SMSC_LAN9303_MDIO is not set +# CONFIG_NET_DSA_VITESSE_VSC73XX_SPI is not set +# CONFIG_NET_DSA_VITESSE_VSC73XX_PLATFORM is not set +# end of Distributed Switch Architecture drivers + +CONFIG_ETHERNET=y +CONFIG_MDIO=m +CONFIG_NET_VENDOR_3COM=y +CONFIG_VORTEX=m +CONFIG_TYPHOON=m +CONFIG_NET_VENDOR_ADAPTEC=y +CONFIG_ADAPTEC_STARFIRE=m +CONFIG_NET_VENDOR_AGERE=y +CONFIG_ET131X=m +CONFIG_NET_VENDOR_ALACRITECH=y +# CONFIG_SLICOSS is not set +CONFIG_NET_VENDOR_ALLWINNER=y +# CONFIG_SUN4I_EMAC is not set +CONFIG_NET_VENDOR_ALTEON=y +CONFIG_ACENIC=m +# CONFIG_ACENIC_OMIT_TIGON_I is not set +# CONFIG_ALTERA_TSE is not set +CONFIG_NET_VENDOR_AMAZON=y +CONFIG_ENA_ETHERNET=m +CONFIG_NET_VENDOR_AMD=y +# CONFIG_AMD8111_ETH is not set +CONFIG_PCNET32=m +CONFIG_AMD_XGBE=m +CONFIG_AMD_XGBE_DCB=y +CONFIG_NET_XGENE=m +CONFIG_NET_XGENE_V2=m +CONFIG_NET_VENDOR_AQUANTIA=y +CONFIG_AQTION=m +# CONFIG_NET_VENDOR_ARC is not set +CONFIG_NET_VENDOR_ATHEROS=y +CONFIG_ATL2=m +CONFIG_ATL1=m +CONFIG_ATL1E=m +CONFIG_ATL1C=m +CONFIG_ALX=m +# CONFIG_NET_VENDOR_AURORA is not set +CONFIG_NET_VENDOR_BROADCOM=y +# CONFIG_B44 is not set +# CONFIG_BCMGENET is not set +CONFIG_BNX2=m +CONFIG_CNIC=m +CONFIG_TIGON3=m +CONFIG_TIGON3_HWMON=y +CONFIG_BNX2X=m +CONFIG_BNX2X_SRIOV=y +# CONFIG_SYSTEMPORT is not set +CONFIG_BNXT=m +CONFIG_BNXT_SRIOV=y +CONFIG_BNXT_FLOWER_OFFLOAD=y +CONFIG_BNXT_DCB=y +CONFIG_BNXT_HWMON=y +CONFIG_NET_VENDOR_BROCADE=y +CONFIG_BNA=m +CONFIG_NET_VENDOR_CADENCE=y +CONFIG_MACB=m +CONFIG_MACB_USE_HWSTAMP=y +# CONFIG_MACB_PCI is not set +CONFIG_NET_VENDOR_CAVIUM=y +CONFIG_THUNDER_NIC_PF=m +CONFIG_THUNDER_NIC_VF=m +CONFIG_THUNDER_NIC_BGX=m +CONFIG_THUNDER_NIC_RGX=m +CONFIG_CAVIUM_PTP=m +CONFIG_LIQUIDIO=m +CONFIG_LIQUIDIO_VF=m +CONFIG_NET_VENDOR_CHELSIO=y +CONFIG_CHELSIO_T1=m +CONFIG_CHELSIO_T1_1G=y +CONFIG_CHELSIO_T3=m +CONFIG_CHELSIO_T4=m +CONFIG_CHELSIO_T4_DCB=y +CONFIG_CHELSIO_T4_FCOE=y +CONFIG_CHELSIO_T4VF=m +CONFIG_CHELSIO_LIB=m +CONFIG_NET_VENDOR_CISCO=y +CONFIG_ENIC=m +CONFIG_NET_VENDOR_CORTINA=y +# CONFIG_GEMINI_ETHERNET is not set +# CONFIG_DNET is not set +CONFIG_NET_VENDOR_DEC=y +CONFIG_NET_TULIP=y +CONFIG_DE2104X=m +CONFIG_DE2104X_DSL=0 +CONFIG_TULIP=m +# CONFIG_TULIP_MWI is not set +# CONFIG_TULIP_MMIO is not set +CONFIG_TULIP_NAPI=y +CONFIG_TULIP_NAPI_HW_MITIGATION=y +CONFIG_WINBOND_840=m +CONFIG_DM9102=m +CONFIG_ULI526X=m +CONFIG_NET_VENDOR_DLINK=y +CONFIG_DL2K=m +CONFIG_SUNDANCE=m +# CONFIG_SUNDANCE_MMIO is not set +CONFIG_NET_VENDOR_EMULEX=y +CONFIG_BE2NET=m +CONFIG_BE2NET_HWMON=y +CONFIG_BE2NET_BE2=y +CONFIG_BE2NET_BE3=y +CONFIG_BE2NET_LANCER=y +CONFIG_BE2NET_SKYHAWK=y +CONFIG_NET_VENDOR_EZCHIP=y +# CONFIG_EZCHIP_NPS_MANAGEMENT_ENET is not set +CONFIG_NET_VENDOR_GOOGLE=y +# CONFIG_GVE is not set +CONFIG_NET_VENDOR_HISILICON=y +CONFIG_HIX5HD2_GMAC=m +CONFIG_HISI_FEMAC=m +CONFIG_HIP04_ETH=m +# CONFIG_HI13X1_GMAC is not set +CONFIG_HNS_MDIO=m +# CONFIG_HNS is not set +# CONFIG_HNS_DSAF is not set +# CONFIG_HNS_ENET is not set +# CONFIG_HNS3 is not set +CONFIG_NET_VENDOR_HP=y +# CONFIG_HP100 is not set +CONFIG_NET_VENDOR_HUAWEI=y +CONFIG_HINIC=m +CONFIG_NET_VENDOR_I825XX=y +CONFIG_NET_VENDOR_INTEL=y +CONFIG_E100=m +CONFIG_E1000=m +CONFIG_E1000E=m +CONFIG_IGB=m +CONFIG_IGB_HWMON=y +CONFIG_IGBVF=m +CONFIG_IXGB=m +CONFIG_IXGBE=m +CONFIG_IXGBE_HWMON=y +CONFIG_IXGBE_DCB=y +CONFIG_IXGBE_IPSEC=y +CONFIG_IXGBEVF=m +CONFIG_IXGBEVF_IPSEC=y +CONFIG_I40E=m +CONFIG_I40E_DCB=y +CONFIG_IAVF=m +CONFIG_I40EVF=m +CONFIG_ICE=m +# CONFIG_FM10K is not set +# CONFIG_IGC is not set +CONFIG_JME=m +CONFIG_NET_VENDOR_MARVELL=y +CONFIG_MVMDIO=m +CONFIG_MVNETA=m +CONFIG_MVPP2=m +CONFIG_SKGE=m +# CONFIG_SKGE_DEBUG is not set +CONFIG_SKGE_GENESIS=y +CONFIG_SKY2=m +# CONFIG_SKY2_DEBUG is not set +# CONFIG_OCTEONTX2_AF is not set +CONFIG_NET_VENDOR_MELLANOX=y +CONFIG_MLX4_EN=m +CONFIG_MLX4_EN_DCB=y +CONFIG_MLX4_CORE=m +CONFIG_MLX4_DEBUG=y +CONFIG_MLX4_CORE_GEN2=y +CONFIG_MLX5_CORE=m +CONFIG_MLX5_ACCEL=y +CONFIG_MLX5_FPGA=y +CONFIG_MLX5_CORE_EN=y +CONFIG_MLX5_EN_ARFS=y +CONFIG_MLX5_EN_RXNFC=y +CONFIG_MLX5_MPFS=y +CONFIG_MLX5_ESWITCH=y +CONFIG_MLX5_CORE_EN_DCB=y +CONFIG_MLX5_CORE_IPOIB=y +# CONFIG_MLX5_FPGA_IPSEC is not set +CONFIG_MLX5_SW_STEERING=y +# CONFIG_MLXSW_CORE is not set +CONFIG_MLXFW=m +CONFIG_NET_VENDOR_MICREL=y +# CONFIG_KS8842 is not set +# CONFIG_KS8851 is not set +# CONFIG_KS8851_MLL is not set +CONFIG_KSZ884X_PCI=m +CONFIG_NET_VENDOR_MICROCHIP=y +# CONFIG_ENC28J60 is not set +# CONFIG_ENCX24J600 is not set +CONFIG_LAN743X=m +CONFIG_NET_VENDOR_MICROSEMI=y +# CONFIG_MSCC_OCELOT_SWITCH is not set +CONFIG_NET_VENDOR_MYRI=y +CONFIG_MYRI10GE=m +CONFIG_FEALNX=m +CONFIG_NET_VENDOR_NATSEMI=y +CONFIG_NATSEMI=m +CONFIG_NS83820=m +CONFIG_NET_VENDOR_NETERION=y +CONFIG_S2IO=m +CONFIG_VXGE=m +# CONFIG_VXGE_DEBUG_TRACE_ALL is not set +CONFIG_NET_VENDOR_NETRONOME=y +CONFIG_NFP=m +CONFIG_NFP_APP_FLOWER=y +CONFIG_NFP_APP_ABM_NIC=y +# CONFIG_NFP_DEBUG is not set +CONFIG_NET_VENDOR_NI=y +# CONFIG_NI_XGE_MANAGEMENT_ENET is not set +CONFIG_NET_VENDOR_8390=y +CONFIG_NE2K_PCI=m +CONFIG_NET_VENDOR_NVIDIA=y +# CONFIG_FORCEDETH is not set +CONFIG_NET_VENDOR_OKI=y +# CONFIG_ETHOC is not set +CONFIG_NET_VENDOR_PACKET_ENGINES=y +CONFIG_HAMACHI=m +CONFIG_YELLOWFIN=m +CONFIG_NET_VENDOR_PENSANDO=y +# CONFIG_IONIC is not set +CONFIG_NET_VENDOR_QLOGIC=y +CONFIG_QLA3XXX=m +CONFIG_QLCNIC=m +CONFIG_QLCNIC_SRIOV=y +CONFIG_QLCNIC_DCB=y +CONFIG_QLCNIC_HWMON=y +CONFIG_NETXEN_NIC=m +CONFIG_QED=m +CONFIG_QED_LL2=y +CONFIG_QED_SRIOV=y +CONFIG_QEDE=m +CONFIG_QED_RDMA=y +CONFIG_QED_ISCSI=y +CONFIG_QED_FCOE=y +CONFIG_QED_OOO=y +CONFIG_NET_VENDOR_QUALCOMM=y +# CONFIG_QCA7000_SPI is not set +# CONFIG_QCA7000_UART is not set +CONFIG_QCOM_EMAC=m +# CONFIG_RMNET is not set +CONFIG_NET_VENDOR_RDC=y +CONFIG_R6040=m +CONFIG_NET_VENDOR_REALTEK=y +CONFIG_8139CP=m +CONFIG_8139TOO=m +# CONFIG_8139TOO_PIO is not set +CONFIG_8139TOO_TUNE_TWISTER=y +CONFIG_8139TOO_8129=y +# CONFIG_8139_OLD_RX_RESET is not set +CONFIG_R8169=m +CONFIG_NET_VENDOR_RENESAS=y +CONFIG_NET_VENDOR_ROCKER=y +# CONFIG_ROCKER is not set +CONFIG_NET_VENDOR_SAMSUNG=y +# CONFIG_SXGBE_ETH is not set +# CONFIG_NET_VENDOR_SEEQ is not set +CONFIG_NET_VENDOR_SOLARFLARE=y +CONFIG_SFC=m +CONFIG_SFC_MTD=y +CONFIG_SFC_MCDI_MON=y +CONFIG_SFC_SRIOV=y +CONFIG_SFC_MCDI_LOGGING=y +CONFIG_SFC_FALCON=m +CONFIG_SFC_FALCON_MTD=y +CONFIG_NET_VENDOR_SILAN=y +CONFIG_SC92031=m +CONFIG_NET_VENDOR_SIS=y +# CONFIG_SIS900 is not set +CONFIG_SIS190=m +CONFIG_NET_VENDOR_SMSC=y +CONFIG_SMC91X=m +CONFIG_EPIC100=m +CONFIG_SMSC911X=m +CONFIG_SMSC9420=m +CONFIG_NET_VENDOR_SOCIONEXT=y +CONFIG_SNI_NETSEC=m +CONFIG_NET_VENDOR_STMICRO=y +CONFIG_STMMAC_ETH=m +# CONFIG_STMMAC_SELFTESTS is not set +CONFIG_STMMAC_PLATFORM=m +# CONFIG_DWMAC_DWC_QOS_ETH is not set +CONFIG_DWMAC_GENERIC=m +CONFIG_DWMAC_IPQ806X=m +CONFIG_DWMAC_MESON=m +CONFIG_DWMAC_QCOM_ETHQOS=m +CONFIG_DWMAC_ROCKCHIP=m +CONFIG_DWMAC_SUNXI=m +CONFIG_DWMAC_SUN8I=m +# CONFIG_STMMAC_PCI is not set +CONFIG_NET_VENDOR_SUN=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +CONFIG_CASSINI=m +CONFIG_NIU=m +CONFIG_NET_VENDOR_SYNOPSYS=y +# CONFIG_DWC_XLGMAC is not set +CONFIG_NET_VENDOR_TEHUTI=y +CONFIG_TEHUTI=m +CONFIG_NET_VENDOR_TI=y +# CONFIG_TI_CPSW_PHY_SEL is not set +CONFIG_TLAN=m +CONFIG_NET_VENDOR_VIA=y +# CONFIG_VIA_RHINE is not set +CONFIG_VIA_VELOCITY=m +CONFIG_NET_VENDOR_WIZNET=y +# CONFIG_WIZNET_W5100 is not set +# CONFIG_WIZNET_W5300 is not set +CONFIG_FDDI=y +CONFIG_DEFXX=m +# CONFIG_DEFXX_MMIO is not set +CONFIG_SKFP=m +# CONFIG_HIPPI is not set +# CONFIG_NET_SB1000 is not set +CONFIG_MDIO_DEVICE=m +CONFIG_MDIO_BUS=m +# CONFIG_MDIO_BCM_UNIMAC is not set +# CONFIG_MDIO_BITBANG is not set +CONFIG_MDIO_BUS_MUX=m +# CONFIG_MDIO_BUS_MUX_GPIO is not set +CONFIG_MDIO_BUS_MUX_MESON_G12A=m +CONFIG_MDIO_BUS_MUX_MMIOREG=m +# CONFIG_MDIO_BUS_MUX_MULTIPLEXER is not set +CONFIG_MDIO_CAVIUM=m +CONFIG_MDIO_HISI_FEMAC=m +CONFIG_MDIO_I2C=m +# CONFIG_MDIO_MSCC_MIIM is not set +# CONFIG_MDIO_OCTEON is not set +# CONFIG_MDIO_SUN4I is not set +CONFIG_MDIO_THUNDER=m +CONFIG_MDIO_XGENE=m +CONFIG_PHYLINK=m +CONFIG_PHYLIB=m +CONFIG_SWPHY=y +CONFIG_LED_TRIGGER_PHY=y + +# +# MII PHY device drivers +# +CONFIG_SFP=m +CONFIG_ADIN_PHY=m +CONFIG_AMD_PHY=m +CONFIG_AQUANTIA_PHY=m +# CONFIG_AX88796B_PHY is not set +CONFIG_AT803X_PHY=m +# CONFIG_BCM7XXX_PHY is not set +CONFIG_BCM87XX_PHY=m +CONFIG_BCM_NET_PHYLIB=m +CONFIG_BROADCOM_PHY=m +CONFIG_CICADA_PHY=m +CONFIG_CORTINA_PHY=m +CONFIG_DAVICOM_PHY=m +CONFIG_DP83822_PHY=m +CONFIG_DP83TC811_PHY=m +CONFIG_DP83848_PHY=m +CONFIG_DP83867_PHY=m +CONFIG_FIXED_PHY=m +CONFIG_ICPLUS_PHY=m +# CONFIG_INTEL_XWAY_PHY is not set +CONFIG_LSI_ET1011C_PHY=m +CONFIG_LXT_PHY=m +CONFIG_MARVELL_PHY=m +CONFIG_MARVELL_10G_PHY=m +CONFIG_MESON_GXL_PHY=m +CONFIG_MICREL_PHY=m +CONFIG_MICROCHIP_PHY=m +CONFIG_MICROCHIP_T1_PHY=m +CONFIG_MICROSEMI_PHY=m +CONFIG_NATIONAL_PHY=m +# CONFIG_NXP_TJA11XX_PHY is not set +CONFIG_QSEMI_PHY=m +CONFIG_REALTEK_PHY=m +CONFIG_RENESAS_PHY=m +CONFIG_ROCKCHIP_PHY=m +CONFIG_SMSC_PHY=m +CONFIG_STE10XP=m +CONFIG_TERANETICS_PHY=m +CONFIG_VITESSE_PHY=m +# CONFIG_XILINX_GMII2RGMII is not set +# CONFIG_MICREL_KS8995MA is not set +# CONFIG_PLIP is not set +CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_FILTER=y +CONFIG_PPP_MPPE=m +CONFIG_PPP_MULTILINK=y +CONFIG_PPPOATM=m +CONFIG_PPPOE=m +CONFIG_PPTP=m +CONFIG_PPPOL2TP=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +CONFIG_SLIP=m +CONFIG_SLHC=m +CONFIG_SLIP_COMPRESSED=y +CONFIG_SLIP_SMART=y +CONFIG_SLIP_MODE_SLIP6=y + +# +# Host-side USB support is needed for USB Network Adapter support +# +CONFIG_USB_NET_DRIVERS=m +CONFIG_USB_CATC=m +CONFIG_USB_KAWETH=m +CONFIG_USB_PEGASUS=m +CONFIG_USB_RTL8150=m +CONFIG_USB_RTL8152=m +CONFIG_USB_LAN78XX=m +CONFIG_USB_USBNET=m +CONFIG_USB_NET_AX8817X=m +CONFIG_USB_NET_AX88179_178A=m +CONFIG_USB_NET_CDCETHER=m +CONFIG_USB_NET_CDC_EEM=m +CONFIG_USB_NET_CDC_NCM=m +CONFIG_USB_NET_HUAWEI_CDC_NCM=m +CONFIG_USB_NET_CDC_MBIM=m +CONFIG_USB_NET_DM9601=m +CONFIG_USB_NET_SR9700=m +CONFIG_USB_NET_SR9800=m +CONFIG_USB_NET_SMSC75XX=m +CONFIG_USB_NET_SMSC95XX=m +CONFIG_USB_NET_GL620A=m +CONFIG_USB_NET_NET1080=m +CONFIG_USB_NET_PLUSB=m +CONFIG_USB_NET_MCS7830=m +CONFIG_USB_NET_RNDIS_HOST=m +CONFIG_USB_NET_CDC_SUBSET_ENABLE=m +CONFIG_USB_NET_CDC_SUBSET=m +CONFIG_USB_ALI_M5632=y +CONFIG_USB_AN2720=y +CONFIG_USB_BELKIN=y +CONFIG_USB_ARMLINUX=y +CONFIG_USB_EPSON2888=y +CONFIG_USB_KC2190=y +CONFIG_USB_NET_ZAURUS=m +CONFIG_USB_NET_CX82310_ETH=m +CONFIG_USB_NET_KALMIA=m +CONFIG_USB_NET_QMI_WWAN=m +CONFIG_USB_HSO=m +CONFIG_USB_NET_INT51X1=m +CONFIG_USB_CDC_PHONET=m +CONFIG_USB_IPHETH=m +CONFIG_USB_SIERRA_NET=m +CONFIG_USB_VL600=m +CONFIG_USB_NET_CH9200=m +# CONFIG_USB_NET_AQC111 is not set +CONFIG_WLAN=y +# CONFIG_WIRELESS_WDS is not set +CONFIG_WLAN_VENDOR_ADMTEK=y +CONFIG_ADM8211=m +CONFIG_ATH_COMMON=m +CONFIG_WLAN_VENDOR_ATH=y +# CONFIG_ATH_DEBUG is not set +CONFIG_ATH5K=m +# CONFIG_ATH5K_DEBUG is not set +# CONFIG_ATH5K_TRACER is not set +CONFIG_ATH5K_PCI=y +CONFIG_ATH9K_HW=m +CONFIG_ATH9K_COMMON=m +CONFIG_ATH9K_BTCOEX_SUPPORT=y +CONFIG_ATH9K=m +CONFIG_ATH9K_PCI=y +# CONFIG_ATH9K_AHB is not set +# CONFIG_ATH9K_DEBUGFS is not set +# CONFIG_ATH9K_DYNACK is not set +# CONFIG_ATH9K_WOW is not set +CONFIG_ATH9K_RFKILL=y +CONFIG_ATH9K_CHANNEL_CONTEXT=y +CONFIG_ATH9K_PCOEM=y +CONFIG_ATH9K_PCI_NO_EEPROM=m +CONFIG_ATH9K_HTC=m +# CONFIG_ATH9K_HTC_DEBUGFS is not set +# CONFIG_ATH9K_HWRNG is not set +CONFIG_CARL9170=m +CONFIG_CARL9170_LEDS=y +CONFIG_CARL9170_WPC=y +# CONFIG_CARL9170_HWRNG is not set +CONFIG_ATH6KL=m +CONFIG_ATH6KL_SDIO=m +CONFIG_ATH6KL_USB=m +# CONFIG_ATH6KL_DEBUG is not set +# CONFIG_ATH6KL_TRACING is not set +CONFIG_AR5523=m +CONFIG_WIL6210=m +CONFIG_WIL6210_ISR_COR=y +CONFIG_WIL6210_TRACING=y +CONFIG_WIL6210_DEBUGFS=y +CONFIG_ATH10K=m +CONFIG_ATH10K_CE=y +CONFIG_ATH10K_PCI=m +# CONFIG_ATH10K_AHB is not set +# CONFIG_ATH10K_SDIO is not set +CONFIG_ATH10K_USB=m +# CONFIG_ATH10K_SNOC is not set +# CONFIG_ATH10K_DEBUG is not set +# CONFIG_ATH10K_DEBUGFS is not set +# CONFIG_ATH10K_TRACING is not set +CONFIG_WCN36XX=m +# CONFIG_WCN36XX_DEBUGFS is not set +CONFIG_WLAN_VENDOR_ATMEL=y +# CONFIG_ATMEL is not set +CONFIG_AT76C50X_USB=m +CONFIG_WLAN_VENDOR_BROADCOM=y +CONFIG_B43=m +CONFIG_B43_BCMA=y +CONFIG_B43_SSB=y +CONFIG_B43_BUSES_BCMA_AND_SSB=y +# CONFIG_B43_BUSES_BCMA is not set +# CONFIG_B43_BUSES_SSB is not set +CONFIG_B43_PCI_AUTOSELECT=y +CONFIG_B43_PCICORE_AUTOSELECT=y +CONFIG_B43_SDIO=y +CONFIG_B43_BCMA_PIO=y +CONFIG_B43_PIO=y +CONFIG_B43_PHY_G=y +CONFIG_B43_PHY_N=y +CONFIG_B43_PHY_LP=y +CONFIG_B43_PHY_HT=y +CONFIG_B43_LEDS=y +CONFIG_B43_HWRNG=y +# CONFIG_B43_DEBUG is not set +CONFIG_B43LEGACY=m +CONFIG_B43LEGACY_PCI_AUTOSELECT=y +CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y +CONFIG_B43LEGACY_LEDS=y +CONFIG_B43LEGACY_HWRNG=y +CONFIG_B43LEGACY_DEBUG=y +CONFIG_B43LEGACY_DMA=y +CONFIG_B43LEGACY_PIO=y +CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y +# CONFIG_B43LEGACY_DMA_MODE is not set +# CONFIG_B43LEGACY_PIO_MODE is not set +CONFIG_BRCMUTIL=m +CONFIG_BRCMSMAC=m +CONFIG_BRCMFMAC=m +CONFIG_BRCMFMAC_PROTO_BCDC=y +CONFIG_BRCMFMAC_PROTO_MSGBUF=y +CONFIG_BRCMFMAC_SDIO=y +CONFIG_BRCMFMAC_USB=y +CONFIG_BRCMFMAC_PCIE=y +# CONFIG_BRCM_TRACING is not set +# CONFIG_BRCMDBG is not set +CONFIG_WLAN_VENDOR_CISCO=y +CONFIG_WLAN_VENDOR_INTEL=y +# CONFIG_IPW2100 is not set +CONFIG_IPW2200=m +CONFIG_IPW2200_MONITOR=y +CONFIG_IPW2200_RADIOTAP=y +CONFIG_IPW2200_PROMISCUOUS=y +CONFIG_IPW2200_QOS=y +# CONFIG_IPW2200_DEBUG is not set +CONFIG_LIBIPW=m +# CONFIG_LIBIPW_DEBUG is not set +CONFIG_IWLEGACY=m +CONFIG_IWL4965=m +CONFIG_IWL3945=m + +# +# iwl3945 / iwl4965 Debugging Options +# +# CONFIG_IWLEGACY_DEBUG is not set +# end of iwl3945 / iwl4965 Debugging Options + +CONFIG_IWLWIFI=m +CONFIG_IWLWIFI_LEDS=y +CONFIG_IWLDVM=m +CONFIG_IWLMVM=m +CONFIG_IWLWIFI_OPMODE_MODULAR=y +# CONFIG_IWLWIFI_BCAST_FILTERING is not set + +# +# Debugging Options +# +# CONFIG_IWLWIFI_DEBUG is not set +# CONFIG_IWLWIFI_DEVICE_TRACING is not set +# end of Debugging Options + +CONFIG_WLAN_VENDOR_INTERSIL=y +CONFIG_HOSTAP=m +CONFIG_HOSTAP_FIRMWARE=y +# CONFIG_HOSTAP_FIRMWARE_NVRAM is not set +CONFIG_HOSTAP_PLX=m +CONFIG_HOSTAP_PCI=m +# CONFIG_HERMES is not set +CONFIG_P54_COMMON=m +CONFIG_P54_USB=m +CONFIG_P54_PCI=m +# CONFIG_P54_SPI is not set +CONFIG_P54_LEDS=y +# CONFIG_PRISM54 is not set +CONFIG_WLAN_VENDOR_MARVELL=y +CONFIG_LIBERTAS=m +CONFIG_LIBERTAS_USB=m +CONFIG_LIBERTAS_SDIO=m +# CONFIG_LIBERTAS_SPI is not set +# CONFIG_LIBERTAS_DEBUG is not set +CONFIG_LIBERTAS_MESH=y +CONFIG_LIBERTAS_THINFIRM=m +# CONFIG_LIBERTAS_THINFIRM_DEBUG is not set +CONFIG_LIBERTAS_THINFIRM_USB=m +CONFIG_MWIFIEX=m +# CONFIG_MWIFIEX_SDIO is not set +CONFIG_MWIFIEX_PCIE=m +# CONFIG_MWIFIEX_USB is not set +CONFIG_MWL8K=m +CONFIG_WLAN_VENDOR_MEDIATEK=y +CONFIG_MT7601U=m +CONFIG_MT76_CORE=m +CONFIG_MT76_LEDS=y +CONFIG_MT76_USB=m +CONFIG_MT76x02_LIB=m +CONFIG_MT76x02_USB=m +CONFIG_MT76x0_COMMON=m +CONFIG_MT76x0U=m +# CONFIG_MT76x0E is not set +CONFIG_MT76x2_COMMON=m +CONFIG_MT76x2E=m +CONFIG_MT76x2U=m +# CONFIG_MT7603E is not set +# CONFIG_MT7615E is not set +CONFIG_WLAN_VENDOR_RALINK=y +CONFIG_RT2X00=m +CONFIG_RT2400PCI=m +CONFIG_RT2500PCI=m +CONFIG_RT61PCI=m +CONFIG_RT2800PCI=m +CONFIG_RT2800PCI_RT33XX=y +CONFIG_RT2800PCI_RT35XX=y +CONFIG_RT2800PCI_RT53XX=y +CONFIG_RT2800PCI_RT3290=y +CONFIG_RT2500USB=m +CONFIG_RT73USB=m +CONFIG_RT2800USB=m +CONFIG_RT2800USB_RT33XX=y +CONFIG_RT2800USB_RT35XX=y +CONFIG_RT2800USB_RT3573=y +CONFIG_RT2800USB_RT53XX=y +CONFIG_RT2800USB_RT55XX=y +# CONFIG_RT2800USB_UNKNOWN is not set +CONFIG_RT2800_LIB=m +CONFIG_RT2800_LIB_MMIO=m +CONFIG_RT2X00_LIB_MMIO=m +CONFIG_RT2X00_LIB_PCI=m +CONFIG_RT2X00_LIB_USB=m +CONFIG_RT2X00_LIB=m +CONFIG_RT2X00_LIB_FIRMWARE=y +CONFIG_RT2X00_LIB_CRYPTO=y +CONFIG_RT2X00_LIB_LEDS=y +# CONFIG_RT2X00_DEBUG is not set +CONFIG_WLAN_VENDOR_REALTEK=y +CONFIG_RTL8180=m +CONFIG_RTL8187=m +CONFIG_RTL8187_LEDS=y +CONFIG_RTL_CARDS=m +CONFIG_RTL8192CE=m +CONFIG_RTL8192SE=m +CONFIG_RTL8192DE=m +CONFIG_RTL8723AE=m +CONFIG_RTL8723BE=m +CONFIG_RTL8188EE=m +CONFIG_RTL8192EE=m +CONFIG_RTL8821AE=m +CONFIG_RTL8192CU=m +CONFIG_RTLWIFI=m +CONFIG_RTLWIFI_PCI=m +CONFIG_RTLWIFI_USB=m +# CONFIG_RTLWIFI_DEBUG is not set +CONFIG_RTL8192C_COMMON=m +CONFIG_RTL8723_COMMON=m +CONFIG_RTLBTCOEXIST=m +CONFIG_RTL8XXXU=m +# CONFIG_RTL8XXXU_UNTESTED is not set +CONFIG_RTW88=m +CONFIG_RTW88_CORE=m +CONFIG_RTW88_PCI=m +CONFIG_RTW88_8822BE=y +CONFIG_RTW88_8822CE=y +# CONFIG_RTW88_DEBUG is not set +# CONFIG_RTW88_DEBUGFS is not set +CONFIG_WLAN_VENDOR_RSI=y +CONFIG_RSI_91X=m +CONFIG_RSI_DEBUGFS=y +# CONFIG_RSI_SDIO is not set +CONFIG_RSI_USB=m +CONFIG_RSI_COEX=y +CONFIG_WLAN_VENDOR_ST=y +# CONFIG_CW1200 is not set +CONFIG_WLAN_VENDOR_TI=y +CONFIG_WL1251=m +CONFIG_WL1251_SPI=m +CONFIG_WL1251_SDIO=m +CONFIG_WL12XX=m +CONFIG_WL18XX=m +CONFIG_WLCORE=m +CONFIG_WLCORE_SPI=m +CONFIG_WLCORE_SDIO=m +CONFIG_WILINK_PLATFORM_DATA=y +CONFIG_WLAN_VENDOR_ZYDAS=y +# CONFIG_USB_ZD1201 is not set +CONFIG_ZD1211RW=m +# CONFIG_ZD1211RW_DEBUG is not set +CONFIG_WLAN_VENDOR_QUANTENNA=y +# CONFIG_QTNFMAC_PCIE is not set +CONFIG_MAC80211_HWSIM=m +CONFIG_USB_NET_RNDIS_WLAN=m +# CONFIG_VIRT_WIFI is not set + +# +# WiMAX Wireless Broadband devices +# +CONFIG_WIMAX_I2400M=m +CONFIG_WIMAX_I2400M_USB=m +CONFIG_WIMAX_I2400M_DEBUG_LEVEL=8 +# end of WiMAX Wireless Broadband devices + +# CONFIG_WAN is not set +CONFIG_IEEE802154_DRIVERS=m +CONFIG_IEEE802154_FAKELB=m +CONFIG_IEEE802154_AT86RF230=m +# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set +CONFIG_IEEE802154_MRF24J40=m +CONFIG_IEEE802154_CC2520=m +CONFIG_IEEE802154_ATUSB=m +CONFIG_IEEE802154_ADF7242=m +# CONFIG_IEEE802154_CA8210 is not set +# CONFIG_IEEE802154_MCR20A is not set +CONFIG_IEEE802154_HWSIM=m +CONFIG_XEN_NETDEV_FRONTEND=m +CONFIG_XEN_NETDEV_BACKEND=m +# CONFIG_VMXNET3 is not set +# CONFIG_FUJITSU_ES is not set +# CONFIG_NETDEVSIM is not set +CONFIG_NET_FAILOVER=m +# CONFIG_ISDN is not set +# CONFIG_NVM is not set + +# +# Input device support +# +CONFIG_INPUT=y +CONFIG_INPUT_LEDS=y +CONFIG_INPUT_FF_MEMLESS=m +CONFIG_INPUT_POLLDEV=m +CONFIG_INPUT_SPARSEKMAP=m +CONFIG_INPUT_MATRIXKMAP=m + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +CONFIG_INPUT_JOYDEV=m +CONFIG_INPUT_EVDEV=m +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +# CONFIG_KEYBOARD_ADC is not set +CONFIG_KEYBOARD_ADP5588=m +# CONFIG_KEYBOARD_ADP5589 is not set +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_QT1050 is not set +# CONFIG_KEYBOARD_QT1070 is not set +CONFIG_KEYBOARD_QT2160=m +# CONFIG_KEYBOARD_DLINK_DIR685 is not set +# CONFIG_KEYBOARD_LKKBD is not set +CONFIG_KEYBOARD_GPIO=m +# CONFIG_KEYBOARD_GPIO_POLLED is not set +# CONFIG_KEYBOARD_TCA6416 is not set +# CONFIG_KEYBOARD_TCA8418 is not set +# CONFIG_KEYBOARD_MATRIX is not set +CONFIG_KEYBOARD_LM8323=m +# CONFIG_KEYBOARD_LM8333 is not set +CONFIG_KEYBOARD_MAX7359=m +# CONFIG_KEYBOARD_MCS is not set +# CONFIG_KEYBOARD_MPR121 is not set +# CONFIG_KEYBOARD_NEWTON is not set +CONFIG_KEYBOARD_TEGRA=m +CONFIG_KEYBOARD_OPENCORES=m +# CONFIG_KEYBOARD_SAMSUNG is not set +CONFIG_KEYBOARD_STOWAWAY=m +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_SUN4I_LRADC is not set +# CONFIG_KEYBOARD_OMAP4 is not set +# CONFIG_KEYBOARD_TM2_TOUCHKEY is not set +# CONFIG_KEYBOARD_XTKBD is not set +CONFIG_KEYBOARD_CROS_EC=m +# CONFIG_KEYBOARD_CAP11XX is not set +# CONFIG_KEYBOARD_BCM is not set +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +CONFIG_MOUSE_PS2_ALPS=y +CONFIG_MOUSE_PS2_BYD=y +CONFIG_MOUSE_PS2_LOGIPS2PP=y +CONFIG_MOUSE_PS2_SYNAPTICS=y +CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS=y +CONFIG_MOUSE_PS2_CYPRESS=y +CONFIG_MOUSE_PS2_TRACKPOINT=y +CONFIG_MOUSE_PS2_ELANTECH=y +CONFIG_MOUSE_PS2_ELANTECH_SMBUS=y +CONFIG_MOUSE_PS2_SENTELIC=y +# CONFIG_MOUSE_PS2_TOUCHKIT is not set +CONFIG_MOUSE_PS2_FOCALTECH=y +CONFIG_MOUSE_PS2_SMBUS=y +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_APPLETOUCH is not set +# CONFIG_MOUSE_BCM5974 is not set +# CONFIG_MOUSE_CYAPA is not set +CONFIG_MOUSE_ELAN_I2C=m +CONFIG_MOUSE_ELAN_I2C_I2C=y +# CONFIG_MOUSE_ELAN_I2C_SMBUS is not set +# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_MOUSE_GPIO is not set +CONFIG_MOUSE_SYNAPTICS_I2C=m +CONFIG_MOUSE_SYNAPTICS_USB=m +# CONFIG_INPUT_JOYSTICK is not set +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=m +CONFIG_TABLET_USB_AIPTEK=m +CONFIG_TABLET_USB_GTCO=m +CONFIG_TABLET_USB_HANWANG=m +CONFIG_TABLET_USB_KBTAB=m +CONFIG_TABLET_USB_PEGASUS=m +CONFIG_TABLET_SERIAL_WACOM4=m +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_TOUCHSCREEN_PROPERTIES=y +CONFIG_TOUCHSCREEN_ADS7846=m +CONFIG_TOUCHSCREEN_AD7877=m +CONFIG_TOUCHSCREEN_AD7879=m +CONFIG_TOUCHSCREEN_AD7879_I2C=m +# CONFIG_TOUCHSCREEN_AD7879_SPI is not set +# CONFIG_TOUCHSCREEN_ADC is not set +# CONFIG_TOUCHSCREEN_AR1021_I2C is not set +CONFIG_TOUCHSCREEN_ATMEL_MXT=m +# CONFIG_TOUCHSCREEN_ATMEL_MXT_T37 is not set +# CONFIG_TOUCHSCREEN_AUO_PIXCIR is not set +# CONFIG_TOUCHSCREEN_BU21013 is not set +# CONFIG_TOUCHSCREEN_BU21029 is not set +# CONFIG_TOUCHSCREEN_CHIPONE_ICN8318 is not set +# CONFIG_TOUCHSCREEN_CHIPONE_ICN8505 is not set +# CONFIG_TOUCHSCREEN_CY8CTMG110 is not set +# CONFIG_TOUCHSCREEN_CYTTSP_CORE is not set +# CONFIG_TOUCHSCREEN_CYTTSP4_CORE is not set +CONFIG_TOUCHSCREEN_DYNAPRO=m +CONFIG_TOUCHSCREEN_HAMPSHIRE=m +CONFIG_TOUCHSCREEN_EETI=m +# CONFIG_TOUCHSCREEN_EGALAX is not set +# CONFIG_TOUCHSCREEN_EGALAX_SERIAL is not set +# CONFIG_TOUCHSCREEN_EXC3000 is not set +CONFIG_TOUCHSCREEN_FUJITSU=m +CONFIG_TOUCHSCREEN_GOODIX=m +# CONFIG_TOUCHSCREEN_HIDEEP is not set +# CONFIG_TOUCHSCREEN_ILI210X is not set +# CONFIG_TOUCHSCREEN_S6SY761 is not set +CONFIG_TOUCHSCREEN_GUNZE=m +# CONFIG_TOUCHSCREEN_EKTF2127 is not set +CONFIG_TOUCHSCREEN_ELAN=m +CONFIG_TOUCHSCREEN_ELO=m +CONFIG_TOUCHSCREEN_WACOM_W8001=m +# CONFIG_TOUCHSCREEN_WACOM_I2C is not set +# CONFIG_TOUCHSCREEN_MAX11801 is not set +CONFIG_TOUCHSCREEN_MCS5000=m +# CONFIG_TOUCHSCREEN_MMS114 is not set +# CONFIG_TOUCHSCREEN_MELFAS_MIP4 is not set +CONFIG_TOUCHSCREEN_MTOUCH=m +# CONFIG_TOUCHSCREEN_IMX6UL_TSC is not set +CONFIG_TOUCHSCREEN_INEXIO=m +CONFIG_TOUCHSCREEN_MK712=m +CONFIG_TOUCHSCREEN_PENMOUNT=m +# CONFIG_TOUCHSCREEN_EDT_FT5X06 is not set +# CONFIG_TOUCHSCREEN_RASPBERRYPI_FW is not set +CONFIG_TOUCHSCREEN_TOUCHRIGHT=m +CONFIG_TOUCHSCREEN_TOUCHWIN=m +# CONFIG_TOUCHSCREEN_PIXCIR is not set +# CONFIG_TOUCHSCREEN_WDT87XX_I2C is not set +CONFIG_TOUCHSCREEN_WM97XX=m +CONFIG_TOUCHSCREEN_WM9705=y +CONFIG_TOUCHSCREEN_WM9712=y +CONFIG_TOUCHSCREEN_WM9713=y +CONFIG_TOUCHSCREEN_USB_COMPOSITE=m +CONFIG_TOUCHSCREEN_USB_EGALAX=y +CONFIG_TOUCHSCREEN_USB_PANJIT=y +CONFIG_TOUCHSCREEN_USB_3M=y +CONFIG_TOUCHSCREEN_USB_ITM=y +CONFIG_TOUCHSCREEN_USB_ETURBO=y +CONFIG_TOUCHSCREEN_USB_GUNZE=y +CONFIG_TOUCHSCREEN_USB_DMC_TSC10=y +CONFIG_TOUCHSCREEN_USB_IRTOUCH=y +CONFIG_TOUCHSCREEN_USB_IDEALTEK=y +CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH=y +CONFIG_TOUCHSCREEN_USB_GOTOP=y +CONFIG_TOUCHSCREEN_USB_JASTEC=y +CONFIG_TOUCHSCREEN_USB_ELO=y +CONFIG_TOUCHSCREEN_USB_E2I=y +CONFIG_TOUCHSCREEN_USB_ZYTRONIC=y +CONFIG_TOUCHSCREEN_USB_ETT_TC45USB=y +CONFIG_TOUCHSCREEN_USB_NEXIO=y +CONFIG_TOUCHSCREEN_USB_EASYTOUCH=y +CONFIG_TOUCHSCREEN_TOUCHIT213=m +# CONFIG_TOUCHSCREEN_TSC_SERIO is not set +# CONFIG_TOUCHSCREEN_TSC2004 is not set +# CONFIG_TOUCHSCREEN_TSC2005 is not set +CONFIG_TOUCHSCREEN_TSC2007=m +# CONFIG_TOUCHSCREEN_TSC2007_IIO is not set +# CONFIG_TOUCHSCREEN_RM_TS is not set +# CONFIG_TOUCHSCREEN_SILEAD is not set +# CONFIG_TOUCHSCREEN_SIS_I2C is not set +# CONFIG_TOUCHSCREEN_ST1232 is not set +# CONFIG_TOUCHSCREEN_STMFTS is not set +# CONFIG_TOUCHSCREEN_SUN4I is not set +CONFIG_TOUCHSCREEN_SUR40=m +# CONFIG_TOUCHSCREEN_SURFACE3_SPI is not set +# CONFIG_TOUCHSCREEN_SX8654 is not set +CONFIG_TOUCHSCREEN_TPS6507X=m +# CONFIG_TOUCHSCREEN_ZET6223 is not set +# CONFIG_TOUCHSCREEN_ZFORCE is not set +# CONFIG_TOUCHSCREEN_ROHM_BU21023 is not set +# CONFIG_TOUCHSCREEN_IQS5XX is not set +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_AD714X is not set +# CONFIG_INPUT_ATMEL_CAPTOUCH is not set +# CONFIG_INPUT_BMA150 is not set +# CONFIG_INPUT_E3X0_BUTTON is not set +# CONFIG_INPUT_MSM_VIBRATOR is not set +CONFIG_INPUT_PM8941_PWRKEY=m +# CONFIG_INPUT_PM8XXX_VIBRATOR is not set +# CONFIG_INPUT_MMA8450 is not set +# CONFIG_INPUT_GP2A is not set +# CONFIG_INPUT_GPIO_BEEPER is not set +# CONFIG_INPUT_GPIO_DECODER is not set +# CONFIG_INPUT_GPIO_VIBRA is not set +CONFIG_INPUT_ATI_REMOTE2=m +CONFIG_INPUT_KEYSPAN_REMOTE=m +# CONFIG_INPUT_KXTJ9 is not set +CONFIG_INPUT_POWERMATE=m +CONFIG_INPUT_YEALINK=m +CONFIG_INPUT_CM109=m +# CONFIG_INPUT_REGULATOR_HAPTIC is not set +CONFIG_INPUT_AXP20X_PEK=m +CONFIG_INPUT_UINPUT=m +# CONFIG_INPUT_PCF8574 is not set +# CONFIG_INPUT_PWM_BEEPER is not set +# CONFIG_INPUT_PWM_VIBRA is not set +# CONFIG_INPUT_RK805_PWRKEY is not set +# CONFIG_INPUT_GPIO_ROTARY_ENCODER is not set +# CONFIG_INPUT_ADXL34X is not set +# CONFIG_INPUT_IMS_PCU is not set +# CONFIG_INPUT_CMA3000 is not set +CONFIG_INPUT_XEN_KBDDEV_FRONTEND=y +# CONFIG_INPUT_SOC_BUTTON_ARRAY is not set +# CONFIG_INPUT_DRV260X_HAPTICS is not set +# CONFIG_INPUT_DRV2665_HAPTICS is not set +# CONFIG_INPUT_DRV2667_HAPTICS is not set +CONFIG_INPUT_HISI_POWERKEY=m +CONFIG_RMI4_CORE=m +# CONFIG_RMI4_I2C is not set +# CONFIG_RMI4_SPI is not set +# CONFIG_RMI4_SMB is not set +CONFIG_RMI4_F03=y +CONFIG_RMI4_F03_SERIO=m +CONFIG_RMI4_2D_SENSOR=y +CONFIG_RMI4_F11=y +CONFIG_RMI4_F12=y +CONFIG_RMI4_F30=y +CONFIG_RMI4_F34=y +# CONFIG_RMI4_F54 is not set +CONFIG_RMI4_F55=y + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_SERPORT=y +# CONFIG_SERIO_PARKBD is not set +# CONFIG_SERIO_AMBAKMI is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +CONFIG_SERIO_ALTERA_PS2=m +# CONFIG_SERIO_PS2MULT is not set +# CONFIG_SERIO_ARC_PS2 is not set +# CONFIG_SERIO_APBPS2 is not set +# CONFIG_SERIO_SUN4I_PS2 is not set +# CONFIG_SERIO_GPIO_PS2 is not set +# CONFIG_USERIO is not set +# CONFIG_GAMEPORT is not set +# end of Hardware I/O ports +# end of Input device support + +# +# Character devices +# +CONFIG_TTY=y +CONFIG_VT=y +CONFIG_CONSOLE_TRANSLATIONS=y +CONFIG_VT_CONSOLE=y +CONFIG_VT_CONSOLE_SLEEP=y +CONFIG_HW_CONSOLE=y +CONFIG_VT_HW_CONSOLE_BINDING=y +CONFIG_UNIX98_PTYS=y +# CONFIG_LEGACY_PTYS is not set +# CONFIG_SERIAL_NONSTANDARD is not set +CONFIG_NOZOMI=m +CONFIG_N_GSM=m +# CONFIG_TRACE_SINK is not set +# CONFIG_NULL_TTY is not set +CONFIG_LDISC_AUTOLOAD=y +CONFIG_DEVMEM=y + +# +# Serial drivers +# +CONFIG_SERIAL_EARLYCON=y +CONFIG_SERIAL_8250=y +# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set +CONFIG_SERIAL_8250_PNP=y +# CONFIG_SERIAL_8250_FINTEK is not set +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DMA=y +CONFIG_SERIAL_8250_PCI=y +CONFIG_SERIAL_8250_EXAR=m +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +CONFIG_SERIAL_8250_EXTENDED=y +# CONFIG_SERIAL_8250_MANY_PORTS is not set +# CONFIG_SERIAL_8250_ASPEED_VUART is not set +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_SERIAL_8250_DETECT_IRQ is not set +# CONFIG_SERIAL_8250_RSA is not set +CONFIG_SERIAL_8250_DWLIB=y +CONFIG_SERIAL_8250_BCM2835AUX=y +CONFIG_SERIAL_8250_FSL=y +CONFIG_SERIAL_8250_DW=y +# CONFIG_SERIAL_8250_RT288X is not set +CONFIG_SERIAL_OF_PLATFORM=y + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_AMBA_PL010=y +CONFIG_SERIAL_AMBA_PL010_CONSOLE=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +# CONFIG_SERIAL_EARLYCON_ARM_SEMIHOST is not set +CONFIG_SERIAL_MESON=y +CONFIG_SERIAL_MESON_CONSOLE=y +CONFIG_SERIAL_TEGRA=y +# CONFIG_SERIAL_MAX3100 is not set +# CONFIG_SERIAL_MAX310X is not set +# CONFIG_SERIAL_UARTLITE is not set +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +CONFIG_SERIAL_MSM=y +CONFIG_SERIAL_MSM_CONSOLE=y +# CONFIG_SERIAL_SIFIVE is not set +# CONFIG_SERIAL_SCCNXP is not set +# CONFIG_SERIAL_SC16IS7XX is not set +# CONFIG_SERIAL_ALTERA_JTAGUART is not set +# CONFIG_SERIAL_ALTERA_UART is not set +# CONFIG_SERIAL_IFX6X60 is not set +CONFIG_SERIAL_XILINX_PS_UART=y +CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y +# CONFIG_SERIAL_ARC is not set +CONFIG_SERIAL_RP2=m +CONFIG_SERIAL_RP2_NR_UARTS=32 +# CONFIG_SERIAL_FSL_LPUART is not set +# CONFIG_SERIAL_FSL_LINFLEXUART is not set +# CONFIG_SERIAL_CONEXANT_DIGICOLOR is not set +CONFIG_SERIAL_MVEBU_UART=y +CONFIG_SERIAL_MVEBU_CONSOLE=y +# end of Serial drivers + +CONFIG_SERIAL_MCTRL_GPIO=y +CONFIG_SERIAL_DEV_BUS=m +CONFIG_TTY_PRINTK=m +CONFIG_TTY_PRINTK_LEVEL=6 +# CONFIG_PRINTER is not set +# CONFIG_PPDEV is not set +CONFIG_HVC_DRIVER=y +CONFIG_HVC_IRQ=y +CONFIG_HVC_XEN=y +CONFIG_HVC_XEN_FRONTEND=y +# CONFIG_HVC_DCC is not set +CONFIG_VIRTIO_CONSOLE=m +CONFIG_IPMI_HANDLER=m +CONFIG_IPMI_DMI_DECODE=y +CONFIG_IPMI_PLAT_DATA=y +# CONFIG_IPMI_PANIC_EVENT is not set +CONFIG_IPMI_DEVICE_INTERFACE=m +CONFIG_IPMI_SI=m +CONFIG_IPMI_SSIF=m +CONFIG_IPMI_WATCHDOG=m +CONFIG_IPMI_POWEROFF=m +CONFIG_HW_RANDOM=m +# CONFIG_HW_RANDOM_TIMERIOMEM is not set +CONFIG_HW_RANDOM_BCM2835=m +CONFIG_HW_RANDOM_OMAP=m +CONFIG_HW_RANDOM_VIRTIO=m +CONFIG_HW_RANDOM_HISI=m +CONFIG_HW_RANDOM_XGENE=m +CONFIG_HW_RANDOM_MESON=m +CONFIG_HW_RANDOM_CAVIUM=m +CONFIG_HW_RANDOM_OPTEE=m +# CONFIG_APPLICOM is not set +# CONFIG_RAW_DRIVER is not set +CONFIG_TCG_TPM=m +CONFIG_HW_RANDOM_TPM=y +CONFIG_TCG_TIS_CORE=m +CONFIG_TCG_TIS=m +CONFIG_TCG_TIS_SPI=m +# CONFIG_TCG_TIS_I2C_ATMEL is not set +CONFIG_TCG_TIS_I2C_INFINEON=m +# CONFIG_TCG_TIS_I2C_NUVOTON is not set +CONFIG_TCG_ATMEL=m +# CONFIG_TCG_INFINEON is not set +# CONFIG_TCG_XEN is not set +CONFIG_TCG_CRB=m +CONFIG_TCG_VTPM_PROXY=m +# CONFIG_TCG_FTPM_TEE is not set +# CONFIG_TCG_TIS_ST33ZP24_I2C is not set +# CONFIG_TCG_TIS_ST33ZP24_SPI is not set +CONFIG_DEVPORT=y +# CONFIG_XILLYBUS is not set +# end of Character devices + +# CONFIG_RANDOM_TRUST_BOOTLOADER is not set + +# +# I2C support +# +CONFIG_I2C=y +CONFIG_ACPI_I2C_OPREGION=y +CONFIG_I2C_BOARDINFO=y +CONFIG_I2C_COMPAT=y +CONFIG_I2C_CHARDEV=m +CONFIG_I2C_MUX=m + +# +# Multiplexer I2C Chip support +# +# CONFIG_I2C_ARB_GPIO_CHALLENGE is not set +# CONFIG_I2C_MUX_GPIO is not set +# CONFIG_I2C_MUX_GPMUX is not set +# CONFIG_I2C_MUX_LTC4306 is not set +# CONFIG_I2C_MUX_PCA9541 is not set +# CONFIG_I2C_MUX_PCA954x is not set +# CONFIG_I2C_MUX_PINCTRL is not set +# CONFIG_I2C_MUX_REG is not set +# CONFIG_I2C_DEMUX_PINCTRL is not set +# CONFIG_I2C_MUX_MLXCPLD is not set +# end of Multiplexer I2C Chip support + +CONFIG_I2C_HELPER_AUTO=y +CONFIG_I2C_SMBUS=m +CONFIG_I2C_ALGOBIT=m +CONFIG_I2C_ALGOPCA=m + +# +# I2C Hardware Bus support +# + +# +# PC SMBus host controller drivers +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +# CONFIG_I2C_AMD_MP2 is not set +# CONFIG_I2C_HIX5HD2 is not set +# CONFIG_I2C_I801 is not set +CONFIG_I2C_ISCH=m +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_NVIDIA_GPU is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set + +# +# ACPI drivers +# +# CONFIG_I2C_SCMI is not set + +# +# I2C system bus drivers (mostly embedded / system-on-chip) +# +CONFIG_I2C_BCM2835=m +# CONFIG_I2C_CADENCE is not set +# CONFIG_I2C_CBUS_GPIO is not set +CONFIG_I2C_DESIGNWARE_CORE=m +CONFIG_I2C_DESIGNWARE_PLATFORM=m +# CONFIG_I2C_DESIGNWARE_SLAVE is not set +# CONFIG_I2C_DESIGNWARE_PCI is not set +# CONFIG_I2C_EMEV2 is not set +CONFIG_I2C_GPIO=m +# CONFIG_I2C_GPIO_FAULT_INJECTOR is not set +CONFIG_I2C_MESON=m +CONFIG_I2C_MV64XXX=m +# CONFIG_I2C_NOMADIK is not set +CONFIG_I2C_OCORES=m +CONFIG_I2C_PCA_PLATFORM=m +CONFIG_I2C_PXA=m +# CONFIG_I2C_PXA_SLAVE is not set +CONFIG_I2C_QUP=m +CONFIG_I2C_RK3X=m +CONFIG_I2C_SIMTEC=m +# CONFIG_I2C_SYNQUACER is not set +CONFIG_I2C_TEGRA=m +# CONFIG_I2C_VERSATILE is not set +CONFIG_I2C_THUNDERX=m +# CONFIG_I2C_XILINX is not set +CONFIG_I2C_XLP9XX=m + +# +# External I2C/SMBus adapter drivers +# +CONFIG_I2C_DIOLAN_U2C=m +# CONFIG_I2C_PARPORT is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +CONFIG_I2C_ROBOTFUZZ_OSIF=m +CONFIG_I2C_TAOS_EVM=m +CONFIG_I2C_TINY_USB=m +CONFIG_I2C_VIPERBOARD=m + +# +# Other I2C/SMBus bus drivers +# +CONFIG_I2C_CROS_EC_TUNNEL=m +CONFIG_I2C_XGENE_SLIMPRO=m +# end of I2C Hardware Bus support + +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_SLAVE is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# end of I2C support + +# CONFIG_I3C is not set +CONFIG_SPI=y +# CONFIG_SPI_DEBUG is not set +CONFIG_SPI_MASTER=y +CONFIG_SPI_MEM=y + +# +# SPI Master Controller Drivers +# +# CONFIG_SPI_ALTERA is not set +CONFIG_SPI_ARMADA_3700=m +# CONFIG_SPI_AXI_SPI_ENGINE is not set +CONFIG_SPI_BCM2835=m +CONFIG_SPI_BCM2835AUX=m +CONFIG_SPI_BITBANG=m +CONFIG_SPI_BUTTERFLY=m +# CONFIG_SPI_CADENCE is not set +# CONFIG_SPI_DESIGNWARE is not set +# CONFIG_SPI_NXP_FLEXSPI is not set +# CONFIG_SPI_GPIO is not set +CONFIG_SPI_LM70_LLP=m +# CONFIG_SPI_FSL_SPI is not set +# CONFIG_SPI_MESON_SPICC is not set +CONFIG_SPI_MESON_SPIFC=m +# CONFIG_SPI_OC_TINY is not set +# CONFIG_SPI_ORION is not set +# CONFIG_SPI_PL022 is not set +# CONFIG_SPI_PXA2XX is not set +CONFIG_SPI_ROCKCHIP=m +# CONFIG_SPI_QCOM_QSPI is not set +CONFIG_SPI_QUP=m +# CONFIG_SPI_SC18IS602 is not set +# CONFIG_SPI_SIFIVE is not set +# CONFIG_SPI_SUN4I is not set +# CONFIG_SPI_SUN6I is not set +# CONFIG_SPI_SYNQUACER is not set +# CONFIG_SPI_MXIC is not set +CONFIG_SPI_TEGRA114=m +CONFIG_SPI_TEGRA20_SFLASH=m +CONFIG_SPI_TEGRA20_SLINK=m +CONFIG_SPI_THUNDERX=m +# CONFIG_SPI_XCOMM is not set +# CONFIG_SPI_XILINX is not set +CONFIG_SPI_XLP=m +# CONFIG_SPI_ZYNQMP_GQSPI is not set + +# +# SPI Protocol Masters +# +CONFIG_SPI_SPIDEV=y +# CONFIG_SPI_LOOPBACK_TEST is not set +# CONFIG_SPI_TLE62X0 is not set +# CONFIG_SPI_SLAVE is not set +CONFIG_SPI_DYNAMIC=y +CONFIG_SPMI=y +CONFIG_SPMI_MSM_PMIC_ARB=y +# CONFIG_HSI is not set +CONFIG_PPS=m +# CONFIG_PPS_DEBUG is not set + +# +# PPS clients support +# +# CONFIG_PPS_CLIENT_KTIMER is not set +CONFIG_PPS_CLIENT_LDISC=m +CONFIG_PPS_CLIENT_PARPORT=m +# CONFIG_PPS_CLIENT_GPIO is not set + +# +# PPS generators support +# + +# +# PTP clock support +# +CONFIG_PTP_1588_CLOCK=m + +# +# Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks. +# +# end of PTP clock support + +CONFIG_PINCTRL=y +CONFIG_GENERIC_PINCTRL_GROUPS=y +CONFIG_PINMUX=y +CONFIG_GENERIC_PINMUX_FUNCTIONS=y +CONFIG_PINCONF=y +CONFIG_GENERIC_PINCONF=y +# CONFIG_DEBUG_PINCTRL is not set +CONFIG_PINCTRL_AXP209=m +CONFIG_PINCTRL_AMD=y +# CONFIG_PINCTRL_MCP23S08 is not set +CONFIG_PINCTRL_ROCKCHIP=y +CONFIG_PINCTRL_SINGLE=y +# CONFIG_PINCTRL_SX150X is not set +# CONFIG_PINCTRL_STMFX is not set +CONFIG_PINCTRL_MAX77620=y +# CONFIG_PINCTRL_RK805 is not set +# CONFIG_PINCTRL_OCELOT is not set +CONFIG_PINCTRL_BCM2835=y +CONFIG_PINCTRL_MVEBU=y +CONFIG_PINCTRL_ARMADA_AP806=y +CONFIG_PINCTRL_ARMADA_CP110=y +CONFIG_PINCTRL_ARMADA_37XX=y +CONFIG_PINCTRL_MSM=y +# CONFIG_PINCTRL_APQ8064 is not set +# CONFIG_PINCTRL_APQ8084 is not set +# CONFIG_PINCTRL_IPQ4019 is not set +# CONFIG_PINCTRL_IPQ8064 is not set +# CONFIG_PINCTRL_IPQ8074 is not set +# CONFIG_PINCTRL_MSM8660 is not set +# CONFIG_PINCTRL_MSM8960 is not set +# CONFIG_PINCTRL_MDM9615 is not set +# CONFIG_PINCTRL_MSM8X74 is not set +CONFIG_PINCTRL_MSM8916=y +# CONFIG_PINCTRL_MSM8994 is not set +CONFIG_PINCTRL_MSM8996=y +# CONFIG_PINCTRL_MSM8998 is not set +# CONFIG_PINCTRL_QCS404 is not set +# CONFIG_PINCTRL_QDF2XXX is not set +CONFIG_PINCTRL_QCOM_SPMI_PMIC=y +CONFIG_PINCTRL_QCOM_SSBI_PMIC=y +# CONFIG_PINCTRL_SC7180 is not set +# CONFIG_PINCTRL_SDM660 is not set +# CONFIG_PINCTRL_SDM845 is not set +# CONFIG_PINCTRL_SM8150 is not set +CONFIG_PINCTRL_SUNXI=y +# CONFIG_PINCTRL_SUN4I_A10 is not set +# CONFIG_PINCTRL_SUN5I is not set +# CONFIG_PINCTRL_SUN6I_A31 is not set +# CONFIG_PINCTRL_SUN6I_A31_R is not set +# CONFIG_PINCTRL_SUN8I_A23 is not set +# CONFIG_PINCTRL_SUN8I_A33 is not set +# CONFIG_PINCTRL_SUN8I_A83T is not set +# CONFIG_PINCTRL_SUN8I_A83T_R is not set +# CONFIG_PINCTRL_SUN8I_A23_R is not set +# CONFIG_PINCTRL_SUN8I_H3 is not set +CONFIG_PINCTRL_SUN8I_H3_R=y +# CONFIG_PINCTRL_SUN8I_V3S is not set +# CONFIG_PINCTRL_SUN9I_A80 is not set +# CONFIG_PINCTRL_SUN9I_A80_R is not set +CONFIG_PINCTRL_SUN50I_A64=y +CONFIG_PINCTRL_SUN50I_A64_R=y +CONFIG_PINCTRL_SUN50I_H5=y +CONFIG_PINCTRL_SUN50I_H6=y +CONFIG_PINCTRL_SUN50I_H6_R=y +CONFIG_PINCTRL_TEGRA=y +CONFIG_PINCTRL_TEGRA124=y +CONFIG_PINCTRL_TEGRA210=y +CONFIG_PINCTRL_TEGRA_XUSB=y +CONFIG_PINCTRL_MESON=y +CONFIG_PINCTRL_MESON_GXBB=y +CONFIG_PINCTRL_MESON_GXL=y +CONFIG_PINCTRL_MESON8_PMX=y +CONFIG_PINCTRL_MESON_AXG=y +CONFIG_PINCTRL_MESON_AXG_PMX=y +CONFIG_PINCTRL_MESON_G12A=y +CONFIG_GPIOLIB=y +CONFIG_GPIOLIB_FASTPATH_LIMIT=512 +CONFIG_OF_GPIO=y +CONFIG_GPIO_ACPI=y +CONFIG_GPIOLIB_IRQCHIP=y +# CONFIG_DEBUG_GPIO is not set +CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_GENERIC=y + +# +# Memory mapped GPIO drivers +# +# CONFIG_GPIO_74XX_MMIO is not set +# CONFIG_GPIO_ALTERA is not set +# CONFIG_GPIO_AMDPT is not set +CONFIG_GPIO_RASPBERRYPI_EXP=y +# CONFIG_GPIO_CADENCE is not set +# CONFIG_GPIO_DWAPB is not set +CONFIG_GPIO_EXAR=m +# CONFIG_GPIO_FTGPIO010 is not set +CONFIG_GPIO_GENERIC_PLATFORM=y +# CONFIG_GPIO_GRGPIO is not set +# CONFIG_GPIO_HLWD is not set +CONFIG_GPIO_MB86S7X=m +CONFIG_GPIO_MVEBU=y +CONFIG_GPIO_PL061=y +# CONFIG_GPIO_SAMA5D2_PIOBU is not set +# CONFIG_GPIO_SYSCON is not set +CONFIG_GPIO_TEGRA=y +# CONFIG_GPIO_THUNDERX is not set +CONFIG_GPIO_XGENE=y +CONFIG_GPIO_XGENE_SB=m +# CONFIG_GPIO_XILINX is not set +CONFIG_GPIO_XLP=y +CONFIG_GPIO_ZYNQ=m +# CONFIG_GPIO_AMD_FCH is not set +# end of Memory mapped GPIO drivers + +# +# I2C GPIO expanders +# +# CONFIG_GPIO_ADP5588 is not set +# CONFIG_GPIO_ADNP is not set +# CONFIG_GPIO_GW_PLD is not set +# CONFIG_GPIO_MAX7300 is not set +# CONFIG_GPIO_MAX732X is not set +CONFIG_GPIO_PCA953X=y +CONFIG_GPIO_PCA953X_IRQ=y +# CONFIG_GPIO_PCF857X is not set +# CONFIG_GPIO_TPIC2810 is not set +# end of I2C GPIO expanders + +# +# MFD GPIO expanders +# +CONFIG_GPIO_MAX77620=y +# end of MFD GPIO expanders + +# +# PCI GPIO expanders +# +CONFIG_GPIO_PCI_IDIO_16=m +CONFIG_GPIO_PCIE_IDIO_24=m +# CONFIG_GPIO_RDC321X is not set +# end of PCI GPIO expanders + +# +# SPI GPIO expanders +# +# CONFIG_GPIO_74X164 is not set +# CONFIG_GPIO_MAX3191X is not set +# CONFIG_GPIO_MAX7301 is not set +# CONFIG_GPIO_MC33880 is not set +# CONFIG_GPIO_PISOSR is not set +# CONFIG_GPIO_XRA1403 is not set +# end of SPI GPIO expanders + +# +# USB GPIO expanders +# +CONFIG_GPIO_VIPERBOARD=m +# end of USB GPIO expanders + +# CONFIG_GPIO_MOCKUP is not set +CONFIG_W1=m +CONFIG_W1_CON=y + +# +# 1-wire Bus Masters +# +# CONFIG_W1_MASTER_MATROX is not set +CONFIG_W1_MASTER_DS2490=m +CONFIG_W1_MASTER_DS2482=m +# CONFIG_W1_MASTER_DS1WM is not set +CONFIG_W1_MASTER_GPIO=m +# CONFIG_W1_MASTER_SGI is not set +# end of 1-wire Bus Masters + +# +# 1-wire Slaves +# +CONFIG_W1_SLAVE_THERM=m +CONFIG_W1_SLAVE_SMEM=m +CONFIG_W1_SLAVE_DS2405=m +CONFIG_W1_SLAVE_DS2408=m +CONFIG_W1_SLAVE_DS2408_READBACK=y +CONFIG_W1_SLAVE_DS2413=m +CONFIG_W1_SLAVE_DS2406=m +CONFIG_W1_SLAVE_DS2423=m +CONFIG_W1_SLAVE_DS2805=m +CONFIG_W1_SLAVE_DS2431=m +CONFIG_W1_SLAVE_DS2433=m +# CONFIG_W1_SLAVE_DS2433_CRC is not set +CONFIG_W1_SLAVE_DS2438=m +# CONFIG_W1_SLAVE_DS250X is not set +CONFIG_W1_SLAVE_DS2780=m +CONFIG_W1_SLAVE_DS2781=m +CONFIG_W1_SLAVE_DS28E04=m +CONFIG_W1_SLAVE_DS28E17=m +# end of 1-wire Slaves + +CONFIG_POWER_AVS=y +CONFIG_ROCKCHIP_IODOMAIN=m +CONFIG_POWER_RESET=y +# CONFIG_POWER_RESET_BRCMSTB is not set +# CONFIG_POWER_RESET_GPIO is not set +# CONFIG_POWER_RESET_GPIO_RESTART is not set +CONFIG_POWER_RESET_HISI=y +CONFIG_POWER_RESET_MSM=y +# CONFIG_POWER_RESET_QCOM_PON is not set +# CONFIG_POWER_RESET_LTC2952 is not set +# CONFIG_POWER_RESET_RESTART is not set +CONFIG_POWER_RESET_VEXPRESS=y +CONFIG_POWER_RESET_XGENE=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_POWER_RESET_SYSCON_POWEROFF=y +# CONFIG_SYSCON_REBOOT_MODE is not set +# CONFIG_NVMEM_REBOOT_MODE is not set +CONFIG_POWER_SUPPLY=y +# CONFIG_POWER_SUPPLY_DEBUG is not set +CONFIG_POWER_SUPPLY_HWMON=y +# CONFIG_PDA_POWER is not set +# CONFIG_GENERIC_ADC_BATTERY is not set +# CONFIG_TEST_POWER is not set +# CONFIG_CHARGER_ADP5061 is not set +CONFIG_BATTERY_DS2760=m +# CONFIG_BATTERY_DS2780 is not set +# CONFIG_BATTERY_DS2781 is not set +# CONFIG_BATTERY_DS2782 is not set +# CONFIG_BATTERY_LEGO_EV3 is not set +CONFIG_BATTERY_SBS=m +# CONFIG_CHARGER_SBS is not set +# CONFIG_MANAGER_SBS is not set +CONFIG_BATTERY_BQ27XXX=m +# CONFIG_BATTERY_BQ27XXX_I2C is not set +CONFIG_BATTERY_BQ27XXX_HDQ=m +CONFIG_CHARGER_AXP20X=m +CONFIG_BATTERY_AXP20X=m +CONFIG_AXP20X_POWER=m +CONFIG_AXP288_FUEL_GAUGE=m +# CONFIG_BATTERY_MAX17040 is not set +# CONFIG_BATTERY_MAX17042 is not set +# CONFIG_BATTERY_MAX1721X is not set +# CONFIG_CHARGER_ISP1704 is not set +# CONFIG_CHARGER_MAX8903 is not set +# CONFIG_CHARGER_LP8727 is not set +# CONFIG_CHARGER_GPIO is not set +# CONFIG_CHARGER_MANAGER is not set +# CONFIG_CHARGER_LT3651 is not set +# CONFIG_CHARGER_DETECTOR_MAX14656 is not set +CONFIG_CHARGER_QCOM_SMBB=m +# CONFIG_CHARGER_BQ2415X is not set +# CONFIG_CHARGER_BQ24190 is not set +# CONFIG_CHARGER_BQ24257 is not set +# CONFIG_CHARGER_BQ24735 is not set +# CONFIG_CHARGER_BQ25890 is not set +# CONFIG_CHARGER_SMB347 is not set +# CONFIG_BATTERY_GAUGE_LTC2941 is not set +# CONFIG_BATTERY_RT5033 is not set +# CONFIG_CHARGER_RT9455 is not set +CONFIG_CHARGER_CROS_USBPD=m +# CONFIG_CHARGER_UCS1002 is not set +CONFIG_HWMON=y +CONFIG_HWMON_VID=m +# CONFIG_HWMON_DEBUG_CHIP is not set + +# +# Native drivers +# +# CONFIG_SENSORS_AD7314 is not set +CONFIG_SENSORS_AD7414=m +CONFIG_SENSORS_AD7418=m +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1026 is not set +CONFIG_SENSORS_ADM1029=m +# CONFIG_SENSORS_ADM1031 is not set +CONFIG_SENSORS_ADM9240=m +# CONFIG_SENSORS_ADT7310 is not set +# CONFIG_SENSORS_ADT7410 is not set +CONFIG_SENSORS_ADT7411=m +CONFIG_SENSORS_ADT7462=m +CONFIG_SENSORS_ADT7470=m +CONFIG_SENSORS_ADT7475=m +# CONFIG_SENSORS_AS370 is not set +CONFIG_SENSORS_ASC7621=m +CONFIG_SENSORS_ALTRA=m +CONFIG_SENSORS_ASPEED=m +CONFIG_SENSORS_ATXP1=m +CONFIG_SENSORS_DS620=m +# CONFIG_SENSORS_DS1621 is not set +CONFIG_SENSORS_I5K_AMB=m +# CONFIG_SENSORS_F71805F is not set +CONFIG_SENSORS_F71882FG=m +CONFIG_SENSORS_F75375S=m +CONFIG_SENSORS_FTSTEUTATES=m +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +CONFIG_SENSORS_G760A=m +# CONFIG_SENSORS_G762 is not set +# CONFIG_SENSORS_GPIO_FAN is not set +# CONFIG_SENSORS_HIH6130 is not set +CONFIG_SENSORS_IBMAEM=m +CONFIG_SENSORS_IBMPEX=m +# CONFIG_SENSORS_IIO_HWMON is not set +# CONFIG_SENSORS_IT87 is not set +CONFIG_SENSORS_JC42=m +# CONFIG_SENSORS_POWR1220 is not set +CONFIG_SENSORS_LINEAGE=m +# CONFIG_SENSORS_LTC2945 is not set +# CONFIG_SENSORS_LTC2990 is not set +CONFIG_SENSORS_LTC4151=m +CONFIG_SENSORS_LTC4215=m +# CONFIG_SENSORS_LTC4222 is not set +CONFIG_SENSORS_LTC4245=m +# CONFIG_SENSORS_LTC4260 is not set +CONFIG_SENSORS_LTC4261=m +CONFIG_SENSORS_MAX1111=m +CONFIG_SENSORS_MAX16065=m +# CONFIG_SENSORS_MAX1619 is not set +CONFIG_SENSORS_MAX1668=m +# CONFIG_SENSORS_MAX197 is not set +# CONFIG_SENSORS_MAX31722 is not set +# CONFIG_SENSORS_MAX6621 is not set +CONFIG_SENSORS_MAX6639=m +CONFIG_SENSORS_MAX6642=m +CONFIG_SENSORS_MAX6650=m +# CONFIG_SENSORS_MAX6697 is not set +# CONFIG_SENSORS_MAX31790 is not set +# CONFIG_SENSORS_MCP3021 is not set +# CONFIG_SENSORS_TC654 is not set +CONFIG_SENSORS_ADCXX=m +# CONFIG_SENSORS_LM63 is not set +CONFIG_SENSORS_LM70=m +CONFIG_SENSORS_LM73=m +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set +CONFIG_SENSORS_LM93=m +# CONFIG_SENSORS_LM95234 is not set +CONFIG_SENSORS_LM95241=m +CONFIG_SENSORS_LM95245=m +# CONFIG_SENSORS_PC87360 is not set +CONFIG_SENSORS_PC87427=m +CONFIG_SENSORS_NTC_THERMISTOR=m +CONFIG_SENSORS_NCT6683=m +CONFIG_SENSORS_NCT6775=m +CONFIG_SENSORS_NCT7802=m +CONFIG_SENSORS_NCT7904=m +CONFIG_SENSORS_NPCM7XX=m +# CONFIG_SENSORS_OCC_P8_I2C is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_PMBUS is not set +# CONFIG_SENSORS_PWM_FAN is not set +# CONFIG_SENSORS_RASPBERRYPI_HWMON is not set +# CONFIG_SENSORS_SHT15 is not set +CONFIG_SENSORS_SHT21=m +# CONFIG_SENSORS_SHT3x is not set +# CONFIG_SENSORS_SHTC1 is not set +# CONFIG_SENSORS_SIS5595 is not set +CONFIG_SENSORS_DME1737=m +CONFIG_SENSORS_EMC1403=m +CONFIG_SENSORS_EMC2103=m +CONFIG_SENSORS_EMC6W201=m +# CONFIG_SENSORS_SMSC47M1 is not set +CONFIG_SENSORS_SMSC47M192=m +# CONFIG_SENSORS_SMSC47B397 is not set +CONFIG_SENSORS_SCH56XX_COMMON=m +CONFIG_SENSORS_SCH5627=m +# CONFIG_SENSORS_SCH5636 is not set +# CONFIG_SENSORS_STTS751 is not set +CONFIG_SENSORS_SMM665=m +# CONFIG_SENSORS_ADC128D818 is not set +CONFIG_SENSORS_ADS7828=m +CONFIG_SENSORS_ADS7871=m +CONFIG_SENSORS_AMC6821=m +# CONFIG_SENSORS_INA209 is not set +# CONFIG_SENSORS_INA2XX is not set +# CONFIG_SENSORS_INA3221 is not set +# CONFIG_SENSORS_TC74 is not set +CONFIG_SENSORS_THMC50=m +CONFIG_SENSORS_TMP102=m +# CONFIG_SENSORS_TMP103 is not set +# CONFIG_SENSORS_TMP108 is not set +CONFIG_SENSORS_TMP401=m +CONFIG_SENSORS_TMP421=m +# CONFIG_SENSORS_VEXPRESS is not set +# CONFIG_SENSORS_VIA686A is not set +CONFIG_SENSORS_VT1211=m +CONFIG_SENSORS_VT8231=m +CONFIG_SENSORS_W83773G=m +# CONFIG_SENSORS_W83781D is not set +CONFIG_SENSORS_W83791D=m +CONFIG_SENSORS_W83792D=m +CONFIG_SENSORS_W83793=m +CONFIG_SENSORS_W83795=m +# CONFIG_SENSORS_W83795_FANCTRL is not set +# CONFIG_SENSORS_W83L785TS is not set +CONFIG_SENSORS_W83L786NG=m +# CONFIG_SENSORS_W83627HF is not set +CONFIG_SENSORS_W83627EHF=m +CONFIG_SENSORS_XGENE=m + +# +# ACPI drivers +# +# CONFIG_SENSORS_ACPI_POWER is not set +CONFIG_THERMAL=y +CONFIG_THERMAL_STATISTICS=y +CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0 +CONFIG_THERMAL_HWMON=y +CONFIG_THERMAL_OF=y +# CONFIG_THERMAL_WRITABLE_TRIPS is not set +CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y +# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set +# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set +# CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR is not set +CONFIG_THERMAL_GOV_FAIR_SHARE=y +CONFIG_THERMAL_GOV_STEP_WISE=y +# CONFIG_THERMAL_GOV_BANG_BANG is not set +# CONFIG_THERMAL_GOV_USER_SPACE is not set +# CONFIG_THERMAL_GOV_POWER_ALLOCATOR is not set +CONFIG_CPU_THERMAL=y +# CONFIG_CLOCK_THERMAL is not set +CONFIG_DEVFREQ_THERMAL=y +# CONFIG_THERMAL_EMULATION is not set +# CONFIG_THERMAL_MMIO is not set +CONFIG_HISI_THERMAL=m +# CONFIG_MAX77620_THERMAL is not set +# CONFIG_QORIQ_THERMAL is not set +CONFIG_ROCKCHIP_THERMAL=m +CONFIG_ARMADA_THERMAL=m + +# +# Broadcom thermal drivers +# +CONFIG_BCM2835_THERMAL=m +# end of Broadcom thermal drivers + +# +# NVIDIA Tegra thermal drivers +# +CONFIG_TEGRA_SOCTHERM=y +# end of NVIDIA Tegra thermal drivers + +# CONFIG_GENERIC_ADC_THERMAL is not set + +# +# Qualcomm thermal drivers +# +CONFIG_QCOM_TSENS=m +CONFIG_QCOM_SPMI_TEMP_ALARM=m +# end of Qualcomm thermal drivers + +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_CORE=m +# CONFIG_WATCHDOG_NOWAYOUT is not set +CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y +CONFIG_WATCHDOG_OPEN_TIMEOUT=0 +CONFIG_WATCHDOG_SYSFS=y + +# +# Watchdog Pretimeout Governors +# +CONFIG_WATCHDOG_PRETIMEOUT_GOV=y +CONFIG_WATCHDOG_PRETIMEOUT_GOV_SEL=m +CONFIG_WATCHDOG_PRETIMEOUT_GOV_NOOP=m +CONFIG_WATCHDOG_PRETIMEOUT_GOV_PANIC=m +CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_NOOP=y +# CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_PANIC is not set + +# +# Watchdog Device Drivers +# +CONFIG_SOFT_WATCHDOG=m +# CONFIG_SOFT_WATCHDOG_PRETIMEOUT is not set +CONFIG_GPIO_WATCHDOG=m +CONFIG_WDAT_WDT=m +# CONFIG_XILINX_WATCHDOG is not set +# CONFIG_ZIIRAVE_WATCHDOG is not set +CONFIG_ARM_SP805_WATCHDOG=m +CONFIG_ARM_SBSA_WATCHDOG=m +# CONFIG_ARMADA_37XX_WATCHDOG is not set +# CONFIG_CADENCE_WATCHDOG is not set +CONFIG_DW_WATCHDOG=m +CONFIG_SUNXI_WATCHDOG=m +# CONFIG_MAX63XX_WATCHDOG is not set +# CONFIG_MAX77620_WATCHDOG is not set +CONFIG_TEGRA_WATCHDOG=m +CONFIG_QCOM_WDT=m +CONFIG_MESON_GXBB_WATCHDOG=m +CONFIG_MESON_WATCHDOG=m +# CONFIG_PM8916_WATCHDOG is not set +# CONFIG_ALIM7101_WDT is not set +# CONFIG_I6300ESB_WDT is not set +CONFIG_BCM2835_WDT=m +# CONFIG_MEN_A21_WDT is not set +CONFIG_XEN_WDT=m + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set + +# +# USB-based Watchdog Cards +# +# CONFIG_USBPCWATCHDOG is not set +CONFIG_SSB_POSSIBLE=y +CONFIG_SSB=m +CONFIG_SSB_SPROM=y +CONFIG_SSB_BLOCKIO=y +CONFIG_SSB_PCIHOST_POSSIBLE=y +CONFIG_SSB_PCIHOST=y +CONFIG_SSB_B43_PCI_BRIDGE=y +CONFIG_SSB_SDIOHOST_POSSIBLE=y +CONFIG_SSB_SDIOHOST=y +CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y +CONFIG_SSB_DRIVER_PCICORE=y +# CONFIG_SSB_DRIVER_GPIO is not set +CONFIG_BCMA_POSSIBLE=y +CONFIG_BCMA=m +CONFIG_BCMA_BLOCKIO=y +CONFIG_BCMA_HOST_PCI_POSSIBLE=y +CONFIG_BCMA_HOST_PCI=y +# CONFIG_BCMA_HOST_SOC is not set +CONFIG_BCMA_DRIVER_PCI=y +# CONFIG_BCMA_DRIVER_GMAC_CMN is not set +# CONFIG_BCMA_DRIVER_GPIO is not set +# CONFIG_BCMA_DEBUG is not set + +# +# Multifunction device drivers +# +CONFIG_MFD_CORE=y +# CONFIG_MFD_ACT8945A is not set +# CONFIG_MFD_SUN4I_GPADC is not set +# CONFIG_MFD_AS3711 is not set +# CONFIG_MFD_AS3722 is not set +# CONFIG_PMIC_ADP5520 is not set +# CONFIG_MFD_AAT2870_CORE is not set +# CONFIG_MFD_ATMEL_FLEXCOM is not set +# CONFIG_MFD_ATMEL_HLCDC is not set +# CONFIG_MFD_BCM590XX is not set +# CONFIG_MFD_BD9571MWV is not set +# CONFIG_MFD_AC100 is not set +CONFIG_MFD_AXP20X=m +# CONFIG_MFD_AXP20X_I2C is not set +CONFIG_MFD_AXP20X_RSB=m +CONFIG_MFD_CROS_EC_DEV=y +# CONFIG_MFD_MADERA is not set +# CONFIG_PMIC_DA903X is not set +# CONFIG_MFD_DA9052_SPI is not set +# CONFIG_MFD_DA9052_I2C is not set +# CONFIG_MFD_DA9055 is not set +# CONFIG_MFD_DA9062 is not set +# CONFIG_MFD_DA9063 is not set +# CONFIG_MFD_DA9150 is not set +# CONFIG_MFD_DLN2 is not set +# CONFIG_MFD_MC13XXX_SPI is not set +# CONFIG_MFD_MC13XXX_I2C is not set +# CONFIG_MFD_HI6421_PMIC is not set +CONFIG_MFD_HI655X_PMIC=m +# CONFIG_HTC_PASIC3 is not set +# CONFIG_HTC_I2CPLD is not set +# CONFIG_LPC_ICH is not set +CONFIG_LPC_SCH=m +# CONFIG_MFD_JANZ_CMODIO is not set +# CONFIG_MFD_KEMPLD is not set +# CONFIG_MFD_88PM800 is not set +# CONFIG_MFD_88PM805 is not set +# CONFIG_MFD_88PM860X is not set +# CONFIG_MFD_MAX14577 is not set +CONFIG_MFD_MAX77620=y +# CONFIG_MFD_MAX77650 is not set +# CONFIG_MFD_MAX77686 is not set +# CONFIG_MFD_MAX77693 is not set +# CONFIG_MFD_MAX77843 is not set +# CONFIG_MFD_MAX8907 is not set +# CONFIG_MFD_MAX8925 is not set +# CONFIG_MFD_MAX8997 is not set +# CONFIG_MFD_MAX8998 is not set +# CONFIG_MFD_MT6397 is not set +# CONFIG_MFD_MENF21BMC is not set +# CONFIG_EZX_PCAP is not set +# CONFIG_MFD_CPCAP is not set +CONFIG_MFD_VIPERBOARD=m +# CONFIG_MFD_RETU is not set +# CONFIG_MFD_PCF50633 is not set +# CONFIG_UCB1400_CORE is not set +CONFIG_MFD_QCOM_RPM=m +CONFIG_MFD_SPMI_PMIC=m +# CONFIG_MFD_RDC321X is not set +# CONFIG_MFD_RT5033 is not set +# CONFIG_MFD_RC5T583 is not set +CONFIG_MFD_RK808=m +# CONFIG_MFD_RN5T618 is not set +# CONFIG_MFD_SEC_CORE is not set +# CONFIG_MFD_SI476X_CORE is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_MFD_SKY81452 is not set +# CONFIG_MFD_SMSC is not set +# CONFIG_ABX500_CORE is not set +# CONFIG_MFD_STMPE is not set +CONFIG_MFD_SUN6I_PRCM=y +CONFIG_MFD_SYSCON=y +# CONFIG_MFD_TI_AM335X_TSCADC is not set +# CONFIG_MFD_LP3943 is not set +# CONFIG_MFD_LP8788 is not set +# CONFIG_MFD_TI_LMU is not set +# CONFIG_MFD_PALMAS is not set +# CONFIG_TPS6105X is not set +# CONFIG_TPS65010 is not set +# CONFIG_TPS6507X is not set +# CONFIG_MFD_TPS65086 is not set +# CONFIG_MFD_TPS65090 is not set +# CONFIG_MFD_TPS65217 is not set +# CONFIG_MFD_TI_LP873X is not set +# CONFIG_MFD_TI_LP87565 is not set +# CONFIG_MFD_TPS65218 is not set +# CONFIG_MFD_TPS6586X is not set +# CONFIG_MFD_TPS65910 is not set +# CONFIG_MFD_TPS65912_I2C is not set +# CONFIG_MFD_TPS65912_SPI is not set +# CONFIG_MFD_TPS80031 is not set +# CONFIG_TWL4030_CORE is not set +# CONFIG_TWL6040_CORE is not set +# CONFIG_MFD_WL1273_CORE is not set +# CONFIG_MFD_LM3533 is not set +# CONFIG_MFD_TC3589X is not set +# CONFIG_MFD_TQMX86 is not set +# CONFIG_MFD_VX855 is not set +# CONFIG_MFD_LOCHNAGAR is not set +# CONFIG_MFD_ARIZONA_I2C is not set +# CONFIG_MFD_ARIZONA_SPI is not set +# CONFIG_MFD_WM8400 is not set +# CONFIG_MFD_WM831X_I2C is not set +# CONFIG_MFD_WM831X_SPI is not set +# CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_WM8994 is not set +# CONFIG_MFD_ROHM_BD718XX is not set +# CONFIG_MFD_ROHM_BD70528 is not set +# CONFIG_MFD_STPMIC1 is not set +# CONFIG_MFD_STMFX is not set +CONFIG_MFD_VEXPRESS_SYSREG=y +# CONFIG_RAVE_SP_CORE is not set +# end of Multifunction device drivers + +CONFIG_REGULATOR=y +# CONFIG_REGULATOR_DEBUG is not set +CONFIG_REGULATOR_FIXED_VOLTAGE=m +# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set +# CONFIG_REGULATOR_USERSPACE_CONSUMER is not set +# CONFIG_REGULATOR_88PG86X is not set +# CONFIG_REGULATOR_ACT8865 is not set +# CONFIG_REGULATOR_AD5398 is not set +# CONFIG_REGULATOR_ANATOP is not set +CONFIG_REGULATOR_AXP20X=m +# CONFIG_REGULATOR_DA9210 is not set +# CONFIG_REGULATOR_DA9211 is not set +CONFIG_REGULATOR_FAN53555=m +CONFIG_REGULATOR_GPIO=m +CONFIG_REGULATOR_HI655X=m +# CONFIG_REGULATOR_ISL9305 is not set +# CONFIG_REGULATOR_ISL6271A is not set +# CONFIG_REGULATOR_LP3971 is not set +# CONFIG_REGULATOR_LP3972 is not set +# CONFIG_REGULATOR_LP872X is not set +# CONFIG_REGULATOR_LP8755 is not set +# CONFIG_REGULATOR_LTC3589 is not set +# CONFIG_REGULATOR_LTC3676 is not set +# CONFIG_REGULATOR_MAX1586 is not set +CONFIG_REGULATOR_MAX77620=m +# CONFIG_REGULATOR_MAX8649 is not set +# CONFIG_REGULATOR_MAX8660 is not set +# CONFIG_REGULATOR_MAX8952 is not set +# CONFIG_REGULATOR_MAX8973 is not set +# CONFIG_REGULATOR_MCP16502 is not set +# CONFIG_REGULATOR_MT6311 is not set +# CONFIG_REGULATOR_PFUZE100 is not set +# CONFIG_REGULATOR_PV88060 is not set +# CONFIG_REGULATOR_PV88080 is not set +# CONFIG_REGULATOR_PV88090 is not set +CONFIG_REGULATOR_PWM=m +CONFIG_REGULATOR_QCOM_RPM=m +CONFIG_REGULATOR_QCOM_SMD_RPM=m +CONFIG_REGULATOR_QCOM_SPMI=m +CONFIG_REGULATOR_RK808=m +# CONFIG_REGULATOR_SLG51000 is not set +# CONFIG_REGULATOR_SY8106A is not set +# CONFIG_REGULATOR_SY8824X is not set +# CONFIG_REGULATOR_TPS51632 is not set +# CONFIG_REGULATOR_TPS62360 is not set +# CONFIG_REGULATOR_TPS65023 is not set +# CONFIG_REGULATOR_TPS6507X is not set +# CONFIG_REGULATOR_TPS65132 is not set +# CONFIG_REGULATOR_TPS6524X is not set +CONFIG_REGULATOR_VCTRL=m +# CONFIG_REGULATOR_VEXPRESS is not set +CONFIG_CEC_CORE=m +CONFIG_RC_CORE=m +CONFIG_RC_MAP=m +CONFIG_LIRC=y +CONFIG_RC_DECODERS=y +CONFIG_IR_NEC_DECODER=m +CONFIG_IR_RC5_DECODER=m +CONFIG_IR_RC6_DECODER=m +CONFIG_IR_JVC_DECODER=m +CONFIG_IR_SONY_DECODER=m +CONFIG_IR_SANYO_DECODER=m +CONFIG_IR_SHARP_DECODER=m +CONFIG_IR_MCE_KBD_DECODER=m +CONFIG_IR_XMP_DECODER=m +CONFIG_IR_IMON_DECODER=m +# CONFIG_IR_RCMM_DECODER is not set +CONFIG_RC_DEVICES=y +CONFIG_RC_ATI_REMOTE=m +CONFIG_IR_ENE=m +# CONFIG_IR_HIX5HD2 is not set +CONFIG_IR_IMON=m +CONFIG_IR_IMON_RAW=m +CONFIG_IR_MCEUSB=m +# CONFIG_IR_ITE_CIR is not set +# CONFIG_IR_FINTEK is not set +# CONFIG_IR_MESON is not set +# CONFIG_IR_NUVOTON is not set +CONFIG_IR_REDRAT3=m +# CONFIG_IR_SPI is not set +CONFIG_IR_STREAMZAP=m +CONFIG_IR_IGORPLUGUSB=m +CONFIG_IR_IGUANA=m +CONFIG_IR_TTUSBIR=m +CONFIG_RC_LOOPBACK=m +# CONFIG_IR_GPIO_CIR is not set +# CONFIG_IR_GPIO_TX is not set +# CONFIG_IR_PWM_TX is not set +# CONFIG_IR_SUNXI is not set +# CONFIG_IR_SERIAL is not set +# CONFIG_IR_SIR is not set +# CONFIG_RC_XBOX_DVD is not set +CONFIG_MEDIA_SUPPORT=m + +# +# Multimedia core support +# +CONFIG_MEDIA_CAMERA_SUPPORT=y +CONFIG_MEDIA_ANALOG_TV_SUPPORT=y +CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y +CONFIG_MEDIA_RADIO_SUPPORT=y +CONFIG_MEDIA_SDR_SUPPORT=y +CONFIG_MEDIA_CEC_SUPPORT=y +# CONFIG_MEDIA_CEC_RC is not set +CONFIG_MEDIA_CONTROLLER=y +CONFIG_MEDIA_CONTROLLER_DVB=y +CONFIG_VIDEO_DEV=m +# CONFIG_VIDEO_V4L2_SUBDEV_API is not set +CONFIG_VIDEO_V4L2=m +CONFIG_VIDEO_V4L2_I2C=y +# CONFIG_VIDEO_ADV_DEBUG is not set +# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set +CONFIG_VIDEO_TUNER=m +CONFIG_V4L2_MEM2MEM_DEV=m +CONFIG_V4L2_FWNODE=m +CONFIG_VIDEOBUF_GEN=m +CONFIG_VIDEOBUF_DMA_SG=m +CONFIG_VIDEOBUF_VMALLOC=m +CONFIG_DVB_CORE=m +# CONFIG_DVB_MMAP is not set +CONFIG_DVB_NET=y +CONFIG_TTPCI_EEPROM=m +CONFIG_DVB_MAX_ADAPTERS=16 +CONFIG_DVB_DYNAMIC_MINORS=y +# CONFIG_DVB_DEMUX_SECTION_LOSS_LOG is not set +# CONFIG_DVB_ULE_DEBUG is not set + +# +# Media drivers +# +CONFIG_MEDIA_USB_SUPPORT=y + +# +# Webcam devices +# +CONFIG_USB_VIDEO_CLASS=m +CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y +CONFIG_USB_GSPCA=m +CONFIG_USB_M5602=m +CONFIG_USB_STV06XX=m +CONFIG_USB_GL860=m +CONFIG_USB_GSPCA_BENQ=m +CONFIG_USB_GSPCA_CONEX=m +CONFIG_USB_GSPCA_CPIA1=m +CONFIG_USB_GSPCA_DTCS033=m +CONFIG_USB_GSPCA_ETOMS=m +CONFIG_USB_GSPCA_FINEPIX=m +CONFIG_USB_GSPCA_JEILINJ=m +CONFIG_USB_GSPCA_JL2005BCD=m +CONFIG_USB_GSPCA_KINECT=m +CONFIG_USB_GSPCA_KONICA=m +CONFIG_USB_GSPCA_MARS=m +CONFIG_USB_GSPCA_MR97310A=m +CONFIG_USB_GSPCA_NW80X=m +CONFIG_USB_GSPCA_OV519=m +CONFIG_USB_GSPCA_OV534=m +CONFIG_USB_GSPCA_OV534_9=m +CONFIG_USB_GSPCA_PAC207=m +CONFIG_USB_GSPCA_PAC7302=m +CONFIG_USB_GSPCA_PAC7311=m +CONFIG_USB_GSPCA_SE401=m +CONFIG_USB_GSPCA_SN9C2028=m +CONFIG_USB_GSPCA_SN9C20X=m +CONFIG_USB_GSPCA_SONIXB=m +CONFIG_USB_GSPCA_SONIXJ=m +CONFIG_USB_GSPCA_SPCA500=m +CONFIG_USB_GSPCA_SPCA501=m +CONFIG_USB_GSPCA_SPCA505=m +CONFIG_USB_GSPCA_SPCA506=m +CONFIG_USB_GSPCA_SPCA508=m +CONFIG_USB_GSPCA_SPCA561=m +CONFIG_USB_GSPCA_SPCA1528=m +CONFIG_USB_GSPCA_SQ905=m +CONFIG_USB_GSPCA_SQ905C=m +CONFIG_USB_GSPCA_SQ930X=m +CONFIG_USB_GSPCA_STK014=m +CONFIG_USB_GSPCA_STK1135=m +CONFIG_USB_GSPCA_STV0680=m +CONFIG_USB_GSPCA_SUNPLUS=m +CONFIG_USB_GSPCA_T613=m +CONFIG_USB_GSPCA_TOPRO=m +CONFIG_USB_GSPCA_TOUPTEK=m +CONFIG_USB_GSPCA_TV8532=m +CONFIG_USB_GSPCA_VC032X=m +CONFIG_USB_GSPCA_VICAM=m +CONFIG_USB_GSPCA_XIRLINK_CIT=m +CONFIG_USB_GSPCA_ZC3XX=m +CONFIG_USB_PWC=m +# CONFIG_USB_PWC_DEBUG is not set +CONFIG_USB_PWC_INPUT_EVDEV=y +CONFIG_VIDEO_CPIA2=m +CONFIG_USB_ZR364XX=m +CONFIG_USB_STKWEBCAM=m +CONFIG_USB_S2255=m +CONFIG_VIDEO_USBTV=m + +# +# Analog TV USB devices +# +CONFIG_VIDEO_PVRUSB2=m +CONFIG_VIDEO_PVRUSB2_SYSFS=y +CONFIG_VIDEO_PVRUSB2_DVB=y +# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set +CONFIG_VIDEO_HDPVR=m +CONFIG_VIDEO_USBVISION=m +CONFIG_VIDEO_STK1160_COMMON=m +CONFIG_VIDEO_STK1160=m +CONFIG_VIDEO_GO7007=m +CONFIG_VIDEO_GO7007_USB=m +CONFIG_VIDEO_GO7007_LOADER=m +CONFIG_VIDEO_GO7007_USB_S2250_BOARD=m + +# +# Analog/digital TV USB devices +# +CONFIG_VIDEO_AU0828=m +CONFIG_VIDEO_AU0828_V4L2=y +CONFIG_VIDEO_AU0828_RC=y +CONFIG_VIDEO_CX231XX=m +CONFIG_VIDEO_CX231XX_RC=y +CONFIG_VIDEO_CX231XX_ALSA=m +CONFIG_VIDEO_CX231XX_DVB=m +# CONFIG_VIDEO_TM6000 is not set + +# +# Digital TV USB devices +# +CONFIG_DVB_USB=m +# CONFIG_DVB_USB_DEBUG is not set +CONFIG_DVB_USB_DIB3000MC=m +CONFIG_DVB_USB_A800=m +CONFIG_DVB_USB_DIBUSB_MB=m +CONFIG_DVB_USB_DIBUSB_MB_FAULTY=y +CONFIG_DVB_USB_DIBUSB_MC=m +CONFIG_DVB_USB_DIB0700=m +CONFIG_DVB_USB_UMT_010=m +CONFIG_DVB_USB_CXUSB=m +# CONFIG_DVB_USB_CXUSB_ANALOG is not set +CONFIG_DVB_USB_M920X=m +CONFIG_DVB_USB_DIGITV=m +CONFIG_DVB_USB_VP7045=m +CONFIG_DVB_USB_VP702X=m +CONFIG_DVB_USB_GP8PSK=m +CONFIG_DVB_USB_NOVA_T_USB2=m +CONFIG_DVB_USB_TTUSB2=m +CONFIG_DVB_USB_DTT200U=m +CONFIG_DVB_USB_OPERA1=m +CONFIG_DVB_USB_AF9005=m +CONFIG_DVB_USB_AF9005_REMOTE=m +CONFIG_DVB_USB_PCTV452E=m +CONFIG_DVB_USB_DW2102=m +CONFIG_DVB_USB_CINERGY_T2=m +CONFIG_DVB_USB_DTV5100=m +CONFIG_DVB_USB_AZ6027=m +CONFIG_DVB_USB_TECHNISAT_USB2=m +CONFIG_DVB_USB_V2=m +CONFIG_DVB_USB_AF9015=m +CONFIG_DVB_USB_AF9035=m +CONFIG_DVB_USB_ANYSEE=m +CONFIG_DVB_USB_AU6610=m +CONFIG_DVB_USB_AZ6007=m +CONFIG_DVB_USB_CE6230=m +CONFIG_DVB_USB_EC168=m +CONFIG_DVB_USB_GL861=m +CONFIG_DVB_USB_LME2510=m +CONFIG_DVB_USB_MXL111SF=m +CONFIG_DVB_USB_RTL28XXU=m +CONFIG_DVB_USB_DVBSKY=m +CONFIG_DVB_USB_ZD1301=m +CONFIG_DVB_TTUSB_BUDGET=m +CONFIG_DVB_TTUSB_DEC=m +CONFIG_SMS_USB_DRV=m +CONFIG_DVB_B2C2_FLEXCOP_USB=m +# CONFIG_DVB_B2C2_FLEXCOP_USB_DEBUG is not set +CONFIG_DVB_AS102=m + +# +# Webcam, TV (analog/digital) USB devices +# +CONFIG_VIDEO_EM28XX=m +CONFIG_VIDEO_EM28XX_V4L2=m +CONFIG_VIDEO_EM28XX_ALSA=m +CONFIG_VIDEO_EM28XX_DVB=m +CONFIG_VIDEO_EM28XX_RC=m + +# +# Software defined radio USB devices +# +CONFIG_USB_AIRSPY=m +CONFIG_USB_HACKRF=m +CONFIG_USB_MSI2500=m + +# +# USB HDMI CEC adapters +# +CONFIG_USB_PULSE8_CEC=m +CONFIG_USB_RAINSHADOW_CEC=m +CONFIG_MEDIA_PCI_SUPPORT=y + +# +# Media capture support +# +CONFIG_VIDEO_SOLO6X10=m +CONFIG_VIDEO_TW5864=m +CONFIG_VIDEO_TW68=m +CONFIG_VIDEO_TW686X=m + +# +# Media capture/analog TV support +# +CONFIG_VIDEO_IVTV=m +# CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS is not set +CONFIG_VIDEO_IVTV_ALSA=m +CONFIG_VIDEO_FB_IVTV=m +CONFIG_VIDEO_HEXIUM_GEMINI=m +CONFIG_VIDEO_HEXIUM_ORION=m +CONFIG_VIDEO_MXB=m +CONFIG_VIDEO_DT3155=m + +# +# Media capture/analog/hybrid TV support +# +CONFIG_VIDEO_CX18=m +CONFIG_VIDEO_CX18_ALSA=m +CONFIG_VIDEO_CX23885=m +CONFIG_MEDIA_ALTERA_CI=m +# CONFIG_VIDEO_CX25821 is not set +CONFIG_VIDEO_CX88=m +CONFIG_VIDEO_CX88_ALSA=m +CONFIG_VIDEO_CX88_BLACKBIRD=m +CONFIG_VIDEO_CX88_DVB=m +CONFIG_VIDEO_CX88_ENABLE_VP3054=y +CONFIG_VIDEO_CX88_VP3054=m +CONFIG_VIDEO_CX88_MPEG=m +CONFIG_VIDEO_BT848=m +CONFIG_DVB_BT8XX=m +CONFIG_VIDEO_SAA7134=m +CONFIG_VIDEO_SAA7134_ALSA=m +CONFIG_VIDEO_SAA7134_RC=y +CONFIG_VIDEO_SAA7134_DVB=m +# CONFIG_VIDEO_SAA7134_GO7007 is not set +CONFIG_VIDEO_SAA7164=m + +# +# Media digital TV PCI Adapters +# +CONFIG_DVB_AV7110_IR=y +CONFIG_DVB_AV7110=m +CONFIG_DVB_AV7110_OSD=y +CONFIG_DVB_BUDGET_CORE=m +CONFIG_DVB_BUDGET=m +CONFIG_DVB_BUDGET_CI=m +CONFIG_DVB_BUDGET_AV=m +CONFIG_DVB_BUDGET_PATCH=m +CONFIG_DVB_B2C2_FLEXCOP_PCI=m +# CONFIG_DVB_B2C2_FLEXCOP_PCI_DEBUG is not set +CONFIG_DVB_PLUTO2=m +CONFIG_DVB_DM1105=m +CONFIG_DVB_PT1=m +CONFIG_DVB_PT3=m +CONFIG_MANTIS_CORE=m +CONFIG_DVB_MANTIS=m +CONFIG_DVB_HOPPER=m +CONFIG_DVB_NGENE=m +CONFIG_DVB_DDBRIDGE=m +# CONFIG_DVB_DDBRIDGE_MSIENABLE is not set +CONFIG_DVB_SMIPCIE=m +CONFIG_DVB_NETUP_UNIDVB=m +CONFIG_V4L_PLATFORM_DRIVERS=y +CONFIG_VIDEO_CAFE_CCIC=m +# CONFIG_VIDEO_CADENCE is not set +# CONFIG_VIDEO_ASPEED is not set +CONFIG_V4L_MEM2MEM_DRIVERS=y +# CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set +# CONFIG_VIDEO_SH_VEU is not set +# CONFIG_VIDEO_ROCKCHIP_RGA is not set +CONFIG_VIDEO_QCOM_VENUS=m +CONFIG_V4L_TEST_DRIVERS=y +CONFIG_VIDEO_VIVID=m +CONFIG_VIDEO_VIVID_CEC=y +CONFIG_VIDEO_VIVID_MAX_DEVS=64 +# CONFIG_VIDEO_VIM2M is not set +# CONFIG_VIDEO_VICODEC is not set +# CONFIG_DVB_PLATFORM_DRIVERS is not set +# CONFIG_CEC_PLATFORM_DRIVERS is not set +# CONFIG_SDR_PLATFORM_DRIVERS is not set + +# +# Supported MMC/SDIO adapters +# +CONFIG_SMS_SDIO_DRV=m +CONFIG_RADIO_ADAPTERS=y +CONFIG_RADIO_TEA575X=m +CONFIG_RADIO_SI470X=m +CONFIG_USB_SI470X=m +# CONFIG_I2C_SI470X is not set +# CONFIG_RADIO_SI4713 is not set +CONFIG_USB_MR800=m +# CONFIG_USB_DSBR is not set +# CONFIG_RADIO_MAXIRADIO is not set +CONFIG_RADIO_SHARK=m +CONFIG_RADIO_SHARK2=m +CONFIG_USB_KEENE=m +CONFIG_USB_RAREMONO=m +CONFIG_USB_MA901=m +# CONFIG_RADIO_TEA5764 is not set +# CONFIG_RADIO_SAA7706H is not set +# CONFIG_RADIO_TEF6862 is not set +# CONFIG_RADIO_WL1273 is not set + +# +# Texas Instruments WL128x FM driver (ST based) +# +# CONFIG_RADIO_WL128X is not set +# end of Texas Instruments WL128x FM driver (ST based) + +# +# Supported FireWire (IEEE 1394) Adapters +# +CONFIG_DVB_FIREDTV=m +CONFIG_DVB_FIREDTV_INPUT=y +CONFIG_MEDIA_COMMON_OPTIONS=y + +# +# common driver options +# +CONFIG_VIDEO_CX2341X=m +CONFIG_VIDEO_TVEEPROM=m +CONFIG_CYPRESS_FIRMWARE=m +CONFIG_VIDEOBUF2_CORE=m +CONFIG_VIDEOBUF2_V4L2=m +CONFIG_VIDEOBUF2_MEMOPS=m +CONFIG_VIDEOBUF2_DMA_CONTIG=m +CONFIG_VIDEOBUF2_VMALLOC=m +CONFIG_VIDEOBUF2_DMA_SG=m +CONFIG_VIDEOBUF2_DVB=m +CONFIG_DVB_B2C2_FLEXCOP=m +CONFIG_VIDEO_SAA7146=m +CONFIG_VIDEO_SAA7146_VV=m +CONFIG_SMS_SIANO_MDTV=m +CONFIG_SMS_SIANO_RC=y +# CONFIG_SMS_SIANO_DEBUGFS is not set +CONFIG_VIDEO_V4L2_TPG=m + +# +# Media ancillary drivers (tuners, sensors, i2c, spi, frontends) +# +CONFIG_MEDIA_SUBDRV_AUTOSELECT=y +CONFIG_MEDIA_ATTACH=y +CONFIG_VIDEO_IR_I2C=m + +# +# I2C Encoders, decoders, sensors and other helper chips +# + +# +# Audio decoders, processors and mixers +# +CONFIG_VIDEO_TVAUDIO=m +CONFIG_VIDEO_TDA7432=m +CONFIG_VIDEO_TDA9840=m +CONFIG_VIDEO_TEA6415C=m +CONFIG_VIDEO_TEA6420=m +CONFIG_VIDEO_MSP3400=m +CONFIG_VIDEO_CS3308=m +CONFIG_VIDEO_CS5345=m +CONFIG_VIDEO_CS53L32A=m +CONFIG_VIDEO_TLV320AIC23B=m +CONFIG_VIDEO_UDA1342=m +CONFIG_VIDEO_WM8775=m +CONFIG_VIDEO_WM8739=m +CONFIG_VIDEO_VP27SMPX=m +CONFIG_VIDEO_SONY_BTF_MPX=m + +# +# RDS decoders +# +CONFIG_VIDEO_SAA6588=m + +# +# Video decoders +# +# CONFIG_VIDEO_ADV7183 is not set +CONFIG_VIDEO_BT819=m +CONFIG_VIDEO_BT856=m +# CONFIG_VIDEO_BT866 is not set +CONFIG_VIDEO_KS0127=m +# CONFIG_VIDEO_ML86V7667 is not set +CONFIG_VIDEO_SAA7110=m +CONFIG_VIDEO_SAA711X=m +# CONFIG_VIDEO_TVP514X is not set +CONFIG_VIDEO_TVP5150=m +# CONFIG_VIDEO_TVP7002 is not set +CONFIG_VIDEO_TW2804=m +CONFIG_VIDEO_TW9903=m +CONFIG_VIDEO_TW9906=m +# CONFIG_VIDEO_TW9910 is not set +CONFIG_VIDEO_VPX3220=m + +# +# Video and audio decoders +# +CONFIG_VIDEO_SAA717X=m +CONFIG_VIDEO_CX25840=m + +# +# Video encoders +# +CONFIG_VIDEO_SAA7127=m +CONFIG_VIDEO_SAA7185=m +CONFIG_VIDEO_ADV7170=m +CONFIG_VIDEO_ADV7175=m +# CONFIG_VIDEO_ADV7343 is not set +# CONFIG_VIDEO_ADV7393 is not set +# CONFIG_VIDEO_AK881X is not set +# CONFIG_VIDEO_THS8200 is not set + +# +# Camera sensor devices +# +CONFIG_VIDEO_OV2640=m +# CONFIG_VIDEO_OV2659 is not set +# CONFIG_VIDEO_OV2680 is not set +# CONFIG_VIDEO_OV2685 is not set +# CONFIG_VIDEO_OV6650 is not set +# CONFIG_VIDEO_OV5695 is not set +# CONFIG_VIDEO_OV772X is not set +CONFIG_VIDEO_OV7640=m +CONFIG_VIDEO_OV7670=m +# CONFIG_VIDEO_OV7740 is not set +# CONFIG_VIDEO_OV9640 is not set +# CONFIG_VIDEO_VS6624 is not set +# CONFIG_VIDEO_MT9M111 is not set +# CONFIG_VIDEO_MT9T112 is not set +CONFIG_VIDEO_MT9V011=m +# CONFIG_VIDEO_MT9V111 is not set +CONFIG_VIDEO_SR030PC30=m +# CONFIG_VIDEO_RJ54N1 is not set + +# +# Lens drivers +# +# CONFIG_VIDEO_AD5820 is not set + +# +# Flash devices +# +# CONFIG_VIDEO_ADP1653 is not set +# CONFIG_VIDEO_LM3560 is not set +# CONFIG_VIDEO_LM3646 is not set + +# +# Video improvement chips +# +CONFIG_VIDEO_UPD64031A=m +CONFIG_VIDEO_UPD64083=m + +# +# Audio/Video compression chips +# +CONFIG_VIDEO_SAA6752HS=m + +# +# SDR tuner chips +# +# CONFIG_SDR_MAX2175 is not set + +# +# Miscellaneous helper chips +# +# CONFIG_VIDEO_THS7303 is not set +CONFIG_VIDEO_M52790=m +# CONFIG_VIDEO_I2C is not set +# end of I2C Encoders, decoders, sensors and other helper chips + +# +# SPI helper chips +# +# end of SPI helper chips + +# +# Media SPI Adapters +# +# CONFIG_CXD2880_SPI_DRV is not set +# end of Media SPI Adapters + +CONFIG_MEDIA_TUNER=m + +# +# Customize TV tuners +# +CONFIG_MEDIA_TUNER_SIMPLE=m +CONFIG_MEDIA_TUNER_TDA18250=m +CONFIG_MEDIA_TUNER_TDA8290=m +CONFIG_MEDIA_TUNER_TDA827X=m +CONFIG_MEDIA_TUNER_TDA18271=m +CONFIG_MEDIA_TUNER_TDA9887=m +CONFIG_MEDIA_TUNER_TEA5761=m +CONFIG_MEDIA_TUNER_TEA5767=m +CONFIG_MEDIA_TUNER_MSI001=m +CONFIG_MEDIA_TUNER_MT20XX=m +CONFIG_MEDIA_TUNER_MT2060=m +CONFIG_MEDIA_TUNER_MT2063=m +CONFIG_MEDIA_TUNER_MT2266=m +CONFIG_MEDIA_TUNER_MT2131=m +CONFIG_MEDIA_TUNER_QT1010=m +CONFIG_MEDIA_TUNER_XC2028=m +CONFIG_MEDIA_TUNER_XC5000=m +CONFIG_MEDIA_TUNER_XC4000=m +CONFIG_MEDIA_TUNER_MXL5005S=m +CONFIG_MEDIA_TUNER_MXL5007T=m +CONFIG_MEDIA_TUNER_MC44S803=m +CONFIG_MEDIA_TUNER_MAX2165=m +CONFIG_MEDIA_TUNER_TDA18218=m +CONFIG_MEDIA_TUNER_FC0011=m +CONFIG_MEDIA_TUNER_FC0012=m +CONFIG_MEDIA_TUNER_FC0013=m +CONFIG_MEDIA_TUNER_TDA18212=m +CONFIG_MEDIA_TUNER_E4000=m +CONFIG_MEDIA_TUNER_FC2580=m +CONFIG_MEDIA_TUNER_M88RS6000T=m +CONFIG_MEDIA_TUNER_TUA9001=m +CONFIG_MEDIA_TUNER_SI2157=m +CONFIG_MEDIA_TUNER_IT913X=m +CONFIG_MEDIA_TUNER_R820T=m +CONFIG_MEDIA_TUNER_MXL301RF=m +CONFIG_MEDIA_TUNER_QM1D1C0042=m +CONFIG_MEDIA_TUNER_QM1D1B0004=m +# end of Customize TV tuners + +# +# Customise DVB Frontends +# + +# +# Multistandard (satellite) frontends +# +CONFIG_DVB_STB0899=m +CONFIG_DVB_STB6100=m +CONFIG_DVB_STV090x=m +CONFIG_DVB_STV0910=m +CONFIG_DVB_STV6110x=m +CONFIG_DVB_STV6111=m +CONFIG_DVB_MXL5XX=m +CONFIG_DVB_M88DS3103=m + +# +# Multistandard (cable + terrestrial) frontends +# +CONFIG_DVB_DRXK=m +CONFIG_DVB_TDA18271C2DD=m +CONFIG_DVB_SI2165=m +CONFIG_DVB_MN88472=m +CONFIG_DVB_MN88473=m + +# +# DVB-S (satellite) frontends +# +CONFIG_DVB_CX24110=m +CONFIG_DVB_CX24123=m +CONFIG_DVB_MT312=m +CONFIG_DVB_ZL10036=m +CONFIG_DVB_ZL10039=m +CONFIG_DVB_S5H1420=m +CONFIG_DVB_STV0288=m +CONFIG_DVB_STB6000=m +CONFIG_DVB_STV0299=m +CONFIG_DVB_STV6110=m +CONFIG_DVB_STV0900=m +CONFIG_DVB_TDA8083=m +CONFIG_DVB_TDA10086=m +CONFIG_DVB_TDA8261=m +CONFIG_DVB_VES1X93=m +CONFIG_DVB_TUNER_ITD1000=m +CONFIG_DVB_TUNER_CX24113=m +CONFIG_DVB_TDA826X=m +CONFIG_DVB_TUA6100=m +CONFIG_DVB_CX24116=m +CONFIG_DVB_CX24117=m +CONFIG_DVB_CX24120=m +CONFIG_DVB_SI21XX=m +CONFIG_DVB_TS2020=m +CONFIG_DVB_DS3000=m +CONFIG_DVB_MB86A16=m +CONFIG_DVB_TDA10071=m + +# +# DVB-T (terrestrial) frontends +# +CONFIG_DVB_SP8870=m +CONFIG_DVB_SP887X=m +CONFIG_DVB_CX22700=m +CONFIG_DVB_CX22702=m +# CONFIG_DVB_S5H1432 is not set +CONFIG_DVB_DRXD=m +CONFIG_DVB_L64781=m +CONFIG_DVB_TDA1004X=m +CONFIG_DVB_NXT6000=m +CONFIG_DVB_MT352=m +CONFIG_DVB_ZL10353=m +CONFIG_DVB_DIB3000MB=m +CONFIG_DVB_DIB3000MC=m +CONFIG_DVB_DIB7000M=m +CONFIG_DVB_DIB7000P=m +# CONFIG_DVB_DIB9000 is not set +CONFIG_DVB_TDA10048=m +CONFIG_DVB_AF9013=m +CONFIG_DVB_EC100=m +CONFIG_DVB_STV0367=m +CONFIG_DVB_CXD2820R=m +CONFIG_DVB_CXD2841ER=m +CONFIG_DVB_RTL2830=m +CONFIG_DVB_RTL2832=m +CONFIG_DVB_RTL2832_SDR=m +CONFIG_DVB_SI2168=m +CONFIG_DVB_AS102_FE=m +CONFIG_DVB_ZD1301_DEMOD=m +CONFIG_DVB_GP8PSK_FE=m +# CONFIG_DVB_CXD2880 is not set + +# +# DVB-C (cable) frontends +# +CONFIG_DVB_VES1820=m +CONFIG_DVB_TDA10021=m +CONFIG_DVB_TDA10023=m +CONFIG_DVB_STV0297=m + +# +# ATSC (North American/Korean Terrestrial/Cable DTV) frontends +# +CONFIG_DVB_NXT200X=m +CONFIG_DVB_OR51211=m +CONFIG_DVB_OR51132=m +CONFIG_DVB_BCM3510=m +CONFIG_DVB_LGDT330X=m +CONFIG_DVB_LGDT3305=m +CONFIG_DVB_LGDT3306A=m +CONFIG_DVB_LG2160=m +CONFIG_DVB_S5H1409=m +CONFIG_DVB_AU8522=m +CONFIG_DVB_AU8522_DTV=m +CONFIG_DVB_AU8522_V4L=m +CONFIG_DVB_S5H1411=m + +# +# ISDB-T (terrestrial) frontends +# +CONFIG_DVB_S921=m +CONFIG_DVB_DIB8000=m +CONFIG_DVB_MB86A20S=m + +# +# ISDB-S (satellite) & ISDB-T (terrestrial) frontends +# +CONFIG_DVB_TC90522=m +# CONFIG_DVB_MN88443X is not set + +# +# Digital terrestrial only tuners/PLL +# +CONFIG_DVB_PLL=m +CONFIG_DVB_TUNER_DIB0070=m +CONFIG_DVB_TUNER_DIB0090=m + +# +# SEC control devices for DVB-S +# +CONFIG_DVB_DRX39XYJ=m +CONFIG_DVB_LNBH25=m +# CONFIG_DVB_LNBH29 is not set +CONFIG_DVB_LNBP21=m +CONFIG_DVB_LNBP22=m +CONFIG_DVB_ISL6405=m +CONFIG_DVB_ISL6421=m +CONFIG_DVB_ISL6423=m +CONFIG_DVB_A8293=m +# CONFIG_DVB_LGS8GL5 is not set +CONFIG_DVB_LGS8GXX=m +CONFIG_DVB_ATBM8830=m +CONFIG_DVB_TDA665x=m +CONFIG_DVB_IX2505V=m +CONFIG_DVB_M88RS2000=m +CONFIG_DVB_AF9033=m +CONFIG_DVB_HORUS3A=m +CONFIG_DVB_ASCOT2E=m +CONFIG_DVB_HELENE=m + +# +# Common Interface (EN50221) controller drivers +# +CONFIG_DVB_CXD2099=m +CONFIG_DVB_SP2=m + +# +# Tools to develop new frontends +# +CONFIG_DVB_DUMMY_FE=m +# end of Customise DVB Frontends + +# +# Graphics support +# +CONFIG_VGA_ARB=y +CONFIG_VGA_ARB_MAX_GPUS=16 +CONFIG_TEGRA_HOST1X=m +CONFIG_TEGRA_HOST1X_FIREWALL=y +CONFIG_DRM=m +CONFIG_DRM_MIPI_DSI=y +CONFIG_DRM_DP_AUX_CHARDEV=y +# CONFIG_DRM_DEBUG_SELFTEST is not set +CONFIG_DRM_KMS_HELPER=m +CONFIG_DRM_KMS_FB_HELPER=y +CONFIG_DRM_FBDEV_EMULATION=y +CONFIG_DRM_FBDEV_OVERALLOC=100 +# CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM is not set +CONFIG_DRM_LOAD_EDID_FIRMWARE=y +# CONFIG_DRM_DP_CEC is not set +CONFIG_DRM_TTM=m +CONFIG_DRM_VRAM_HELPER=m +CONFIG_DRM_GEM_CMA_HELPER=y +CONFIG_DRM_KMS_CMA_HELPER=y +CONFIG_DRM_GEM_SHMEM_HELPER=y +CONFIG_DRM_VM=y +CONFIG_DRM_SCHED=m + +# +# I2C encoder or helper chips +# +# CONFIG_DRM_I2C_CH7006 is not set +# CONFIG_DRM_I2C_SIL164 is not set +# CONFIG_DRM_I2C_NXP_TDA998X is not set +# CONFIG_DRM_I2C_NXP_TDA9950 is not set +# end of I2C encoder or helper chips + +# +# ARM devices +# +CONFIG_DRM_HDLCD=m +# CONFIG_DRM_HDLCD_SHOW_UNDERRUN is not set +CONFIG_DRM_MALI_DISPLAY=m +# CONFIG_DRM_KOMEDA is not set +# end of ARM devices + +CONFIG_DRM_RADEON=m +# CONFIG_DRM_RADEON_USERPTR is not set +CONFIG_DRM_AMDGPU=m +CONFIG_DRM_AMDGPU_SI=y +CONFIG_DRM_AMDGPU_CIK=y +# CONFIG_DRM_AMDGPU_USERPTR is not set +# CONFIG_DRM_AMDGPU_GART_DEBUGFS is not set + +# +# ACP (Audio CoProcessor) Configuration +# +# CONFIG_DRM_AMD_ACP is not set +# end of ACP (Audio CoProcessor) Configuration + +# +# Display Engine Configuration +# +CONFIG_DRM_AMD_DC=y +# CONFIG_DEBUG_KERNEL_DC is not set +# end of Display Engine Configuration + +# CONFIG_HSA_AMD is not set +CONFIG_DRM_NOUVEAU=m +CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT=y +CONFIG_NOUVEAU_PLATFORM_DRIVER=y +CONFIG_NOUVEAU_DEBUG=5 +CONFIG_NOUVEAU_DEBUG_DEFAULT=3 +# CONFIG_NOUVEAU_DEBUG_MMU is not set +CONFIG_DRM_NOUVEAU_BACKLIGHT=y +CONFIG_DRM_VGEM=m +# CONFIG_DRM_VKMS is not set +CONFIG_DRM_ATI_PCIGART=y +CONFIG_DRM_ROCKCHIP=m +CONFIG_ROCKCHIP_ANALOGIX_DP=y +CONFIG_ROCKCHIP_CDN_DP=y +CONFIG_ROCKCHIP_DW_HDMI=y +CONFIG_ROCKCHIP_DW_MIPI_DSI=y +# CONFIG_ROCKCHIP_INNO_HDMI is not set +# CONFIG_ROCKCHIP_LVDS is not set +# CONFIG_ROCKCHIP_RGB is not set +# CONFIG_ROCKCHIP_RK3066_HDMI is not set +CONFIG_DRM_UDL=m +CONFIG_DRM_AST=m +# CONFIG_DRM_MGAG200 is not set +CONFIG_DRM_CIRRUS_QEMU=m +# CONFIG_DRM_RCAR_DW_HDMI is not set +# CONFIG_DRM_RCAR_LVDS is not set +CONFIG_DRM_RCAR_WRITEBACK=y +CONFIG_DRM_SUN4I=m +CONFIG_DRM_SUN4I_HDMI=m +# CONFIG_DRM_SUN4I_HDMI_CEC is not set +CONFIG_DRM_SUN4I_BACKEND=m +# CONFIG_DRM_SUN6I_DSI is not set +CONFIG_DRM_SUN8I_DW_HDMI=m +# CONFIG_DRM_SUN8I_MIXER is not set +CONFIG_DRM_QXL=m +CONFIG_DRM_BOCHS=m +CONFIG_DRM_VIRTIO_GPU=m +CONFIG_DRM_MSM=m +CONFIG_DRM_MSM_GPU_STATE=y +# CONFIG_DRM_MSM_REGISTER_LOGGING is not set +# CONFIG_DRM_MSM_GPU_SUDO is not set +CONFIG_DRM_MSM_HDMI_HDCP=y +CONFIG_DRM_MSM_DSI=y +CONFIG_DRM_MSM_DSI_PLL=y +CONFIG_DRM_MSM_DSI_28NM_PHY=y +CONFIG_DRM_MSM_DSI_20NM_PHY=y +CONFIG_DRM_MSM_DSI_28NM_8960_PHY=y +CONFIG_DRM_MSM_DSI_14NM_PHY=y +CONFIG_DRM_MSM_DSI_10NM_PHY=y +CONFIG_DRM_TEGRA=m +# CONFIG_DRM_TEGRA_DEBUG is not set +CONFIG_DRM_TEGRA_STAGING=y +CONFIG_DRM_PANEL=y + +# +# Display Panels +# +# CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_LVDS is not set +CONFIG_DRM_PANEL_SIMPLE=m +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set +# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_SAMSUNG_LD9040 is not set +# CONFIG_DRM_PANEL_LG_LB035Q02 is not set +# CONFIG_DRM_PANEL_LG_LG4573 is not set +# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set +# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set +CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_ROCKTECH_JH057N00900 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set +# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set +# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set +# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set +# CONFIG_DRM_PANEL_TPO_TPG110 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# end of Display Panels + +CONFIG_DRM_BRIDGE=y +CONFIG_DRM_PANEL_BRIDGE=y + +# +# Display Interface Bridges +# +# CONFIG_DRM_ANALOGIX_ANX78XX is not set +# CONFIG_DRM_CDNS_DSI is not set +# CONFIG_DRM_DUMB_VGA_DAC is not set +# CONFIG_DRM_LVDS_ENCODER is not set +# CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW is not set +# CONFIG_DRM_NXP_PTN3460 is not set +# CONFIG_DRM_PARADE_PS8622 is not set +# CONFIG_DRM_SIL_SII8620 is not set +# CONFIG_DRM_SII902X is not set +# CONFIG_DRM_SII9234 is not set +# CONFIG_DRM_THINE_THC63LVD1024 is not set +# CONFIG_DRM_TOSHIBA_TC358764 is not set +# CONFIG_DRM_TOSHIBA_TC358767 is not set +# CONFIG_DRM_TI_TFP410 is not set +# CONFIG_DRM_TI_SN65DSI86 is not set +CONFIG_DRM_ANALOGIX_DP=m +CONFIG_DRM_I2C_ADV7511=m +CONFIG_DRM_I2C_ADV7511_AUDIO=y +CONFIG_DRM_I2C_ADV7533=y +CONFIG_DRM_I2C_ADV7511_CEC=y +CONFIG_DRM_DW_HDMI=m +# CONFIG_DRM_DW_HDMI_AHB_AUDIO is not set +CONFIG_DRM_DW_HDMI_I2S_AUDIO=m +# CONFIG_DRM_DW_HDMI_CEC is not set +CONFIG_DRM_DW_MIPI_DSI=m +# end of Display Interface Bridges + +CONFIG_DRM_VC4=m +# CONFIG_DRM_VC4_HDMI_CEC is not set +# CONFIG_DRM_ETNAVIV is not set +# CONFIG_DRM_ARCPGU is not set +CONFIG_DRM_HISI_HIBMC=m +CONFIG_DRM_HISI_KIRIN=m +# CONFIG_DRM_MXSFB is not set +CONFIG_DRM_MESON=m +CONFIG_DRM_MESON_DW_HDMI=m +# CONFIG_DRM_GM12U320 is not set +# CONFIG_TINYDRM_HX8357D is not set +# CONFIG_TINYDRM_ILI9225 is not set +# CONFIG_TINYDRM_ILI9341 is not set +# CONFIG_TINYDRM_MI0283QT is not set +# CONFIG_TINYDRM_REPAPER is not set +# CONFIG_TINYDRM_ST7586 is not set +# CONFIG_TINYDRM_ST7735R is not set +# CONFIG_DRM_PL111 is not set +# CONFIG_DRM_XEN is not set +CONFIG_DRM_LIMA=m +CONFIG_DRM_PANFROST=m +CONFIG_DRM_LEGACY=y +# CONFIG_DRM_TDFX is not set +# CONFIG_DRM_R128 is not set +# CONFIG_DRM_MGA is not set +CONFIG_DRM_VIA=m +CONFIG_DRM_SAVAGE=m +CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y + +# +# Frame buffer Devices +# +CONFIG_FB_CMDLINE=y +CONFIG_FB_NOTIFY=y +CONFIG_FB=y +CONFIG_FIRMWARE_EDID=y +CONFIG_FB_DDC=m +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +CONFIG_FB_SYS_FILLRECT=y +CONFIG_FB_SYS_COPYAREA=y +CONFIG_FB_SYS_IMAGEBLIT=y +# CONFIG_FB_FOREIGN_ENDIAN is not set +CONFIG_FB_SYS_FOPS=y +CONFIG_FB_DEFERRED_IO=y +CONFIG_FB_SVGALIB=m +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_CIRRUS is not set +# CONFIG_FB_PM2 is not set +CONFIG_FB_ARMCLCD=y +# CONFIG_FB_CYBER2000 is not set +# CONFIG_FB_ASILIANT is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_UVESA is not set +CONFIG_FB_EFI=y +# CONFIG_FB_OPENCORES is not set +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_NVIDIA is not set +# CONFIG_FB_RIVA is not set +# CONFIG_FB_I740 is not set +# CONFIG_FB_MATROX is not set +# CONFIG_FB_RADEON is not set +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +CONFIG_FB_S3=m +CONFIG_FB_S3_DDC=y +# CONFIG_FB_SAVAGE is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +CONFIG_FB_3DFX=m +# CONFIG_FB_3DFX_ACCEL is not set +CONFIG_FB_3DFX_I2C=y +# CONFIG_FB_VOODOO1 is not set +CONFIG_FB_VT8623=m +# CONFIG_FB_TRIDENT is not set +CONFIG_FB_ARK=m +CONFIG_FB_PM3=m +# CONFIG_FB_CARMINE is not set +CONFIG_FB_SMSCUFX=m +CONFIG_FB_UDL=m +# CONFIG_FB_IBM_GXT4500 is not set +# CONFIG_FB_XILINX is not set +# CONFIG_FB_VIRTUAL is not set +CONFIG_XEN_FBDEV_FRONTEND=y +# CONFIG_FB_METRONOME is not set +CONFIG_FB_MB862XX=m +CONFIG_FB_MB862XX_PCI_GDC=y +CONFIG_FB_MB862XX_I2C=y +CONFIG_FB_SIMPLE=y +# CONFIG_FB_SSD1307 is not set +# CONFIG_FB_SM712 is not set +# end of Frame buffer Devices + +# +# Backlight & LCD device support +# +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_BACKLIGHT_CLASS_DEVICE=y +CONFIG_BACKLIGHT_GENERIC=m +CONFIG_BACKLIGHT_PWM=m +# CONFIG_BACKLIGHT_PM8941_WLED is not set +# CONFIG_BACKLIGHT_ADP8860 is not set +# CONFIG_BACKLIGHT_ADP8870 is not set +# CONFIG_BACKLIGHT_LM3630A is not set +# CONFIG_BACKLIGHT_LM3639 is not set +CONFIG_BACKLIGHT_LP855X=m +# CONFIG_BACKLIGHT_GPIO is not set +# CONFIG_BACKLIGHT_LV5207LP is not set +# CONFIG_BACKLIGHT_BD6107 is not set +# CONFIG_BACKLIGHT_ARCXCNN is not set +# end of Backlight & LCD device support + +CONFIG_VGASTATE=m +CONFIG_VIDEOMODE_HELPERS=y +CONFIG_HDMI=y + +# +# Console display driver support +# +CONFIG_DUMMY_CONSOLE=y +CONFIG_DUMMY_CONSOLE_COLUMNS=80 +CONFIG_DUMMY_CONSOLE_ROWS=25 +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y +# CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER is not set +# end of Console display driver support + +# CONFIG_LOGO is not set +# end of Graphics support + +CONFIG_SOUND=m +CONFIG_SOUND_OSS_CORE=y +# CONFIG_SOUND_OSS_CORE_PRECLAIM is not set +CONFIG_SND=m +CONFIG_SND_TIMER=m +CONFIG_SND_PCM=m +CONFIG_SND_PCM_ELD=y +CONFIG_SND_PCM_IEC958=y +CONFIG_SND_DMAENGINE_PCM=m +CONFIG_SND_HWDEP=m +CONFIG_SND_SEQ_DEVICE=m +CONFIG_SND_RAWMIDI=m +CONFIG_SND_JACK=y +CONFIG_SND_JACK_INPUT_DEV=y +CONFIG_SND_OSSEMUL=y +CONFIG_SND_MIXER_OSS=m +CONFIG_SND_PCM_OSS=m +CONFIG_SND_PCM_OSS_PLUGINS=y +CONFIG_SND_PCM_TIMER=y +CONFIG_SND_HRTIMER=m +CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_MAX_CARDS=32 +CONFIG_SND_SUPPORT_OLD_API=y +CONFIG_SND_PROC_FS=y +CONFIG_SND_VERBOSE_PROCFS=y +# CONFIG_SND_VERBOSE_PRINTK is not set +# CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +# CONFIG_SND_SEQUENCER_OSS is not set +CONFIG_SND_SEQ_HRTIMER_DEFAULT=y +CONFIG_SND_SEQ_MIDI_EVENT=m +CONFIG_SND_SEQ_MIDI=m +CONFIG_SND_SEQ_MIDI_EMUL=m +CONFIG_SND_MPU401_UART=m +CONFIG_SND_OPL3_LIB=m +CONFIG_SND_OPL3_LIB_SEQ=m +CONFIG_SND_AC97_CODEC=m +CONFIG_SND_DRIVERS=y +# CONFIG_SND_DUMMY is not set +CONFIG_SND_ALOOP=m +# CONFIG_SND_VIRMIDI is not set +# CONFIG_SND_MTPAV is not set +CONFIG_SND_MTS64=m +# CONFIG_SND_SERIAL_U16550 is not set +# CONFIG_SND_MPU401 is not set +CONFIG_SND_PORTMAN2X4=m +CONFIG_SND_AC97_POWER_SAVE=y +CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0 +CONFIG_SND_PCI=y +CONFIG_SND_AD1889=m +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AW2 is not set +# CONFIG_SND_BT87X is not set +# CONFIG_SND_CA0106 is not set +# CONFIG_SND_CMIPCI is not set +CONFIG_SND_OXYGEN_LIB=m +CONFIG_SND_OXYGEN=m +# CONFIG_SND_CS4281 is not set +# CONFIG_SND_CS46XX is not set +CONFIG_SND_CTXFI=m +CONFIG_SND_DARLA20=m +CONFIG_SND_GINA20=m +CONFIG_SND_LAYLA20=m +CONFIG_SND_DARLA24=m +CONFIG_SND_GINA24=m +CONFIG_SND_LAYLA24=m +CONFIG_SND_MONA=m +CONFIG_SND_MIA=m +CONFIG_SND_ECHO3G=m +CONFIG_SND_INDIGO=m +CONFIG_SND_INDIGOIO=m +CONFIG_SND_INDIGODJ=m +CONFIG_SND_INDIGOIOX=m +CONFIG_SND_INDIGODJX=m +# CONFIG_SND_ENS1370 is not set +# CONFIG_SND_ENS1371 is not set +# CONFIG_SND_FM801 is not set +# CONFIG_SND_HDSP is not set +CONFIG_SND_HDSPM=m +# CONFIG_SND_ICE1724 is not set +# CONFIG_SND_INTEL8X0 is not set +# CONFIG_SND_INTEL8X0M is not set +# CONFIG_SND_KORG1212 is not set +CONFIG_SND_LOLA=m +CONFIG_SND_LX6464ES=m +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +CONFIG_SND_PCXHR=m +CONFIG_SND_RIPTIDE=m +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VIA82XX_MODEM is not set +CONFIG_SND_VIRTUOSO=m +# CONFIG_SND_VX222 is not set +# CONFIG_SND_YMFPCI is not set + +# +# HD-Audio +# +CONFIG_SND_HDA=m +CONFIG_SND_HDA_INTEL=m +# CONFIG_SND_HDA_INTEL_DETECT_DMIC is not set +CONFIG_SND_HDA_TEGRA=m +CONFIG_SND_HDA_HWDEP=y +CONFIG_SND_HDA_RECONFIG=y +CONFIG_SND_HDA_INPUT_BEEP=y +CONFIG_SND_HDA_INPUT_BEEP_MODE=1 +CONFIG_SND_HDA_PATCH_LOADER=y +CONFIG_SND_HDA_CODEC_REALTEK=m +CONFIG_SND_HDA_CODEC_ANALOG=m +CONFIG_SND_HDA_CODEC_SIGMATEL=m +CONFIG_SND_HDA_CODEC_VIA=m +CONFIG_SND_HDA_CODEC_HDMI=m +CONFIG_SND_HDA_CODEC_CIRRUS=m +CONFIG_SND_HDA_CODEC_CONEXANT=m +CONFIG_SND_HDA_CODEC_CA0110=m +CONFIG_SND_HDA_CODEC_CA0132=m +CONFIG_SND_HDA_CODEC_CA0132_DSP=y +CONFIG_SND_HDA_CODEC_CMEDIA=m +CONFIG_SND_HDA_CODEC_SI3054=m +CONFIG_SND_HDA_GENERIC=m +CONFIG_SND_HDA_POWER_SAVE_DEFAULT=1 +# end of HD-Audio + +CONFIG_SND_HDA_CORE=m +CONFIG_SND_HDA_DSP_LOADER=y +CONFIG_SND_HDA_ALIGNED_MMIO=y +CONFIG_SND_HDA_COMPONENT=y +CONFIG_SND_HDA_PREALLOC_SIZE=2048 +CONFIG_SND_INTEL_NHLT=m +CONFIG_SND_SPI=y +CONFIG_SND_USB=y +CONFIG_SND_USB_AUDIO=m +CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER=y +CONFIG_SND_USB_UA101=m +CONFIG_SND_USB_CAIAQ=m +CONFIG_SND_USB_CAIAQ_INPUT=y +CONFIG_SND_USB_6FIRE=m +CONFIG_SND_USB_HIFACE=m +CONFIG_SND_BCD2000=m +CONFIG_SND_USB_LINE6=m +CONFIG_SND_USB_POD=m +CONFIG_SND_USB_PODHD=m +CONFIG_SND_USB_TONEPORT=m +CONFIG_SND_USB_VARIAX=m +CONFIG_SND_FIREWIRE=y +CONFIG_SND_FIREWIRE_LIB=m +CONFIG_SND_DICE=m +CONFIG_SND_OXFW=m +CONFIG_SND_ISIGHT=m +CONFIG_SND_FIREWORKS=m +CONFIG_SND_BEBOB=m +CONFIG_SND_FIREWIRE_DIGI00X=m +CONFIG_SND_FIREWIRE_TASCAM=m +CONFIG_SND_FIREWIRE_MOTU=m +CONFIG_SND_FIREFACE=m +CONFIG_SND_SOC=m +CONFIG_SND_SOC_GENERIC_DMAENGINE_PCM=y +# CONFIG_SND_SOC_AMD_ACP is not set +# CONFIG_SND_ATMEL_SOC is not set +CONFIG_SND_BCM2835_SOC_I2S=m +# CONFIG_SND_DESIGNWARE_I2S is not set + +# +# SoC Audio for Freescale CPUs +# + +# +# Common SoC Audio options for Freescale CPUs: +# +# CONFIG_SND_SOC_FSL_ASRC is not set +# CONFIG_SND_SOC_FSL_SAI is not set +# CONFIG_SND_SOC_FSL_AUDMIX is not set +# CONFIG_SND_SOC_FSL_SSI is not set +# CONFIG_SND_SOC_FSL_SPDIF is not set +# CONFIG_SND_SOC_FSL_ESAI is not set +# CONFIG_SND_SOC_FSL_MICFIL is not set +# CONFIG_SND_SOC_IMX_AUDMUX is not set +# end of SoC Audio for Freescale CPUs + +CONFIG_SND_I2S_HI6210_I2S=m +# CONFIG_SND_KIRKWOOD_SOC is not set +# CONFIG_SND_SOC_IMG is not set +# CONFIG_SND_SOC_MTK_BTCVSD is not set + +# +# ASoC support for Amlogic platforms +# +# CONFIG_SND_MESON_AXG_FRDDR is not set +# CONFIG_SND_MESON_AXG_TODDR is not set +# CONFIG_SND_MESON_AXG_TDMIN is not set +# CONFIG_SND_MESON_AXG_TDMOUT is not set +# CONFIG_SND_MESON_AXG_SOUND_CARD is not set +# CONFIG_SND_MESON_AXG_SPDIFOUT is not set +# CONFIG_SND_MESON_AXG_SPDIFIN is not set +# CONFIG_SND_MESON_AXG_PDM is not set +# CONFIG_SND_MESON_G12A_TOHDMITX is not set +# end of ASoC support for Amlogic platforms + +CONFIG_SND_SOC_QCOM=m +CONFIG_SND_SOC_LPASS_CPU=m +CONFIG_SND_SOC_LPASS_PLATFORM=m +CONFIG_SND_SOC_LPASS_APQ8016=m +# CONFIG_SND_SOC_STORM is not set +CONFIG_SND_SOC_APQ8016_SBC=m +CONFIG_SND_SOC_ROCKCHIP=m +CONFIG_SND_SOC_ROCKCHIP_I2S=m +# CONFIG_SND_SOC_ROCKCHIP_PDM is not set +CONFIG_SND_SOC_ROCKCHIP_SPDIF=m +# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set +CONFIG_SND_SOC_ROCKCHIP_RT5645=m +# CONFIG_SND_SOC_RK3288_HDMI_ANALOG is not set +CONFIG_SND_SOC_RK3399_GRU_SOUND=m +# CONFIG_SND_SOC_SOF_TOPLEVEL is not set + +# +# STMicroelectronics STM32 SOC audio support +# +# end of STMicroelectronics STM32 SOC audio support + +# +# Allwinner SoC Audio support +# +# CONFIG_SND_SUN4I_CODEC is not set +CONFIG_SND_SUN8I_CODEC=m +# CONFIG_SND_SUN8I_CODEC_ANALOG is not set +CONFIG_SND_SUN50I_CODEC_ANALOG=m +CONFIG_SND_SUN4I_I2S=m +# CONFIG_SND_SUN4I_SPDIF is not set +CONFIG_SND_SUN8I_ADDA_PR_REGMAP=m +# end of Allwinner SoC Audio support + +CONFIG_SND_SOC_TEGRA=m +# CONFIG_SND_SOC_TEGRA20_AC97 is not set +# CONFIG_SND_SOC_TEGRA20_DAS is not set +# CONFIG_SND_SOC_TEGRA20_I2S is not set +CONFIG_SND_SOC_TEGRA20_SPDIF=m +# CONFIG_SND_SOC_TEGRA30_AHUB is not set +# CONFIG_SND_SOC_TEGRA30_I2S is not set +CONFIG_SND_SOC_TEGRA_RT5640=m +CONFIG_SND_SOC_TEGRA_WM8753=m +CONFIG_SND_SOC_TEGRA_WM8903=m +# CONFIG_SND_SOC_TEGRA_WM9712 is not set +CONFIG_SND_SOC_TEGRA_TRIMSLICE=m +CONFIG_SND_SOC_TEGRA_ALC5632=m +CONFIG_SND_SOC_TEGRA_MAX98090=m +CONFIG_SND_SOC_TEGRA_RT5677=m +# CONFIG_SND_SOC_TEGRA_SGTL5000 is not set +# CONFIG_SND_SOC_XILINX_I2S is not set +# CONFIG_SND_SOC_XILINX_AUDIO_FORMATTER is not set +# CONFIG_SND_SOC_XILINX_SPDIF is not set +# CONFIG_SND_SOC_XTFPGA_I2S is not set +# CONFIG_ZX_TDM is not set +CONFIG_SND_SOC_I2C_AND_SPI=m + +# +# CODEC drivers +# +# CONFIG_SND_SOC_AC97_CODEC is not set +# CONFIG_SND_SOC_ADAU1701 is not set +# CONFIG_SND_SOC_ADAU1761_I2C is not set +# CONFIG_SND_SOC_ADAU1761_SPI is not set +# CONFIG_SND_SOC_ADAU7002 is not set +# CONFIG_SND_SOC_AK4104 is not set +# CONFIG_SND_SOC_AK4118 is not set +# CONFIG_SND_SOC_AK4458 is not set +# CONFIG_SND_SOC_AK4554 is not set +# CONFIG_SND_SOC_AK4613 is not set +# CONFIG_SND_SOC_AK4642 is not set +# CONFIG_SND_SOC_AK5386 is not set +# CONFIG_SND_SOC_AK5558 is not set +# CONFIG_SND_SOC_ALC5623 is not set +CONFIG_SND_SOC_ALC5632=m +# CONFIG_SND_SOC_BD28623 is not set +# CONFIG_SND_SOC_BT_SCO is not set +# CONFIG_SND_SOC_CROS_EC_CODEC is not set +# CONFIG_SND_SOC_CS35L32 is not set +# CONFIG_SND_SOC_CS35L33 is not set +# CONFIG_SND_SOC_CS35L34 is not set +# CONFIG_SND_SOC_CS35L35 is not set +# CONFIG_SND_SOC_CS35L36 is not set +# CONFIG_SND_SOC_CS42L42 is not set +# CONFIG_SND_SOC_CS42L51_I2C is not set +# CONFIG_SND_SOC_CS42L52 is not set +# CONFIG_SND_SOC_CS42L56 is not set +# CONFIG_SND_SOC_CS42L73 is not set +# CONFIG_SND_SOC_CS4265 is not set +# CONFIG_SND_SOC_CS4270 is not set +# CONFIG_SND_SOC_CS4271_I2C is not set +# CONFIG_SND_SOC_CS4271_SPI is not set +# CONFIG_SND_SOC_CS42XX8_I2C is not set +# CONFIG_SND_SOC_CS43130 is not set +# CONFIG_SND_SOC_CS4341 is not set +# CONFIG_SND_SOC_CS4349 is not set +# CONFIG_SND_SOC_CS53L30 is not set +# CONFIG_SND_SOC_CX2072X is not set +CONFIG_SND_SOC_DA7219=m +CONFIG_SND_SOC_DMIC=m +CONFIG_SND_SOC_HDMI_CODEC=m +# CONFIG_SND_SOC_ES7134 is not set +# CONFIG_SND_SOC_ES7241 is not set +# CONFIG_SND_SOC_ES8316 is not set +# CONFIG_SND_SOC_ES8328_I2C is not set +# CONFIG_SND_SOC_ES8328_SPI is not set +# CONFIG_SND_SOC_GTM601 is not set +# CONFIG_SND_SOC_INNO_RK3036 is not set +# CONFIG_SND_SOC_MAX98088 is not set +CONFIG_SND_SOC_MAX98090=m +CONFIG_SND_SOC_MAX98357A=m +# CONFIG_SND_SOC_MAX98504 is not set +# CONFIG_SND_SOC_MAX9867 is not set +# CONFIG_SND_SOC_MAX98927 is not set +# CONFIG_SND_SOC_MAX98373 is not set +# CONFIG_SND_SOC_MAX9860 is not set +# CONFIG_SND_SOC_MSM8916_WCD_ANALOG is not set +# CONFIG_SND_SOC_MSM8916_WCD_DIGITAL is not set +# CONFIG_SND_SOC_PCM1681 is not set +# CONFIG_SND_SOC_PCM1789_I2C is not set +# CONFIG_SND_SOC_PCM179X_I2C is not set +# CONFIG_SND_SOC_PCM179X_SPI is not set +# CONFIG_SND_SOC_PCM186X_I2C is not set +# CONFIG_SND_SOC_PCM186X_SPI is not set +# CONFIG_SND_SOC_PCM3060_I2C is not set +# CONFIG_SND_SOC_PCM3060_SPI is not set +# CONFIG_SND_SOC_PCM3168A_I2C is not set +# CONFIG_SND_SOC_PCM3168A_SPI is not set +# CONFIG_SND_SOC_PCM512x_I2C is not set +# CONFIG_SND_SOC_PCM512x_SPI is not set +# CONFIG_SND_SOC_RK3328 is not set +CONFIG_SND_SOC_RL6231=m +CONFIG_SND_SOC_RT5514=m +CONFIG_SND_SOC_RT5514_SPI=m +# CONFIG_SND_SOC_RT5616 is not set +# CONFIG_SND_SOC_RT5631 is not set +CONFIG_SND_SOC_RT5640=m +CONFIG_SND_SOC_RT5645=m +CONFIG_SND_SOC_RT5677=m +CONFIG_SND_SOC_RT5677_SPI=m +# CONFIG_SND_SOC_SGTL5000 is not set +CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m +# CONFIG_SND_SOC_SIRF_AUDIO_CODEC is not set +# CONFIG_SND_SOC_SPDIF is not set +# CONFIG_SND_SOC_SSM2305 is not set +# CONFIG_SND_SOC_SSM2602_SPI is not set +# CONFIG_SND_SOC_SSM2602_I2C is not set +# CONFIG_SND_SOC_SSM4567 is not set +# CONFIG_SND_SOC_STA32X is not set +# CONFIG_SND_SOC_STA350 is not set +# CONFIG_SND_SOC_STI_SAS is not set +# CONFIG_SND_SOC_TAS2552 is not set +# CONFIG_SND_SOC_TAS5086 is not set +# CONFIG_SND_SOC_TAS571X is not set +# CONFIG_SND_SOC_TAS5720 is not set +# CONFIG_SND_SOC_TAS6424 is not set +# CONFIG_SND_SOC_TDA7419 is not set +# CONFIG_SND_SOC_TFA9879 is not set +CONFIG_SND_SOC_TLV320AIC23=m +CONFIG_SND_SOC_TLV320AIC23_I2C=m +# CONFIG_SND_SOC_TLV320AIC23_SPI is not set +# CONFIG_SND_SOC_TLV320AIC31XX is not set +# CONFIG_SND_SOC_TLV320AIC32X4_I2C is not set +# CONFIG_SND_SOC_TLV320AIC32X4_SPI is not set +# CONFIG_SND_SOC_TLV320AIC3X is not set +# CONFIG_SND_SOC_TS3A227E is not set +# CONFIG_SND_SOC_TSCS42XX is not set +# CONFIG_SND_SOC_TSCS454 is not set +# CONFIG_SND_SOC_UDA1334 is not set +# CONFIG_SND_SOC_WM8510 is not set +# CONFIG_SND_SOC_WM8523 is not set +# CONFIG_SND_SOC_WM8524 is not set +# CONFIG_SND_SOC_WM8580 is not set +# CONFIG_SND_SOC_WM8711 is not set +# CONFIG_SND_SOC_WM8728 is not set +# CONFIG_SND_SOC_WM8731 is not set +# CONFIG_SND_SOC_WM8737 is not set +# CONFIG_SND_SOC_WM8741 is not set +# CONFIG_SND_SOC_WM8750 is not set +CONFIG_SND_SOC_WM8753=m +# CONFIG_SND_SOC_WM8770 is not set +# CONFIG_SND_SOC_WM8776 is not set +# CONFIG_SND_SOC_WM8782 is not set +# CONFIG_SND_SOC_WM8804_I2C is not set +# CONFIG_SND_SOC_WM8804_SPI is not set +CONFIG_SND_SOC_WM8903=m +# CONFIG_SND_SOC_WM8904 is not set +# CONFIG_SND_SOC_WM8960 is not set +# CONFIG_SND_SOC_WM8962 is not set +# CONFIG_SND_SOC_WM8974 is not set +# CONFIG_SND_SOC_WM8978 is not set +# CONFIG_SND_SOC_WM8985 is not set +# CONFIG_SND_SOC_ZX_AUD96P22 is not set +# CONFIG_SND_SOC_MAX9759 is not set +# CONFIG_SND_SOC_MT6351 is not set +# CONFIG_SND_SOC_MT6358 is not set +# CONFIG_SND_SOC_NAU8540 is not set +# CONFIG_SND_SOC_NAU8810 is not set +# CONFIG_SND_SOC_NAU8822 is not set +# CONFIG_SND_SOC_NAU8824 is not set +# CONFIG_SND_SOC_TPA6130A2 is not set +# end of CODEC drivers + +CONFIG_SND_SIMPLE_CARD_UTILS=m +CONFIG_SND_SIMPLE_CARD=m +# CONFIG_SND_AUDIO_GRAPH_CARD is not set +CONFIG_SND_XEN_FRONTEND=m +CONFIG_AC97_BUS=m + +# +# HID support +# +CONFIG_HID=m +CONFIG_HID_BATTERY_STRENGTH=y +CONFIG_HIDRAW=y +CONFIG_UHID=m +CONFIG_HID_GENERIC=m + +# +# Special HID drivers +# +CONFIG_HID_A4TECH=m +CONFIG_HID_ACCUTOUCH=m +CONFIG_HID_ACRUX=m +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=m +# CONFIG_HID_APPLEIR is not set +CONFIG_HID_ASUS=m +CONFIG_HID_AUREAL=m +CONFIG_HID_BELKIN=m +CONFIG_HID_BETOP_FF=m +CONFIG_HID_BIGBEN_FF=m +CONFIG_HID_CHERRY=m +CONFIG_HID_CHICONY=m +CONFIG_HID_CORSAIR=m +CONFIG_HID_COUGAR=m +CONFIG_HID_MACALLY=m +CONFIG_HID_PRODIKEYS=m +CONFIG_HID_CMEDIA=m +CONFIG_HID_CP2112=m +# CONFIG_HID_CREATIVE_SB0540 is not set +CONFIG_HID_CYPRESS=m +CONFIG_HID_DRAGONRISE=m +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=m +CONFIG_HID_ELAN=m +CONFIG_HID_ELECOM=m +CONFIG_HID_ELO=m +CONFIG_HID_EZKEY=m +CONFIG_HID_GEMBIRD=m +CONFIG_HID_GFRM=m +CONFIG_HID_HOLTEK=m +CONFIG_HOLTEK_FF=y +# CONFIG_HID_GOOGLE_HAMMER is not set +CONFIG_HID_GT683R=m +CONFIG_HID_KEYTOUCH=m +CONFIG_HID_KYE=m +CONFIG_HID_UCLOGIC=m +CONFIG_HID_WALTOP=m +CONFIG_HID_VIEWSONIC=m +CONFIG_HID_GYRATION=m +CONFIG_HID_ICADE=m +CONFIG_HID_ITE=m +CONFIG_HID_JABRA=m +CONFIG_HID_TWINHAN=m +CONFIG_HID_KENSINGTON=m +CONFIG_HID_LCPOWER=m +CONFIG_HID_LED=m +CONFIG_HID_LENOVO=m +CONFIG_HID_LOGITECH=m +CONFIG_HID_LOGITECH_DJ=m +CONFIG_HID_LOGITECH_HIDPP=m +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_LOGIWHEELS_FF=y +CONFIG_HID_MAGICMOUSE=m +CONFIG_HID_MALTRON=m +CONFIG_HID_MAYFLASH=m +CONFIG_HID_REDRAGON=m +CONFIG_HID_MICROSOFT=m +CONFIG_HID_MONTEREY=m +CONFIG_HID_MULTITOUCH=m +CONFIG_HID_NTI=m +CONFIG_HID_NTRIG=m +CONFIG_HID_ORTEK=m +CONFIG_HID_PANTHERLORD=m +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PENMOUNT=m +CONFIG_HID_PETALYNX=m +CONFIG_HID_PICOLCD=m +CONFIG_HID_PICOLCD_FB=y +CONFIG_HID_PICOLCD_BACKLIGHT=y +CONFIG_HID_PICOLCD_LEDS=y +CONFIG_HID_PICOLCD_CIR=y +CONFIG_HID_PLANTRONICS=m +CONFIG_HID_PRIMAX=m +CONFIG_HID_RETRODE=m +CONFIG_HID_ROCCAT=m +CONFIG_HID_SAITEK=m +CONFIG_HID_SAMSUNG=m +CONFIG_HID_SONY=m +CONFIG_SONY_FF=y +CONFIG_HID_SPEEDLINK=m +CONFIG_HID_STEAM=m +CONFIG_HID_STEELSERIES=m +CONFIG_HID_SUNPLUS=m +CONFIG_HID_RMI=m +CONFIG_HID_GREENASIA=m +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=m +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=m +CONFIG_HID_TOPSEED=m +CONFIG_HID_THINGM=m +CONFIG_HID_THRUSTMASTER=m +CONFIG_THRUSTMASTER_FF=y +CONFIG_HID_UDRAW_PS3=m +CONFIG_HID_U2FZERO=m +CONFIG_HID_WACOM=m +CONFIG_HID_WIIMOTE=m +CONFIG_HID_XINMO=m +CONFIG_HID_ZEROPLUS=m +CONFIG_ZEROPLUS_FF=y +CONFIG_HID_ZYDACRON=m +CONFIG_HID_SENSOR_HUB=m +CONFIG_HID_SENSOR_CUSTOM_SENSOR=m +CONFIG_HID_ALPS=m +# end of Special HID drivers + +# +# USB HID support +# +CONFIG_USB_HID=m +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y + +# +# USB HID Boot Protocol drivers +# +# CONFIG_USB_KBD is not set +# CONFIG_USB_MOUSE is not set +# end of USB HID Boot Protocol drivers +# end of USB HID support + +# +# I2C HID support +# +CONFIG_I2C_HID=m +# end of I2C HID support +# end of HID support + +CONFIG_USB_OHCI_LITTLE_ENDIAN=y +CONFIG_USB_SUPPORT=y +CONFIG_USB_COMMON=y +CONFIG_USB_LED_TRIG=y +CONFIG_USB_ULPI_BUS=m +# CONFIG_USB_CONN_GPIO is not set +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB=m +CONFIG_USB_PCI=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y + +# +# Miscellaneous USB options +# +CONFIG_USB_DEFAULT_PERSIST=y +CONFIG_USB_DYNAMIC_MINORS=y +# CONFIG_USB_OTG is not set +# CONFIG_USB_OTG_WHITELIST is not set +# CONFIG_USB_OTG_BLACKLIST_HUB is not set +CONFIG_USB_LEDS_TRIGGER_USBPORT=m +CONFIG_USB_AUTOSUSPEND_DELAY=2 +CONFIG_USB_MON=m + +# +# USB Host Controller Drivers +# +# CONFIG_USB_C67X00_HCD is not set +CONFIG_USB_XHCI_HCD=m +# CONFIG_USB_XHCI_DBGCAP is not set +CONFIG_USB_XHCI_PCI=m +CONFIG_USB_XHCI_PLATFORM=m +# CONFIG_USB_XHCI_HISTB is not set +# CONFIG_USB_XHCI_MVEBU is not set +CONFIG_USB_XHCI_TEGRA=m +CONFIG_USB_EHCI_HCD=m +CONFIG_USB_EHCI_ROOT_HUB_TT=y +CONFIG_USB_EHCI_TT_NEWSCHED=y +CONFIG_USB_EHCI_PCI=m +# CONFIG_USB_EHCI_FSL is not set +CONFIG_USB_EHCI_HCD_ORION=m +CONFIG_USB_EHCI_TEGRA=m +CONFIG_USB_EHCI_HCD_PLATFORM=m +# CONFIG_USB_OXU210HP_HCD is not set +# CONFIG_USB_ISP116X_HCD is not set +# CONFIG_USB_FOTG210_HCD is not set +# CONFIG_USB_MAX3421_HCD is not set +CONFIG_USB_OHCI_HCD=m +CONFIG_USB_OHCI_HCD_PCI=m +# CONFIG_USB_OHCI_HCD_SSB is not set +CONFIG_USB_OHCI_HCD_PLATFORM=m +# CONFIG_USB_UHCI_HCD is not set +CONFIG_USB_U132_HCD=m +# CONFIG_USB_SL811_HCD is not set +# CONFIG_USB_R8A66597_HCD is not set +# CONFIG_USB_HCD_BCMA is not set +# CONFIG_USB_HCD_SSB is not set +# CONFIG_USB_HCD_TEST_MODE is not set + +# +# USB Device Class drivers +# +CONFIG_USB_ACM=m +CONFIG_USB_PRINTER=m +CONFIG_USB_WDM=m +CONFIG_USB_TMC=m + +# +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may +# + +# +# also be needed; see USB_STORAGE Help for more info +# +CONFIG_USB_STORAGE=m +# CONFIG_USB_STORAGE_DEBUG is not set +CONFIG_USB_STORAGE_REALTEK=m +CONFIG_REALTEK_AUTOPM=y +CONFIG_USB_STORAGE_DATAFAB=m +CONFIG_USB_STORAGE_FREECOM=m +CONFIG_USB_STORAGE_ISD200=m +CONFIG_USB_STORAGE_USBAT=m +CONFIG_USB_STORAGE_SDDR09=m +CONFIG_USB_STORAGE_SDDR55=m +CONFIG_USB_STORAGE_JUMPSHOT=m +CONFIG_USB_STORAGE_ALAUDA=m +CONFIG_USB_STORAGE_ONETOUCH=m +CONFIG_USB_STORAGE_KARMA=m +CONFIG_USB_STORAGE_CYPRESS_ATACB=m +CONFIG_USB_STORAGE_ENE_UB6250=m +CONFIG_USB_UAS=m + +# +# USB Imaging devices +# +CONFIG_USB_MDC800=m +CONFIG_USB_MICROTEK=m +CONFIG_USBIP_CORE=m +CONFIG_USBIP_VHCI_HCD=m +CONFIG_USBIP_VHCI_HC_PORTS=15 +CONFIG_USBIP_VHCI_NR_HCS=8 +CONFIG_USBIP_HOST=m +CONFIG_USBIP_VUDC=m +# CONFIG_USBIP_DEBUG is not set +# CONFIG_USB_CDNS3 is not set +CONFIG_USB_MUSB_HDRC=m +# CONFIG_USB_MUSB_HOST is not set +# CONFIG_USB_MUSB_GADGET is not set +CONFIG_USB_MUSB_DUAL_ROLE=y + +# +# Platform Glue Layer +# +CONFIG_USB_MUSB_SUNXI=m + +# +# MUSB DMA mode +# +# CONFIG_MUSB_PIO_ONLY is not set +CONFIG_USB_DWC3=m +CONFIG_USB_DWC3_ULPI=y +# CONFIG_USB_DWC3_HOST is not set +# CONFIG_USB_DWC3_GADGET is not set +CONFIG_USB_DWC3_DUAL_ROLE=y + +# +# Platform Glue Driver Support +# +CONFIG_USB_DWC3_PCI=m +CONFIG_USB_DWC3_HAPS=m +CONFIG_USB_DWC3_MESON_G12A=m +CONFIG_USB_DWC3_OF_SIMPLE=m +CONFIG_USB_DWC3_QCOM=m +CONFIG_USB_DWC2=m +# CONFIG_USB_DWC2_HOST is not set + +# +# Gadget/Dual-role mode requires USB Gadget support to be enabled +# +# CONFIG_USB_DWC2_PERIPHERAL is not set +CONFIG_USB_DWC2_DUAL_ROLE=y +# CONFIG_USB_DWC2_PCI is not set +# CONFIG_USB_DWC2_DEBUG is not set +# CONFIG_USB_DWC2_TRACK_MISSED_SOFS is not set +CONFIG_USB_CHIPIDEA=m +CONFIG_USB_CHIPIDEA_OF=m +CONFIG_USB_CHIPIDEA_PCI=m +CONFIG_USB_CHIPIDEA_UDC=y +CONFIG_USB_CHIPIDEA_HOST=y +CONFIG_USB_ISP1760=m +CONFIG_USB_ISP1760_HCD=y +CONFIG_USB_ISP1761_UDC=y +# CONFIG_USB_ISP1760_HOST_ROLE is not set +# CONFIG_USB_ISP1760_GADGET_ROLE is not set +CONFIG_USB_ISP1760_DUAL_ROLE=y + +# +# USB port drivers +# +# CONFIG_USB_USS720 is not set +CONFIG_USB_SERIAL=m +CONFIG_USB_SERIAL_GENERIC=y +CONFIG_USB_SERIAL_SIMPLE=m +CONFIG_USB_SERIAL_AIRCABLE=m +CONFIG_USB_SERIAL_ARK3116=m +CONFIG_USB_SERIAL_BELKIN=m +CONFIG_USB_SERIAL_CH341=m +CONFIG_USB_SERIAL_WHITEHEAT=m +CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m +CONFIG_USB_SERIAL_CP210X=m +CONFIG_USB_SERIAL_CYPRESS_M8=m +CONFIG_USB_SERIAL_EMPEG=m +CONFIG_USB_SERIAL_FTDI_SIO=m +CONFIG_USB_SERIAL_VISOR=m +CONFIG_USB_SERIAL_IPAQ=m +CONFIG_USB_SERIAL_IR=m +CONFIG_USB_SERIAL_EDGEPORT=m +CONFIG_USB_SERIAL_EDGEPORT_TI=m +CONFIG_USB_SERIAL_F81232=m +CONFIG_USB_SERIAL_F8153X=m +CONFIG_USB_SERIAL_GARMIN=m +CONFIG_USB_SERIAL_IPW=m +CONFIG_USB_SERIAL_IUU=m +CONFIG_USB_SERIAL_KEYSPAN_PDA=m +CONFIG_USB_SERIAL_KEYSPAN=m +CONFIG_USB_SERIAL_KLSI=m +CONFIG_USB_SERIAL_KOBIL_SCT=m +CONFIG_USB_SERIAL_MCT_U232=m +CONFIG_USB_SERIAL_METRO=m +CONFIG_USB_SERIAL_MOS7720=m +CONFIG_USB_SERIAL_MOS7715_PARPORT=y +CONFIG_USB_SERIAL_MOS7840=m +CONFIG_USB_SERIAL_MXUPORT=m +CONFIG_USB_SERIAL_NAVMAN=m +CONFIG_USB_SERIAL_PL2303=m +CONFIG_USB_SERIAL_OTI6858=m +CONFIG_USB_SERIAL_QCAUX=m +CONFIG_USB_SERIAL_QUALCOMM=m +CONFIG_USB_SERIAL_SPCP8X5=m +CONFIG_USB_SERIAL_SAFE=m +# CONFIG_USB_SERIAL_SAFE_PADDED is not set +CONFIG_USB_SERIAL_SIERRAWIRELESS=m +CONFIG_USB_SERIAL_SYMBOL=m +CONFIG_USB_SERIAL_TI=m +CONFIG_USB_SERIAL_CYBERJACK=m +CONFIG_USB_SERIAL_XIRCOM=m +CONFIG_USB_SERIAL_WWAN=m +CONFIG_USB_SERIAL_OPTION=m +CONFIG_USB_SERIAL_OMNINET=m +CONFIG_USB_SERIAL_OPTICON=m +CONFIG_USB_SERIAL_XSENS_MT=m +CONFIG_USB_SERIAL_WISHBONE=m +CONFIG_USB_SERIAL_SSU100=m +CONFIG_USB_SERIAL_QT2=m +CONFIG_USB_SERIAL_UPD78F0730=m +CONFIG_USB_SERIAL_DEBUG=m + +# +# USB Miscellaneous drivers +# +CONFIG_USB_EMI62=m +CONFIG_USB_EMI26=m +CONFIG_USB_ADUTUX=m +CONFIG_USB_SEVSEG=m +CONFIG_USB_LEGOTOWER=m +CONFIG_USB_LCD=m +CONFIG_USB_CYPRESS_CY7C63=m +CONFIG_USB_CYTHERM=m +CONFIG_USB_IDMOUSE=m +CONFIG_USB_FTDI_ELAN=m +CONFIG_USB_APPLEDISPLAY=m +CONFIG_USB_SISUSBVGA=m +CONFIG_USB_LD=m +CONFIG_USB_TRANCEVIBRATOR=m +CONFIG_USB_IOWARRIOR=m +CONFIG_USB_TEST=m +CONFIG_USB_EHSET_TEST_FIXTURE=m +CONFIG_USB_ISIGHTFW=m +CONFIG_USB_YUREX=m +CONFIG_USB_EZUSB_FX2=m +# CONFIG_USB_HUB_USB251XB is not set +CONFIG_USB_HSIC_USB3503=m +# CONFIG_USB_HSIC_USB4604 is not set +# CONFIG_USB_LINK_LAYER_TEST is not set +CONFIG_USB_CHAOSKEY=m +# CONFIG_USB_ATM is not set + +# +# USB Physical Layer drivers +# +CONFIG_USB_PHY=y +CONFIG_NOP_USB_XCEIV=m +# CONFIG_USB_GPIO_VBUS is not set +# CONFIG_USB_ISP1301 is not set +CONFIG_USB_TEGRA_PHY=m +CONFIG_USB_ULPI=y +CONFIG_USB_ULPI_VIEWPORT=y +# end of USB Physical Layer drivers + +CONFIG_USB_GADGET=m +# CONFIG_USB_GADGET_DEBUG is not set +# CONFIG_USB_GADGET_DEBUG_FILES is not set +# CONFIG_USB_GADGET_DEBUG_FS is not set +CONFIG_USB_GADGET_VBUS_DRAW=2 +CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2 +# CONFIG_U_SERIAL_CONSOLE is not set + +# +# USB Peripheral Controller +# +# CONFIG_USB_FOTG210_UDC is not set +# CONFIG_USB_GR_UDC is not set +# CONFIG_USB_R8A66597 is not set +# CONFIG_USB_PXA27X is not set +# CONFIG_USB_MV_UDC is not set +# CONFIG_USB_MV_U3D is not set +# CONFIG_USB_SNP_UDC_PLAT is not set +# CONFIG_USB_M66592 is not set +# CONFIG_USB_BDC_UDC is not set +# CONFIG_USB_AMD5536UDC is not set +# CONFIG_USB_NET2272 is not set +CONFIG_USB_NET2280=m +# CONFIG_USB_GOKU is not set +# CONFIG_USB_EG20T is not set +# CONFIG_USB_GADGET_XILINX is not set +# CONFIG_USB_DUMMY_HCD is not set +# end of USB Peripheral Controller + +CONFIG_USB_LIBCOMPOSITE=m +CONFIG_USB_F_ACM=m +CONFIG_USB_F_SS_LB=m +CONFIG_USB_U_SERIAL=m +CONFIG_USB_U_ETHER=m +CONFIG_USB_U_AUDIO=m +CONFIG_USB_F_SERIAL=m +CONFIG_USB_F_OBEX=m +CONFIG_USB_F_NCM=m +CONFIG_USB_F_ECM=m +CONFIG_USB_F_PHONET=m +CONFIG_USB_F_EEM=m +CONFIG_USB_F_SUBSET=m +CONFIG_USB_F_RNDIS=m +CONFIG_USB_F_MASS_STORAGE=m +CONFIG_USB_F_FS=m +CONFIG_USB_F_UAC1=m +CONFIG_USB_F_UAC2=m +CONFIG_USB_F_UVC=m +CONFIG_USB_F_MIDI=m +CONFIG_USB_F_HID=m +CONFIG_USB_F_PRINTER=m +CONFIG_USB_CONFIGFS=m +CONFIG_USB_CONFIGFS_SERIAL=y +CONFIG_USB_CONFIGFS_ACM=y +CONFIG_USB_CONFIGFS_OBEX=y +CONFIG_USB_CONFIGFS_NCM=y +CONFIG_USB_CONFIGFS_ECM=y +CONFIG_USB_CONFIGFS_ECM_SUBSET=y +CONFIG_USB_CONFIGFS_RNDIS=y +CONFIG_USB_CONFIGFS_EEM=y +CONFIG_USB_CONFIGFS_PHONET=y +CONFIG_USB_CONFIGFS_MASS_STORAGE=y +CONFIG_USB_CONFIGFS_F_LB_SS=y +CONFIG_USB_CONFIGFS_F_FS=y +CONFIG_USB_CONFIGFS_F_UAC1=y +# CONFIG_USB_CONFIGFS_F_UAC1_LEGACY is not set +CONFIG_USB_CONFIGFS_F_UAC2=y +CONFIG_USB_CONFIGFS_F_MIDI=y +CONFIG_USB_CONFIGFS_F_HID=y +CONFIG_USB_CONFIGFS_F_UVC=y +CONFIG_USB_CONFIGFS_F_PRINTER=y +# CONFIG_USB_CONFIGFS_F_TCM is not set +# CONFIG_USB_ZERO is not set +# CONFIG_USB_AUDIO is not set +CONFIG_USB_ETH=m +CONFIG_USB_ETH_RNDIS=y +# CONFIG_USB_ETH_EEM is not set +# CONFIG_USB_G_NCM is not set +CONFIG_USB_GADGETFS=m +CONFIG_USB_FUNCTIONFS=m +CONFIG_USB_FUNCTIONFS_ETH=y +CONFIG_USB_FUNCTIONFS_RNDIS=y +CONFIG_USB_FUNCTIONFS_GENERIC=y +# CONFIG_USB_MASS_STORAGE is not set +# CONFIG_USB_GADGET_TARGET is not set +CONFIG_USB_G_SERIAL=m +# CONFIG_USB_MIDI_GADGET is not set +# CONFIG_USB_G_PRINTER is not set +# CONFIG_USB_CDC_COMPOSITE is not set +# CONFIG_USB_G_NOKIA is not set +# CONFIG_USB_G_ACM_MS is not set +# CONFIG_USB_G_MULTI is not set +# CONFIG_USB_G_HID is not set +# CONFIG_USB_G_DBGP is not set +# CONFIG_USB_G_WEBCAM is not set +# CONFIG_TYPEC is not set +CONFIG_USB_ROLE_SWITCH=m +CONFIG_MMC=y +CONFIG_PWRSEQ_EMMC=y +# CONFIG_PWRSEQ_SD8787 is not set +CONFIG_PWRSEQ_SIMPLE=y +CONFIG_MMC_BLOCK=y +CONFIG_MMC_BLOCK_MINORS=256 +CONFIG_SDIO_UART=m +# CONFIG_MMC_TEST is not set + +# +# MMC/SD/SDIO Host Controller Drivers +# +# CONFIG_MMC_DEBUG is not set +CONFIG_MMC_ARMMMCI=m +CONFIG_MMC_QCOM_DML=y +CONFIG_MMC_STM32_SDMMC=y +CONFIG_MMC_SDHCI=m +CONFIG_MMC_SDHCI_IO_ACCESSORS=y +CONFIG_MMC_SDHCI_PCI=m +CONFIG_MMC_RICOH_MMC=y +CONFIG_MMC_SDHCI_ACPI=m +CONFIG_MMC_SDHCI_PLTFM=m +CONFIG_MMC_SDHCI_OF_ARASAN=m +# CONFIG_MMC_SDHCI_OF_ASPEED is not set +# CONFIG_MMC_SDHCI_OF_AT91 is not set +# CONFIG_MMC_SDHCI_OF_DWCMSHC is not set +# CONFIG_MMC_SDHCI_CADENCE is not set +CONFIG_MMC_SDHCI_TEGRA=m +# CONFIG_MMC_SDHCI_PXAV3 is not set +CONFIG_MMC_SDHCI_F_SDH30=m +CONFIG_MMC_SDHCI_IPROC=m +CONFIG_MMC_MESON_GX=m +# CONFIG_MMC_MESON_MX_SDIO is not set +CONFIG_MMC_SDHCI_MSM=m +CONFIG_MMC_TIFM_SD=m +CONFIG_MMC_SPI=m +CONFIG_MMC_CB710=m +CONFIG_MMC_VIA_SDMMC=m +CONFIG_MMC_DW=m +CONFIG_MMC_DW_PLTFM=m +# CONFIG_MMC_DW_BLUEFIELD is not set +# CONFIG_MMC_DW_EXYNOS is not set +# CONFIG_MMC_DW_HI3798CV200 is not set +CONFIG_MMC_DW_K3=m +# CONFIG_MMC_DW_PCI is not set +CONFIG_MMC_DW_ROCKCHIP=m +CONFIG_MMC_VUB300=m +CONFIG_MMC_USHC=m +# CONFIG_MMC_USDHI6ROL0 is not set +CONFIG_MMC_REALTEK_PCI=m +CONFIG_MMC_REALTEK_USB=m +CONFIG_MMC_SUNXI=m +CONFIG_MMC_CQHCI=m +CONFIG_MMC_TOSHIBA_PCI=m +CONFIG_MMC_BCM2835=m +# CONFIG_MMC_MTK is not set +CONFIG_MMC_SDHCI_XENON=m +# CONFIG_MMC_SDHCI_OMAP is not set +# CONFIG_MMC_SDHCI_AM654 is not set +CONFIG_MEMSTICK=m +# CONFIG_MEMSTICK_DEBUG is not set + +# +# MemoryStick drivers +# +# CONFIG_MEMSTICK_UNSAFE_RESUME is not set +CONFIG_MSPRO_BLOCK=m +# CONFIG_MS_BLOCK is not set + +# +# MemoryStick Host Controller Drivers +# +CONFIG_MEMSTICK_TIFM_MS=m +CONFIG_MEMSTICK_JMICRON_38X=m +CONFIG_MEMSTICK_R592=m +CONFIG_MEMSTICK_REALTEK_PCI=m +CONFIG_MEMSTICK_REALTEK_USB=m +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y +# CONFIG_LEDS_CLASS_FLASH is not set +CONFIG_LEDS_BRIGHTNESS_HW_CHANGED=y + +# +# LED drivers +# +# CONFIG_LEDS_AN30259A is not set +# CONFIG_LEDS_BCM6328 is not set +# CONFIG_LEDS_BCM6358 is not set +# CONFIG_LEDS_CR0014114 is not set +# CONFIG_LEDS_LM3530 is not set +# CONFIG_LEDS_LM3532 is not set +# CONFIG_LEDS_LM3642 is not set +# CONFIG_LEDS_LM3692X is not set +# CONFIG_LEDS_PCA9532 is not set +CONFIG_LEDS_GPIO=m +CONFIG_LEDS_LP3944=m +# CONFIG_LEDS_LP3952 is not set +# CONFIG_LEDS_LP5521 is not set +# CONFIG_LEDS_LP5523 is not set +# CONFIG_LEDS_LP5562 is not set +# CONFIG_LEDS_LP8501 is not set +# CONFIG_LEDS_LP8860 is not set +CONFIG_LEDS_PCA955X=m +# CONFIG_LEDS_PCA955X_GPIO is not set +# CONFIG_LEDS_PCA963X is not set +CONFIG_LEDS_DAC124S085=m +CONFIG_LEDS_PWM=m +CONFIG_LEDS_REGULATOR=m +CONFIG_LEDS_BD2802=m +CONFIG_LEDS_LT3593=m +# CONFIG_LEDS_TCA6507 is not set +# CONFIG_LEDS_TLC591XX is not set +# CONFIG_LEDS_LM355x is not set +# CONFIG_LEDS_IS31FL319X is not set +# CONFIG_LEDS_IS31FL32XX is not set + +# +# LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM) +# +# CONFIG_LEDS_BLINKM is not set +# CONFIG_LEDS_SYSCON is not set +# CONFIG_LEDS_MLXREG is not set +# CONFIG_LEDS_USER is not set +# CONFIG_LEDS_SPI_BYTE is not set +# CONFIG_LEDS_TI_LMU_COMMON is not set +CONFIG_LEDS_ALTRA_SPCI=y + +# +# LED Triggers +# +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_TIMER=m +CONFIG_LEDS_TRIGGER_ONESHOT=m +CONFIG_LEDS_TRIGGER_DISK=y +CONFIG_LEDS_TRIGGER_MTD=y +CONFIG_LEDS_TRIGGER_HEARTBEAT=m +CONFIG_LEDS_TRIGGER_BACKLIGHT=m +CONFIG_LEDS_TRIGGER_CPU=y +# CONFIG_LEDS_TRIGGER_ACTIVITY is not set +CONFIG_LEDS_TRIGGER_GPIO=m +CONFIG_LEDS_TRIGGER_DEFAULT_ON=m + +# +# iptables trigger is under Netfilter config (LED target) +# +CONFIG_LEDS_TRIGGER_TRANSIENT=m +CONFIG_LEDS_TRIGGER_CAMERA=m +CONFIG_LEDS_TRIGGER_PANIC=y +# CONFIG_LEDS_TRIGGER_NETDEV is not set +# CONFIG_LEDS_TRIGGER_PATTERN is not set +# CONFIG_LEDS_TRIGGER_AUDIO is not set +CONFIG_ACCESSIBILITY=y +CONFIG_A11Y_BRAILLE_CONSOLE=y +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_MAD=m +CONFIG_INFINIBAND_USER_ACCESS=m +# CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI is not set +CONFIG_INFINIBAND_USER_MEM=y +CONFIG_INFINIBAND_ON_DEMAND_PAGING=y +CONFIG_INFINIBAND_ADDR_TRANS=y +CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y +CONFIG_INFINIBAND_VIRT_DMA=y +CONFIG_INFINIBAND_MTHCA=m +CONFIG_INFINIBAND_MTHCA_DEBUG=y +CONFIG_INFINIBAND_CXGB3=m +CONFIG_INFINIBAND_CXGB4=m +# CONFIG_INFINIBAND_EFA is not set +CONFIG_INFINIBAND_I40IW=m +CONFIG_MLX4_INFINIBAND=m +CONFIG_MLX5_INFINIBAND=m +CONFIG_INFINIBAND_OCRDMA=m +# CONFIG_INFINIBAND_BNXT_RE is not set +CONFIG_INFINIBAND_QEDR=m +CONFIG_RDMA_RXE=m +# CONFIG_RDMA_SIW is not set +CONFIG_INFINIBAND_IPOIB=m +CONFIG_INFINIBAND_IPOIB_CM=y +CONFIG_INFINIBAND_IPOIB_DEBUG=y +# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set +CONFIG_INFINIBAND_SRP=m +CONFIG_INFINIBAND_SRPT=m +CONFIG_INFINIBAND_ISER=m +CONFIG_INFINIBAND_ISERT=m +CONFIG_EDAC_SUPPORT=y +CONFIG_EDAC=y +CONFIG_EDAC_LEGACY_SYSFS=y +CONFIG_EDAC_DEBUG=y +CONFIG_EDAC_GHES=y +CONFIG_EDAC_THUNDERX=m +# CONFIG_EDAC_SYNOPSYS is not set +CONFIG_EDAC_XGENE=m +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_HCTOSYS=y +CONFIG_RTC_HCTOSYS_DEVICE="rtc0" +CONFIG_RTC_SYSTOHC=y +CONFIG_RTC_SYSTOHC_DEVICE="rtc0" +# CONFIG_RTC_DEBUG is not set +CONFIG_RTC_NVMEM=y + +# +# RTC interfaces +# +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +# CONFIG_RTC_DRV_TEST is not set + +# +# I2C RTC drivers +# +# CONFIG_RTC_DRV_ABB5ZES3 is not set +# CONFIG_RTC_DRV_ABEOZ9 is not set +# CONFIG_RTC_DRV_ABX80X is not set +CONFIG_RTC_DRV_DS1307=y +# CONFIG_RTC_DRV_DS1307_CENTURY is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_HYM8563 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +CONFIG_RTC_DRV_MAX77686=y +CONFIG_RTC_DRV_MESON_VRTC=m +CONFIG_RTC_DRV_RK808=m +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_ISL12022 is not set +# CONFIG_RTC_DRV_ISL12026 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8523 is not set +# CONFIG_RTC_DRV_PCF85063 is not set +# CONFIG_RTC_DRV_PCF85363 is not set +CONFIG_RTC_DRV_PCF8563=y +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_BQ32K is not set +# CONFIG_RTC_DRV_S35390A is not set +# CONFIG_RTC_DRV_FM3130 is not set +# CONFIG_RTC_DRV_RX8010 is not set +# CONFIG_RTC_DRV_RX8581 is not set +# CONFIG_RTC_DRV_RX8025 is not set +# CONFIG_RTC_DRV_EM3027 is not set +# CONFIG_RTC_DRV_RV3028 is not set +# CONFIG_RTC_DRV_RV8803 is not set +# CONFIG_RTC_DRV_SD3078 is not set + +# +# SPI RTC drivers +# +# CONFIG_RTC_DRV_M41T93 is not set +# CONFIG_RTC_DRV_M41T94 is not set +# CONFIG_RTC_DRV_DS1302 is not set +# CONFIG_RTC_DRV_DS1305 is not set +# CONFIG_RTC_DRV_DS1343 is not set +# CONFIG_RTC_DRV_DS1347 is not set +# CONFIG_RTC_DRV_DS1390 is not set +# CONFIG_RTC_DRV_MAX6916 is not set +# CONFIG_RTC_DRV_R9701 is not set +# CONFIG_RTC_DRV_RX4581 is not set +# CONFIG_RTC_DRV_RX6110 is not set +# CONFIG_RTC_DRV_RS5C348 is not set +# CONFIG_RTC_DRV_MAX6902 is not set +# CONFIG_RTC_DRV_PCF2123 is not set +# CONFIG_RTC_DRV_MCP795 is not set +CONFIG_RTC_I2C_AND_SPI=y + +# +# SPI and I2C RTC drivers +# +# CONFIG_RTC_DRV_DS3232 is not set +# CONFIG_RTC_DRV_PCF2127 is not set +# CONFIG_RTC_DRV_RV3029C2 is not set + +# +# Platform RTC drivers +# +# CONFIG_RTC_DRV_DS1286 is not set +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1685_FAMILY is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_DS2404 is not set +CONFIG_RTC_DRV_EFI=y +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T35 is not set +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_MSM6242 is not set +# CONFIG_RTC_DRV_BQ4802 is not set +# CONFIG_RTC_DRV_RP5C01 is not set +# CONFIG_RTC_DRV_V3020 is not set +# CONFIG_RTC_DRV_ZYNQMP is not set +CONFIG_RTC_DRV_CROS_EC=m + +# +# on-CPU RTC drivers +# +# CONFIG_RTC_DRV_PL030 is not set +CONFIG_RTC_DRV_PL031=y +CONFIG_RTC_DRV_SUN6I=y +CONFIG_RTC_DRV_MV=m +CONFIG_RTC_DRV_ARMADA38X=m +# CONFIG_RTC_DRV_CADENCE is not set +# CONFIG_RTC_DRV_FTRTC010 is not set +CONFIG_RTC_DRV_PM8XXX=m +CONFIG_RTC_DRV_TEGRA=y +# CONFIG_RTC_DRV_SNVS is not set +CONFIG_RTC_DRV_XGENE=y +# CONFIG_RTC_DRV_R7301 is not set + +# +# HID Sensor RTC drivers +# +# CONFIG_RTC_DRV_HID_SENSOR_TIME is not set +CONFIG_DMADEVICES=y +# CONFIG_DMADEVICES_DEBUG is not set + +# +# DMA Devices +# +CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH=y +CONFIG_DMA_ENGINE=y +CONFIG_DMA_VIRTUAL_CHANNELS=y +CONFIG_DMA_ACPI=y +CONFIG_DMA_OF=y +# CONFIG_ALTERA_MSGDMA is not set +# CONFIG_AMBA_PL08X is not set +# CONFIG_AXI_DMAC is not set +# CONFIG_BCM_SBA_RAID is not set +CONFIG_DMA_BCM2835=y +CONFIG_DMA_SUN6I=m +# CONFIG_DW_AXI_DMAC is not set +# CONFIG_FSL_EDMA is not set +# CONFIG_FSL_QDMA is not set +# CONFIG_INTEL_IDMA64 is not set +CONFIG_K3_DMA=m +CONFIG_MV_XOR=y +CONFIG_MV_XOR_V2=y +CONFIG_PL330_DMA=m +CONFIG_TEGRA20_APB_DMA=y +CONFIG_TEGRA210_ADMA=y +CONFIG_XGENE_DMA=m +# CONFIG_XILINX_DMA is not set +# CONFIG_XILINX_ZYNQMP_DMA is not set +CONFIG_QCOM_BAM_DMA=m +CONFIG_QCOM_HIDMA_MGMT=m +CONFIG_QCOM_HIDMA=m +# CONFIG_DW_DMAC is not set +# CONFIG_DW_DMAC_PCI is not set +# CONFIG_DW_EDMA is not set +# CONFIG_DW_EDMA_PCIE is not set + +# +# DMA Clients +# +CONFIG_ASYNC_TX_DMA=y +# CONFIG_DMATEST is not set +CONFIG_DMA_ENGINE_RAID=y + +# +# DMABUF options +# +CONFIG_SYNC_FILE=y +# CONFIG_SW_SYNC is not set +# CONFIG_UDMABUF is not set +# CONFIG_DMABUF_SELFTESTS is not set +# end of DMABUF options + +# CONFIG_AUXDISPLAY is not set +# CONFIG_PANEL is not set +CONFIG_UIO=m +CONFIG_UIO_CIF=m +# CONFIG_UIO_PDRV_GENIRQ is not set +# CONFIG_UIO_DMEM_GENIRQ is not set +CONFIG_UIO_AEC=m +CONFIG_UIO_SERCOS3=m +CONFIG_UIO_PCI_GENERIC=m +CONFIG_UIO_NETX=m +# CONFIG_UIO_PRUSS is not set +CONFIG_UIO_MF624=m +CONFIG_VFIO_IOMMU_TYPE1=m +CONFIG_VFIO_VIRQFD=m +CONFIG_VFIO=m +CONFIG_VFIO_NOIOMMU=y +CONFIG_VFIO_PCI=m +CONFIG_VFIO_PCI_MMAP=y +CONFIG_VFIO_PCI_INTX=y +# CONFIG_VFIO_PLATFORM is not set +# CONFIG_VFIO_MDEV is not set +CONFIG_VIRT_DRIVERS=y +CONFIG_VIRTIO=m +CONFIG_VIRTIO_PCI_LIB=m +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_PCI=m +CONFIG_VIRTIO_PCI_LEGACY=y +CONFIG_VIRTIO_VDPA=m +# CONFIG_VIRTIO_PMEM is not set +CONFIG_VIRTIO_BALLOON=m +CONFIG_VIRTIO_INPUT=m +CONFIG_VIRTIO_MMIO=m +# CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES is not set +CONFIG_VDPA=m +CONFIG_VDPA_SIM=m +CONFIG_VDPA_SIM_NET=m +CONFIG_VDPA_SIM_BLOCK=m +CONFIG_VDPA_USER=m + +CONFIG_VHOST=m +CONFIG_VHOST_IOTLB=m +CONFIG_VHOST_RING=m +CONFIG_VHOST_MENU=y +CONFIG_VHOST_NET=m +CONFIG_VHOST_SCSI=m +CONFIG_VHOST_VSOCK=m +CONFIG_VHOST_VDPA=m +# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set + +# +# Microsoft Hyper-V guest support +# +# end of Microsoft Hyper-V guest support + +# +# Xen driver support +# +CONFIG_XEN_BALLOON=y +CONFIG_XEN_BALLOON_MEMORY_HOTPLUG=y +CONFIG_XEN_SCRUB_PAGES_DEFAULT=y +CONFIG_XEN_DEV_EVTCHN=m +CONFIG_XEN_BACKEND=y +CONFIG_XENFS=m +CONFIG_XEN_COMPAT_XENFS=y +CONFIG_XEN_SYS_HYPERVISOR=y +CONFIG_XEN_XENBUS_FRONTEND=y +CONFIG_XEN_GNTDEV=m +CONFIG_XEN_GRANT_DEV_ALLOC=m +# CONFIG_XEN_GRANT_DMA_ALLOC is not set +CONFIG_SWIOTLB_XEN=y +# CONFIG_XEN_PVCALLS_FRONTEND is not set +# CONFIG_XEN_PVCALLS_BACKEND is not set +CONFIG_XEN_SCSI_BACKEND=m +CONFIG_XEN_PRIVCMD=m +CONFIG_XEN_EFI=y +CONFIG_XEN_AUTO_XLATE=y +CONFIG_XEN_FRONT_PGDIR_SHBUF=m +# end of Xen driver support + +# CONFIG_GREYBUS is not set +CONFIG_STAGING=y +# CONFIG_PRISM2_USB is not set +# CONFIG_COMEDI is not set +# CONFIG_RTL8192U is not set +# CONFIG_RTLLIB is not set +CONFIG_RTL8723BS=m +CONFIG_R8712U=m +CONFIG_R8188EU=m +CONFIG_88EU_AP_MODE=y +# CONFIG_RTS5208 is not set +# CONFIG_VT6655 is not set +# CONFIG_VT6656 is not set + +# +# IIO staging drivers +# + +# +# Accelerometers +# +# CONFIG_ADIS16203 is not set +# CONFIG_ADIS16240 is not set +# end of Accelerometers + +# +# Analog to digital converters +# +# CONFIG_AD7816 is not set +# CONFIG_AD7192 is not set +# CONFIG_AD7280 is not set +# end of Analog to digital converters + +# +# Analog digital bi-direction converters +# +# CONFIG_ADT7316 is not set +# end of Analog digital bi-direction converters + +# +# Capacitance to digital converters +# +# CONFIG_AD7150 is not set +# CONFIG_AD7746 is not set +# end of Capacitance to digital converters + +# +# Direct Digital Synthesis +# +# CONFIG_AD9832 is not set +# CONFIG_AD9834 is not set +# end of Direct Digital Synthesis + +# +# Network Analyzer, Impedance Converters +# +# CONFIG_AD5933 is not set +# end of Network Analyzer, Impedance Converters + +# +# Active energy metering IC +# +# CONFIG_ADE7854 is not set +# end of Active energy metering IC + +# +# Resolver to digital converters +# +# CONFIG_AD2S1210 is not set +# end of Resolver to digital converters +# end of IIO staging drivers + +# CONFIG_FB_SM750 is not set + +# +# Speakup console speech +# +CONFIG_SPEAKUP=m +CONFIG_SPEAKUP_SYNTH_ACNTSA=m +CONFIG_SPEAKUP_SYNTH_APOLLO=m +CONFIG_SPEAKUP_SYNTH_AUDPTR=m +CONFIG_SPEAKUP_SYNTH_BNS=m +CONFIG_SPEAKUP_SYNTH_DECTLK=m +CONFIG_SPEAKUP_SYNTH_DECEXT=m +CONFIG_SPEAKUP_SYNTH_LTLK=m +CONFIG_SPEAKUP_SYNTH_SOFT=m +CONFIG_SPEAKUP_SYNTH_SPKOUT=m +CONFIG_SPEAKUP_SYNTH_TXPRT=m +CONFIG_SPEAKUP_SYNTH_DUMMY=m +# end of Speakup console speech + +# CONFIG_MFD_NVEC is not set +# CONFIG_STAGING_MEDIA is not set + +# +# Android +# +# CONFIG_ASHMEM is not set +# CONFIG_ANDROID_VSOC is not set +# CONFIG_ION is not set +# end of Android + +# CONFIG_STAGING_BOARD is not set +# CONFIG_LTE_GDM724X is not set +# CONFIG_FIREWIRE_SERIAL is not set +# CONFIG_GS_FPGABOOT is not set +# CONFIG_UNISYSSPAR is not set +# CONFIG_COMMON_CLK_XLNX_CLKWZRD is not set +# CONFIG_FB_TFT is not set +# CONFIG_WILC1000_SDIO is not set +# CONFIG_WILC1000_SPI is not set +# CONFIG_MOST is not set +# CONFIG_KS7010 is not set +CONFIG_BCM_VIDEOCORE=y +CONFIG_BCM2835_VCHIQ=m +CONFIG_SND_BCM2835=m +CONFIG_VIDEO_BCM2835=m +# CONFIG_PI433 is not set + +# +# Gasket devices +# +# CONFIG_STAGING_GASKET_FRAMEWORK is not set +# end of Gasket devices + +# CONFIG_XIL_AXIS_FIFO is not set +# CONFIG_FIELDBUS_DEV is not set +# CONFIG_KPC2000 is not set +CONFIG_USB_WUSB=m +CONFIG_USB_WUSB_CBAF=m +# CONFIG_USB_WUSB_CBAF_DEBUG is not set +CONFIG_USB_WHCI_HCD=m +CONFIG_USB_HWA_HCD=m +CONFIG_UWB=m +CONFIG_UWB_HWA=m +CONFIG_UWB_WHCI=m +CONFIG_UWB_I1480U=m +# CONFIG_EXFAT_FS is not set +CONFIG_QLGE=m +# CONFIG_GOLDFISH is not set +CONFIG_MFD_CROS_EC=y +CONFIG_CHROME_PLATFORMS=y +# CONFIG_CHROMEOS_TBMC is not set +CONFIG_CROS_EC=y +CONFIG_CROS_EC_I2C=m +# CONFIG_CROS_EC_RPMSG is not set +CONFIG_CROS_EC_SPI=m +CONFIG_CROS_EC_PROTO=y +CONFIG_CROS_KBD_LED_BACKLIGHT=m +CONFIG_CROS_EC_CHARDEV=y +CONFIG_CROS_EC_LIGHTBAR=m +CONFIG_CROS_EC_VBC=m +CONFIG_CROS_EC_DEBUGFS=m +CONFIG_CROS_EC_SYSFS=m +CONFIG_CROS_USBPD_LOGGER=m +# CONFIG_MELLANOX_PLATFORM is not set +CONFIG_CLKDEV_LOOKUP=y +CONFIG_HAVE_CLK_PREPARE=y +CONFIG_COMMON_CLK=y + +# +# Common Clock Framework +# +CONFIG_COMMON_CLK_VERSATILE=y +CONFIG_CLK_SP810=y +CONFIG_CLK_VEXPRESS_OSC=y +# CONFIG_CLK_HSDK is not set +# CONFIG_COMMON_CLK_MAX77686 is not set +# CONFIG_COMMON_CLK_MAX9485 is not set +CONFIG_COMMON_CLK_RK808=m +CONFIG_COMMON_CLK_HI655X=m +# CONFIG_COMMON_CLK_SI5341 is not set +# CONFIG_COMMON_CLK_SI5351 is not set +# CONFIG_COMMON_CLK_SI514 is not set +# CONFIG_COMMON_CLK_SI544 is not set +# CONFIG_COMMON_CLK_SI570 is not set +# CONFIG_COMMON_CLK_CDCE706 is not set +# CONFIG_COMMON_CLK_CDCE925 is not set +# CONFIG_COMMON_CLK_CS2000_CP is not set +# CONFIG_CLK_QORIQ is not set +CONFIG_COMMON_CLK_XGENE=y +# CONFIG_COMMON_CLK_PWM is not set +# CONFIG_COMMON_CLK_VC5 is not set +# CONFIG_COMMON_CLK_FIXED_MMIO is not set +CONFIG_CLK_BCM2835=y +# CONFIG_CLK_RASPBERRYPI is not set +CONFIG_COMMON_CLK_HI3516CV300=y +CONFIG_COMMON_CLK_HI3519=y +CONFIG_COMMON_CLK_HI3660=y +CONFIG_COMMON_CLK_HI3670=y +CONFIG_COMMON_CLK_HI3798CV200=y +CONFIG_COMMON_CLK_HI6220=y +CONFIG_RESET_HISI=y +CONFIG_STUB_CLK_HI6220=y +CONFIG_STUB_CLK_HI3660=y +CONFIG_COMMON_CLK_MESON_REGMAP=y +CONFIG_COMMON_CLK_MESON_DUALDIV=y +CONFIG_COMMON_CLK_MESON_MPLL=y +CONFIG_COMMON_CLK_MESON_PLL=y +CONFIG_COMMON_CLK_MESON_VID_PLL_DIV=y +CONFIG_COMMON_CLK_MESON_AO_CLKC=y +CONFIG_COMMON_CLK_MESON_EE_CLKC=y +CONFIG_COMMON_CLK_MESON_CPU_DYNDIV=y +CONFIG_COMMON_CLK_GXBB=y +CONFIG_COMMON_CLK_AXG=y +# CONFIG_COMMON_CLK_AXG_AUDIO is not set +CONFIG_COMMON_CLK_G12A=y +CONFIG_ARMADA_AP_CP_HELPER=y +CONFIG_ARMADA_37XX_CLK=y +CONFIG_ARMADA_AP806_SYSCON=y +CONFIG_ARMADA_CP110_SYSCON=y +CONFIG_QCOM_GDSC=y +CONFIG_QCOM_RPMCC=y +CONFIG_COMMON_CLK_QCOM=y +CONFIG_QCOM_A53PLL=y +CONFIG_QCOM_CLK_APCS_MSM8916=m +CONFIG_QCOM_CLK_RPM=m +CONFIG_QCOM_CLK_SMD_RPM=m +# CONFIG_APQ_GCC_8084 is not set +# CONFIG_APQ_MMCC_8084 is not set +# CONFIG_IPQ_GCC_4019 is not set +# CONFIG_IPQ_GCC_806X is not set +# CONFIG_IPQ_LCC_806X is not set +# CONFIG_IPQ_GCC_8074 is not set +# CONFIG_MSM_GCC_8660 is not set +CONFIG_MSM_GCC_8916=y +# CONFIG_MSM_GCC_8960 is not set +# CONFIG_MSM_LCC_8960 is not set +# CONFIG_MDM_GCC_9615 is not set +# CONFIG_MDM_LCC_9615 is not set +# CONFIG_MSM_MMCC_8960 is not set +# CONFIG_MSM_GCC_8974 is not set +# CONFIG_MSM_MMCC_8974 is not set +# CONFIG_MSM_GCC_8994 is not set +CONFIG_MSM_GCC_8996=y +CONFIG_MSM_MMCC_8996=y +# CONFIG_MSM_GCC_8998 is not set +# CONFIG_QCS_GCC_404 is not set +# CONFIG_SDM_CAMCC_845 is not set +# CONFIG_SDM_GCC_660 is not set +# CONFIG_QCS_TURING_404 is not set +# CONFIG_SDM_GCC_845 is not set +# CONFIG_SDM_GPUCC_845 is not set +# CONFIG_SDM_VIDEOCC_845 is not set +# CONFIG_SDM_DISPCC_845 is not set +# CONFIG_SDM_LPASSCC_845 is not set +# CONFIG_SM_GCC_8150 is not set +# CONFIG_SPMI_PMIC_CLKDIV is not set +# CONFIG_QCOM_HFPLL is not set +# CONFIG_KPSS_XCC is not set +CONFIG_CLK_SUNXI=y +CONFIG_CLK_SUNXI_CLOCKS=y +CONFIG_CLK_SUNXI_PRCM_SUN6I=y +CONFIG_CLK_SUNXI_PRCM_SUN8I=y +CONFIG_CLK_SUNXI_PRCM_SUN9I=y +CONFIG_SUNXI_CCU=y +CONFIG_SUN50I_A64_CCU=y +CONFIG_SUN50I_H6_CCU=y +CONFIG_SUN50I_H6_R_CCU=y +# CONFIG_SUN8I_A83T_CCU is not set +CONFIG_SUN8I_H3_CCU=y +CONFIG_SUN8I_DE2_CCU=y +CONFIG_SUN8I_R_CCU=y +CONFIG_TEGRA_CLK_DFLL=y +# CONFIG_COMMON_CLK_ZYNQMP is not set +# end of Common Clock Framework + +# CONFIG_HWSPINLOCK is not set + +# +# Clock Source drivers +# +CONFIG_TIMER_OF=y +CONFIG_TIMER_ACPI=y +CONFIG_TIMER_PROBE=y +CONFIG_CLKSRC_MMIO=y +CONFIG_ROCKCHIP_TIMER=y +CONFIG_TEGRA_TIMER=y +CONFIG_ARM_ARCH_TIMER=y +CONFIG_ARM_ARCH_TIMER_EVTSTREAM=y +CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND=y +CONFIG_FSL_ERRATUM_A008585=y +CONFIG_HISILICON_ERRATUM_161010101=y +CONFIG_ARM64_ERRATUM_858921=y +CONFIG_SUN50I_ERRATUM_UNKNOWN1=y +CONFIG_ARM_TIMER_SP804=y +CONFIG_CLKSRC_VERSATILE=y +# end of Clock Source drivers + +CONFIG_MAILBOX=y +# CONFIG_ARM_MHU is not set +# CONFIG_PLATFORM_MHU is not set +# CONFIG_PL320_MBOX is not set +CONFIG_ARMADA_37XX_RWTM_MBOX=m +# CONFIG_ROCKCHIP_MBOX is not set +CONFIG_PCC=y +# CONFIG_ALTERA_MBOX is not set +CONFIG_BCM2835_MBOX=y +CONFIG_HI3660_MBOX=y +CONFIG_HI6220_MBOX=y +# CONFIG_MAILBOX_TEST is not set +CONFIG_QCOM_APCS_IPC=m +# CONFIG_TEGRA_HSP_MBOX is not set +CONFIG_XGENE_SLIMPRO_MBOX=m +# CONFIG_ZYNQMP_IPI_MBOX is not set +CONFIG_IOMMU_IOVA=y +CONFIG_IOMMU_API=y +CONFIG_IOMMU_SUPPORT=y + +# +# Generic IOMMU Pagetable Support +# +CONFIG_IOMMU_IO_PGTABLE=y +CONFIG_IOMMU_IO_PGTABLE_LPAE=y +# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set +# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set +# end of Generic IOMMU Pagetable Support + +# CONFIG_IOMMU_DEBUGFS is not set +CONFIG_IOMMU_DEFAULT_PASSTHROUGH=y +CONFIG_OF_IOMMU=y +CONFIG_IOMMU_DMA=y +CONFIG_ROCKCHIP_IOMMU=y +CONFIG_TEGRA_IOMMU_SMMU=y +CONFIG_ARM_SMMU=y +CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT=y +CONFIG_ARM_SMMU_V3=y +CONFIG_QCOM_IOMMU=y + +# +# Remoteproc drivers +# +# CONFIG_REMOTEPROC is not set +# end of Remoteproc drivers + +# +# Rpmsg drivers +# +CONFIG_RPMSG=m +# CONFIG_RPMSG_CHAR is not set +CONFIG_RPMSG_QCOM_GLINK_NATIVE=m +CONFIG_RPMSG_QCOM_GLINK_RPM=m +# CONFIG_RPMSG_VIRTIO is not set +# end of Rpmsg drivers + +# CONFIG_SOUNDWIRE is not set + +# +# SOC (System On Chip) specific Drivers +# + +# +# Amlogic SoC drivers +# +CONFIG_MESON_CANVAS=m +CONFIG_MESON_CLK_MEASURE=y +CONFIG_MESON_GX_SOCINFO=y +CONFIG_MESON_GX_PM_DOMAINS=y +CONFIG_MESON_EE_PM_DOMAINS=y +CONFIG_MESON_MX_SOCINFO=y +# end of Amlogic SoC drivers + +# +# Aspeed SoC drivers +# +# end of Aspeed SoC drivers + +# +# Broadcom SoC drivers +# +CONFIG_BCM2835_POWER=y +CONFIG_RASPBERRYPI_POWER=y +# CONFIG_SOC_BRCMSTB is not set +# end of Broadcom SoC drivers + +# +# NXP/Freescale QorIQ SoC drivers +# +# end of NXP/Freescale QorIQ SoC drivers + +# +# i.MX SoC drivers +# +# end of i.MX SoC drivers + +# +# Qualcomm SoC drivers +# +# CONFIG_QCOM_AOSS_QMP is not set +CONFIG_QCOM_COMMAND_DB=y +# CONFIG_QCOM_GENI_SE is not set +CONFIG_QCOM_GSBI=m +# CONFIG_QCOM_LLCC is not set +CONFIG_QCOM_MDT_LOADER=m +# CONFIG_QCOM_RMTFS_MEM is not set +# CONFIG_QCOM_RPMH is not set +CONFIG_QCOM_SMD_RPM=m +CONFIG_QCOM_WCNSS_CTRL=m +# CONFIG_QCOM_APR is not set +# end of Qualcomm SoC drivers + +CONFIG_ROCKCHIP_GRF=y +CONFIG_ROCKCHIP_PM_DOMAINS=y +CONFIG_SUNXI_SRAM=y +CONFIG_ARCH_TEGRA_132_SOC=y +CONFIG_ARCH_TEGRA_210_SOC=y +# CONFIG_ARCH_TEGRA_186_SOC is not set +# CONFIG_ARCH_TEGRA_194_SOC is not set +CONFIG_SOC_TEGRA_FUSE=y +CONFIG_SOC_TEGRA_FLOWCTRL=y +CONFIG_SOC_TEGRA_PMC=y +# CONFIG_SOC_TI is not set + +# +# Xilinx SoC drivers +# +# CONFIG_XILINX_VCU is not set +CONFIG_ZYNQMP_POWER=y +CONFIG_ZYNQMP_PM_DOMAINS=y +# end of Xilinx SoC drivers +# end of SOC (System On Chip) specific Drivers + +CONFIG_PM_DEVFREQ=y + +# +# DEVFREQ Governors +# +CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=m +# CONFIG_DEVFREQ_GOV_PERFORMANCE is not set +# CONFIG_DEVFREQ_GOV_POWERSAVE is not set +# CONFIG_DEVFREQ_GOV_USERSPACE is not set +# CONFIG_DEVFREQ_GOV_PASSIVE is not set + +# +# DEVFREQ Drivers +# +# CONFIG_ARM_TEGRA_DEVFREQ is not set +CONFIG_ARM_RK3399_DMC_DEVFREQ=m +CONFIG_PM_DEVFREQ_EVENT=y +CONFIG_DEVFREQ_EVENT_ROCKCHIP_DFI=m +CONFIG_EXTCON=y + +# +# Extcon Device Drivers +# +# CONFIG_EXTCON_ADC_JACK is not set +# CONFIG_EXTCON_FSA9480 is not set +# CONFIG_EXTCON_GPIO is not set +# CONFIG_EXTCON_MAX3355 is not set +# CONFIG_EXTCON_PTN5150 is not set +CONFIG_EXTCON_QCOM_SPMI_MISC=m +# CONFIG_EXTCON_RT8973A is not set +# CONFIG_EXTCON_SM5502 is not set +CONFIG_EXTCON_USB_GPIO=m +CONFIG_EXTCON_USBC_CROS_EC=m +CONFIG_MEMORY=y +# CONFIG_ARM_PL172_MPMC is not set +CONFIG_TEGRA_MC=y +CONFIG_IIO=m +CONFIG_IIO_BUFFER=y +# CONFIG_IIO_BUFFER_CB is not set +# CONFIG_IIO_BUFFER_HW_CONSUMER is not set +CONFIG_IIO_KFIFO_BUF=m +CONFIG_IIO_TRIGGERED_BUFFER=m +# CONFIG_IIO_CONFIGFS is not set +CONFIG_IIO_TRIGGER=y +CONFIG_IIO_CONSUMERS_PER_TRIGGER=2 +# CONFIG_IIO_SW_DEVICE is not set +# CONFIG_IIO_SW_TRIGGER is not set + +# +# Accelerometers +# +# CONFIG_ADIS16201 is not set +# CONFIG_ADIS16209 is not set +# CONFIG_ADXL345_I2C is not set +# CONFIG_ADXL345_SPI is not set +# CONFIG_ADXL372_SPI is not set +# CONFIG_ADXL372_I2C is not set +# CONFIG_BMA180 is not set +# CONFIG_BMA220 is not set +# CONFIG_BMC150_ACCEL is not set +# CONFIG_DA280 is not set +# CONFIG_DA311 is not set +# CONFIG_DMARD06 is not set +# CONFIG_DMARD09 is not set +# CONFIG_DMARD10 is not set +CONFIG_HID_SENSOR_ACCEL_3D=m +CONFIG_IIO_CROS_EC_ACCEL_LEGACY=m +# CONFIG_IIO_ST_ACCEL_3AXIS is not set +# CONFIG_KXSD9 is not set +# CONFIG_KXCJK1013 is not set +# CONFIG_MC3230 is not set +# CONFIG_MMA7455_I2C is not set +# CONFIG_MMA7455_SPI is not set +# CONFIG_MMA7660 is not set +# CONFIG_MMA8452 is not set +# CONFIG_MMA9551 is not set +# CONFIG_MMA9553 is not set +# CONFIG_MXC4005 is not set +# CONFIG_MXC6255 is not set +# CONFIG_SCA3000 is not set +# CONFIG_STK8312 is not set +# CONFIG_STK8BA50 is not set +# end of Accelerometers + +# +# Analog to digital converters +# +# CONFIG_AD7124 is not set +# CONFIG_AD7266 is not set +# CONFIG_AD7291 is not set +# CONFIG_AD7298 is not set +# CONFIG_AD7476 is not set +# CONFIG_AD7606_IFACE_PARALLEL is not set +# CONFIG_AD7606_IFACE_SPI is not set +# CONFIG_AD7766 is not set +# CONFIG_AD7768_1 is not set +# CONFIG_AD7780 is not set +# CONFIG_AD7791 is not set +# CONFIG_AD7793 is not set +# CONFIG_AD7887 is not set +# CONFIG_AD7923 is not set +# CONFIG_AD7949 is not set +# CONFIG_AD799X is not set +CONFIG_AXP20X_ADC=m +CONFIG_AXP288_ADC=m +# CONFIG_CC10001_ADC is not set +# CONFIG_ENVELOPE_DETECTOR is not set +# CONFIG_HI8435 is not set +# CONFIG_HX711 is not set +# CONFIG_INA2XX_ADC is not set +# CONFIG_LTC2471 is not set +# CONFIG_LTC2485 is not set +# CONFIG_LTC2497 is not set +# CONFIG_MAX1027 is not set +# CONFIG_MAX11100 is not set +# CONFIG_MAX1118 is not set +# CONFIG_MAX1363 is not set +# CONFIG_MAX9611 is not set +# CONFIG_MCP320X is not set +# CONFIG_MCP3422 is not set +# CONFIG_MCP3911 is not set +CONFIG_MESON_SARADC=m +# CONFIG_NAU7802 is not set +CONFIG_QCOM_VADC_COMMON=m +CONFIG_QCOM_SPMI_IADC=m +CONFIG_QCOM_SPMI_VADC=m +# CONFIG_QCOM_SPMI_ADC5 is not set +CONFIG_ROCKCHIP_SARADC=m +# CONFIG_SD_ADC_MODULATOR is not set +# CONFIG_TI_ADC081C is not set +# CONFIG_TI_ADC0832 is not set +# CONFIG_TI_ADC084S021 is not set +# CONFIG_TI_ADC12138 is not set +# CONFIG_TI_ADC108S102 is not set +# CONFIG_TI_ADC128S052 is not set +# CONFIG_TI_ADC161S626 is not set +# CONFIG_TI_ADS1015 is not set +# CONFIG_TI_ADS7950 is not set +# CONFIG_TI_ADS8344 is not set +# CONFIG_TI_ADS8688 is not set +# CONFIG_TI_ADS124S08 is not set +# CONFIG_TI_TLC4541 is not set +# CONFIG_VF610_ADC is not set +CONFIG_VIPERBOARD_ADC=m +# CONFIG_XILINX_XADC is not set +# end of Analog to digital converters + +# +# Analog Front Ends +# +# CONFIG_IIO_RESCALE is not set +# end of Analog Front Ends + +# +# Amplifiers +# +# CONFIG_AD8366 is not set +# end of Amplifiers + +# +# Chemical Sensors +# +# CONFIG_ATLAS_PH_SENSOR is not set +# CONFIG_BME680 is not set +# CONFIG_CCS811 is not set +# CONFIG_IAQCORE is not set +# CONFIG_PMS7003 is not set +# CONFIG_SENSIRION_SGP30 is not set +# CONFIG_SPS30 is not set +# CONFIG_VZ89X is not set +# end of Chemical Sensors + +CONFIG_IIO_CROS_EC_SENSORS_CORE=m +CONFIG_IIO_CROS_EC_SENSORS=m +# CONFIG_IIO_CROS_EC_SENSORS_LID_ANGLE is not set + +# +# Hid Sensor IIO Common +# +CONFIG_HID_SENSOR_IIO_COMMON=m +CONFIG_HID_SENSOR_IIO_TRIGGER=m +# end of Hid Sensor IIO Common + +# +# SSP Sensor Common +# +# CONFIG_IIO_SSP_SENSORHUB is not set +# end of SSP Sensor Common + +# +# Digital to analog converters +# +# CONFIG_AD5064 is not set +# CONFIG_AD5360 is not set +# CONFIG_AD5380 is not set +# CONFIG_AD5421 is not set +CONFIG_AD5446=m +# CONFIG_AD5449 is not set +# CONFIG_AD5592R is not set +# CONFIG_AD5593R is not set +# CONFIG_AD5504 is not set +# CONFIG_AD5624R_SPI is not set +# CONFIG_LTC1660 is not set +# CONFIG_LTC2632 is not set +# CONFIG_AD5686_SPI is not set +# CONFIG_AD5696_I2C is not set +# CONFIG_AD5755 is not set +# CONFIG_AD5758 is not set +# CONFIG_AD5761 is not set +# CONFIG_AD5764 is not set +# CONFIG_AD5791 is not set +# CONFIG_AD7303 is not set +# CONFIG_AD8801 is not set +# CONFIG_DPOT_DAC is not set +# CONFIG_DS4424 is not set +# CONFIG_M62332 is not set +# CONFIG_MAX517 is not set +# CONFIG_MAX5821 is not set +# CONFIG_MCP4725 is not set +# CONFIG_MCP4922 is not set +# CONFIG_TI_DAC082S085 is not set +# CONFIG_TI_DAC5571 is not set +# CONFIG_TI_DAC7311 is not set +# CONFIG_TI_DAC7612 is not set +# CONFIG_VF610_DAC is not set +# end of Digital to analog converters + +# +# IIO dummy driver +# +# end of IIO dummy driver + +# +# Frequency Synthesizers DDS/PLL +# + +# +# Clock Generator/Distribution +# +# CONFIG_AD9523 is not set +# end of Clock Generator/Distribution + +# +# Phase-Locked Loop (PLL) frequency synthesizers +# +# CONFIG_ADF4350 is not set +# CONFIG_ADF4371 is not set +# end of Phase-Locked Loop (PLL) frequency synthesizers +# end of Frequency Synthesizers DDS/PLL + +# +# Digital gyroscope sensors +# +# CONFIG_ADIS16080 is not set +# CONFIG_ADIS16130 is not set +# CONFIG_ADIS16136 is not set +# CONFIG_ADIS16260 is not set +# CONFIG_ADXRS450 is not set +# CONFIG_BMG160 is not set +# CONFIG_FXAS21002C is not set +CONFIG_HID_SENSOR_GYRO_3D=m +# CONFIG_MPU3050_I2C is not set +# CONFIG_IIO_ST_GYRO_3AXIS is not set +# CONFIG_ITG3200 is not set +# end of Digital gyroscope sensors + +# +# Health Sensors +# + +# +# Heart Rate Monitors +# +# CONFIG_AFE4403 is not set +# CONFIG_AFE4404 is not set +# CONFIG_MAX30100 is not set +# CONFIG_MAX30102 is not set +# end of Heart Rate Monitors +# end of Health Sensors + +# +# Humidity sensors +# +# CONFIG_AM2315 is not set +CONFIG_DHT11=m +# CONFIG_HDC100X is not set +# CONFIG_HID_SENSOR_HUMIDITY is not set +# CONFIG_HTS221 is not set +# CONFIG_HTU21 is not set +# CONFIG_SI7005 is not set +# CONFIG_SI7020 is not set +# end of Humidity sensors + +# +# Inertial measurement units +# +# CONFIG_ADIS16400 is not set +# CONFIG_ADIS16460 is not set +# CONFIG_ADIS16480 is not set +# CONFIG_BMI160_I2C is not set +# CONFIG_BMI160_SPI is not set +# CONFIG_KMX61 is not set +# CONFIG_INV_MPU6050_I2C is not set +# CONFIG_INV_MPU6050_SPI is not set +# CONFIG_IIO_ST_LSM6DSX is not set +# end of Inertial measurement units + +# +# Light sensors +# +CONFIG_ACPI_ALS=m +# CONFIG_ADJD_S311 is not set +# CONFIG_AL3320A is not set +# CONFIG_APDS9300 is not set +# CONFIG_APDS9960 is not set +# CONFIG_BH1750 is not set +CONFIG_BH1780=m +# CONFIG_CM32181 is not set +# CONFIG_CM3232 is not set +# CONFIG_CM3323 is not set +# CONFIG_CM3605 is not set +# CONFIG_CM36651 is not set +CONFIG_IIO_CROS_EC_LIGHT_PROX=m +# CONFIG_GP2AP020A00F is not set +# CONFIG_SENSORS_ISL29018 is not set +# CONFIG_SENSORS_ISL29028 is not set +# CONFIG_ISL29125 is not set +CONFIG_HID_SENSOR_ALS=m +CONFIG_HID_SENSOR_PROX=m +# CONFIG_JSA1212 is not set +# CONFIG_RPR0521 is not set +# CONFIG_LTR501 is not set +# CONFIG_LV0104CS is not set +# CONFIG_MAX44000 is not set +# CONFIG_MAX44009 is not set +# CONFIG_NOA1305 is not set +# CONFIG_OPT3001 is not set +# CONFIG_PA12203001 is not set +# CONFIG_SI1133 is not set +# CONFIG_SI1145 is not set +# CONFIG_STK3310 is not set +# CONFIG_ST_UVIS25 is not set +# CONFIG_TCS3414 is not set +# CONFIG_TCS3472 is not set +# CONFIG_SENSORS_TSL2563 is not set +# CONFIG_TSL2583 is not set +# CONFIG_TSL2772 is not set +# CONFIG_TSL4531 is not set +# CONFIG_US5182D is not set +# CONFIG_VCNL4000 is not set +# CONFIG_VCNL4035 is not set +# CONFIG_VEML6070 is not set +# CONFIG_VL6180 is not set +# CONFIG_ZOPT2201 is not set +# end of Light sensors + +# +# Magnetometer sensors +# +# CONFIG_AK8974 is not set +# CONFIG_AK8975 is not set +# CONFIG_AK09911 is not set +# CONFIG_BMC150_MAGN_I2C is not set +# CONFIG_BMC150_MAGN_SPI is not set +# CONFIG_MAG3110 is not set +CONFIG_HID_SENSOR_MAGNETOMETER_3D=m +# CONFIG_MMC35240 is not set +# CONFIG_IIO_ST_MAGN_3AXIS is not set +# CONFIG_SENSORS_HMC5843_I2C is not set +# CONFIG_SENSORS_HMC5843_SPI is not set +# CONFIG_SENSORS_RM3100_I2C is not set +# CONFIG_SENSORS_RM3100_SPI is not set +# end of Magnetometer sensors + +# +# Multiplexers +# +# CONFIG_IIO_MUX is not set +# end of Multiplexers + +# +# Inclinometer sensors +# +CONFIG_HID_SENSOR_INCLINOMETER_3D=m +CONFIG_HID_SENSOR_DEVICE_ROTATION=m +# end of Inclinometer sensors + +# +# Triggers - standalone +# +# CONFIG_IIO_INTERRUPT_TRIGGER is not set +# CONFIG_IIO_SYSFS_TRIGGER is not set +# end of Triggers - standalone + +# +# Digital potentiometers +# +# CONFIG_AD5272 is not set +# CONFIG_DS1803 is not set +# CONFIG_MAX5432 is not set +# CONFIG_MAX5481 is not set +# CONFIG_MAX5487 is not set +# CONFIG_MCP4018 is not set +# CONFIG_MCP4131 is not set +# CONFIG_MCP4531 is not set +# CONFIG_MCP41010 is not set +# CONFIG_TPL0102 is not set +# end of Digital potentiometers + +# +# Digital potentiostats +# +# CONFIG_LMP91000 is not set +# end of Digital potentiostats + +# +# Pressure sensors +# +# CONFIG_ABP060MG is not set +# CONFIG_BMP280 is not set +CONFIG_IIO_CROS_EC_BARO=m +# CONFIG_DPS310 is not set +CONFIG_HID_SENSOR_PRESS=m +# CONFIG_HP03 is not set +# CONFIG_MPL115_I2C is not set +# CONFIG_MPL115_SPI is not set +# CONFIG_MPL3115 is not set +# CONFIG_MS5611 is not set +# CONFIG_MS5637 is not set +# CONFIG_IIO_ST_PRESS is not set +# CONFIG_T5403 is not set +# CONFIG_HP206C is not set +# CONFIG_ZPA2326 is not set +# end of Pressure sensors + +# +# Lightning sensors +# +# CONFIG_AS3935 is not set +# end of Lightning sensors + +# +# Proximity and distance sensors +# +# CONFIG_ISL29501 is not set +# CONFIG_LIDAR_LITE_V2 is not set +# CONFIG_MB1232 is not set +# CONFIG_RFD77402 is not set +# CONFIG_SRF04 is not set +# CONFIG_SX9500 is not set +# CONFIG_SRF08 is not set +# CONFIG_VL53L0X_I2C is not set +# end of Proximity and distance sensors + +# +# Resolver to digital converters +# +# CONFIG_AD2S90 is not set +# CONFIG_AD2S1200 is not set +# end of Resolver to digital converters + +# +# Temperature sensors +# +# CONFIG_MAXIM_THERMOCOUPLE is not set +# CONFIG_HID_SENSOR_TEMP is not set +# CONFIG_MLX90614 is not set +# CONFIG_MLX90632 is not set +# CONFIG_TMP006 is not set +# CONFIG_TMP007 is not set +# CONFIG_TSYS01 is not set +# CONFIG_TSYS02D is not set +# CONFIG_MAX31856 is not set +# end of Temperature sensors + +# CONFIG_NTB is not set +# CONFIG_VME_BUS is not set +CONFIG_PWM=y +CONFIG_PWM_SYSFS=y +CONFIG_PWM_BCM2835=m +CONFIG_PWM_CROS_EC=m +# CONFIG_PWM_FSL_FTM is not set +# CONFIG_PWM_HIBVT is not set +CONFIG_PWM_MESON=m +# CONFIG_PWM_PCA9685 is not set +CONFIG_PWM_ROCKCHIP=m +CONFIG_PWM_SUN4I=m +CONFIG_PWM_TEGRA=m + +# +# IRQ chip support +# +CONFIG_IRQCHIP=y +CONFIG_ARM_GIC=y +CONFIG_ARM_GIC_PM=y +CONFIG_ARM_GIC_MAX_NR=1 +CONFIG_ARM_GIC_V2M=y +CONFIG_ARM_GIC_V3=y +CONFIG_ARM_GIC_V3_ITS=y +CONFIG_ARM_GIC_V3_ITS_PCI=y +# CONFIG_AL_FIC is not set +CONFIG_HISILICON_IRQ_MBIGEN=y +CONFIG_MVEBU_GICP=y +CONFIG_MVEBU_ICU=y +CONFIG_MVEBU_ODMI=y +CONFIG_MVEBU_PIC=y +CONFIG_MVEBU_SEI=y +CONFIG_PARTITION_PERCPU=y +CONFIG_QCOM_IRQ_COMBINER=y +CONFIG_MESON_IRQ_GPIO=y +# CONFIG_QCOM_PDC is not set +# end of IRQ chip support + +# CONFIG_IPACK_BUS is not set +CONFIG_ARCH_HAS_RESET_CONTROLLER=y +CONFIG_RESET_CONTROLLER=y +CONFIG_RESET_MESON=y +# CONFIG_RESET_MESON_AUDIO_ARB is not set +# CONFIG_RESET_QCOM_AOSS is not set +# CONFIG_RESET_QCOM_PDC is not set +CONFIG_RESET_SIMPLE=y +CONFIG_RESET_SUNXI=y +# CONFIG_RESET_TI_SYSCON is not set +CONFIG_COMMON_RESET_HI3660=y +CONFIG_COMMON_RESET_HI6220=y + +# +# PHY Subsystem +# +CONFIG_GENERIC_PHY=y +CONFIG_PHY_XGENE=m +CONFIG_PHY_SUN4I_USB=m +# CONFIG_PHY_SUN6I_MIPI_DPHY is not set +# CONFIG_PHY_SUN9I_USB is not set +CONFIG_PHY_MESON8B_USB2=m +CONFIG_PHY_MESON_GXL_USB2=y +CONFIG_PHY_MESON_GXL_USB3=y +CONFIG_PHY_MESON_G12A_USB2=y +CONFIG_PHY_MESON_G12A_USB3_PCIE=y +# CONFIG_BCM_KONA_USB2_PHY is not set +# CONFIG_PHY_CADENCE_DP is not set +# CONFIG_PHY_CADENCE_DPHY is not set +# CONFIG_PHY_CADENCE_SIERRA is not set +# CONFIG_PHY_FSL_IMX8MQ_USB is not set +# CONFIG_PHY_MIXEL_MIPI_DPHY is not set +CONFIG_PHY_HI6220_USB=m +# CONFIG_PHY_HI3660_USB is not set +# CONFIG_PHY_HISTB_COMBPHY is not set +# CONFIG_PHY_HISI_INNO_USB2 is not set +CONFIG_PHY_MVEBU_A3700_COMPHY=y +CONFIG_PHY_MVEBU_A3700_UTMI=y +# CONFIG_PHY_MVEBU_A38X_COMPHY is not set +CONFIG_PHY_MVEBU_CP110_COMPHY=m +# CONFIG_PHY_PXA_28NM_HSIC is not set +# CONFIG_PHY_PXA_28NM_USB2 is not set +# CONFIG_PHY_CPCAP_USB is not set +# CONFIG_PHY_MAPPHONE_MDM6600 is not set +# CONFIG_PHY_OCELOT_SERDES is not set +CONFIG_PHY_QCOM_APQ8064_SATA=m +CONFIG_PHY_QCOM_IPQ806X_SATA=m +# CONFIG_PHY_QCOM_PCIE2 is not set +CONFIG_PHY_QCOM_QMP=m +CONFIG_PHY_QCOM_QUSB2=m +CONFIG_PHY_QCOM_UFS=m +CONFIG_PHY_QCOM_UFS_14NM=m +CONFIG_PHY_QCOM_USB_HS=m +CONFIG_PHY_QCOM_USB_HSIC=m +CONFIG_PHY_ROCKCHIP_DP=m +CONFIG_PHY_ROCKCHIP_EMMC=m +CONFIG_PHY_ROCKCHIP_INNO_HDMI=m +CONFIG_PHY_ROCKCHIP_INNO_USB2=m +CONFIG_PHY_ROCKCHIP_PCIE=m +CONFIG_PHY_ROCKCHIP_TYPEC=m +CONFIG_PHY_ROCKCHIP_USB=m +# CONFIG_PHY_SAMSUNG_USB2 is not set +CONFIG_PHY_TEGRA_XUSB=m +# CONFIG_PHY_TUSB1210 is not set +# end of PHY Subsystem + +# CONFIG_POWERCAP is not set +# CONFIG_MCB is not set + +# +# Performance monitor support +# +CONFIG_ARM_CCI_PMU=y +CONFIG_ARM_CCI400_PMU=y +CONFIG_ARM_CCI5xx_PMU=y +CONFIG_ARM_CCN=y +CONFIG_ARM_CMN=m +CONFIG_ARM_PMU=y +CONFIG_ARM_PMU_ACPI=y +CONFIG_ARM_SMMU_V3_PMU=y +CONFIG_ARM_DSU_PMU=m +CONFIG_HISI_PMU=y +CONFIG_QCOM_L2_PMU=y +CONFIG_QCOM_L3_PMU=y +CONFIG_THUNDERX2_PMU=m +CONFIG_XGENE_PMU=y +CONFIG_ARM_SPE_PMU=m +CONFIG_ARM_DMC620_PMU=y +# end of Performance monitor support + +CONFIG_RAS=y + +# +# Android +# +CONFIG_ANDROID=y +# CONFIG_ANDROID_BINDER_IPC is not set +# end of Android + +CONFIG_LIBNVDIMM=m +CONFIG_BLK_DEV_PMEM=m +CONFIG_ND_BLK=m +CONFIG_ND_CLAIM=y +CONFIG_ND_BTT=m +CONFIG_BTT=y +CONFIG_OF_PMEM=m +CONFIG_DAX_DRIVER=y +CONFIG_DAX=y +CONFIG_DEV_DAX=m +CONFIG_DEV_DAX_KMEM=m +CONFIG_NVMEM=y +CONFIG_NVMEM_SYSFS=y +CONFIG_QCOM_QFPROM=m +CONFIG_ROCKCHIP_EFUSE=m +CONFIG_NVMEM_SUNXI_SID=m +CONFIG_MESON_EFUSE=m +# CONFIG_MESON_MX_EFUSE is not set +# CONFIG_NVMEM_ZYNQMP is not set + +# +# HW tracing support +# +# CONFIG_STM is not set +# CONFIG_INTEL_TH is not set +# end of HW tracing support + +# CONFIG_FPGA is not set +# CONFIG_FSI is not set +CONFIG_TEE=m + +# +# TEE drivers +# +CONFIG_OPTEE=m +CONFIG_OPTEE_SHM_NUM_PRIV_PAGES=1 +# end of TEE drivers + +CONFIG_PM_OPP=y +# CONFIG_SIOX is not set +# CONFIG_SLIMBUS is not set +# CONFIG_INTERCONNECT is not set +# CONFIG_COUNTER is not set + +# +# ByteDance drivers support +# +CONFIG_BYTEDANCE=y +CONFIG_BYTEDANCE_TRACE_IRQOFF=m +# CONFIG_BYTEDANCE_HOT_INSPECTOR is not set +CONFIG_BYTEDANCE_HOOKBIND=m +# CONFIG_BYTEDANCE_BLACKHOLEFS is not set +# CONFIG_TTCP is not set +# end of ByteDance drivers support +# end of Device Drivers + +# +# File systems +# +CONFIG_DCACHE_WORD_ACCESS=y +# CONFIG_VALIDATE_FS_PARSER is not set +CONFIG_FS_IOMAP=y +# CONFIG_EXT2_FS is not set +# CONFIG_EXT3_FS is not set +CONFIG_EXT4_FS=m +CONFIG_EXT4_USE_FOR_EXT2=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +# CONFIG_EXT4_DEBUG is not set +CONFIG_JBD2=m +# CONFIG_JBD2_DEBUG is not set +CONFIG_FS_MBCACHE=m +CONFIG_REISERFS_FS=m +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_SECURITY=y +# CONFIG_JFS_DEBUG is not set +# CONFIG_JFS_STATISTICS is not set +CONFIG_XFS_FS=m +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_XFS_RT=y +# CONFIG_XFS_ONLINE_SCRUB is not set +# CONFIG_XFS_WARN is not set +# CONFIG_XFS_DEBUG is not set +CONFIG_GFS2_FS=m +CONFIG_GFS2_FS_LOCKING_DLM=y +CONFIG_OCFS2_FS=m +CONFIG_OCFS2_FS_O2CB=m +CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m +CONFIG_OCFS2_FS_STATS=y +CONFIG_OCFS2_DEBUG_MASKLOG=y +# CONFIG_OCFS2_DEBUG_FS is not set +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +# CONFIG_BTRFS_FS_CHECK_INTEGRITY is not set +# CONFIG_BTRFS_FS_RUN_SANITY_TESTS is not set +# CONFIG_BTRFS_DEBUG is not set +# CONFIG_BTRFS_ASSERT is not set +# CONFIG_BTRFS_FS_REF_VERIFY is not set +CONFIG_NILFS2_FS=m +CONFIG_F2FS_FS=m +CONFIG_F2FS_STAT_FS=y +CONFIG_F2FS_FS_XATTR=y +CONFIG_F2FS_FS_POSIX_ACL=y +CONFIG_F2FS_FS_SECURITY=y +# CONFIG_F2FS_CHECK_FS is not set +# CONFIG_F2FS_IO_TRACE is not set +# CONFIG_F2FS_FAULT_INJECTION is not set +CONFIG_FS_DAX=y +CONFIG_FS_POSIX_ACL=y +CONFIG_EXPORTFS=y +CONFIG_EXPORTFS_BLOCK_OPS=y +CONFIG_FILE_LOCKING=y +CONFIG_MANDATORY_FILE_LOCKING=y +CONFIG_FS_ENCRYPTION=y +# CONFIG_FS_VERITY is not set +CONFIG_FSNOTIFY=y +CONFIG_DNOTIFY=y +CONFIG_INOTIFY_USER=y +CONFIG_FANOTIFY=y +CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +CONFIG_PRINT_QUOTA_WARNING=y +# CONFIG_QUOTA_DEBUG is not set +CONFIG_QUOTA_TREE=m +CONFIG_QFMT_V1=m +CONFIG_QFMT_V2=m +CONFIG_QUOTACTL=y +# CONFIG_AUTOFS4_FS is not set +CONFIG_AUTOFS_FS=m +CONFIG_FUSE_FS=m +CONFIG_CUSE=m +CONFIG_VIRTIO_FS=m +CONFIG_OVERLAY_FS=m +# CONFIG_OVERLAY_FS_REDIRECT_DIR is not set +CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW=y +# CONFIG_OVERLAY_FS_INDEX is not set +# CONFIG_OVERLAY_FS_XINO_AUTO is not set +# CONFIG_OVERLAY_FS_METACOPY is not set + +# +# Caches +# +CONFIG_FSCACHE=m +CONFIG_FSCACHE_STATS=y +# CONFIG_FSCACHE_HISTOGRAM is not set +# CONFIG_FSCACHE_DEBUG is not set +# CONFIG_FSCACHE_OBJECT_LIST is not set +CONFIG_CACHEFILES=m +# CONFIG_CACHEFILES_DEBUG is not set +# CONFIG_CACHEFILES_HISTOGRAM is not set +# end of Caches + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=m +# end of CD-ROM/DVD Filesystems + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="ascii" +CONFIG_FAT_DEFAULT_UTF8=y +CONFIG_NTFS_FS=m +# CONFIG_NTFS_DEBUG is not set +CONFIG_NTFS_RW=y +# end of DOS/FAT/NT Filesystems + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_VMCORE=y +# CONFIG_PROC_VMCORE_DEVICE_DUMP is not set +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_PROC_CHILDREN=y +CONFIG_KERNFS=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS_XATTR=y +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_MEMFD_CREATE=y +CONFIG_ARCH_HAS_GIGANTIC_PAGE=y +CONFIG_CONFIGFS_FS=m +CONFIG_EFIVAR_FS=m +# end of Pseudo filesystems + +CONFIG_MISC_FILESYSTEMS=y +CONFIG_ORANGEFS_FS=m +CONFIG_ADFS_FS=m +# CONFIG_ADFS_FS_RW is not set +CONFIG_AFFS_FS=m +CONFIG_ECRYPT_FS=m +CONFIG_ECRYPT_FS_MESSAGING=y +CONFIG_HFS_FS=m +CONFIG_HFSPLUS_FS=m +CONFIG_BEFS_FS=m +# CONFIG_BEFS_DEBUG is not set +CONFIG_BFS_FS=m +CONFIG_EFS_FS=m +CONFIG_JFFS2_FS=m +CONFIG_JFFS2_FS_DEBUG=0 +CONFIG_JFFS2_FS_WRITEBUFFER=y +# CONFIG_JFFS2_FS_WBUF_VERIFY is not set +CONFIG_JFFS2_SUMMARY=y +CONFIG_JFFS2_FS_XATTR=y +CONFIG_JFFS2_FS_POSIX_ACL=y +CONFIG_JFFS2_FS_SECURITY=y +CONFIG_JFFS2_COMPRESSION_OPTIONS=y +CONFIG_JFFS2_ZLIB=y +CONFIG_JFFS2_LZO=y +CONFIG_JFFS2_RTIME=y +# CONFIG_JFFS2_RUBIN is not set +# CONFIG_JFFS2_CMODE_NONE is not set +CONFIG_JFFS2_CMODE_PRIORITY=y +# CONFIG_JFFS2_CMODE_SIZE is not set +# CONFIG_JFFS2_CMODE_FAVOURLZO is not set +CONFIG_UBIFS_FS=m +CONFIG_UBIFS_FS_ADVANCED_COMPR=y +CONFIG_UBIFS_FS_LZO=y +CONFIG_UBIFS_FS_ZLIB=y +CONFIG_UBIFS_FS_ZSTD=y +# CONFIG_UBIFS_ATIME_SUPPORT is not set +CONFIG_UBIFS_FS_XATTR=y +CONFIG_UBIFS_FS_SECURITY=y +# CONFIG_UBIFS_FS_AUTHENTICATION is not set +# CONFIG_CRAMFS is not set +CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_FILE_CACHE=y +# CONFIG_SQUASHFS_FILE_DIRECT is not set +CONFIG_SQUASHFS_DECOMP_SINGLE=y +# CONFIG_SQUASHFS_DECOMP_MULTI is not set +# CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set +CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_ZLIB=y +CONFIG_SQUASHFS_LZ4=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y +CONFIG_SQUASHFS_ZSTD=y +# CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set +# CONFIG_SQUASHFS_EMBEDDED is not set +CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 +CONFIG_VXFS_FS=m +CONFIG_MINIX_FS=m +CONFIG_OMFS_FS=m +CONFIG_HPFS_FS=m +CONFIG_QNX4FS_FS=m +CONFIG_QNX6FS_FS=m +# CONFIG_QNX6FS_DEBUG is not set +CONFIG_ROMFS_FS=m +# CONFIG_ROMFS_BACKED_BY_BLOCK is not set +# CONFIG_ROMFS_BACKED_BY_MTD is not set +CONFIG_ROMFS_BACKED_BY_BOTH=y +CONFIG_ROMFS_ON_BLOCK=y +CONFIG_ROMFS_ON_MTD=y +CONFIG_PSTORE=y +CONFIG_PSTORE_DEFLATE_COMPRESS=y +# CONFIG_PSTORE_LZO_COMPRESS is not set +# CONFIG_PSTORE_LZ4_COMPRESS is not set +# CONFIG_PSTORE_LZ4HC_COMPRESS is not set +# CONFIG_PSTORE_842_COMPRESS is not set +# CONFIG_PSTORE_ZSTD_COMPRESS is not set +CONFIG_PSTORE_COMPRESS=y +CONFIG_PSTORE_DEFLATE_COMPRESS_DEFAULT=y +CONFIG_PSTORE_COMPRESS_DEFAULT="deflate" +# CONFIG_PSTORE_CONSOLE is not set +# CONFIG_PSTORE_PMSG is not set +# CONFIG_PSTORE_FTRACE is not set +CONFIG_PSTORE_RAM=m +CONFIG_SYSV_FS=m +CONFIG_UFS_FS=m +# CONFIG_UFS_FS_WRITE is not set +# CONFIG_UFS_DEBUG is not set +# CONFIG_EROFS_FS is not set +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_NFS_FS=m +CONFIG_NFS_V2=m +CONFIG_NFS_V3=m +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=m +CONFIG_NFS_SWAP=y +CONFIG_NFS_V4_1=y +CONFIG_NFS_V4_2=y +CONFIG_PNFS_FILE_LAYOUT=m +CONFIG_PNFS_BLOCK=m +CONFIG_PNFS_FLEXFILE_LAYOUT=m +CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org" +# CONFIG_NFS_V4_1_MIGRATION is not set +CONFIG_NFS_V4_SECURITY_LABEL=y +CONFIG_NFS_FSCACHE=y +# CONFIG_NFS_USE_LEGACY_DNS is not set +CONFIG_NFS_USE_KERNEL_DNS=y +CONFIG_NFS_DEBUG=y +CONFIG_NFSD=m +CONFIG_NFSD_V2_ACL=y +CONFIG_NFSD_V3=y +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_PNFS=y +CONFIG_NFSD_BLOCKLAYOUT=y +# CONFIG_NFSD_SCSILAYOUT is not set +# CONFIG_NFSD_FLEXFILELAYOUT is not set +CONFIG_NFSD_V4_SECURITY_LABEL=y +CONFIG_GRACE_PERIOD=m +CONFIG_LOCKD=m +CONFIG_LOCKD_V4=y +CONFIG_NFS_ACL_SUPPORT=m +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=m +CONFIG_SUNRPC_GSS=m +CONFIG_SUNRPC_BACKCHANNEL=y +CONFIG_SUNRPC_SWAP=y +CONFIG_RPCSEC_GSS_KRB5=m +# CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES is not set +CONFIG_SUNRPC_DEBUG=y +CONFIG_SUNRPC_XPRT_RDMA=m +CONFIG_CEPH_FS=m +CONFIG_CEPH_FSCACHE=y +CONFIG_CEPH_FS_POSIX_ACL=y +# CONFIG_CEPH_FS_SECURITY_LABEL is not set +CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set +CONFIG_CIFS_ALLOW_INSECURE_LEGACY=y +CONFIG_CIFS_WEAK_PW_HASH=y +CONFIG_CIFS_UPCALL=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +CONFIG_CIFS_DEBUG=y +# CONFIG_CIFS_DEBUG2 is not set +# CONFIG_CIFS_DEBUG_DUMP_KEYS is not set +CONFIG_CIFS_DFS_UPCALL=y +# CONFIG_CIFS_SMB_DIRECT is not set +CONFIG_CIFS_FSCACHE=y +CONFIG_CODA_FS=m +CONFIG_AFS_FS=m +# CONFIG_AFS_DEBUG is not set +CONFIG_AFS_FSCACHE=y +# CONFIG_AFS_DEBUG_CURSOR is not set +CONFIG_9P_FS=m +CONFIG_9P_FSCACHE=y +CONFIG_9P_FS_POSIX_ACL=y +CONFIG_9P_FS_SECURITY=y +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_MAC_ROMAN=m +CONFIG_NLS_MAC_CELTIC=m +CONFIG_NLS_MAC_CENTEURO=m +CONFIG_NLS_MAC_CROATIAN=m +CONFIG_NLS_MAC_CYRILLIC=m +CONFIG_NLS_MAC_GAELIC=m +CONFIG_NLS_MAC_GREEK=m +CONFIG_NLS_MAC_ICELAND=m +CONFIG_NLS_MAC_INUIT=m +CONFIG_NLS_MAC_ROMANIAN=m +CONFIG_NLS_MAC_TURKISH=m +CONFIG_NLS_UTF8=m +CONFIG_DLM=m +CONFIG_DLM_DEBUG=y +# CONFIG_UNICODE is not set +CONFIG_IO_WQ=y +# end of File systems + +# +# Security options +# +CONFIG_KEYS=y +CONFIG_KEYS_COMPAT=y +# CONFIG_KEYS_REQUEST_CACHE is not set +# CONFIG_PERSISTENT_KEYRINGS is not set +# CONFIG_BIG_KEYS is not set +# CONFIG_TRUSTED_KEYS is not set +# CONFIG_ENCRYPTED_KEYS is not set +CONFIG_KEY_DH_OPERATIONS=y +CONFIG_SECURITY_DMESG_RESTRICT=y +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y +CONFIG_SECURITY_NETWORK=y +# CONFIG_SECURITY_INFINIBAND is not set +CONFIG_SECURITY_NETWORK_XFRM=y +CONFIG_SECURITY_PATH=y +CONFIG_LSM_MMAP_MIN_ADDR=32768 +CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y +CONFIG_HARDENED_USERCOPY=y +# CONFIG_HARDENED_USERCOPY_FALLBACK is not set +# CONFIG_HARDENED_USERCOPY_PAGESPAN is not set +CONFIG_FORTIFY_SOURCE=y +# CONFIG_STATIC_USERMODEHELPER is not set +CONFIG_SECURITY_SELINUX=y +# CONFIG_SECURITY_SELINUX_BOOTPARAM is not set +# CONFIG_SECURITY_SELINUX_DISABLE is not set +CONFIG_SECURITY_SELINUX_DEVELOP=y +CONFIG_SECURITY_SELINUX_AVC_STATS=y +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=0 +# CONFIG_SECURITY_SMACK is not set +CONFIG_SECURITY_TOMOYO=y +CONFIG_SECURITY_TOMOYO_MAX_ACCEPT_ENTRY=2048 +CONFIG_SECURITY_TOMOYO_MAX_AUDIT_LOG=1024 +# CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER is not set +CONFIG_SECURITY_TOMOYO_POLICY_LOADER="/sbin/tomoyo-init" +CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER="/sbin/init" +# CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING is not set +CONFIG_SECURITY_APPARMOR=y +CONFIG_SECURITY_APPARMOR_HASH=y +CONFIG_SECURITY_APPARMOR_HASH_DEFAULT=y +# CONFIG_SECURITY_APPARMOR_DEBUG is not set +# CONFIG_SECURITY_LOADPIN is not set +CONFIG_SECURITY_YAMA=y +# CONFIG_SECURITY_SAFESETID is not set +# CONFIG_SECURITY_LOCKDOWN_LSM is not set +CONFIG_INTEGRITY=y +CONFIG_INTEGRITY_SIGNATURE=y +CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y +# CONFIG_INTEGRITY_TRUSTED_KEYRING is not set +CONFIG_INTEGRITY_PLATFORM_KEYRING=y +CONFIG_LOAD_UEFI_KEYS=y +CONFIG_INTEGRITY_AUDIT=y +# CONFIG_IMA is not set +# CONFIG_IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY is not set +# CONFIG_EVM is not set +# CONFIG_DEFAULT_SECURITY_SELINUX is not set +# CONFIG_DEFAULT_SECURITY_TOMOYO is not set +CONFIG_DEFAULT_SECURITY_APPARMOR=y +# CONFIG_DEFAULT_SECURITY_DAC is not set +CONFIG_LSM="yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo" + +# +# Kernel hardening options +# + +# +# Memory initialization +# +CONFIG_INIT_STACK_NONE=y +# CONFIG_INIT_ON_ALLOC_DEFAULT_ON is not set +# CONFIG_INIT_ON_FREE_DEFAULT_ON is not set +# end of Memory initialization +# end of Kernel hardening options +# end of Security options + +CONFIG_XOR_BLOCKS=m +CONFIG_ASYNC_CORE=m +CONFIG_ASYNC_MEMCPY=m +CONFIG_ASYNC_XOR=m +CONFIG_ASYNC_PQ=m +CONFIG_ASYNC_RAID6_RECOV=m +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +CONFIG_CRYPTO_FIPS=y +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y +CONFIG_CRYPTO_AEAD=m +CONFIG_CRYPTO_AEAD2=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG=m +CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_RNG_DEFAULT=m +CONFIG_CRYPTO_AKCIPHER2=y +CONFIG_CRYPTO_AKCIPHER=y +CONFIG_CRYPTO_KPP2=y +CONFIG_CRYPTO_KPP=y +CONFIG_CRYPTO_ACOMP2=y +CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y +CONFIG_CRYPTO_USER=m +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +# CONFIG_CRYPTO_MANAGER_EXTRA_TESTS is not set +CONFIG_CRYPTO_GF128MUL=m +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_NULL2=y +CONFIG_CRYPTO_PCRYPT=m +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_AUTHENC=m +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_SIMD=m +CONFIG_CRYPTO_ENGINE=m + +# +# Public-key cryptography +# +CONFIG_CRYPTO_RSA=y +CONFIG_CRYPTO_DH=y +CONFIG_CRYPTO_ECC=m +CONFIG_CRYPTO_ECDH=m +# CONFIG_CRYPTO_ECRDSA is not set + +# +# Authenticated Encryption with Associated Data +# +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_AEGIS128_SIMD=y +CONFIG_CRYPTO_SEQIV=m +CONFIG_CRYPTO_ECHAINIV=m + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CFB is not set +CONFIG_CRYPTO_CTR=m +CONFIG_CRYPTO_CTS=y +CONFIG_CRYPTO_ECB=y +CONFIG_CRYPTO_LRW=m +# CONFIG_CRYPTO_OFB is not set +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=y +# CONFIG_CRYPTO_KEYWRAP is not set +# CONFIG_CRYPTO_ADIANTUM is not set +CONFIG_CRYPTO_ESSIV=m + +# +# Hash modes +# +CONFIG_CRYPTO_CMAC=m +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m + +# +# Digest +# +CONFIG_CRYPTO_CRC32C=m +CONFIG_CRYPTO_CRC32=m +# CONFIG_CRYPTO_XXHASH is not set +CONFIG_CRYPTO_CRCT10DIF=y +CONFIG_CRYPTO_GHASH=m +CONFIG_CRYPTO_POLY1305=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_LIB_SHA256=y +CONFIG_CRYPTO_SHA256=y +CONFIG_CRYPTO_SHA512=y +CONFIG_CRYPTO_SHA3=m +# CONFIG_CRYPTO_SM3 is not set +# CONFIG_CRYPTO_STREEBOG is not set +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m + +# +# Ciphers +# +CONFIG_CRYPTO_LIB_AES=y +CONFIG_CRYPTO_AES=y +# CONFIG_CRYPTO_AES_TI is not set +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_LIB_ARC4=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_BLOWFISH_COMMON=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST_COMMON=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_LIB_DES=m +CONFIG_CRYPTO_DES=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_CHACHA20=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +# CONFIG_CRYPTO_SM4 is not set +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_TWOFISH_COMMON=m + +# +# Compression +# +CONFIG_CRYPTO_DEFLATE=y +CONFIG_CRYPTO_LZO=y +# CONFIG_CRYPTO_842 is not set +CONFIG_CRYPTO_LZ4=m +CONFIG_CRYPTO_LZ4HC=m +CONFIG_CRYPTO_ZSTD=m + +# +# Random Number Generation +# +CONFIG_CRYPTO_ANSI_CPRNG=m +CONFIG_CRYPTO_DRBG_MENU=m +CONFIG_CRYPTO_DRBG_HMAC=y +# CONFIG_CRYPTO_DRBG_HASH is not set +# CONFIG_CRYPTO_DRBG_CTR is not set +CONFIG_CRYPTO_DRBG=m +CONFIG_CRYPTO_JITTERENTROPY=m +CONFIG_CRYPTO_USER_API=m +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_CRYPTO_USER_API_SKCIPHER=m +CONFIG_CRYPTO_USER_API_RNG=m +CONFIG_CRYPTO_USER_API_AEAD=m +# CONFIG_CRYPTO_STATS is not set +CONFIG_CRYPTO_HASH_INFO=y +CONFIG_CRYPTO_HW=y +CONFIG_CRYPTO_DEV_MARVELL_CESA=m +# CONFIG_CRYPTO_DEV_ATMEL_ECC is not set +# CONFIG_CRYPTO_DEV_ATMEL_SHA204A is not set +# CONFIG_CRYPTO_DEV_CCP is not set +CONFIG_CRYPTO_DEV_CPT=m +CONFIG_CAVIUM_CPT=m +CONFIG_CRYPTO_DEV_NITROX=m +CONFIG_CRYPTO_DEV_NITROX_CNN55XX=m +# CONFIG_CRYPTO_DEV_CAVIUM_ZIP is not set +CONFIG_CRYPTO_DEV_QCE=m +CONFIG_CRYPTO_DEV_QCOM_RNG=m +# CONFIG_CRYPTO_DEV_ROCKCHIP is not set +CONFIG_CRYPTO_DEV_CHELSIO=m +# CONFIG_CHELSIO_IPSEC_INLINE is not set +CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_CRYPTO_DEV_SAFEXCEL=m +# CONFIG_CRYPTO_DEV_CCREE is not set +# CONFIG_CRYPTO_DEV_HISI_SEC is not set +# CONFIG_CRYPTO_DEV_HISI_ZIP is not set +CONFIG_ASYMMETRIC_KEY_TYPE=y +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y +CONFIG_X509_CERTIFICATE_PARSER=y +CONFIG_PKCS8_PRIVATE_KEY_PARSER=m +CONFIG_PKCS7_MESSAGE_PARSER=y +# CONFIG_PKCS7_TEST_KEY is not set +# CONFIG_SIGNED_PE_FILE_VERIFICATION is not set + +# +# Certificates for signature checking +# +CONFIG_MODULE_SIG_KEY="" +CONFIG_SYSTEM_TRUSTED_KEYRING=y +CONFIG_SYSTEM_TRUSTED_KEYS="" +# CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set +CONFIG_SECONDARY_TRUSTED_KEYRING=y +CONFIG_SYSTEM_BLACKLIST_KEYRING=y +CONFIG_SYSTEM_BLACKLIST_HASH_LIST="" +# CONFIG_SYSTEM_REVOCATION_LIST is not set +# end of Certificates for signature checking + +CONFIG_BINARY_PRINTF=y + +# +# Library routines +# +CONFIG_RAID6_PQ=m +CONFIG_RAID6_PQ_BENCHMARK=y +# CONFIG_PACKING is not set +CONFIG_BITREVERSE=y +CONFIG_HAVE_ARCH_BITREVERSE=y +CONFIG_GENERIC_STRNCPY_FROM_USER=y +CONFIG_GENERIC_STRNLEN_USER=y +CONFIG_GENERIC_NET_UTILS=y +CONFIG_CORDIC=m +CONFIG_RATIONAL=y +CONFIG_GENERIC_PCI_IOMAP=y +CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y +CONFIG_ARCH_HAS_FAST_MULTIPLIER=y +CONFIG_INDIRECT_PIO=y +CONFIG_CRC_CCITT=m +CONFIG_CRC16=m +CONFIG_CRC_T10DIF=y +CONFIG_CRC_ITU_T=m +CONFIG_CRC32=y +# CONFIG_CRC32_SELFTEST is not set +CONFIG_CRC32_SLICEBY8=y +# CONFIG_CRC32_SLICEBY4 is not set +# CONFIG_CRC32_SARWATE is not set +# CONFIG_CRC32_BIT is not set +CONFIG_CRC64=m +# CONFIG_CRC4 is not set +CONFIG_CRC7=m +CONFIG_LIBCRC32C=m +CONFIG_CRC8=m +CONFIG_XXHASH=y +CONFIG_AUDIT_GENERIC=y +CONFIG_AUDIT_ARCH_COMPAT_GENERIC=y +CONFIG_AUDIT_COMPAT_GENERIC=y +# CONFIG_RANDOM32_SELFTEST is not set +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=y +CONFIG_LZO_COMPRESS=y +CONFIG_LZO_DECOMPRESS=y +CONFIG_LZ4_COMPRESS=m +CONFIG_LZ4HC_COMPRESS=m +CONFIG_LZ4_DECOMPRESS=y +CONFIG_ZSTD_COMPRESS=m +CONFIG_ZSTD_DECOMPRESS=m +CONFIG_XZ_DEC=y +# CONFIG_XZ_DEC_X86 is not set +# CONFIG_XZ_DEC_POWERPC is not set +# CONFIG_XZ_DEC_IA64 is not set +# CONFIG_XZ_DEC_ARM is not set +# CONFIG_XZ_DEC_ARMTHUMB is not set +# CONFIG_XZ_DEC_SPARC is not set +# CONFIG_XZ_DEC_TEST is not set +CONFIG_DECOMPRESS_GZIP=y +CONFIG_DECOMPRESS_BZIP2=y +CONFIG_DECOMPRESS_LZMA=y +CONFIG_DECOMPRESS_XZ=y +CONFIG_DECOMPRESS_LZO=y +CONFIG_DECOMPRESS_LZ4=y +CONFIG_GENERIC_ALLOCATOR=y +CONFIG_REED_SOLOMON=m +CONFIG_REED_SOLOMON_ENC8=y +CONFIG_REED_SOLOMON_DEC8=y +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m +CONFIG_BTREE=y +CONFIG_INTERVAL_TREE=y +CONFIG_XARRAY_MULTI=y +CONFIG_ASSOCIATIVE_ARRAY=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT_MAP=y +CONFIG_HAS_DMA=y +CONFIG_NEED_SG_DMA_LENGTH=y +CONFIG_NEED_DMA_MAP_STATE=y +CONFIG_ARCH_DMA_ADDR_T_64BIT=y +CONFIG_DMA_DECLARE_COHERENT=y +CONFIG_ARCH_HAS_SETUP_DMA_OPS=y +CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS=y +CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE=y +CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU=y +CONFIG_ARCH_HAS_DMA_PREP_COHERENT=y +CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN=y +CONFIG_DMA_VIRT_OPS=y +CONFIG_SWIOTLB=y +CONFIG_DMA_REMAP=y +CONFIG_DMA_DIRECT_REMAP=y +# CONFIG_DMA_API_DEBUG is not set +CONFIG_SGL_ALLOC=y +CONFIG_CHECK_SIGNATURE=y +CONFIG_CPU_RMAP=y +CONFIG_DQL=y +CONFIG_GLOB=y +# CONFIG_GLOB_SELFTEST is not set +CONFIG_NLATTR=y +CONFIG_LRU_CACHE=m +CONFIG_CLZ_TAB=y +CONFIG_IRQ_POLL=y +CONFIG_MPILIB=y +CONFIG_SIGNATURE=y +CONFIG_DIMLIB=y +CONFIG_LIBFDT=y +CONFIG_OID_REGISTRY=y +CONFIG_UCS2_STRING=y +CONFIG_HAVE_GENERIC_VDSO=y +CONFIG_GENERIC_GETTIMEOFDAY=y +CONFIG_FONT_SUPPORT=y +# CONFIG_FONTS is not set +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y +CONFIG_SG_POOL=y +CONFIG_ARCH_HAS_PMEM_API=y +CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y +CONFIG_STACKDEPOT=y +CONFIG_SBITMAP=y +# CONFIG_STRING_SELFTEST is not set +# end of Library routines + +# +# Kernel hacking +# + +# +# printk and dmesg options +# +CONFIG_PRINTK_TIME=y +CONFIG_PRINTK_CALLER=y +CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7 +CONFIG_CONSOLE_LOGLEVEL_QUIET=4 +CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 +CONFIG_BOOT_PRINTK_DELAY=y +CONFIG_DYNAMIC_DEBUG=y +# end of printk and dmesg options + +# +# Compile-time checks and compiler options +# +CONFIG_DEBUG_INFO=y +# CONFIG_DEBUG_INFO_REDUCED is not set +# CONFIG_DEBUG_INFO_SPLIT is not set +# CONFIG_DEBUG_INFO_DWARF4 is not set +CONFIG_DEBUG_INFO_BTF=y +# CONFIG_GDB_SCRIPTS is not set +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=2048 +CONFIG_STRIP_ASM_SYMS=y +# CONFIG_READABLE_ASM is not set +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_INSTALL is not set +CONFIG_OPTIMIZE_INLINING=y +# CONFIG_DEBUG_SECTION_MISMATCH is not set +CONFIG_SECTION_MISMATCH_WARN_ONLY=y +CONFIG_ARCH_WANT_FRAME_POINTERS=y +CONFIG_FRAME_POINTER=y +# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set +# end of Compile-time checks and compiler options + +CONFIG_MAGIC_SYSRQ=y +CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x01b6 +CONFIG_MAGIC_SYSRQ_SERIAL=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_MISC=y + +# +# Memory Debugging +# +CONFIG_PAGE_EXTENSION=y +# CONFIG_DEBUG_PAGEALLOC is not set +CONFIG_PAGE_OWNER=y +CONFIG_PAGE_POISONING=y +CONFIG_PAGE_POISONING_NO_SANITY=y +# CONFIG_PAGE_POISONING_ZERO is not set +# CONFIG_DEBUG_PAGE_REF is not set +# CONFIG_DEBUG_RODATA_TEST is not set +# CONFIG_DEBUG_OBJECTS is not set +# CONFIG_SLUB_DEBUG_ON is not set +# CONFIG_SLUB_STATS is not set +CONFIG_HAVE_DEBUG_KMEMLEAK=y +# CONFIG_DEBUG_KMEMLEAK is not set +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_VM is not set +CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y +# CONFIG_DEBUG_VIRTUAL is not set +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m +# CONFIG_DEBUG_PER_CPU_MAPS is not set +CONFIG_HAVE_ARCH_KASAN=y +CONFIG_HAVE_ARCH_KASAN_SW_TAGS=y +CONFIG_CC_HAS_KASAN_GENERIC=y +# CONFIG_KASAN is not set +CONFIG_KASAN_STACK=1 +# end of Memory Debugging + +CONFIG_ARCH_HAS_KCOV=y +CONFIG_CC_HAS_SANCOV_TRACE_PC=y +# CONFIG_KCOV is not set +# CONFIG_DEBUG_SHIRQ is not set + +# +# Debug Lockups and Hangs +# +CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 +# CONFIG_WQ_WATCHDOG is not set +# end of Debug Lockups and Hangs + +# CONFIG_PANIC_ON_OOPS is not set +CONFIG_PANIC_ON_OOPS_VALUE=0 +CONFIG_PANIC_TIMEOUT=0 +CONFIG_SCHED_DEBUG=y +CONFIG_SCHED_INFO=y +CONFIG_SCHEDSTATS=y +CONFIG_SCHED_STACK_END_CHECK=y +# CONFIG_DEBUG_TIMEKEEPING is not set + +# +# Lock Debugging (spinlocks, mutexes, etc...) +# +CONFIG_LOCK_DEBUGGING_SUPPORT=y +# CONFIG_PROVE_LOCKING is not set +# CONFIG_LOCK_STAT is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set +# CONFIG_DEBUG_RWSEMS is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_DEBUG_ATOMIC_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_LOCK_TORTURE_TEST is not set +# CONFIG_WW_MUTEX_SELFTEST is not set +# end of Lock Debugging (spinlocks, mutexes, etc...) + +CONFIG_STACKTRACE=y +# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set +# CONFIG_DEBUG_KOBJECT is not set +CONFIG_HAVE_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_LIST=y +# CONFIG_DEBUG_PLIST is not set +# CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +# CONFIG_DEBUG_CREDENTIALS is not set + +# +# RCU Debugging +# +# CONFIG_RCU_PERF_TEST is not set +# CONFIG_RCU_TORTURE_TEST is not set +CONFIG_RCU_CPU_STALL_TIMEOUT=21 +# CONFIG_RCU_TRACE is not set +# CONFIG_RCU_EQS_DEBUG is not set +# end of RCU Debugging + +# CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set +CONFIG_NOTIFIER_ERROR_INJECTION=m +CONFIG_PM_NOTIFIER_ERROR_INJECT=m +# CONFIG_NETDEV_NOTIFIER_ERROR_INJECT is not set +CONFIG_FUNCTION_ERROR_INJECTION=y +CONFIG_FAULT_INJECTION=y +# CONFIG_FAILSLAB is not set +# CONFIG_FAIL_PAGE_ALLOC is not set +# CONFIG_FAIL_MAKE_REQUEST is not set +# CONFIG_FAIL_IO_TIMEOUT is not set +# CONFIG_FAIL_FUTEX is not set +# CONFIG_FAULT_INJECTION_DEBUG_FS is not set +# CONFIG_LATENCYTOP is not set +CONFIG_NOP_TRACER=y +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_HAVE_SYSCALL_TRACEPOINTS=y +CONFIG_HAVE_C_RECORDMCOUNT=y +CONFIG_TRACER_MAX_TRACE=y +CONFIG_TRACE_CLOCK=y +CONFIG_RING_BUFFER=y +CONFIG_EVENT_TRACING=y +CONFIG_CONTEXT_SWITCH_TRACER=y +CONFIG_TRACING=y +CONFIG_GENERIC_TRACER=y +CONFIG_TRACING_SUPPORT=y +CONFIG_FTRACE=y +CONFIG_FUNCTION_TRACER=y +CONFIG_FUNCTION_GRAPH_TRACER=y +# CONFIG_PREEMPTIRQ_EVENTS is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_HWLAT_TRACER is not set +CONFIG_FTRACE_SYSCALLS=y +CONFIG_TRACER_SNAPSHOT=y +# CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP is not set +CONFIG_BRANCH_PROFILE_NONE=y +# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set +CONFIG_STACK_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_KPROBE_EVENTS=y +# CONFIG_KPROBE_EVENTS_ON_NOTRACE is not set +CONFIG_UPROBE_EVENTS=y +CONFIG_BPF_EVENTS=y +CONFIG_DYNAMIC_EVENTS=y +CONFIG_PROBE_EVENTS=y +CONFIG_DYNAMIC_FTRACE=y +# CONFIG_FUNCTION_PROFILER is not set +# CONFIG_BPF_KPROBE_OVERRIDE is not set +CONFIG_FTRACE_MCOUNT_RECORD=y +# CONFIG_FTRACE_STARTUP_TEST is not set +# CONFIG_HIST_TRIGGERS is not set +# CONFIG_TRACEPOINT_BENCHMARK is not set +# CONFIG_RING_BUFFER_BENCHMARK is not set +# CONFIG_RING_BUFFER_STARTUP_TEST is not set +# CONFIG_PREEMPTIRQ_DELAY_TEST is not set +# CONFIG_TRACE_EVAL_MAP_FILE is not set +CONFIG_RUNTIME_TESTING_MENU=y +# CONFIG_LKDTM is not set +# CONFIG_TEST_LIST_SORT is not set +# CONFIG_TEST_SORT is not set +# CONFIG_KPROBES_SANITY_TEST is not set +# CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_RBTREE_TEST is not set +# CONFIG_REED_SOLOMON_TEST is not set +# CONFIG_INTERVAL_TREE_TEST is not set +# CONFIG_PERCPU_TEST is not set +# CONFIG_ATOMIC64_SELFTEST is not set +# CONFIG_ASYNC_RAID6_TEST is not set +# CONFIG_TEST_HEXDUMP is not set +# CONFIG_TEST_STRING_HELPERS is not set +# CONFIG_TEST_STRSCPY is not set +# CONFIG_TEST_KSTRTOX is not set +# CONFIG_TEST_PRINTF is not set +# CONFIG_TEST_BITMAP is not set +# CONFIG_TEST_BITFIELD is not set +# CONFIG_TEST_UUID is not set +# CONFIG_TEST_XARRAY is not set +# CONFIG_TEST_OVERFLOW is not set +# CONFIG_TEST_RHASHTABLE is not set +# CONFIG_TEST_HASH is not set +# CONFIG_TEST_IDA is not set +# CONFIG_TEST_LKM is not set +# CONFIG_TEST_VMALLOC is not set +CONFIG_TEST_USER_COPY=m +CONFIG_TEST_BPF=m +# CONFIG_TEST_BLACKHOLE_DEV is not set +# CONFIG_FIND_BIT_BENCHMARK is not set +CONFIG_TEST_FIRMWARE=m +# CONFIG_TEST_SYSCTL is not set +# CONFIG_TEST_UDELAY is not set +CONFIG_TEST_STATIC_KEYS=m +# CONFIG_TEST_KMOD is not set +# CONFIG_TEST_MEMCAT_P is not set +# CONFIG_TEST_STACKINIT is not set +# CONFIG_TEST_MEMINIT is not set +# CONFIG_MEMTEST is not set +CONFIG_BUG_ON_DATA_CORRUPTION=y +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set +CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y +# CONFIG_UBSAN is not set +CONFIG_UBSAN_ALIGNMENT=y +CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y +CONFIG_STRICT_DEVMEM=y +# CONFIG_IO_STRICT_DEVMEM is not set +# CONFIG_ARM64_PTDUMP_DEBUGFS is not set +# CONFIG_PID_IN_CONTEXTIDR is not set +# CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET is not set +# CONFIG_DEBUG_WX is not set +# CONFIG_DEBUG_ALIGN_RODATA is not set +# CONFIG_DEBUG_EFI is not set +# CONFIG_ARM64_RELOC_TEST is not set +# CONFIG_CORESIGHT is not set +# end of Kernel hacking diff --git a/config.x86_64 b/config.x86_64 index 0633c4302c60d..6a16759ff5d14 100644 --- a/config.x86_64 +++ b/config.x86_64 @@ -133,6 +133,7 @@ CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y # # Scheduler features # +CONFIG_BYTEDANCE_BURST_SCHED=y # CONFIG_UCLAMP_TASK is not set # end of Scheduler features @@ -156,7 +157,7 @@ CONFIG_CFS_BANDWIDTH=y CONFIG_CGROUP_PIDS=y # CONFIG_CGROUP_RDMA is not set CONFIG_CGROUP_FREEZER=y -# CONFIG_CGROUP_HUGETLB is not set +CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y CONFIG_PROC_PID_CPUSET=y CONFIG_CGROUP_DEVICE=y @@ -851,6 +852,7 @@ CONFIG_BLK_DEV_THROTTLING=y # CONFIG_BLK_CMDLINE_PARSER is not set CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BYTEDANCE_BLK_CGROUP_IOTRACE=y CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y @@ -1032,10 +1034,10 @@ CONFIG_XFRM=y CONFIG_XFRM_OFFLOAD=y CONFIG_XFRM_ALGO=m CONFIG_XFRM_USER=m -# CONFIG_XFRM_INTERFACE is not set +CONFIG_XFRM_INTERFACE=m CONFIG_XFRM_SUB_POLICY=y CONFIG_XFRM_MIGRATE=y -# CONFIG_XFRM_STATISTICS is not set +CONFIG_XFRM_STATISTICS=y CONFIG_XFRM_IPCOMP=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y @@ -1211,7 +1213,7 @@ CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m CONFIG_NFT_FIB=m CONFIG_NFT_FIB_INET=m -# CONFIG_NFT_XFRM is not set +CONFIG_NFT_XFRM=m CONFIG_NFT_SOCKET=m CONFIG_NFT_OSF=m CONFIG_NFT_TPROXY=m @@ -2329,7 +2331,7 @@ CONFIG_EEPROM_MAX6875=m CONFIG_EEPROM_93CX6=m # CONFIG_EEPROM_93XX46 is not set # CONFIG_EEPROM_IDT_89HPESX is not set -# CONFIG_EEPROM_EE1004 is not set +CONFIG_EEPROM_EE1004=m # end of EEPROM support CONFIG_CB710_CORE=m @@ -2954,7 +2956,7 @@ CONFIG_I40EVF=m # CONFIG_IGC is not set CONFIG_JME=m CONFIG_NET_VENDOR_MARVELL=y -# CONFIG_MVMDIO is not set +CONFIG_MVMDIO=m CONFIG_SKGE=m # CONFIG_SKGE_DEBUG is not set CONFIG_SKGE_GENESIS=y @@ -3995,8 +3997,8 @@ CONFIG_I2C_MUX=m # # CONFIG_I2C_MUX_GPIO is not set # CONFIG_I2C_MUX_LTC4306 is not set -# CONFIG_I2C_MUX_PCA9541 is not set -# CONFIG_I2C_MUX_PCA954x is not set +CONFIG_I2C_MUX_PCA9541=m +CONFIG_I2C_MUX_PCA954x=m # CONFIG_I2C_MUX_REG is not set # CONFIG_I2C_MUX_MLXCPLD is not set # end of Multiplexer I2C Chip support @@ -4049,7 +4051,7 @@ CONFIG_I2C_DESIGNWARE_PLATFORM=y CONFIG_I2C_DESIGNWARE_PCI=m # CONFIG_I2C_DESIGNWARE_BAYTRAIL is not set # CONFIG_I2C_EMEV2 is not set -# CONFIG_I2C_GPIO is not set +CONFIG_I2C_GPIO=m CONFIG_I2C_KEMPLD=m CONFIG_I2C_OCORES=m CONFIG_I2C_PCA_PLATFORM=m @@ -4159,7 +4161,7 @@ CONFIG_PINCTRL_INTEL=y CONFIG_PINCTRL_BROXTON=y # CONFIG_PINCTRL_CANNONLAKE is not set # CONFIG_PINCTRL_CEDARFORK is not set -# CONFIG_PINCTRL_DENVERTON is not set +CONFIG_PINCTRL_DENVERTON=m # CONFIG_PINCTRL_GEMINILAKE is not set # CONFIG_PINCTRL_ICELAKE is not set # CONFIG_PINCTRL_LEWISBURG is not set @@ -6564,7 +6566,7 @@ CONFIG_USB_EHSET_TEST_FIXTURE=m CONFIG_USB_ISIGHTFW=m CONFIG_USB_YUREX=m CONFIG_USB_EZUSB_FX2=m -# CONFIG_USB_HUB_USB251XB is not set +CONFIG_USB_HUB_USB251XB=m # CONFIG_USB_HSIC_USB3503 is not set # CONFIG_USB_HSIC_USB4604 is not set # CONFIG_USB_LINK_LAYER_TEST is not set @@ -7039,7 +7041,7 @@ CONFIG_UIO_MF624=m CONFIG_VFIO_IOMMU_TYPE1=m CONFIG_VFIO_VIRQFD=m CONFIG_VFIO=m -# CONFIG_VFIO_NOIOMMU is not set +CONFIG_VFIO_NOIOMMU=y CONFIG_VFIO_PCI=m CONFIG_VFIO_PCI_VGA=y CONFIG_VFIO_PCI_MMAP=y @@ -8232,6 +8234,7 @@ CONFIG_FSCACHE_STATS=y CONFIG_CACHEFILES=m # CONFIG_CACHEFILES_DEBUG is not set # CONFIG_CACHEFILES_HISTOGRAM is not set +CONFIG_CACHEFILES_ONDEMAND=y # end of Caches # @@ -8275,8 +8278,8 @@ CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_XATTR=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y -CONFIG_HUGETLB_PAGE_FREE_VMEMMAP=y -# CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON is not set +CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y +# CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON is not set CONFIG_MEMFD_CREATE=y CONFIG_ARCH_HAS_GIGANTIC_PAGE=y CONFIG_CONFIGFS_FS=m @@ -8369,7 +8372,11 @@ CONFIG_SYSV_FS=m CONFIG_UFS_FS=m # CONFIG_UFS_FS_WRITE is not set # CONFIG_UFS_DEBUG is not set -# CONFIG_EROFS_FS is not set +CONFIG_EROFS_FS=m +# CONFIG_EROFS_FS_DEBUG is not set +# CONFIG_EROFS_FS_XATTR is not set +# CONFIG_EROFS_FS_ZIP is not set +CONFIG_EROFS_FS_ONDEMAND=y CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=m CONFIG_NFS_V2=m @@ -8515,16 +8522,25 @@ CONFIG_PAGE_TABLE_ISOLATION=y CONFIG_SECURITY_NETWORK_XFRM=y CONFIG_SECURITY_PATH=y # CONFIG_INTEL_TXT is not set +CONFIG_LSM_MMAP_MIN_ADDR=65536 CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y CONFIG_HARDENED_USERCOPY=y # CONFIG_HARDENED_USERCOPY_FALLBACK is not set # CONFIG_HARDENED_USERCOPY_PAGESPAN is not set CONFIG_FORTIFY_SOURCE=y # CONFIG_STATIC_USERMODEHELPER is not set -# CONFIG_SECURITY_SELINUX is not set +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +# CONFIG_SECURITY_SELINUX_DISABLE is not set +CONFIG_SECURITY_SELINUX_DEVELOP=y +CONFIG_SECURITY_SELINUX_AVC_STATS=y +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=0 # CONFIG_SECURITY_SMACK is not set # CONFIG_SECURITY_TOMOYO is not set -# CONFIG_SECURITY_APPARMOR is not set +CONFIG_SECURITY_APPARMOR=y +CONFIG_SECURITY_APPARMOR_HASH=y +CONFIG_SECURITY_APPARMOR_HASH_DEFAULT=y +# CONFIG_SECURITY_APPARMOR_DEBUG is not set # CONFIG_SECURITY_LOADPIN is not set CONFIG_SECURITY_YAMA=y # CONFIG_SECURITY_SAFESETID is not set @@ -8536,6 +8552,8 @@ CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_INTEGRITY_AUDIT=y # CONFIG_IMA is not set # CONFIG_EVM is not set +# CONFIG_DEFAULT_SECURITY_SELINUX is not set +# CONFIG_DEFAULT_SECURITY_APPARMOR is not set CONFIG_DEFAULT_SECURITY_DAC=y CONFIG_LSM="lockdown,yama,loadpin,safesetid,integrity" @@ -8671,7 +8689,9 @@ CONFIG_CRYPTO_LIB_SHA256=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m -# CONFIG_CRYPTO_SM3 is not set +CONFIG_CRYPTO_LIB_SM3=m +CONFIG_CRYPTO_SM3=m +CONFIG_CRYPTO_SM3_AVX_X86_64=m # CONFIG_CRYPTO_STREEBOG is not set CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m @@ -8712,7 +8732,10 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SERPENT_SSE2_X86_64=m CONFIG_CRYPTO_SERPENT_AVX_X86_64=m CONFIG_CRYPTO_SERPENT_AVX2_X86_64=m -# CONFIG_CRYPTO_SM4 is not set +CONFIG_CRYPTO_LIB_SM4=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64=m +CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_TWOFISH_COMMON=m @@ -8774,7 +8797,7 @@ CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y CONFIG_X509_CERTIFICATE_PARSER=y -# CONFIG_PKCS8_PRIVATE_KEY_PARSER is not set +CONFIG_PKCS8_PRIVATE_KEY_PARSER=m CONFIG_PKCS7_MESSAGE_PARSER=y # CONFIG_PKCS7_TEST_KEY is not set CONFIG_SIGNED_PE_FILE_VERIFICATION=y @@ -8911,7 +8934,7 @@ CONFIG_SBITMAP=y # printk and dmesg options # CONFIG_PRINTK_TIME=y -# CONFIG_PRINTK_CALLER is not set +CONFIG_PRINTK_CALLER=y CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7 CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 diff --git a/crypto/Kconfig b/crypto/Kconfig index b2cc0ad3792ad..b75e382e8508f 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -242,12 +242,12 @@ config CRYPTO_DH config CRYPTO_ECC tristate + select CRYPTO_RNG_DEFAULT config CRYPTO_ECDH tristate "ECDH algorithm" select CRYPTO_ECC select CRYPTO_KPP - select CRYPTO_RNG_DEFAULT help Generic implementation of the ECDH algorithm @@ -961,9 +961,13 @@ config CRYPTO_SHA3 References: http://keccak.noekeon.org/ +config CRYPTO_LIB_SM3 + tristate + config CRYPTO_SM3 tristate "SM3 digest algorithm" select CRYPTO_HASH + select CRYPTO_LIB_SM3 help SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3). It is part of the Chinese Commercial Cryptography suite. @@ -972,6 +976,19 @@ config CRYPTO_SM3 http://www.oscca.gov.cn/UpFile/20101222141857786.pdf https://datatracker.ietf.org/doc/html/draft-shen-sm3-hash +config CRYPTO_SM3_AVX_X86_64 + tristate "SM3 digest algorithm (x86_64/AVX)" + depends on X86 && 64BIT + select CRYPTO_HASH + select CRYPTO_LIB_SM3 + help + SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3). + It is part of the Chinese Commercial Cryptography suite. This is + SM3 optimized implementation using Advanced Vector Extensions (AVX) + when available. + + If unsure, say N. + config CRYPTO_STREEBOG tristate "Streebog Hash Function" select CRYPTO_HASH @@ -1534,9 +1551,13 @@ config CRYPTO_SERPENT_AVX2_X86_64 See also: +config CRYPTO_LIB_SM4 + tristate + config CRYPTO_SM4 tristate "SM4 cipher algorithm" select CRYPTO_ALGAPI + select CRYPTO_LIB_SM4 help SM4 cipher algorithms (OSCCA GB/T 32907-2016). @@ -1559,6 +1580,49 @@ config CRYPTO_SM4 If unsure, say N. +config CRYPTO_SM4_AESNI_AVX_X86_64 + tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + select CRYPTO_ALGAPI + select CRYPTO_LIB_SM4 + help + SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX). + + SM4 (GBT.32907-2016) is a cryptographic standard issued by the + Organization of State Commercial Administration of China (OSCCA) + as an authorized cryptographic algorithms for the use within China. + + This is SM4 optimized implementation using AES-NI/AVX/x86_64 + instruction set for block cipher. Through two affine transforms, + we can use the AES S-Box to simulate the SM4 S-Box to achieve the + effect of instruction acceleration. + + If unsure, say N. + +config CRYPTO_SM4_AESNI_AVX2_X86_64 + tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX2)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + select CRYPTO_ALGAPI + select CRYPTO_LIB_SM4 + select CRYPTO_SM4_AESNI_AVX_X86_64 + help + SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX2). + + SM4 (GBT.32907-2016) is a cryptographic standard issued by the + Organization of State Commercial Administration of China (OSCCA) + as an authorized cryptographic algorithms for the use within China. + + This is SM4 optimized implementation using AES-NI/AVX2/x86_64 + instruction set for block cipher. Through two affine transforms, + we can use the AES S-Box to simulate the SM4 S-Box to achieve the + effect of instruction acceleration. + + If unsure, say N. + config CRYPTO_TEA tristate "TEA, XTEA and XETA cipher algorithms" select CRYPTO_ALGAPI diff --git a/crypto/algapi.c b/crypto/algapi.c index fff52bc9d97d0..5ccbb95b88b54 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -924,6 +924,14 @@ int crypto_enqueue_request(struct crypto_queue *queue, } EXPORT_SYMBOL_GPL(crypto_enqueue_request); +void crypto_enqueue_request_head(struct crypto_queue *queue, + struct crypto_async_request *request) +{ + queue->qlen++; + list_add(&request->list, &queue->list); +} +EXPORT_SYMBOL_GPL(crypto_enqueue_request_head); + struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue) { struct list_head *request; diff --git a/crypto/authenc.c b/crypto/authenc.c index 3f0ed94025820..321da90389957 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -263,7 +263,7 @@ static int crypto_authenc_decrypt_tail(struct aead_request *req, dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen); skcipher_request_set_tfm(skreq, ctx->enc); - skcipher_request_set_callback(skreq, aead_request_flags(req), + skcipher_request_set_callback(skreq, flags, req->base.complete, req->base.data); skcipher_request_set_crypt(skreq, src, dst, req->cryptlen - authsize, req->iv); diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 927760b316a4d..43a1a855886bd 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -39,6 +39,10 @@ struct cryptd_cpu_queue { }; struct cryptd_queue { + /* + * Protected by disabling BH to allow enqueueing from softinterrupt and + * dequeuing from kworker (cryptd_queue_worker()). + */ struct cryptd_cpu_queue __percpu *cpu_queue; }; @@ -125,28 +129,28 @@ static void cryptd_fini_queue(struct cryptd_queue *queue) static int cryptd_enqueue_request(struct cryptd_queue *queue, struct crypto_async_request *request) { - int cpu, err; + int err; struct cryptd_cpu_queue *cpu_queue; refcount_t *refcnt; - cpu = get_cpu(); + local_bh_disable(); cpu_queue = this_cpu_ptr(queue->cpu_queue); err = crypto_enqueue_request(&cpu_queue->queue, request); refcnt = crypto_tfm_ctx(request->tfm); if (err == -ENOSPC) - goto out_put_cpu; + goto out; - queue_work_on(cpu, cryptd_wq, &cpu_queue->work); + queue_work_on(smp_processor_id(), cryptd_wq, &cpu_queue->work); if (!refcount_read(refcnt)) - goto out_put_cpu; + goto out; refcount_inc(refcnt); -out_put_cpu: - put_cpu(); +out: + local_bh_enable(); return err; } @@ -162,15 +166,10 @@ static void cryptd_queue_worker(struct work_struct *work) cpu_queue = container_of(work, struct cryptd_cpu_queue, work); /* * Only handle one request at a time to avoid hogging crypto workqueue. - * preempt_disable/enable is used to prevent being preempted by - * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent - * cryptd_enqueue_request() being accessed from software interrupts. */ local_bh_disable(); - preempt_disable(); backlog = crypto_get_backlog(&cpu_queue->queue); req = crypto_dequeue_request(&cpu_queue->queue); - preempt_enable(); local_bh_enable(); if (!req) diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c index 055d179772807..8fe1bb578ab03 100644 --- a/crypto/crypto_engine.c +++ b/crypto/crypto_engine.c @@ -22,32 +22,36 @@ * @err: error number */ static void crypto_finalize_request(struct crypto_engine *engine, - struct crypto_async_request *req, int err) + struct crypto_async_request *req, int err) { unsigned long flags; - bool finalize_cur_req = false; + bool finalize_req = false; int ret; struct crypto_engine_ctx *enginectx; - spin_lock_irqsave(&engine->queue_lock, flags); - if (engine->cur_req == req) - finalize_cur_req = true; - spin_unlock_irqrestore(&engine->queue_lock, flags); + /* + * If hardware cannot enqueue more requests + * and retry mechanism is not supported + * make sure we are completing the current request + */ + if (!engine->retry_support) { + spin_lock_irqsave(&engine->queue_lock, flags); + if (engine->cur_req == req) { + finalize_req = true; + engine->cur_req = NULL; + } + spin_unlock_irqrestore(&engine->queue_lock, flags); + } - if (finalize_cur_req) { + if (finalize_req || engine->retry_support) { enginectx = crypto_tfm_ctx(req->tfm); - if (engine->cur_req_prepared && + if (enginectx->op.prepare_request && enginectx->op.unprepare_request) { ret = enginectx->op.unprepare_request(engine, req); if (ret) dev_err(engine->dev, "failed to unprepare request\n"); } - spin_lock_irqsave(&engine->queue_lock, flags); - engine->cur_req = NULL; - engine->cur_req_prepared = false; - spin_unlock_irqrestore(&engine->queue_lock, flags); } - req->complete(req, err); kthread_queue_work(engine->kworker, &engine->pump_requests); @@ -74,7 +78,7 @@ static void crypto_pump_requests(struct crypto_engine *engine, spin_lock_irqsave(&engine->queue_lock, flags); /* Make sure we are not already running a request */ - if (engine->cur_req) + if (!engine->retry_support && engine->cur_req) goto out; /* If another context is idling then defer */ @@ -108,13 +112,21 @@ static void crypto_pump_requests(struct crypto_engine *engine, goto out; } +start_request: /* Get the fist request from the engine queue to handle */ backlog = crypto_get_backlog(&engine->queue); async_req = crypto_dequeue_request(&engine->queue); if (!async_req) goto out; - engine->cur_req = async_req; + /* + * If hardware doesn't support the retry mechanism, + * keep track of the request we are processing now. + * We'll need it on completion (crypto_finalize_request). + */ + if (!engine->retry_support) + engine->cur_req = async_req; + if (backlog) backlog->complete(backlog, -EINPROGRESS); @@ -130,7 +142,7 @@ static void crypto_pump_requests(struct crypto_engine *engine, ret = engine->prepare_crypt_hardware(engine); if (ret) { dev_err(engine->dev, "failed to prepare crypt hardware\n"); - goto req_err; + goto req_err_2; } } @@ -141,28 +153,90 @@ static void crypto_pump_requests(struct crypto_engine *engine, if (ret) { dev_err(engine->dev, "failed to prepare request: %d\n", ret); - goto req_err; + goto req_err_2; } - engine->cur_req_prepared = true; } if (!enginectx->op.do_one_request) { dev_err(engine->dev, "failed to do request\n"); ret = -EINVAL; - goto req_err; + goto req_err_1; } + ret = enginectx->op.do_one_request(engine, async_req); - if (ret) { - dev_err(engine->dev, "Failed to do one request from queue: %d\n", ret); - goto req_err; + + /* Request unsuccessfully executed by hardware */ + if (ret < 0) { + /* + * If hardware queue is full (-ENOSPC), requeue request + * regardless of backlog flag. + * Otherwise, unprepare and complete the request. + */ + if (!engine->retry_support || + (ret != -ENOSPC)) { + dev_err(engine->dev, + "Failed to do one request from queue: %d\n", + ret); + goto req_err_1; + } + /* + * If retry mechanism is supported, + * unprepare current request and + * enqueue it back into crypto-engine queue. + */ + if (enginectx->op.unprepare_request) { + ret = enginectx->op.unprepare_request(engine, + async_req); + if (ret) + dev_err(engine->dev, + "failed to unprepare request\n"); + } + spin_lock_irqsave(&engine->queue_lock, flags); + /* + * If hardware was unable to execute request, enqueue it + * back in front of crypto-engine queue, to keep the order + * of requests. + */ + crypto_enqueue_request_head(&engine->queue, async_req); + + kthread_queue_work(engine->kworker, &engine->pump_requests); + goto out; } - return; -req_err: - crypto_finalize_request(engine, async_req, ret); + goto retry; + +req_err_1: + if (enginectx->op.unprepare_request) { + ret = enginectx->op.unprepare_request(engine, async_req); + if (ret) + dev_err(engine->dev, "failed to unprepare request\n"); + } + +req_err_2: + async_req->complete(async_req, ret); + +retry: + /* If retry mechanism is supported, send new requests to engine */ + if (engine->retry_support) { + spin_lock_irqsave(&engine->queue_lock, flags); + goto start_request; + } return; out: spin_unlock_irqrestore(&engine->queue_lock, flags); + + /* + * Batch requests is possible only if + * hardware can enqueue multiple requests + */ + if (engine->do_batch_requests) { + ret = engine->do_batch_requests(engine); + if (ret) + dev_err(engine->dev, "failed to do batch requests: %d\n", + ret); + } + + return; } static void crypto_pump_work(struct kthread_work *work) @@ -415,15 +489,27 @@ int crypto_engine_stop(struct crypto_engine *engine) EXPORT_SYMBOL_GPL(crypto_engine_stop); /** - * crypto_engine_alloc_init - allocate crypto hardware engine structure and - * initialize it. + * crypto_engine_alloc_init_and_set - allocate crypto hardware engine structure + * and initialize it by setting the maximum number of entries in the software + * crypto-engine queue. * @dev: the device attached with one hardware engine + * @retry_support: whether hardware has support for retry mechanism + * @cbk_do_batch: pointer to a callback function to be invoked when executing a + * a batch of requests. + * This has the form: + * callback(struct crypto_engine *engine) + * where: + * @engine: the crypto engine structure. * @rt: whether this queue is set to run as a realtime task + * @qlen: maximum size of the crypto-engine queue * * This must be called from context that can sleep. * Return: the crypto engine structure on success, else NULL. */ -struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt) +struct crypto_engine *crypto_engine_alloc_init_and_set(struct device *dev, + bool retry_support, + int (*cbk_do_batch)(struct crypto_engine *engine), + bool rt, int qlen) { struct sched_param param = { .sched_priority = MAX_RT_PRIO / 2 }; struct crypto_engine *engine; @@ -440,12 +526,18 @@ struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt) engine->running = false; engine->busy = false; engine->idling = false; - engine->cur_req_prepared = false; + engine->retry_support = retry_support; engine->priv_data = dev; + /* + * Batch requests is possible only if + * hardware has support for retry mechanism. + */ + engine->do_batch_requests = retry_support ? cbk_do_batch : NULL; + snprintf(engine->name, sizeof(engine->name), "%s-engine", dev_name(dev)); - crypto_init_queue(&engine->queue, CRYPTO_ENGINE_MAX_QLEN); + crypto_init_queue(&engine->queue, qlen); spin_lock_init(&engine->queue_lock); engine->kworker = kthread_create_worker(0, "%s", engine->name); @@ -462,6 +554,22 @@ struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt) return engine; } +EXPORT_SYMBOL_GPL(crypto_engine_alloc_init_and_set); + +/** + * crypto_engine_alloc_init - allocate crypto hardware engine structure and + * initialize it. + * @dev: the device attached with one hardware engine + * @rt: whether this queue is set to run as a realtime task + * + * This must be called from context that can sleep. + * Return: the crypto engine structure on success, else NULL. + */ +struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt) +{ + return crypto_engine_alloc_init_and_set(dev, false, NULL, rt, + CRYPTO_ENGINE_MAX_QLEN); +} EXPORT_SYMBOL_GPL(crypto_engine_alloc_init); /** diff --git a/crypto/drbg.c b/crypto/drbg.c index 04379ca624cd8..9329d9dcc210f 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1035,17 +1035,38 @@ static const struct drbg_state_ops drbg_hash_ops = { ******************************************************************/ static inline int __drbg_seed(struct drbg_state *drbg, struct list_head *seed, - int reseed) + int reseed, enum drbg_seed_state new_seed_state) { int ret = drbg->d_ops->update(drbg, seed, reseed); if (ret) return ret; - drbg->seeded = true; + drbg->seeded = new_seed_state; /* 10.1.1.2 / 10.1.1.3 step 5 */ drbg->reseed_ctr = 1; + switch (drbg->seeded) { + case DRBG_SEED_STATE_UNSEEDED: + /* Impossible, but handle it to silence compiler warnings. */ + fallthrough; + case DRBG_SEED_STATE_PARTIAL: + /* + * Require frequent reseeds until the seed source is + * fully initialized. + */ + drbg->reseed_threshold = 50; + break; + + case DRBG_SEED_STATE_FULL: + /* + * Seed source has become fully initialized, frequent + * reseeds no longer required. + */ + drbg->reseed_threshold = drbg_max_requests(drbg); + break; + } + return ret; } @@ -1065,12 +1086,10 @@ static inline int drbg_get_random_bytes(struct drbg_state *drbg, return 0; } -static void drbg_async_seed(struct work_struct *work) +static int drbg_seed_from_random(struct drbg_state *drbg) { struct drbg_string data; LIST_HEAD(seedlist); - struct drbg_state *drbg = container_of(work, struct drbg_state, - seed_work); unsigned int entropylen = drbg_sec_strength(drbg->core->flags); unsigned char entropy[32]; int ret; @@ -1081,30 +1100,15 @@ static void drbg_async_seed(struct work_struct *work) drbg_string_fill(&data, entropy, entropylen); list_add_tail(&data.list, &seedlist); - mutex_lock(&drbg->drbg_mutex); - ret = drbg_get_random_bytes(drbg, entropy, entropylen); if (ret) - goto unlock; - - /* If nonblocking pool is initialized, deactivate Jitter RNG */ - crypto_free_rng(drbg->jent); - drbg->jent = NULL; - - /* Set seeded to false so that if __drbg_seed fails the - * next generate call will trigger a reseed. - */ - drbg->seeded = false; - - __drbg_seed(drbg, &seedlist, true); - - if (drbg->seeded) - drbg->reseed_threshold = drbg_max_requests(drbg); + goto out; -unlock: - mutex_unlock(&drbg->drbg_mutex); + ret = __drbg_seed(drbg, &seedlist, true, DRBG_SEED_STATE_FULL); +out: memzero_explicit(entropy, entropylen); + return ret; } /* @@ -1126,6 +1130,7 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, unsigned int entropylen = drbg_sec_strength(drbg->core->flags); struct drbg_string data1; LIST_HEAD(seedlist); + enum drbg_seed_state new_seed_state = DRBG_SEED_STATE_FULL; /* 9.1 / 9.2 / 9.3.1 step 3 */ if (pers && pers->len > (drbg_max_addtl(drbg))) { @@ -1153,6 +1158,9 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, BUG_ON((entropylen * 2) > sizeof(entropy)); /* Get seed from in-kernel /dev/urandom */ + if (!rng_is_initialized()) + new_seed_state = DRBG_SEED_STATE_PARTIAL; + ret = drbg_get_random_bytes(drbg, entropy, entropylen); if (ret) goto out; @@ -1168,7 +1176,23 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, entropylen); if (ret) { pr_devel("DRBG: jent failed with %d\n", ret); - goto out; + + /* + * Do not treat the transient failure of the + * Jitter RNG as an error that needs to be + * reported. The combined number of the + * maximum reseed threshold times the maximum + * number of Jitter RNG transient errors is + * less than the reseed threshold required by + * SP800-90A allowing us to treat the + * transient errors as such. + * + * However, we mandate that at least the first + * seeding operation must succeed with the + * Jitter RNG. + */ + if (!reseed || ret != -EAGAIN) + goto out; } drbg_string_fill(&data1, entropy, entropylen * 2); @@ -1193,7 +1217,7 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, memset(drbg->C, 0, drbg_statelen(drbg)); } - ret = __drbg_seed(drbg, &seedlist, reseed); + ret = __drbg_seed(drbg, &seedlist, reseed, new_seed_state); out: memzero_explicit(entropy, entropylen * 2); @@ -1373,19 +1397,25 @@ static int drbg_generate(struct drbg_state *drbg, * here. The spec is a bit convoluted here, we make it simpler. */ if (drbg->reseed_threshold < drbg->reseed_ctr) - drbg->seeded = false; + drbg->seeded = DRBG_SEED_STATE_UNSEEDED; - if (drbg->pr || !drbg->seeded) { + if (drbg->pr || drbg->seeded == DRBG_SEED_STATE_UNSEEDED) { pr_devel("DRBG: reseeding before generation (prediction " "resistance: %s, state %s)\n", drbg->pr ? "true" : "false", - drbg->seeded ? "seeded" : "unseeded"); + (drbg->seeded == DRBG_SEED_STATE_FULL ? + "seeded" : "unseeded")); /* 9.3.1 steps 7.1 through 7.3 */ len = drbg_seed(drbg, addtl, true); if (len) goto err; /* 9.3.1 step 7.4 */ addtl = NULL; + } else if (rng_is_initialized() && + drbg->seeded == DRBG_SEED_STATE_PARTIAL) { + len = drbg_seed_from_random(drbg); + if (len) + goto err; } if (addtl && 0 < addtl->len) @@ -1478,51 +1508,15 @@ static int drbg_generate_long(struct drbg_state *drbg, return 0; } -static void drbg_schedule_async_seed(struct random_ready_callback *rdy) -{ - struct drbg_state *drbg = container_of(rdy, struct drbg_state, - random_ready); - - schedule_work(&drbg->seed_work); -} - static int drbg_prepare_hrng(struct drbg_state *drbg) { - int err; - /* We do not need an HRNG in test mode. */ if (list_empty(&drbg->test_data.list)) return 0; - INIT_WORK(&drbg->seed_work, drbg_async_seed); - - drbg->random_ready.owner = THIS_MODULE; - drbg->random_ready.func = drbg_schedule_async_seed; - - err = add_random_ready_callback(&drbg->random_ready); - - switch (err) { - case 0: - break; - - case -EALREADY: - err = 0; - /* fall through */ - - default: - drbg->random_ready.func = NULL; - return err; - } - drbg->jent = crypto_alloc_rng("jitterentropy_rng", 0, 0); - /* - * Require frequent reseeds until the seed source is fully - * initialized. - */ - drbg->reseed_threshold = 50; - - return err; + return 0; } /* @@ -1565,7 +1559,7 @@ static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers, if (!drbg->core) { drbg->core = &drbg_cores[coreref]; drbg->pr = pr; - drbg->seeded = false; + drbg->seeded = DRBG_SEED_STATE_UNSEEDED; drbg->reseed_threshold = drbg_max_requests(drbg); ret = drbg_alloc_state(drbg); @@ -1616,12 +1610,9 @@ static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers, */ static int drbg_uninstantiate(struct drbg_state *drbg) { - if (drbg->random_ready.func) { - del_random_ready_callback(&drbg->random_ready); - cancel_work_sync(&drbg->seed_work); + if (!IS_ERR_OR_NULL(drbg->jent)) crypto_free_rng(drbg->jent); - drbg->jent = NULL; - } + drbg->jent = NULL; if (drbg->d_ops) drbg->d_ops->crypto_fini(drbg); diff --git a/crypto/ecrdsa.c b/crypto/ecrdsa.c index 887ec21aee494..1ffcea7b03558 100644 --- a/crypto/ecrdsa.c +++ b/crypto/ecrdsa.c @@ -112,15 +112,15 @@ static int ecrdsa_verify(struct akcipher_request *req) /* Step 1: verify that 0 < r < q, 0 < s < q */ if (vli_is_zero(r, ndigits) || - vli_cmp(r, ctx->curve->n, ndigits) == 1 || + vli_cmp(r, ctx->curve->n, ndigits) >= 0 || vli_is_zero(s, ndigits) || - vli_cmp(s, ctx->curve->n, ndigits) == 1) + vli_cmp(s, ctx->curve->n, ndigits) >= 0) return -EKEYREJECTED; /* Step 2: calculate hash (h) of the message (passed as input) */ /* Step 3: calculate e = h \mod q */ vli_from_le64(e, digest, ndigits); - if (vli_cmp(e, ctx->curve->n, ndigits) == 1) + if (vli_cmp(e, ctx->curve->n, ndigits) >= 0) vli_sub(e, e, ctx->curve->n, ndigits); if (vli_is_zero(e, ndigits)) e[0] = 1; @@ -136,7 +136,7 @@ static int ecrdsa_verify(struct akcipher_request *req) /* Step 6: calculate point C = z_1P + z_2Q, and R = x_c \mod q */ ecc_point_mult_shamir(&cc, z1, &ctx->curve->g, z2, &ctx->pub_key, ctx->curve); - if (vli_cmp(cc.x, ctx->curve->n, ndigits) == 1) + if (vli_cmp(cc.x, ctx->curve->n, ndigits) >= 0) vli_sub(cc.x, cc.x, ctx->curve->n, ndigits); /* Step 7: if R == r signature is valid */ diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 0aa489711ec49..9cbafaf6dd851 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -475,6 +475,8 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) pos++; if (digest_info) { + if (digest_info->size > dst_len - pos) + goto done; if (crypto_memneq(out_buf + pos, digest_info->data, digest_info->size)) goto done; @@ -494,7 +496,7 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) sg_nents_for_len(req->src, req->src_len + req->dst_len), req_ctx->out_buf + ctx->key_size, - req->dst_len, ctx->key_size); + req->dst_len, req->src_len); /* Do the actual verification step. */ if (memcmp(req_ctx->out_buf + ctx->key_size, out_buf + pos, req->dst_len) != 0) @@ -537,7 +539,7 @@ static int pkcs1pad_verify(struct akcipher_request *req) if (WARN_ON(req->dst) || WARN_ON(!req->dst_len) || - !ctx->key_size || req->src_len < ctx->key_size) + !ctx->key_size || req->src_len != ctx->key_size) return -EINVAL; req_ctx->out_buf = kmalloc(ctx->key_size + req->dst_len, GFP_KERNEL); diff --git a/crypto/sm3_generic.c b/crypto/sm3_generic.c index 3468975215cae..a215c1c37e730 100644 --- a/crypto/sm3_generic.c +++ b/crypto/sm3_generic.c @@ -5,6 +5,7 @@ * * Copyright (C) 2017 ARM Limited or its affiliates. * Written by Gilad Ben-Yossef + * Copyright (C) 2021 Tianjia Zhang */ #include @@ -26,153 +27,41 @@ const u8 sm3_zero_message_hash[SM3_DIGEST_SIZE] = { }; EXPORT_SYMBOL_GPL(sm3_zero_message_hash); -static inline u32 p0(u32 x) -{ - return x ^ rol32(x, 9) ^ rol32(x, 17); -} - -static inline u32 p1(u32 x) -{ - return x ^ rol32(x, 15) ^ rol32(x, 23); -} - -static inline u32 ff(unsigned int n, u32 a, u32 b, u32 c) -{ - return (n < 16) ? (a ^ b ^ c) : ((a & b) | (a & c) | (b & c)); -} - -static inline u32 gg(unsigned int n, u32 e, u32 f, u32 g) -{ - return (n < 16) ? (e ^ f ^ g) : ((e & f) | ((~e) & g)); -} - -static inline u32 t(unsigned int n) -{ - return (n < 16) ? SM3_T1 : SM3_T2; -} - -static void sm3_expand(u32 *t, u32 *w, u32 *wt) -{ - int i; - unsigned int tmp; - - /* load the input */ - for (i = 0; i <= 15; i++) - w[i] = get_unaligned_be32((__u32 *)t + i); - - for (i = 16; i <= 67; i++) { - tmp = w[i - 16] ^ w[i - 9] ^ rol32(w[i - 3], 15); - w[i] = p1(tmp) ^ (rol32(w[i - 13], 7)) ^ w[i - 6]; - } - - for (i = 0; i <= 63; i++) - wt[i] = w[i] ^ w[i + 4]; -} - -static void sm3_compress(u32 *w, u32 *wt, u32 *m) -{ - u32 ss1; - u32 ss2; - u32 tt1; - u32 tt2; - u32 a, b, c, d, e, f, g, h; - int i; - - a = m[0]; - b = m[1]; - c = m[2]; - d = m[3]; - e = m[4]; - f = m[5]; - g = m[6]; - h = m[7]; - - for (i = 0; i <= 63; i++) { - - ss1 = rol32((rol32(a, 12) + e + rol32(t(i), i & 31)), 7); - - ss2 = ss1 ^ rol32(a, 12); - - tt1 = ff(i, a, b, c) + d + ss2 + *wt; - wt++; - - tt2 = gg(i, e, f, g) + h + ss1 + *w; - w++; - - d = c; - c = rol32(b, 9); - b = a; - a = tt1; - h = g; - g = rol32(f, 19); - f = e; - e = p0(tt2); - } - - m[0] = a ^ m[0]; - m[1] = b ^ m[1]; - m[2] = c ^ m[2]; - m[3] = d ^ m[3]; - m[4] = e ^ m[4]; - m[5] = f ^ m[5]; - m[6] = g ^ m[6]; - m[7] = h ^ m[7]; - - a = b = c = d = e = f = g = h = ss1 = ss2 = tt1 = tt2 = 0; -} - -static void sm3_transform(struct sm3_state *sst, u8 const *src) -{ - unsigned int w[68]; - unsigned int wt[64]; - - sm3_expand((u32 *)src, w, wt); - sm3_compress(w, wt, sst->state); - - memzero_explicit(w, sizeof(w)); - memzero_explicit(wt, sizeof(wt)); -} - -static void sm3_generic_block_fn(struct sm3_state *sst, u8 const *src, - int blocks) -{ - while (blocks--) { - sm3_transform(sst, src); - src += SM3_BLOCK_SIZE; - } -} - -int crypto_sm3_update(struct shash_desc *desc, const u8 *data, +static int crypto_sm3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sm3_base_do_update(desc, data, len, sm3_generic_block_fn); + sm3_update(shash_desc_ctx(desc), data, len); + return 0; } -EXPORT_SYMBOL(crypto_sm3_update); -static int sm3_final(struct shash_desc *desc, u8 *out) +static int crypto_sm3_final(struct shash_desc *desc, u8 *out) { - sm3_base_do_finalize(desc, sm3_generic_block_fn); - return sm3_base_finish(desc, out); + sm3_final(shash_desc_ctx(desc), out); + return 0; } -int crypto_sm3_finup(struct shash_desc *desc, const u8 *data, +static int crypto_sm3_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *hash) { - sm3_base_do_update(desc, data, len, sm3_generic_block_fn); - return sm3_final(desc, hash); + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (len) + sm3_update(sctx, data, len); + sm3_final(sctx, hash); + return 0; } -EXPORT_SYMBOL(crypto_sm3_finup); static struct shash_alg sm3_alg = { .digestsize = SM3_DIGEST_SIZE, .init = sm3_base_init, .update = crypto_sm3_update, - .final = sm3_final, + .final = crypto_sm3_final, .finup = crypto_sm3_finup, .descsize = sizeof(struct sm3_state), .base = { .cra_name = "sm3", .cra_driver_name = "sm3-generic", + .cra_priority = 100, .cra_blocksize = SM3_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/crypto/sm4_generic.c b/crypto/sm4_generic.c index 71ffb343709a5..ba539d05481f5 100644 --- a/crypto/sm4_generic.c +++ b/crypto/sm4_generic.c @@ -16,197 +16,49 @@ #include #include -static const u32 fk[4] = { - 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc -}; - -static const u8 sbox[256] = { - 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, - 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, - 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, - 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, - 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, - 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, - 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, - 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, - 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, - 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, - 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, - 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, - 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, - 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, - 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, - 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, - 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, - 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, - 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, - 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, - 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, - 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, - 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, - 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, - 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, - 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, - 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, - 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, - 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, - 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, - 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, - 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48 -}; - -static const u32 ck[] = { - 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269, - 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9, - 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249, - 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9, - 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229, - 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299, - 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209, - 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 -}; - -static u32 sm4_t_non_lin_sub(u32 x) -{ - int i; - u8 *b = (u8 *)&x; - - for (i = 0; i < 4; ++i) - b[i] = sbox[b[i]]; - - return x; -} - -static u32 sm4_key_lin_sub(u32 x) -{ - return x ^ rol32(x, 13) ^ rol32(x, 23); - -} - -static u32 sm4_enc_lin_sub(u32 x) -{ - return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24); -} - -static u32 sm4_key_sub(u32 x) -{ - return sm4_key_lin_sub(sm4_t_non_lin_sub(x)); -} - -static u32 sm4_enc_sub(u32 x) -{ - return sm4_enc_lin_sub(sm4_t_non_lin_sub(x)); -} - -static u32 sm4_round(const u32 *x, const u32 rk) -{ - return x[0] ^ sm4_enc_sub(x[1] ^ x[2] ^ x[3] ^ rk); -} - - -/** - * crypto_sm4_expand_key - Expands the SM4 key as described in GB/T 32907-2016 - * @ctx: The location where the computed key will be stored. - * @in_key: The supplied key. - * @key_len: The length of the supplied key. - * - * Returns 0 on success. The function fails only if an invalid key size (or - * pointer) is supplied. - */ -int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key, - unsigned int key_len) -{ - u32 rk[4], t; - const u32 *key = (u32 *)in_key; - int i; - - if (key_len != SM4_KEY_SIZE) - return -EINVAL; - - for (i = 0; i < 4; ++i) - rk[i] = get_unaligned_be32(&key[i]) ^ fk[i]; - - for (i = 0; i < 32; ++i) { - t = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i]); - ctx->rkey_enc[i] = t; - rk[0] = rk[1]; - rk[1] = rk[2]; - rk[2] = rk[3]; - rk[3] = t; - } - - for (i = 0; i < 32; ++i) - ctx->rkey_dec[i] = ctx->rkey_enc[31 - i]; - - return 0; -} -EXPORT_SYMBOL_GPL(crypto_sm4_expand_key); - /** - * crypto_sm4_set_key - Set the AES key. + * sm4_setkey - Set the SM4 key. * @tfm: The %crypto_tfm that is used in the context. * @in_key: The input key. * @key_len: The size of the key. * * Returns 0 on success, on failure the %CRYPTO_TFM_RES_BAD_KEY_LEN flag in tfm - * is set. The function uses crypto_sm4_expand_key() to expand the key. - * &crypto_sm4_ctx _must_ be the private data embedded in @tfm which is + * is set. This function uses sm4_expandkey() to expand the key. + * &sm4_ctx _must_ be the private data embedded in @tfm which is * retrieved with crypto_tfm_ctx(). */ -int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int sm4_setkey(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { - struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); u32 *flags = &tfm->crt_flags; int ret; - ret = crypto_sm4_expand_key(ctx, in_key, key_len); + ret = sm4_expandkey(ctx, in_key, key_len); if (!ret) return 0; *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } -EXPORT_SYMBOL_GPL(crypto_sm4_set_key); - -static void sm4_do_crypt(const u32 *rk, u32 *out, const u32 *in) -{ - u32 x[4], i, t; - - for (i = 0; i < 4; ++i) - x[i] = get_unaligned_be32(&in[i]); - - for (i = 0; i < 32; ++i) { - t = sm4_round(x, rk[i]); - x[0] = x[1]; - x[1] = x[2]; - x[2] = x[3]; - x[3] = t; - } - - for (i = 0; i < 4; ++i) - put_unaligned_be32(x[3 - i], &out[i]); -} /* encrypt a block of text */ -void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); - sm4_do_crypt(ctx->rkey_enc, (u32 *)out, (u32 *)in); + sm4_crypt_block(ctx->rkey_enc, out, in); } -EXPORT_SYMBOL_GPL(crypto_sm4_encrypt); /* decrypt a block of text */ -void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); - sm4_do_crypt(ctx->rkey_dec, (u32 *)out, (u32 *)in); + sm4_crypt_block(ctx->rkey_dec, out, in); } -EXPORT_SYMBOL_GPL(crypto_sm4_decrypt); static struct crypto_alg sm4_alg = { .cra_name = "sm4", @@ -214,15 +66,15 @@ static struct crypto_alg sm4_alg = { .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = SM4_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_sm4_ctx), + .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = SM4_KEY_SIZE, .cia_max_keysize = SM4_KEY_SIZE, - .cia_setkey = crypto_sm4_set_key, - .cia_encrypt = crypto_sm4_encrypt, - .cia_decrypt = crypto_sm4_decrypt + .cia_setkey = sm4_setkey, + .cia_encrypt = sm4_encrypt, + .cia_decrypt = sm4_decrypt } } }; diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 0cece1f883ebe..845982ea8cdd5 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -76,7 +76,7 @@ static char *check[] = { NULL }; -static u32 block_sizes[] = { 16, 64, 256, 1024, 1472, 8192, 0 }; +static u32 block_sizes[] = { 16, 64, 128, 256, 1024, 1472, 8192, 0 }; static u32 aead_sizes[] = { 16, 64, 256, 512, 1024, 2048, 4096, 8192, 0 }; #define XBUFSIZE 8 @@ -2047,6 +2047,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) case 191: ret += tcrypt_test("ecb(sm4)"); ret += tcrypt_test("cbc(sm4)"); + ret += tcrypt_test("cfb(sm4)"); ret += tcrypt_test("ctr(sm4)"); break; case 200: @@ -2310,6 +2311,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) speed_template_16); test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0, speed_template_16); + test_cipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_cipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0, + speed_template_16); test_cipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0, speed_template_16); test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0, @@ -2557,31 +2562,35 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) if (mode > 400 && mode < 500) break; /* fall through */ case 422: + test_ahash_speed("sm3", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + /* fall through */ + case 450: test_mb_ahash_speed("sha1", sec, generic_hash_speed_template, num_mb); if (mode > 400 && mode < 500) break; /* fall through */ - case 423: + case 451: test_mb_ahash_speed("sha256", sec, generic_hash_speed_template, num_mb); if (mode > 400 && mode < 500) break; /* fall through */ - case 424: + case 452: test_mb_ahash_speed("sha512", sec, generic_hash_speed_template, num_mb); if (mode > 400 && mode < 500) break; /* fall through */ - case 425: + case 453: test_mb_ahash_speed("sm3", sec, generic_hash_speed_template, num_mb); if (mode > 400 && mode < 500) break; /* fall through */ - case 426: + case 454: test_mb_ahash_speed("streebog256", sec, generic_hash_speed_template, num_mb); if (mode > 400 && mode < 500) break; /* fall through */ - case 427: + case 455: test_mb_ahash_speed("streebog512", sec, generic_hash_speed_template, num_mb); if (mode > 400 && mode < 500) break; @@ -2801,6 +2810,25 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) speed_template_8_32); break; + case 518: + test_acipher_speed("ecb(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("ecb(sm4)", DECRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("cbc(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0, + speed_template_16); + break; + case 600: test_mb_skcipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0, speed_template_16_24_32, num_mb); diff --git a/crypto/testmgr.h b/crypto/testmgr.h index ef7d21f39d4a9..f3722c66530da 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -178,7 +178,7 @@ static const struct akcipher_testvec rsa_tv_template[] = { #ifndef CONFIG_CRYPTO_FIPS .key = "\x30\x81\x9A" /* sequence of 154 bytes */ - "\x02\x01\x01" /* version - integer of 1 byte */ + "\x02\x01\x00" /* version - integer of 1 byte */ "\x02\x41" /* modulus - integer of 65 bytes */ "\x00\xAA\x36\xAB\xCE\x88\xAC\xFD\xFF\x55\x52\x3C\x7F\xC4\x52\x3F" "\x90\xEF\xA0\x0D\xF3\x77\x4A\x25\x9F\x2E\x62\xB4\xC5\xD9\x9C\xB5" @@ -208,7 +208,7 @@ static const struct akcipher_testvec rsa_tv_template[] = { }, { .key = "\x30\x82\x01\x1D" /* sequence of 285 bytes */ - "\x02\x01\x01" /* version - integer of 1 byte */ + "\x02\x01\x00" /* version - integer of 1 byte */ "\x02\x81\x81" /* modulus - integer of 129 bytes */ "\x00\xBB\xF8\x2F\x09\x06\x82\xCE\x9C\x23\x38\xAC\x2B\x9D\xA8\x71" "\xF7\x36\x8D\x07\xEE\xD4\x10\x43\xA4\x40\xD6\xB6\xF0\x74\x54\xF5" @@ -251,9 +251,9 @@ static const struct akcipher_testvec rsa_tv_template[] = { }, { #endif .key = - "\x30\x82\x02\x1F" /* sequence of 543 bytes */ - "\x02\x01\x01" /* version - integer of 1 byte */ - "\x02\x82\x01\x00" /* modulus - integer of 256 bytes */ + "\x30\x82\x02\x20" /* sequence of 544 bytes */ + "\x02\x01\x00" /* version - integer of 1 byte */ + "\x02\x82\x01\x01\x00" /* modulus - integer of 256 bytes */ "\xDB\x10\x1A\xC2\xA3\xF1\xDC\xFF\x13\x6B\xED\x44\xDF\xF0\x02\x6D" "\x13\xC7\x88\xDA\x70\x6B\x54\xF1\xE8\x27\xDC\xC3\x0F\x99\x6A\xFA" "\xC6\x67\xFF\x1D\x1E\x3C\x1D\xC1\xB5\x5F\x6C\xC0\xB2\x07\x3A\x6D" @@ -293,7 +293,7 @@ static const struct akcipher_testvec rsa_tv_template[] = { "\x02\x01\x00" /* exponent1 - integer of 1 byte */ "\x02\x01\x00" /* exponent2 - integer of 1 byte */ "\x02\x01\x00", /* coefficient - integer of 1 byte */ - .key_len = 547, + .key_len = 548, .m = "\x54\x85\x9b\x34\x2c\x49\xea\x2a", .c = "\xb2\x97\x76\xb4\xae\x3e\x38\x3c\x7e\x64\x1f\xcc\xa2\x7f\xf6\xbe" @@ -771,7 +771,7 @@ static const struct akcipher_testvec pkcs1pad_rsa_tv_template[] = { "\xd1\x86\x48\x55\xce\x83\xee\x8e\x51\xc7\xde\x32\x12\x47\x7d\x46" "\xb8\x35\xdf\x41\x02\x01\x00\x02\x01\x00\x02\x01\x00\x02\x01\x00" "\x02\x01\x00", - .key_len = 804, + .key_len = 803, /* * m is SHA256 hash of following message: * "\x49\x41\xbe\x0a\x0c\xc9\xf6\x35\x51\xe4\x27\x56\x13\x71\x4b\xd0" diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index fd3beea934213..42c4bfd796f42 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -220,6 +220,8 @@ extern struct acpi_bit_register_info acpi_gbl_bit_register_info[ACPI_NUM_BITREG]; ACPI_GLOBAL(u8, acpi_gbl_sleep_type_a); ACPI_GLOBAL(u8, acpi_gbl_sleep_type_b); +ACPI_GLOBAL(u8, acpi_gbl_sleep_type_a_s0); +ACPI_GLOBAL(u8, acpi_gbl_sleep_type_b_s0); /***************************************************************************** * diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c index d3d2dbfba680c..cd3debefe990d 100644 --- a/drivers/acpi/acpica/exfield.c +++ b/drivers/acpi/acpica/exfield.c @@ -320,12 +320,7 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc, obj_desc->field.base_byte_offset, source_desc->buffer.pointer, data_length); - if ((obj_desc->field.region_obj->region.address == - PCC_MASTER_SUBSPACE - && MASTER_SUBSPACE_COMMAND(obj_desc->field. - base_byte_offset)) - || GENERIC_SUBSPACE_COMMAND(obj_desc->field. - base_byte_offset)) { + if (MASTER_SUBSPACE_COMMAND(obj_desc->field.base_byte_offset)) { /* Perform the write */ diff --git a/drivers/acpi/acpica/exoparg1.c b/drivers/acpi/acpica/exoparg1.c index 06e35ea098234..6d84618ba3871 100644 --- a/drivers/acpi/acpica/exoparg1.c +++ b/drivers/acpi/acpica/exoparg1.c @@ -1007,7 +1007,8 @@ acpi_status acpi_ex_opcode_1A_0T_1R(struct acpi_walk_state *walk_state) (walk_state, return_desc, &temp_desc); if (ACPI_FAILURE(status)) { - goto cleanup; + return_ACPI_STATUS + (status); } return_desc = temp_desc; diff --git a/drivers/acpi/acpica/hwesleep.c b/drivers/acpi/acpica/hwesleep.c index dee3affaca491..de0a59878e52d 100644 --- a/drivers/acpi/acpica/hwesleep.c +++ b/drivers/acpi/acpica/hwesleep.c @@ -104,7 +104,9 @@ acpi_status acpi_hw_extended_sleep(u8 sleep_state) /* Flush caches, as per ACPI specification */ - ACPI_FLUSH_CPU_CACHE(); + if (sleep_state < ACPI_STATE_S4) { + ACPI_FLUSH_CPU_CACHE(); + } status = acpi_os_enter_sleep(sleep_state, sleep_control, 0); if (status == AE_CTRL_TERMINATE) { @@ -147,17 +149,13 @@ acpi_status acpi_hw_extended_sleep(u8 sleep_state) acpi_status acpi_hw_extended_wake_prep(u8 sleep_state) { - acpi_status status; u8 sleep_type_value; ACPI_FUNCTION_TRACE(hw_extended_wake_prep); - status = acpi_get_sleep_type_data(ACPI_STATE_S0, - &acpi_gbl_sleep_type_a, - &acpi_gbl_sleep_type_b); - if (ACPI_SUCCESS(status)) { + if (acpi_gbl_sleep_type_a_s0 != ACPI_SLEEP_TYPE_INVALID) { sleep_type_value = - ((acpi_gbl_sleep_type_a << ACPI_X_SLEEP_TYPE_POSITION) & + ((acpi_gbl_sleep_type_a_s0 << ACPI_X_SLEEP_TYPE_POSITION) & ACPI_X_SLEEP_TYPE_MASK); (void)acpi_write((u64)(sleep_type_value | ACPI_X_SLEEP_ENABLE), diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c index b62db8ec446fa..321aaad97e2f7 100644 --- a/drivers/acpi/acpica/hwsleep.c +++ b/drivers/acpi/acpica/hwsleep.c @@ -110,7 +110,9 @@ acpi_status acpi_hw_legacy_sleep(u8 sleep_state) /* Flush caches, as per ACPI specification */ - ACPI_FLUSH_CPU_CACHE(); + if (sleep_state < ACPI_STATE_S4) { + ACPI_FLUSH_CPU_CACHE(); + } status = acpi_os_enter_sleep(sleep_state, pm1a_control, pm1b_control); if (status == AE_CTRL_TERMINATE) { @@ -179,7 +181,7 @@ acpi_status acpi_hw_legacy_sleep(u8 sleep_state) acpi_status acpi_hw_legacy_wake_prep(u8 sleep_state) { - acpi_status status; + acpi_status status = AE_OK; struct acpi_bit_register_info *sleep_type_reg_info; struct acpi_bit_register_info *sleep_enable_reg_info; u32 pm1a_control; @@ -192,10 +194,7 @@ acpi_status acpi_hw_legacy_wake_prep(u8 sleep_state) * This is unclear from the ACPI Spec, but it is required * by some machines. */ - status = acpi_get_sleep_type_data(ACPI_STATE_S0, - &acpi_gbl_sleep_type_a, - &acpi_gbl_sleep_type_b); - if (ACPI_SUCCESS(status)) { + if (acpi_gbl_sleep_type_a_s0 != ACPI_SLEEP_TYPE_INVALID) { sleep_type_reg_info = acpi_hw_get_bit_register_info(ACPI_BITREG_SLEEP_TYPE); sleep_enable_reg_info = @@ -216,9 +215,9 @@ acpi_status acpi_hw_legacy_wake_prep(u8 sleep_state) /* Insert the SLP_TYP bits */ - pm1a_control |= (acpi_gbl_sleep_type_a << + pm1a_control |= (acpi_gbl_sleep_type_a_s0 << sleep_type_reg_info->bit_position); - pm1b_control |= (acpi_gbl_sleep_type_b << + pm1b_control |= (acpi_gbl_sleep_type_b_s0 << sleep_type_reg_info->bit_position); /* Write the control registers and ignore any errors */ diff --git a/drivers/acpi/acpica/hwxfsleep.c b/drivers/acpi/acpica/hwxfsleep.c index abbf9702aa7f2..4e3398819718d 100644 --- a/drivers/acpi/acpica/hwxfsleep.c +++ b/drivers/acpi/acpica/hwxfsleep.c @@ -162,8 +162,6 @@ acpi_status acpi_enter_sleep_state_s4bios(void) return_ACPI_STATUS(status); } - ACPI_FLUSH_CPU_CACHE(); - status = acpi_hw_write_port(acpi_gbl_FADT.smi_command, (u32)acpi_gbl_FADT.s4_bios_request, 8); @@ -214,6 +212,13 @@ acpi_status acpi_enter_sleep_state_prep(u8 sleep_state) return_ACPI_STATUS(status); } + status = acpi_get_sleep_type_data(ACPI_STATE_S0, + &acpi_gbl_sleep_type_a_s0, + &acpi_gbl_sleep_type_b_s0); + if (ACPI_FAILURE(status)) { + acpi_gbl_sleep_type_a_s0 = ACPI_SLEEP_TYPE_INVALID; + } + /* Execute the _PTS method (Prepare To Sleep) */ arg_list.count = 1; diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c index 3f045b5953b2e..a0c1a665dfc12 100644 --- a/drivers/acpi/acpica/nsaccess.c +++ b/drivers/acpi/acpica/nsaccess.c @@ -99,13 +99,12 @@ acpi_status acpi_ns_root_initialize(void) * just create and link the new node(s) here. */ new_node = - ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_namespace_node)); + acpi_ns_create_node(*ACPI_CAST_PTR(u32, init_val->name)); if (!new_node) { status = AE_NO_MEMORY; goto unlock_and_exit; } - ACPI_COPY_NAMESEG(new_node->name.ascii, init_val->name); new_node->descriptor_type = ACPI_DESC_TYPE_NAMED; new_node->type = init_val->type; diff --git a/drivers/acpi/acpica/nswalk.c b/drivers/acpi/acpica/nswalk.c index ceea6af79d121..bf4eb642f4232 100644 --- a/drivers/acpi/acpica/nswalk.c +++ b/drivers/acpi/acpica/nswalk.c @@ -169,6 +169,9 @@ acpi_ns_walk_namespace(acpi_object_type type, if (start_node == ACPI_ROOT_OBJECT) { start_node = acpi_gbl_root_node; + if (!start_node) { + return_ACPI_STATUS(AE_NO_NAMESPACE); + } } /* Null child means "get first node" */ diff --git a/drivers/acpi/acpica/utdelete.c b/drivers/acpi/acpica/utdelete.c index 72d2c0b656339..cb1750e7a6281 100644 --- a/drivers/acpi/acpica/utdelete.c +++ b/drivers/acpi/acpica/utdelete.c @@ -422,6 +422,7 @@ acpi_ut_update_ref_count(union acpi_operand_object *object, u32 action) ACPI_WARNING((AE_INFO, "Obj %p, Reference Count is already zero, cannot decrement\n", object)); + return; } ACPI_DEBUG_PRINT_RAW((ACPI_DB_ALLOCATIONS, diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c index 1155fb9dcc3ad..a06f35528c9a7 100644 --- a/drivers/acpi/apei/bert.c +++ b/drivers/acpi/apei/bert.c @@ -30,14 +30,25 @@ #undef pr_fmt #define pr_fmt(fmt) "BERT: " fmt +#define ACPI_BERT_PRINT_MAX_RECORDS 5 +#define ACPI_BERT_PRINT_MAX_LEN 1024 + static int bert_disable; +/* + * Print "all" the error records in the BERT table, but avoid huge spam to + * the console if the BIOS included oversize records, or too many records. + * Skipping some records here does not lose anything because the full + * data is available to user tools in: + * /sys/firmware/acpi/tables/data/BERT + */ static void __init bert_print_all(struct acpi_bert_region *region, unsigned int region_len) { struct acpi_hest_generic_status *estatus = (struct acpi_hest_generic_status *)region; int remain = region_len; + int printed = 0, skipped = 0; u32 estatus_len; while (remain >= sizeof(struct acpi_bert_region)) { @@ -45,21 +56,26 @@ static void __init bert_print_all(struct acpi_bert_region *region, if (remain < estatus_len) { pr_err(FW_BUG "Truncated status block (length: %u).\n", estatus_len); - return; + break; } /* No more error records. */ if (!estatus->block_status) - return; + break; if (cper_estatus_check(estatus)) { pr_err(FW_BUG "Invalid error record.\n"); - return; + break; } - pr_info_once("Error records from previous boot:\n"); - - cper_estatus_print(KERN_INFO HW_ERR, estatus); + if (estatus_len < ACPI_BERT_PRINT_MAX_LEN && + printed < ACPI_BERT_PRINT_MAX_RECORDS) { + pr_info_once("Error records from previous boot:\n"); + cper_estatus_print(KERN_INFO HW_ERR, estatus); + printed++; + } else { + skipped++; + } /* * Because the boot error source is "one-time polled" type, @@ -71,13 +87,16 @@ static void __init bert_print_all(struct acpi_bert_region *region, estatus = (void *)estatus + estatus_len; remain -= estatus_len; } + + if (skipped) + pr_info(HW_ERR "Skipped %d error records\n", skipped); } static int __init setup_bert_disable(char *str) { bert_disable = 1; - return 0; + return 1; } __setup("bert_disable", setup_bert_disable); diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 2015a0967cbbf..5ee3cb7fcd906 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -891,7 +891,7 @@ EXPORT_SYMBOL_GPL(erst_clear); static int __init setup_erst_disable(char *str) { erst_disable = 1; - return 0; + return 1; } __setup("erst_disable", setup_erst_disable); diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 267bdbf6a7bf0..add6416e78f23 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -219,7 +219,7 @@ static int __init hest_ghes_dev_register(unsigned int ghes_count) static int __init setup_hest_disable(char *str) { hest_disable = HEST_DISABLED; - return 0; + return 1; } __setup("hest_disable", setup_hest_disable); diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c index 7d7497b856020..311419d1d6b05 100644 --- a/drivers/acpi/arm64/gtdt.c +++ b/drivers/acpi/arm64/gtdt.c @@ -36,7 +36,7 @@ struct acpi_gtdt_descriptor { static struct acpi_gtdt_descriptor acpi_gtdt_desc __initdata; -static inline void *next_platform_timer(void *platform_timer) +static inline __init void *next_platform_timer(void *platform_timer) { struct acpi_gtdt_header *gh = platform_timer; diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index bc95a5eebd137..553c89b0bdcbb 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1373,9 +1373,17 @@ static void __init arm_smmu_v3_pmcg_init_resources(struct resource *res, res[0].start = pmcg->page0_base_address; res[0].end = pmcg->page0_base_address + SZ_4K - 1; res[0].flags = IORESOURCE_MEM; - res[1].start = pmcg->page1_base_address; - res[1].end = pmcg->page1_base_address + SZ_4K - 1; - res[1].flags = IORESOURCE_MEM; + /* + * The initial version in DEN0049C lacked a way to describe register + * page 1, which makes it broken for most PMCG implementations; in + * that case, just let the driver fail gracefully if it expects to + * find a second memory resource. + */ + if (node->revision > 0) { + res[1].start = pmcg->page1_base_address; + res[1].end = pmcg->page1_base_address + SZ_4K - 1; + res[1].flags = IORESOURCE_MEM; + } if (pmcg->overflow_gsiv) acpi_iort_register_irq(pmcg->overflow_gsiv, "overflow", diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 254a7d98b9d4c..974c2df13da1d 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -65,6 +65,7 @@ static int battery_bix_broken_package; static int battery_notification_delay_ms; static int battery_ac_is_broken; static int battery_check_pmic = 1; +static int battery_quirk_notcharging; static unsigned int cache_time = 1000; module_param(cache_time, uint, 0644); MODULE_PARM_DESC(cache_time, "cache time in milliseconds"); @@ -76,6 +77,10 @@ extern void *acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir); static const struct acpi_device_id battery_device_ids[] = { {"PNP0C0A", 0}, + + /* Microsoft Surface Go 3 */ + {"MSHW0146", 0}, + {"", 0}, }; @@ -185,7 +190,7 @@ static int acpi_battery_is_charged(struct acpi_battery *battery) return 1; /* fallback to using design values for broken batteries */ - if (battery->design_capacity == battery->capacity_now) + if (battery->design_capacity <= battery->capacity_now) return 1; /* we don't do any sort of metric based on percentages */ @@ -233,6 +238,8 @@ static int acpi_battery_get_property(struct power_supply *psy, val->intval = POWER_SUPPLY_STATUS_CHARGING; else if (acpi_battery_is_charged(battery)) val->intval = POWER_SUPPLY_STATUS_FULL; + else if (battery_quirk_notcharging) + val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING; else val->intval = POWER_SUPPLY_STATUS_UNKNOWN; break; @@ -1337,6 +1344,12 @@ battery_do_not_check_pmic_quirk(const struct dmi_system_id *d) return 0; } +static int __init battery_quirk_not_charging(const struct dmi_system_id *d) +{ + battery_quirk_notcharging = 1; + return 0; +} + static const struct dmi_system_id bat_dmi_table[] __initconst = { { /* NEC LZ750/LS */ @@ -1381,6 +1394,27 @@ static const struct dmi_system_id bat_dmi_table[] __initconst = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"), }, }, + { + /* + * On Lenovo ThinkPads the BIOS specification defines + * a state when the bits for charging and discharging + * are both set to 0. That state is "Not Charging". + */ + .callback = battery_quirk_not_charging, + .ident = "Lenovo ThinkPad", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad"), + }, + }, + { + /* Microsoft Surface Go 3 */ + .callback = battery_notification_delay_quirk, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 3"), + }, + }, {}, }; diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 2826bd45c61a1..0521b1d4c2fd0 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -738,6 +738,11 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) cpc_obj = &out_obj->package.elements[0]; if (cpc_obj->type == ACPI_TYPE_INTEGER) { num_ent = cpc_obj->integer.value; + if (num_ent <= 1) { + pr_debug("Unexpected _CPC NumEntries value (%d) for CPU:%d\n", + num_ent, pr->id); + goto out_free; + } } else { pr_debug("Unexpected entry type(%d) for NumEntries\n", cpc_obj->type); diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 258a8df235cfb..e5b92958c299e 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -167,6 +167,7 @@ struct acpi_ec_query { struct transaction transaction; struct work_struct work; struct acpi_ec_query_handler *handler; + struct acpi_ec *ec; }; static int acpi_ec_query(struct acpi_ec *ec, u8 *data); @@ -462,6 +463,7 @@ static void acpi_ec_submit_query(struct acpi_ec *ec) ec_dbg_evt("Command(%s) submitted/blocked", acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY)); ec->nr_pending_queries++; + ec->events_in_progress++; queue_work(ec_wq, &ec->work); } } @@ -528,7 +530,7 @@ static void acpi_ec_enable_event(struct acpi_ec *ec) #ifdef CONFIG_PM_SLEEP static void __acpi_ec_flush_work(void) { - drain_workqueue(ec_wq); /* flush ec->work */ + flush_workqueue(ec_wq); /* flush ec->work */ flush_workqueue(ec_query_wq); /* flush queries */ } @@ -1119,7 +1121,7 @@ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit) } EXPORT_SYMBOL_GPL(acpi_ec_remove_query_handler); -static struct acpi_ec_query *acpi_ec_create_query(u8 *pval) +static struct acpi_ec_query *acpi_ec_create_query(struct acpi_ec *ec, u8 *pval) { struct acpi_ec_query *q; struct transaction *t; @@ -1127,11 +1129,13 @@ static struct acpi_ec_query *acpi_ec_create_query(u8 *pval) q = kzalloc(sizeof (struct acpi_ec_query), GFP_KERNEL); if (!q) return NULL; + INIT_WORK(&q->work, acpi_ec_event_processor); t = &q->transaction; t->command = ACPI_EC_COMMAND_QUERY; t->rdata = pval; t->rlen = 1; + q->ec = ec; return q; } @@ -1148,13 +1152,21 @@ static void acpi_ec_event_processor(struct work_struct *work) { struct acpi_ec_query *q = container_of(work, struct acpi_ec_query, work); struct acpi_ec_query_handler *handler = q->handler; + struct acpi_ec *ec = q->ec; ec_dbg_evt("Query(0x%02x) started", handler->query_bit); + if (handler->func) handler->func(handler->data); else if (handler->handle) acpi_evaluate_object(handler->handle, NULL, NULL, NULL); + ec_dbg_evt("Query(0x%02x) stopped", handler->query_bit); + + spin_lock_irq(&ec->lock); + ec->queries_in_progress--; + spin_unlock_irq(&ec->lock); + acpi_ec_delete_query(q); } @@ -1164,7 +1176,7 @@ static int acpi_ec_query(struct acpi_ec *ec, u8 *data) int result; struct acpi_ec_query *q; - q = acpi_ec_create_query(&value); + q = acpi_ec_create_query(ec, &value); if (!q) return -ENOMEM; @@ -1186,19 +1198,20 @@ static int acpi_ec_query(struct acpi_ec *ec, u8 *data) } /* - * It is reported that _Qxx are evaluated in a parallel way on - * Windows: + * It is reported that _Qxx are evaluated in a parallel way on Windows: * https://bugzilla.kernel.org/show_bug.cgi?id=94411 * - * Put this log entry before schedule_work() in order to make - * it appearing before any other log entries occurred during the - * work queue execution. + * Put this log entry before queue_work() to make it appear in the log + * before any other messages emitted during workqueue handling. */ ec_dbg_evt("Query(0x%02x) scheduled", value); - if (!queue_work(ec_query_wq, &q->work)) { - ec_dbg_evt("Query(0x%02x) overlapped", value); - result = -EBUSY; - } + + spin_lock_irq(&ec->lock); + + ec->queries_in_progress++; + queue_work(ec_query_wq, &q->work); + + spin_unlock_irq(&ec->lock); err_exit: if (result) @@ -1256,6 +1269,10 @@ static void acpi_ec_event_handler(struct work_struct *work) ec_dbg_evt("Event stopped"); acpi_ec_check_event(ec); + + spin_lock_irqsave(&ec->lock, flags); + ec->events_in_progress--; + spin_unlock_irqrestore(&ec->lock, flags); } static u32 acpi_ec_gpe_handler(acpi_handle gpe_device, @@ -1972,6 +1989,7 @@ void acpi_ec_set_gpe_wake_mask(u8 action) bool acpi_ec_dispatch_gpe(void) { + bool work_in_progress; u32 ret; if (!first_ec) @@ -1992,8 +2010,19 @@ bool acpi_ec_dispatch_gpe(void) if (ret == ACPI_INTERRUPT_HANDLED) pm_pr_dbg("EC GPE dispatched\n"); - /* Flush the event and query workqueues. */ - acpi_ec_flush_work(); + /* Drain EC work. */ + do { + acpi_ec_flush_work(); + + pm_pr_dbg("ACPI EC work flushed\n"); + + spin_lock_irq(&first_ec->lock); + + work_in_progress = first_ec->events_in_progress + + first_ec->queries_in_progress > 0; + + spin_unlock_irq(&first_ec->lock); + } while (work_in_progress && !pm_wakeup_pending()); return false; } diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 159c422601bc4..62b6b36f3a37c 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -183,6 +183,8 @@ struct acpi_ec { struct work_struct work; unsigned long timestamp; unsigned long nr_pending_queries; + unsigned int events_in_progress; + unsigned int queries_in_progress; bool busy_polling; unsigned int polling_guard; }; diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index 715a382d92df0..47e43c9498257 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -143,25 +143,25 @@ static struct mcfg_fixup mcfg_quirks[] = { XGENE_V2_ECAM_MCFG(4, 1), XGENE_V2_ECAM_MCFG(4, 2), -#define AMPERE_ECAM32(rev, seg) \ +#define ALTRA_ECAM_QUIRK(rev, seg) \ { "Ampere", "Altra ", rev, seg, MCFG_BUS_ANY, &pci_32b_read_ops } - AMPERE_ECAM32(1, 0), - AMPERE_ECAM32(1, 1), - AMPERE_ECAM32(1, 2), - AMPERE_ECAM32(1, 3), - AMPERE_ECAM32(1, 4), - AMPERE_ECAM32(1, 5), - AMPERE_ECAM32(1, 6), - AMPERE_ECAM32(1, 7), - AMPERE_ECAM32(1, 8), - AMPERE_ECAM32(1, 9), - AMPERE_ECAM32(1, 10), - AMPERE_ECAM32(1, 11), - AMPERE_ECAM32(1, 12), - AMPERE_ECAM32(1, 13), - AMPERE_ECAM32(1, 14), - AMPERE_ECAM32(1, 15), + ALTRA_ECAM_QUIRK(1, 0), + ALTRA_ECAM_QUIRK(1, 1), + ALTRA_ECAM_QUIRK(1, 2), + ALTRA_ECAM_QUIRK(1, 3), + ALTRA_ECAM_QUIRK(1, 4), + ALTRA_ECAM_QUIRK(1, 5), + ALTRA_ECAM_QUIRK(1, 6), + ALTRA_ECAM_QUIRK(1, 7), + ALTRA_ECAM_QUIRK(1, 8), + ALTRA_ECAM_QUIRK(1, 9), + ALTRA_ECAM_QUIRK(1, 10), + ALTRA_ECAM_QUIRK(1, 11), + ALTRA_ECAM_QUIRK(1, 12), + ALTRA_ECAM_QUIRK(1, 13), + ALTRA_ECAM_QUIRK(1, 14), + ALTRA_ECAM_QUIRK(1, 15), }; static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 0cb9df5462c39..57b7ba6d9ad10 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -483,13 +483,8 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm, if (IS_ENABLED(CONFIG_HOTPLUG_PCI_SHPC)) control |= OSC_PCI_SHPC_NATIVE_HP_CONTROL; - if (pci_aer_available()) { - if (aer_acpi_firmware_first()) - dev_info(&device->dev, - "PCIe AER handled by firmware\n"); - else - control |= OSC_PCI_EXPRESS_AER_CONTROL; - } + if (pci_aer_available()) + control |= OSC_PCI_EXPRESS_AER_CONTROL; /* * Per the Downstream Port Containment Related Enhancements ECN to diff --git a/drivers/acpi/pmic/intel_pmic.c b/drivers/acpi/pmic/intel_pmic.c index 452041398b347..36d5a5d50b2ff 100644 --- a/drivers/acpi/pmic/intel_pmic.c +++ b/drivers/acpi/pmic/intel_pmic.c @@ -211,31 +211,36 @@ static acpi_status intel_pmic_regs_handler(u32 function, void *handler_context, void *region_context) { struct intel_pmic_opregion *opregion = region_context; - int result = 0; + int result = -EINVAL; + + if (function == ACPI_WRITE) { + switch (address) { + case 0: + return AE_OK; + case 1: + opregion->ctx.addr |= (*value64 & 0xff) << 8; + return AE_OK; + case 2: + opregion->ctx.addr |= *value64 & 0xff; + return AE_OK; + case 3: + opregion->ctx.val = *value64 & 0xff; + return AE_OK; + case 4: + if (*value64) { + result = regmap_write(opregion->regmap, opregion->ctx.addr, + opregion->ctx.val); + } else { + result = regmap_read(opregion->regmap, opregion->ctx.addr, + &opregion->ctx.val); + } + opregion->ctx.addr = 0; + } + } - switch (address) { - case 0: - return AE_OK; - case 1: - opregion->ctx.addr |= (*value64 & 0xff) << 8; - return AE_OK; - case 2: - opregion->ctx.addr |= *value64 & 0xff; + if (function == ACPI_READ && address == 3) { + *value64 = opregion->ctx.val; return AE_OK; - case 3: - opregion->ctx.val = *value64 & 0xff; - return AE_OK; - case 4: - if (*value64) { - result = regmap_write(opregion->regmap, opregion->ctx.addr, - opregion->ctx.val); - } else { - result = regmap_read(opregion->regmap, opregion->ctx.addr, - &opregion->ctx.val); - if (result == 0) - *value64 = opregion->ctx.val; - } - memset(&opregion->ctx, 0x00, sizeof(opregion->ctx)); } if (result < 0) { diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index e5bb5d776b548..3d8b0aff5a3ac 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1057,6 +1057,11 @@ static int flatten_lpi_states(struct acpi_processor *pr, return 0; } +int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu) +{ + return -EOPNOTSUPP; +} + static int acpi_processor_get_lpi_info(struct acpi_processor *pr) { int ret, i; @@ -1065,6 +1070,11 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr) struct acpi_device *d = NULL; struct acpi_lpi_states_array info[2], *tmp, *prev, *curr; + /* make sure our architecture has support */ + ret = acpi_processor_ffh_lpi_probe(pr->id); + if (ret == -EOPNOTSUPP) + return ret; + if (!osc_pc_lpi_support_confirmed) return -EOPNOTSUPP; @@ -1116,11 +1126,6 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr) return 0; } -int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu) -{ - return -ENODEV; -} - int __weak acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) { return -ENODEV; diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index a08e3eb2a6f9f..1b0aeb8320448 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -430,6 +430,16 @@ void acpi_init_properties(struct acpi_device *adev) acpi_extract_apple_properties(adev); } +static void acpi_free_device_properties(struct list_head *list) +{ + struct acpi_device_properties *props, *tmp; + + list_for_each_entry_safe(props, tmp, list, list) { + list_del(&props->list); + kfree(props); + } +} + static void acpi_destroy_nondev_subnodes(struct list_head *list) { struct acpi_data_node *dn, *next; @@ -442,22 +452,18 @@ static void acpi_destroy_nondev_subnodes(struct list_head *list) wait_for_completion(&dn->kobj_done); list_del(&dn->sibling); ACPI_FREE((void *)dn->data.pointer); + acpi_free_device_properties(&dn->data.properties); kfree(dn); } } void acpi_free_properties(struct acpi_device *adev) { - struct acpi_device_properties *props, *tmp; - acpi_destroy_nondev_subnodes(&adev->data.subnodes); ACPI_FREE((void *)adev->data.pointer); adev->data.of_compatible = NULL; adev->data.pointer = NULL; - list_for_each_entry_safe(props, tmp, &adev->data.properties, list) { - list_del(&props->list); - kfree(props); - } + acpi_free_device_properties(&adev->data.properties); } /** @@ -692,7 +698,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, */ if (obj->type == ACPI_TYPE_LOCAL_REFERENCE) { if (index) - return -EINVAL; + return -ENOENT; ret = acpi_bus_get_device(obj->reference.handle, &device); if (ret) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 95d119ff76b65..5d4be80ee6cb4 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1577,6 +1577,7 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) { struct list_head resource_list; bool is_serial_bus_slave = false; + static const struct acpi_device_id ignore_serial_bus_ids[] = { /* * These devices have multiple I2cSerialBus resources and an i2c-client * must be instantiated for each, each with its own i2c_device_id. @@ -1585,11 +1586,18 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) * drivers/platform/x86/i2c-multi-instantiate.c driver, which knows * which i2c_device_id to use for each resource. */ - static const struct acpi_device_id i2c_multi_instantiate_ids[] = { {"BSG1160", }, {"BSG2150", }, {"INT33FE", }, {"INT3515", }, + /* + * HIDs of device with an UartSerialBusV2 resource for which userspace + * expects a regular tty cdev to be created (instead of the in kernel + * serdev) and which have a kernel driver which expects a platform_dev + * such as the rfkill-gpio driver. + */ + {"BCM4752", }, + {"LNV4752", }, {} }; @@ -1603,8 +1611,7 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) fwnode_property_present(&device->fwnode, "baud"))) return true; - /* Instantiate a pdev for the i2c-multi-instantiate drv to bind to */ - if (!acpi_match_device_ids(device, i2c_multi_instantiate_ids)) + if (!acpi_match_device_ids(device, ignore_serial_bus_ids)) return false; INIT_LIST_HEAD(&resource_list); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index d1b74179d2179..34966128293b1 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -374,6 +374,18 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "20GGA00L00"), }, }, + /* + * ASUS B1400CEAE hangs on resume from suspend (see + * https://bugzilla.kernel.org/show_bug.cgi?id=215742). + */ + { + .callback = init_default_s3, + .ident = "ASUS B1400CEAE", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ASUS EXPERTBOOK B1400CEAE"), + }, + }, {}, }; @@ -1027,6 +1039,7 @@ static bool acpi_s2idle_wake(void) if (pm_wakeup_pending()) return true; + pm_wakeup_clear(acpi_sci_irq); rearm_wake_irq(acpi_sci_irq); } diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 76c668c05fa03..cc0b98affd64d 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -439,18 +439,29 @@ static ssize_t acpi_data_show(struct file *filp, struct kobject *kobj, { struct acpi_data_attr *data_attr; void __iomem *base; - ssize_t rc; + ssize_t size; data_attr = container_of(bin_attr, struct acpi_data_attr, attr); + size = data_attr->attr.size; + + if (offset < 0) + return -EINVAL; + + if (offset >= size) + return 0; - base = acpi_os_map_memory(data_attr->addr, data_attr->attr.size); + if (count > size - offset) + count = size - offset; + + base = acpi_os_map_iomem(data_attr->addr, size); if (!base) return -ENOMEM; - rc = memory_read_from_buffer(buf, count, &offset, base, - data_attr->attr.size); - acpi_os_unmap_memory(base, data_attr->attr.size); - return rc; + memcpy_fromio(buf, base + offset, count); + + acpi_os_unmap_iomem(base, size); + + return count; } static int acpi_bert_data_init(void *th, struct acpi_data_attr *data_attr) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index e7978d983b263..3b972ca536896 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -372,7 +372,97 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "BA51_MV"), }, }, - + /* + * Clevo NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2 have both a + * working native and video interface. However the default detection + * mechanism first registers the video interface before unregistering + * it again and switching to the native interface during boot. This + * results in a dangling SBIOS request for backlight change for some + * reason, causing the backlight to switch to ~2% once per boot on the + * first power cord connect or disconnect event. Setting the native + * interface explicitly circumvents this buggy behaviour, by avoiding + * the unregistering process. + */ + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xRU", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xRU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "AURA1501"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xRU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xNU", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), + }, + }, + /* + * The TongFang PF5PU1G, PF4NU1F, PF5NU1G, and PF5LUXG/TUXEDO BA15 Gen10, + * Pulse 14/15 Gen1, and Pulse 15 Gen2 have the same problem as the Clevo + * NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2. See the description + * above. + */ + { + .callback = video_detect_force_native, + .ident = "TongFang PF5PU1G", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PF5PU1G"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang PF4NU1F", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PF4NU1F"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang PF4NU1F", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "PULSE1401"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang PF5NU1G", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PF5NU1G"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang PF5NU1G", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "PULSE1501"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang PF5LUXG", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PF5LUXG"), + }, + }, /* * Desktops which falsely report a backlight and which our heuristics * for this do not catch. diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index af58768a03937..702284bcd467c 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -375,9 +375,6 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent) void __iomem *tmp; int i, ret; - WARN_ON(dev->irq[0] == (unsigned int)-1); - WARN_ON(dev->irq[1] == (unsigned int)-1); - ret = request_resource(parent, &dev->res); if (ret) goto err_out; diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 9316a7910389f..76ca37cf70053 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -424,6 +424,9 @@ enum binder_deferred_state { * (invariant after initialized) * @tsk task_struct for group_leader of process * (invariant after initialized) + * @cred struct cred associated with the `struct file` + * in binder_open() + * (invariant after initialized) * @deferred_work_node: element for binder_deferred_list * (protected by binder_deferred_lock) * @deferred_work: bitmap of deferred work to perform @@ -469,6 +472,7 @@ struct binder_proc { struct list_head waiting_threads; int pid; struct task_struct *tsk; + const struct cred *cred; struct hlist_node deferred_work_node; int deferred_work; bool is_dead; @@ -2241,6 +2245,7 @@ static void binder_deferred_fd_close(int fd) } static void binder_transaction_buffer_release(struct binder_proc *proc, + struct binder_thread *thread, struct binder_buffer *buffer, binder_size_t failed_at, bool is_failure) @@ -2258,7 +2263,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, binder_dec_node(buffer->target_node, 1, 0); off_start_offset = ALIGN(buffer->data_size, sizeof(void *)); - off_end_offset = is_failure ? failed_at : + off_end_offset = is_failure && failed_at ? failed_at : off_start_offset + buffer->offsets_size; for (buffer_offset = off_start_offset; buffer_offset < off_end_offset; buffer_offset += sizeof(binder_size_t)) { @@ -2344,9 +2349,8 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, binder_size_t fd_buf_size; binder_size_t num_valid; - if (proc->tsk != current->group_leader) { + if (is_failure) { /* - * Nothing to do if running in sender context * The fd fixups have not been applied so no * fds need to be closed. */ @@ -2400,8 +2404,16 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, &proc->alloc, &fd, buffer, offset, sizeof(fd)); WARN_ON(err); - if (!err) + if (!err) { binder_deferred_fd_close(fd); + /* + * Need to make sure the thread goes + * back to userspace to complete the + * deferred close + */ + if (thread) + thread->looper_need_return = true; + } } } break; default: @@ -2436,7 +2448,7 @@ static int binder_translate_binder(struct flat_binder_object *fp, ret = -EINVAL; goto done; } - if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) { + if (security_binder_transfer_binder(proc->cred, target_proc->cred)) { ret = -EPERM; goto done; } @@ -2482,7 +2494,7 @@ static int binder_translate_handle(struct flat_binder_object *fp, proc->pid, thread->pid, fp->handle); return -EINVAL; } - if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) { + if (security_binder_transfer_binder(proc->cred, target_proc->cred)) { ret = -EPERM; goto done; } @@ -2570,7 +2582,7 @@ static int binder_translate_fd(u32 fd, binder_size_t fd_offset, ret = -EBADF; goto err_fget; } - ret = security_binder_transfer_file(proc->tsk, target_proc->tsk, file); + ret = security_binder_transfer_file(proc->cred, target_proc->cred, file); if (ret < 0) { ret = -EPERM; goto err_security; @@ -2650,8 +2662,8 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, if (!ret) ret = binder_translate_fd(fd, offset, t, thread, in_reply_to); - if (ret < 0) - return ret; + if (ret) + return ret > 0 ? -EINVAL : ret; } return 0; } @@ -2969,8 +2981,8 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_invalid_target_handle; } - if (security_binder_transaction(proc->tsk, - target_proc->tsk) < 0) { + if (security_binder_transaction(proc->cred, + target_proc->cred) < 0) { return_error = BR_FAILED_REPLY; return_error_param = -EPERM; return_error_line = __LINE__; @@ -3096,7 +3108,7 @@ static void binder_transaction(struct binder_proc *proc, u32 secid; size_t added_size; - security_task_getsecid(proc->tsk, &secid); + security_cred_getsecid(proc->cred, &secid); ret = security_secid_to_secctx(secid, &secctx, &secctx_sz); if (ret) { return_error = BR_FAILED_REPLY; @@ -3471,7 +3483,7 @@ static void binder_transaction(struct binder_proc *proc, err_copy_data_failed: binder_free_txn_fixups(t); trace_binder_transaction_failed_buffer_release(t->buffer); - binder_transaction_buffer_release(target_proc, t->buffer, + binder_transaction_buffer_release(target_proc, NULL, t->buffer, buffer_offset, true); if (target_node) binder_dec_node_tmpref(target_node); @@ -3541,6 +3553,7 @@ static void binder_transaction(struct binder_proc *proc, * binder_free_buf() - free the specified buffer * @proc: binder proc that owns buffer * @buffer: buffer to be freed + * @is_failure: failed to send transaction * * If buffer for an async transaction, enqueue the next async * transaction from the node. @@ -3548,7 +3561,9 @@ static void binder_transaction(struct binder_proc *proc, * Cleanup buffer and free it. */ static void -binder_free_buf(struct binder_proc *proc, struct binder_buffer *buffer) +binder_free_buf(struct binder_proc *proc, + struct binder_thread *thread, + struct binder_buffer *buffer, bool is_failure) { binder_inner_proc_lock(proc); if (buffer->transaction) { @@ -3576,7 +3591,7 @@ binder_free_buf(struct binder_proc *proc, struct binder_buffer *buffer) binder_node_inner_unlock(buf_node); } trace_binder_transaction_buffer_release(buffer); - binder_transaction_buffer_release(proc, buffer, 0, false); + binder_transaction_buffer_release(proc, thread, buffer, 0, is_failure); binder_alloc_free_buf(&proc->alloc, buffer); } @@ -3777,7 +3792,7 @@ static int binder_thread_write(struct binder_proc *proc, proc->pid, thread->pid, (u64)data_ptr, buffer->debug_id, buffer->transaction ? "active" : "finished"); - binder_free_buf(proc, buffer); + binder_free_buf(proc, thread, buffer, false); break; } @@ -4465,7 +4480,7 @@ static int binder_thread_read(struct binder_proc *proc, buffer->transaction = NULL; binder_cleanup_transaction(t, "fd fixups failed", BR_FAILED_REPLY); - binder_free_buf(proc, buffer); + binder_free_buf(proc, thread, buffer, true); binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d %stransaction %d fd fixups failed %d/%d, line %d\n", proc->pid, thread->pid, @@ -4698,6 +4713,7 @@ static void binder_free_proc(struct binder_proc *proc) } binder_alloc_deferred_release(&proc->alloc); put_task_struct(proc->tsk); + put_cred(proc->cred); binder_stats_deleted(BINDER_STAT_PROC); kfree(proc); } @@ -4774,23 +4790,20 @@ static int binder_thread_release(struct binder_proc *proc, __release(&t->lock); /* - * If this thread used poll, make sure we remove the waitqueue - * from any epoll data structures holding it with POLLFREE. - * waitqueue_active() is safe to use here because we're holding - * the inner lock. + * If this thread used poll, make sure we remove the waitqueue from any + * poll data structures holding it. */ - if ((thread->looper & BINDER_LOOPER_STATE_POLL) && - waitqueue_active(&thread->wait)) { - wake_up_poll(&thread->wait, EPOLLHUP | POLLFREE); - } + if (thread->looper & BINDER_LOOPER_STATE_POLL) + wake_up_pollfree(&thread->wait); binder_inner_proc_unlock(thread->proc); /* - * This is needed to avoid races between wake_up_poll() above and - * and ep_remove_waitqueue() called for other reasons (eg the epoll file - * descriptor being closed); ep_remove_waitqueue() holds an RCU read - * lock, so we can be sure it's done after calling synchronize_rcu(). + * This is needed to avoid races between wake_up_pollfree() above and + * someone else removing the last entry from the queue for other reasons + * (e.g. ep_remove_wait_queue() being called due to an epoll file + * descriptor being closed). Such other users hold an RCU read lock, so + * we can be sure they're done after we call synchronize_rcu(). */ if (thread->looper & BINDER_LOOPER_STATE_POLL) synchronize_rcu(); @@ -4908,7 +4921,7 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp, ret = -EBUSY; goto out; } - ret = security_binder_set_context_mgr(proc->tsk); + ret = security_binder_set_context_mgr(proc->cred); if (ret < 0) goto out; if (uid_valid(context->binder_context_mgr_uid)) { @@ -5225,6 +5238,7 @@ static int binder_open(struct inode *nodp, struct file *filp) spin_lock_init(&proc->outer_lock); get_task_struct(current->group_leader); proc->tsk = current->group_leader; + proc->cred = get_cred(filp->f_cred); INIT_LIST_HEAD(&proc->todo); proc->default_priority = task_nice(current); /* binderfs stashes devices in i_private */ diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index a09a0199b1c33..56a1516268f70 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -613,7 +613,7 @@ static void binder_free_buf_locked(struct binder_alloc *alloc, BUG_ON(buffer->user_data > alloc->buffer + alloc->buffer_size); if (buffer->async_transaction) { - alloc->free_async_space += size + sizeof(struct binder_buffer); + alloc->free_async_space += buffer_size + sizeof(struct binder_buffer); binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, "%d: binder_free_buf size %zd async free %zd\n", diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 8beb418ce167b..6f572967b5552 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -420,6 +420,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { /* AMD */ { PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD Hudson-2 */ { PCI_VDEVICE(AMD, 0x7900), board_ahci }, /* AMD CZ */ + { PCI_VDEVICE(AMD, 0x7901), board_ahci_mobile }, /* AMD Green Sardine */ /* AMD is using RAID class only for ahci controllers */ { PCI_VENDOR_ID_AMD, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_RAID << 8, 0xffffff, board_ahci }, diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 416cfbf2f1c29..8a963d2a951db 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -440,10 +440,7 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev, hpriv->phy_regulator = devm_regulator_get(dev, "phy"); if (IS_ERR(hpriv->phy_regulator)) { rc = PTR_ERR(hpriv->phy_regulator); - if (rc == -EPROBE_DEFER) - goto err_out; - rc = 0; - hpriv->phy_regulator = NULL; + goto err_out; } if (flags & AHCI_PLATFORM_GET_RESETS) { diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index f67b3fb33d579..fc37e075f3e15 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2057,7 +2057,7 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log, retry: ata_tf_init(dev, &tf); - if (dev->dma_mode && ata_id_has_read_log_dma_ext(dev->id) && + if (ata_dma_enabled(dev) && ata_id_has_read_log_dma_ext(dev->id) && !(dev->horkage & ATA_HORKAGE_NO_DMA_LOG)) { tf.command = ATA_CMD_READ_LOG_DMA_EXT; tf.protocol = ATA_PROT_DMA; @@ -2252,6 +2252,25 @@ static void ata_dev_config_ncq_prio(struct ata_device *dev) } +static bool ata_dev_check_adapter(struct ata_device *dev, + unsigned short vendor_id) +{ + struct pci_dev *pcidev = NULL; + struct device *parent_dev = NULL; + + for (parent_dev = dev->tdev.parent; parent_dev != NULL; + parent_dev = parent_dev->parent) { + if (dev_is_pci(parent_dev)) { + pcidev = to_pci_dev(parent_dev); + if (pcidev->vendor == vendor_id) + return true; + break; + } + } + + return false; +} + static int ata_dev_config_ncq(struct ata_device *dev, char *desc, size_t desc_sz) { @@ -2268,6 +2287,13 @@ static int ata_dev_config_ncq(struct ata_device *dev, snprintf(desc, desc_sz, "NCQ (not used)"); return 0; } + + if (dev->horkage & ATA_HORKAGE_NO_NCQ_ON_ATI && + ata_dev_check_adapter(dev, PCI_VENDOR_ID_ATI)) { + snprintf(desc, desc_sz, "NCQ (not used)"); + return 0; + } + if (ap->flags & ATA_FLAG_NCQ) { hdepth = min(ap->scsi_host->can_queue, ATA_MAX_QUEUE); dev->flags |= ATA_DFLAG_NCQ; @@ -4411,6 +4437,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "VRFDFC22048UCHC-TE*", NULL, ATA_HORKAGE_NODMA }, /* Odd clown on sil3726/4726 PMPs */ { "Config Disk", NULL, ATA_HORKAGE_DISABLE }, + /* Similar story with ASMedia 1092 */ + { "ASMT109x- Config", NULL, ATA_HORKAGE_DISABLE }, /* Weird ATAPI devices */ { "TORiSAN DVD-ROM DRD-N216", NULL, ATA_HORKAGE_MAX_SEC_128 }, @@ -4552,15 +4580,25 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Crucial_CT*MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Samsung SSD 840 EVO*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_NO_DMA_LOG | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung SSD 840*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung SSD 850*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Samsung SSD 860*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM | + ATA_HORKAGE_NO_NCQ_ON_ATI, }, + { "Samsung SSD 870*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM | + ATA_HORKAGE_NO_NCQ_ON_ATI, }, { "FCCT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, /* devices that don't properly handle TRIM commands */ { "SuperSSpeed S238*", NULL, ATA_HORKAGE_NOTRIM, }, + { "M88V29*", NULL, ATA_HORKAGE_NOTRIM, }, /* * As defined, the DRAT (Deterministic Read After Trim) and RZAT @@ -6197,7 +6235,7 @@ struct ata_host *ata_host_alloc_pinfo(struct device *dev, const struct ata_port_info * const * ppi, int n_ports) { - const struct ata_port_info *pi; + const struct ata_port_info *pi = &ata_dummy_port_info; struct ata_host *host; int i, j; @@ -6205,7 +6243,7 @@ struct ata_host *ata_host_alloc_pinfo(struct device *dev, if (!host) return NULL; - for (i = 0, j = 0, pi = NULL; i < host->n_ports; i++) { + for (i = 0, j = 0; i < host->n_ports; i++) { struct ata_port *ap = host->ports[i]; if (ppi[j]) @@ -6394,7 +6432,7 @@ int ata_host_start(struct ata_host *host) have_stop = 1; } - if (host->ops->host_stop) + if (host->ops && host->ops->host_stop) have_stop = 1; if (have_stop) { @@ -6912,6 +6950,8 @@ static int __init ata_parse_force_one(char **cur, { "ncq", .horkage_off = ATA_HORKAGE_NONCQ }, { "noncqtrim", .horkage_on = ATA_HORKAGE_NO_NCQ_TRIM }, { "ncqtrim", .horkage_off = ATA_HORKAGE_NO_NCQ_TRIM }, + { "noncqati", .horkage_on = ATA_HORKAGE_NO_NCQ_ON_ATI }, + { "ncqati", .horkage_off = ATA_HORKAGE_NO_NCQ_ON_ATI }, { "dump_id", .horkage_on = ATA_HORKAGE_DUMP_ID }, { "pio0", .xfer_mask = 1 << (ATA_SHIFT_PIO + 0) }, { "pio1", .xfer_mask = 1 << (ATA_SHIFT_PIO + 1) }, diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 3bfd9da584734..c6a21a784ec65 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -97,6 +97,12 @@ static const unsigned long ata_eh_identify_timeouts[] = { ULONG_MAX, }; +static const unsigned long ata_eh_revalidate_timeouts[] = { + 15000, /* Some drives are slow to read log pages when waking-up */ + 15000, /* combined time till here is enough even for media access */ + ULONG_MAX, +}; + static const unsigned long ata_eh_flush_timeouts[] = { 15000, /* be generous with flush */ 15000, /* ditto */ @@ -133,6 +139,8 @@ static const struct ata_eh_cmd_timeout_ent ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), .timeouts = ata_eh_identify_timeouts, }, + { .commands = CMDS(ATA_CMD_READ_LOG_EXT, ATA_CMD_READ_LOG_DMA_EXT), + .timeouts = ata_eh_revalidate_timeouts, }, { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), .timeouts = ata_eh_other_timeouts, }, { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 464efedc778b0..2b247014ba452 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3164,8 +3164,19 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) goto invalid_fld; } - if (ata_is_ncq(tf->protocol) && (cdb[2 + cdb_offset] & 0x3) == 0) - tf->protocol = ATA_PROT_NCQ_NODATA; + if ((cdb[2 + cdb_offset] & 0x3) == 0) { + /* + * When T_LENGTH is zero (No data is transferred), dir should + * be DMA_NONE. + */ + if (scmd->sc_data_direction != DMA_NONE) { + fp = 2 + cdb_offset; + goto invalid_fld; + } + + if (ata_is_ncq(tf->protocol)) + tf->protocol = ATA_PROT_NCQ_NODATA; + } /* enable LBA */ tf->flags |= ATA_TFLAG_LBA; diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index 12a505bb9c5b1..c4f36312b8a42 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -196,7 +196,7 @@ static struct { { XFER_PIO_0, "XFER_PIO_0" }, { XFER_PIO_SLOW, "XFER_PIO_SLOW" } }; -ata_bitfield_name_match(xfer,ata_xfer_names) +ata_bitfield_name_search(xfer, ata_xfer_names) /* * ATA Port attributes diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c index fad6c6a873130..fef46de2f6b23 100644 --- a/drivers/ata/pata_hpt37x.c +++ b/drivers/ata/pata_hpt37x.c @@ -917,6 +917,20 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id) irqmask &= ~0x10; pci_write_config_byte(dev, 0x5a, irqmask); + /* + * HPT371 chips physically have only one channel, the secondary one, + * but the primary channel registers do exist! Go figure... + * So, we manually disable the non-existing channel here + * (if the BIOS hasn't done this already). + */ + if (dev->device == PCI_DEVICE_ID_TTI_HPT371) { + u8 mcr1; + + pci_read_config_byte(dev, 0x50, &mcr1); + mcr1 &= ~0x04; + pci_write_config_byte(dev, 0x50, mcr1); + } + /* * default to pci clock. make sure MA15/16 are set to output * to prevent drives having problems with 40-pin cables. Needed @@ -948,14 +962,14 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id) if ((freq >> 12) != 0xABCDE) { int i; - u8 sr; + u16 sr; u32 total = 0; pr_warn("BIOS has not set timing clocks\n"); /* This is the process the HPT371 BIOS is reported to use */ for (i = 0; i < 128; i++) { - pci_read_config_byte(dev, 0x78, &sr); + pci_read_config_word(dev, 0x78, &sr); total += sr & 0x1FF; udelay(15); } diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c index 4fd12b20df239..d91ba47f2fc44 100644 --- a/drivers/ata/pata_legacy.c +++ b/drivers/ata/pata_legacy.c @@ -315,7 +315,8 @@ static unsigned int pdc_data_xfer_vlb(struct ata_queued_cmd *qc, iowrite32_rep(ap->ioaddr.data_addr, buf, buflen >> 2); if (unlikely(slop)) { - __le32 pad; + __le32 pad = 0; + if (rw == READ) { pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr)); memcpy(buf + buflen - slop, &pad, slop); @@ -705,7 +706,8 @@ static unsigned int vlb32_data_xfer(struct ata_queued_cmd *qc, ioread32_rep(ap->ioaddr.data_addr, buf, buflen >> 2); if (unlikely(slop)) { - __le32 pad; + __le32 pad = 0; + if (rw == WRITE) { memcpy(&pad, buf + buflen - slop, slop); iowrite32(le32_to_cpu(pad), ap->ioaddr.data_addr); diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c index b066809ba9a11..c56f4043b0cc0 100644 --- a/drivers/ata/pata_marvell.c +++ b/drivers/ata/pata_marvell.c @@ -83,6 +83,8 @@ static int marvell_cable_detect(struct ata_port *ap) switch(ap->port_no) { case 0: + if (!ap->ioaddr.bmdma_addr) + return ATA_CBL_PATA_UNK; if (ioread8(ap->ioaddr.bmdma_addr + 1) & 1) return ATA_CBL_PATA40; return ATA_CBL_PATA80; diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c index ac3b1fda820ff..c240d8cbfd417 100644 --- a/drivers/ata/pata_octeon_cf.c +++ b/drivers/ata/pata_octeon_cf.c @@ -888,12 +888,14 @@ static int octeon_cf_probe(struct platform_device *pdev) int i; res_dma = platform_get_resource(dma_dev, IORESOURCE_MEM, 0); if (!res_dma) { + put_device(&dma_dev->dev); of_node_put(dma_node); return -EINVAL; } cf_port->dma_base = (u64)devm_ioremap_nocache(&pdev->dev, res_dma->start, resource_size(res_dma)); if (!cf_port->dma_base) { + put_device(&dma_dev->dev); of_node_put(dma_node); return -EINVAL; } @@ -903,6 +905,7 @@ static int octeon_cf_probe(struct platform_device *pdev) irq = i; irq_handler = octeon_cf_interrupt; } + put_device(&dma_dev->dev); } of_node_put(dma_node); } diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 9dcef6ac643b9..464260f668708 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -145,7 +145,11 @@ struct sata_dwc_device { #endif }; -#define SATA_DWC_QCMD_MAX 32 +/* + * Allow one extra special slot for commands and DMA management + * to account for libata internal commands. + */ +#define SATA_DWC_QCMD_MAX (ATA_MAX_QUEUE + 1) struct sata_dwc_device_port { struct sata_dwc_device *hsdev; @@ -1249,24 +1253,20 @@ static int sata_dwc_probe(struct platform_device *ofdev) irq = irq_of_parse_and_map(np, 0); if (irq == NO_IRQ) { dev_err(&ofdev->dev, "no SATA DMA irq\n"); - err = -ENODEV; - goto error_out; + return -ENODEV; } #ifdef CONFIG_SATA_DWC_OLD_DMA if (!of_find_property(np, "dmas", NULL)) { err = sata_dwc_dma_init_old(ofdev, hsdev); if (err) - goto error_out; + return err; } #endif hsdev->phy = devm_phy_optional_get(hsdev->dev, "sata-phy"); - if (IS_ERR(hsdev->phy)) { - err = PTR_ERR(hsdev->phy); - hsdev->phy = NULL; - goto error_out; - } + if (IS_ERR(hsdev->phy)) + return PTR_ERR(hsdev->phy); err = phy_init(hsdev->phy); if (err) diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index d55ee244d6931..0ba231e80b191 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -1394,6 +1394,14 @@ static int sata_fsl_init_controller(struct ata_host *host) return 0; } +static void sata_fsl_host_stop(struct ata_host *host) +{ + struct sata_fsl_host_priv *host_priv = host->private_data; + + iounmap(host_priv->hcr_base); + kfree(host_priv); +} + /* * scsi mid-layer and libata interface structures */ @@ -1426,6 +1434,8 @@ static struct ata_port_operations sata_fsl_ops = { .port_start = sata_fsl_port_start, .port_stop = sata_fsl_port_stop, + .host_stop = sata_fsl_host_stop, + .pmp_attach = sata_fsl_pmp_attach, .pmp_detach = sata_fsl_pmp_detach, }; @@ -1480,9 +1490,9 @@ static int sata_fsl_probe(struct platform_device *ofdev) host_priv->ssr_base = ssr_base; host_priv->csr_base = csr_base; - irq = irq_of_parse_and_map(ofdev->dev.of_node, 0); - if (!irq) { - dev_err(&ofdev->dev, "invalid irq from platform\n"); + irq = platform_get_irq(ofdev, 0); + if (irq < 0) { + retval = irq; goto error_exit_with_cleanup; } host_priv->irq = irq; @@ -1557,10 +1567,6 @@ static int sata_fsl_remove(struct platform_device *ofdev) ata_host_detach(host); - irq_dispose_mapping(host_priv->irq); - iounmap(host_priv->hcr_base); - kfree(host_priv); - return 0; } diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index d1ed9679c7177..3fca3e13ed6ae 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -3892,8 +3892,8 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx) break; default: - dev_err(host->dev, "BUG: invalid board index %u\n", board_idx); - return 1; + dev_alert(host->dev, "BUG: invalid board index %u\n", board_idx); + return -EINVAL; } hpriv->hp_flags = hp_flags; diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index de52428b8833d..4816db0553ef8 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -1116,6 +1116,8 @@ DPRINTK("iovcnt = %d\n",skb_shinfo(skb)->nr_frags); } paddr = dma_map_single(&eni_dev->pci_dev->dev,skb->data,skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(&eni_dev->pci_dev->dev, paddr)) + return enq_next; ENI_PRV_PADDR(skb) = paddr; /* prepare DMA queue entries */ j = 0; diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index 5acb459856752..8995c39330fac 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -1677,6 +1677,8 @@ static int fs_init(struct fs_dev *dev) dev->hw_base = pci_resource_start(pci_dev, 0); dev->base = ioremap(dev->hw_base, 0x1000); + if (!dev->base) + return 1; reset_chip (dev); diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c index 33b887b389061..59109b410c67f 100644 --- a/drivers/auxdisplay/ht16k33.c +++ b/drivers/auxdisplay/ht16k33.c @@ -219,6 +219,15 @@ static const struct backlight_ops ht16k33_bl_ops = { .check_fb = ht16k33_bl_check_fb, }; +/* + * Blank events will be passed to the actual device handling the backlight when + * we return zero here. + */ +static int ht16k33_blank(int blank, struct fb_info *info) +{ + return 0; +} + static int ht16k33_mmap(struct fb_info *info, struct vm_area_struct *vma) { struct ht16k33_priv *priv = info->par; @@ -231,6 +240,7 @@ static struct fb_ops ht16k33_fb_ops = { .owner = THIS_MODULE, .fb_read = fb_sys_read, .fb_write = fb_sys_write, + .fb_blank = ht16k33_blank, .fb_fillrect = sys_fillrect, .fb_copyarea = sys_copyarea, .fb_imageblit = sys_imageblit, @@ -418,6 +428,33 @@ static int ht16k33_probe(struct i2c_client *client, if (err) return err; + /* Backlight */ + memset(&bl_props, 0, sizeof(struct backlight_properties)); + bl_props.type = BACKLIGHT_RAW; + bl_props.max_brightness = MAX_BRIGHTNESS; + + bl = devm_backlight_device_register(&client->dev, DRIVER_NAME"-bl", + &client->dev, priv, + &ht16k33_bl_ops, &bl_props); + if (IS_ERR(bl)) { + dev_err(&client->dev, "failed to register backlight\n"); + return PTR_ERR(bl); + } + + err = of_property_read_u32(node, "default-brightness-level", + &dft_brightness); + if (err) { + dft_brightness = MAX_BRIGHTNESS; + } else if (dft_brightness > MAX_BRIGHTNESS) { + dev_warn(&client->dev, + "invalid default brightness level: %u, using %u\n", + dft_brightness, MAX_BRIGHTNESS); + dft_brightness = MAX_BRIGHTNESS; + } + + bl->props.brightness = dft_brightness; + ht16k33_bl_update_status(bl); + /* Framebuffer (2 bytes per column) */ BUILD_BUG_ON(PAGE_SIZE < HT16K33_FB_SIZE); fbdev->buffer = (unsigned char *) get_zeroed_page(GFP_KERNEL); @@ -450,6 +487,7 @@ static int ht16k33_probe(struct i2c_client *client, fbdev->info->screen_size = HT16K33_FB_SIZE; fbdev->info->fix = ht16k33_fb_fix; fbdev->info->var = ht16k33_fb_var; + fbdev->info->bl_dev = bl; fbdev->info->pseudo_palette = NULL; fbdev->info->flags = FBINFO_FLAG_DEFAULT; fbdev->info->par = priv; @@ -462,34 +500,6 @@ static int ht16k33_probe(struct i2c_client *client, if (err) goto err_fbdev_unregister; - /* Backlight */ - memset(&bl_props, 0, sizeof(struct backlight_properties)); - bl_props.type = BACKLIGHT_RAW; - bl_props.max_brightness = MAX_BRIGHTNESS; - - bl = devm_backlight_device_register(&client->dev, DRIVER_NAME"-bl", - &client->dev, priv, - &ht16k33_bl_ops, &bl_props); - if (IS_ERR(bl)) { - dev_err(&client->dev, "failed to register backlight\n"); - err = PTR_ERR(bl); - goto err_fbdev_unregister; - } - - err = of_property_read_u32(node, "default-brightness-level", - &dft_brightness); - if (err) { - dft_brightness = MAX_BRIGHTNESS; - } else if (dft_brightness > MAX_BRIGHTNESS) { - dev_warn(&client->dev, - "invalid default brightness level: %u, using %u\n", - dft_brightness, MAX_BRIGHTNESS); - dft_brightness = MAX_BRIGHTNESS; - } - - bl->props.brightness = dft_brightness; - ht16k33_bl_update_status(bl); - ht16k33_fb_queue(priv); return 0; diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c index efb928e25aef3..9556d6827f005 100644 --- a/drivers/auxdisplay/img-ascii-lcd.c +++ b/drivers/auxdisplay/img-ascii-lcd.c @@ -280,6 +280,16 @@ static int img_ascii_lcd_display(struct img_ascii_lcd_ctx *ctx, if (msg[count - 1] == '\n') count--; + if (!count) { + /* clear the LCD */ + devm_kfree(&ctx->pdev->dev, ctx->message); + ctx->message = NULL; + ctx->message_len = 0; + memset(ctx->curr, ' ', ctx->cfg->num_chars); + ctx->cfg->update(ctx); + return 0; + } + new_msg = devm_kmalloc(&ctx->pdev->dev, count + 1, GFP_KERNEL); if (!new_msg) return -ENOMEM; diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 83e26fd188cc9..503404f3280e3 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -48,7 +48,7 @@ static ssize_t cpu_capacity_show(struct device *dev, { struct cpu *cpu = container_of(dev, struct cpu, dev); - return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id)); + return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id)); } static void update_topology_flags_workfn(struct work_struct *work); @@ -457,7 +457,7 @@ void update_siblings_masks(unsigned int cpuid) for_each_online_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; - if (cpuid_topo->llc_id == cpu_topo->llc_id) { + if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) { cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); } diff --git a/drivers/base/bus.c b/drivers/base/bus.c index a1d1e82563244..7d7d28f498edd 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -620,7 +620,7 @@ int bus_add_driver(struct device_driver *drv) if (drv->bus->p->drivers_autoprobe) { error = driver_attach(drv); if (error) - goto out_unregister; + goto out_del_list; } module_add_driver(drv->owner, drv); @@ -647,6 +647,8 @@ int bus_add_driver(struct device_driver *drv) return 0; +out_del_list: + klist_del(&priv->knode_bus); out_unregister: kobject_put(&priv->kobj); /* drv->p is freed in driver_release() */ diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 8d553c92cd322..6a8c2b5881be3 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -377,7 +377,7 @@ static ssize_t size_show(struct device *dev, { struct cacheinfo *this_leaf = dev_get_drvdata(dev); - return sprintf(buf, "%uK\n", this_leaf->size >> 10); + return sysfs_emit(buf, "%uK\n", this_leaf->size >> 10); } static ssize_t shared_cpumap_show_func(struct device *dev, bool list, char *buf) @@ -407,11 +407,11 @@ static ssize_t type_show(struct device *dev, switch (this_leaf->type) { case CACHE_TYPE_DATA: - return sprintf(buf, "Data\n"); + return sysfs_emit(buf, "Data\n"); case CACHE_TYPE_INST: - return sprintf(buf, "Instruction\n"); + return sysfs_emit(buf, "Instruction\n"); case CACHE_TYPE_UNIFIED: - return sprintf(buf, "Unified\n"); + return sysfs_emit(buf, "Unified\n"); default: return -EINVAL; } @@ -425,11 +425,11 @@ static ssize_t allocation_policy_show(struct device *dev, int n = 0; if ((ci_attr & CACHE_READ_ALLOCATE) && (ci_attr & CACHE_WRITE_ALLOCATE)) - n = sprintf(buf, "ReadWriteAllocate\n"); + n = sysfs_emit(buf, "ReadWriteAllocate\n"); else if (ci_attr & CACHE_READ_ALLOCATE) - n = sprintf(buf, "ReadAllocate\n"); + n = sysfs_emit(buf, "ReadAllocate\n"); else if (ci_attr & CACHE_WRITE_ALLOCATE) - n = sprintf(buf, "WriteAllocate\n"); + n = sysfs_emit(buf, "WriteAllocate\n"); return n; } @@ -441,9 +441,9 @@ static ssize_t write_policy_show(struct device *dev, int n = 0; if (ci_attr & CACHE_WRITE_THROUGH) - n = sprintf(buf, "WriteThrough\n"); + n = sysfs_emit(buf, "WriteThrough\n"); else if (ci_attr & CACHE_WRITE_BACK) - n = sprintf(buf, "WriteBack\n"); + n = sysfs_emit(buf, "WriteBack\n"); return n; } diff --git a/drivers/base/core.c b/drivers/base/core.c index 8b651bfc1d88e..1b016fdd1a750 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1027,7 +1027,7 @@ ssize_t device_show_ulong(struct device *dev, char *buf) { struct dev_ext_attribute *ea = to_ext_attr(attr); - return snprintf(buf, PAGE_SIZE, "%lx\n", *(unsigned long *)(ea->var)); + return sysfs_emit(buf, "%lx\n", *(unsigned long *)(ea->var)); } EXPORT_SYMBOL_GPL(device_show_ulong); @@ -1057,7 +1057,7 @@ ssize_t device_show_int(struct device *dev, { struct dev_ext_attribute *ea = to_ext_attr(attr); - return snprintf(buf, PAGE_SIZE, "%d\n", *(int *)(ea->var)); + return sysfs_emit(buf, "%d\n", *(int *)(ea->var)); } EXPORT_SYMBOL_GPL(device_show_int); @@ -1078,7 +1078,7 @@ ssize_t device_show_bool(struct device *dev, struct device_attribute *attr, { struct dev_ext_attribute *ea = to_ext_attr(attr); - return snprintf(buf, PAGE_SIZE, "%d\n", *(bool *)(ea->var)); + return sysfs_emit(buf, "%d\n", *(bool *)(ea->var)); } EXPORT_SYMBOL_GPL(device_show_bool); @@ -1310,7 +1310,7 @@ static ssize_t online_show(struct device *dev, struct device_attribute *attr, device_lock(dev); val = !dev->offline; device_unlock(dev); - return sprintf(buf, "%u\n", val); + return sysfs_emit(buf, "%u\n", val); } static ssize_t online_store(struct device *dev, struct device_attribute *attr, diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index f00da44ae6fe0..9b5edf1dfe9e9 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -156,7 +156,7 @@ static ssize_t show_crash_notes(struct device *dev, struct device_attribute *att * operation should be safe. No locking required. */ addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpunum)); - rc = sprintf(buf, "%Lx\n", addr); + rc = sysfs_emit(buf, "%Lx\n", addr); return rc; } static DEVICE_ATTR(crash_notes, 0400, show_crash_notes, NULL); @@ -167,7 +167,7 @@ static ssize_t show_crash_notes_size(struct device *dev, { ssize_t rc; - rc = sprintf(buf, "%zu\n", sizeof(note_buf_t)); + rc = sysfs_emit(buf, "%zu\n", sizeof(note_buf_t)); return rc; } static DEVICE_ATTR(crash_notes_size, 0400, show_crash_notes_size, NULL); @@ -231,8 +231,7 @@ static struct cpu_attr cpu_attrs[] = { static ssize_t print_cpus_kernel_max(struct device *dev, struct device_attribute *attr, char *buf) { - int n = snprintf(buf, PAGE_SIZE-2, "%d\n", NR_CPUS - 1); - return n; + return sprintf(buf, "%d\n", NR_CPUS - 1); } static DEVICE_ATTR(kernel_max, 0444, print_cpus_kernel_max, NULL); @@ -272,7 +271,7 @@ static DEVICE_ATTR(offline, 0444, print_cpus_offline, NULL); static ssize_t print_cpus_isolated(struct device *dev, struct device_attribute *attr, char *buf) { - int n = 0, len = PAGE_SIZE-2; + int n; cpumask_var_t isolated; if (!alloc_cpumask_var(&isolated, GFP_KERNEL)) @@ -280,7 +279,7 @@ static ssize_t print_cpus_isolated(struct device *dev, cpumask_andnot(isolated, cpu_possible_mask, housekeeping_cpumask(HK_FLAG_DOMAIN)); - n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(isolated)); + n = sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(isolated)); free_cpumask_var(isolated); @@ -292,11 +291,7 @@ static DEVICE_ATTR(isolated, 0444, print_cpus_isolated, NULL); static ssize_t print_cpus_nohz_full(struct device *dev, struct device_attribute *attr, char *buf) { - int n = 0, len = PAGE_SIZE-2; - - n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(tick_nohz_full_mask)); - - return n; + return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(tick_nohz_full_mask)); } static DEVICE_ATTR(nohz_full, 0444, print_cpus_nohz_full, NULL); #endif @@ -328,8 +323,8 @@ static ssize_t print_cpu_modalias(struct device *dev, ssize_t n; u32 i; - n = sprintf(buf, "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:", - CPU_FEATURE_TYPEVAL); + n = sysfs_emit(buf, "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:", + CPU_FEATURE_TYPEVAL); for (i = 0; i < MAX_CPU_FEATURES; i++) if (cpu_have_feature(i)) { @@ -521,56 +516,62 @@ static void __init cpu_dev_register_generic(void) ssize_t __weak cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "Not affected\n"); + return sysfs_emit(buf, "Not affected\n"); } ssize_t __weak cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "Not affected\n"); + return sysfs_emit(buf, "Not affected\n"); } ssize_t __weak cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "Not affected\n"); + return sysfs_emit(buf, "Not affected\n"); } ssize_t __weak cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "Not affected\n"); + return sysfs_emit(buf, "Not affected\n"); } ssize_t __weak cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "Not affected\n"); + return sysfs_emit(buf, "Not affected\n"); } ssize_t __weak cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "Not affected\n"); + return sysfs_emit(buf, "Not affected\n"); } ssize_t __weak cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "Not affected\n"); + return sysfs_emit(buf, "Not affected\n"); } ssize_t __weak cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "Not affected\n"); + return sysfs_emit(buf, "Not affected\n"); } ssize_t __weak cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "Not affected\n"); + return sysfs_emit(buf, "Not affected\n"); +} + +ssize_t __weak cpu_show_mmio_stale_data(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); } static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); @@ -582,6 +583,7 @@ static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); +static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -593,6 +595,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_tsx_async_abort.attr, &dev_attr_itlb_multihit.attr, &dev_attr_srbds.attr, + &dev_attr_mmio_stale_data.attr, NULL }; diff --git a/drivers/base/dd.c b/drivers/base/dd.c index cf7e5b4afc1be..6f85280fef8d3 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -747,7 +747,7 @@ static int __init save_async_options(char *buf) "Too long list of driver names for 'driver_async_probe'!\n"); strlcpy(async_probe_drv_names, buf, ASYNC_DRV_NAMES_MAX_LEN); - return 0; + return 1; } __setup("driver_async_probe=", save_async_options); @@ -873,6 +873,7 @@ static void __device_attach_async_helper(void *_dev, async_cookie_t cookie) static int __device_attach(struct device *dev, bool allow_async) { int ret = 0; + bool async = false; device_lock(dev); if (dev->p->dead) { @@ -911,7 +912,7 @@ static int __device_attach(struct device *dev, bool allow_async) */ dev_dbg(dev, "scheduling asynchronous probe\n"); get_device(dev); - async_schedule_dev(__device_attach_async_helper, dev); + async = true; } else { pm_request_idle(dev); } @@ -921,6 +922,8 @@ static int __device_attach(struct device *dev, bool allow_async) } out_unlock: device_unlock(dev); + if (async) + async_schedule_dev(__device_attach_async_helper, dev); return ret; } diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 30d0523014e0d..5e9b00711357b 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "base.h" @@ -62,8 +63,15 @@ static struct dentry *public_dev_mount(struct file_system_type *fs_type, int fla const char *dev_name, void *data) { struct super_block *s = mnt->mnt_sb; + int err; + atomic_inc(&s->s_active); down_write(&s->s_umount); + err = reconfigure_single(s, flags, data); + if (err < 0) { + deactivate_locked_super(s); + return ERR_PTR(err); + } return dget(s->s_root); } diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c index 5f3e5d8372590..19d18afb30864 100644 --- a/drivers/base/firmware_loader/fallback.c +++ b/drivers/base/firmware_loader/fallback.c @@ -215,7 +215,7 @@ static ssize_t firmware_loading_show(struct device *dev, loading = fw_sysfs_loading(fw_sysfs->fw_priv); mutex_unlock(&fw_lock); - return sprintf(buf, "%d\n", loading); + return sysfs_emit(buf, "%d\n", loading); } /** diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 249349f64bfe9..12ab50d295484 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -98,12 +98,15 @@ static struct firmware_cache fw_cache; extern struct builtin_fw __start_builtin_fw[]; extern struct builtin_fw __end_builtin_fw[]; -static void fw_copy_to_prealloc_buf(struct firmware *fw, +static bool fw_copy_to_prealloc_buf(struct firmware *fw, void *buf, size_t size) { - if (!buf || size < fw->size) - return; + if (!buf) + return true; + if (size < fw->size) + return false; memcpy(buf, fw->data, fw->size); + return true; } static bool fw_get_builtin_firmware(struct firmware *fw, const char *name, @@ -115,9 +118,7 @@ static bool fw_get_builtin_firmware(struct firmware *fw, const char *name, if (strcmp(name, b_fw->name) == 0) { fw->size = b_fw->size; fw->data = b_fw->data; - fw_copy_to_prealloc_buf(fw, buf, size); - - return true; + return fw_copy_to_prealloc_buf(fw, buf, size); } } @@ -760,6 +761,8 @@ _request_firmware(const struct firmware **firmware_p, const char *name, enum fw_opt opt_flags) { struct firmware *fw = NULL; + struct cred *kern_cred = NULL; + const struct cred *old_cred; int ret; if (!firmware_p) @@ -775,6 +778,18 @@ _request_firmware(const struct firmware **firmware_p, const char *name, if (ret <= 0) /* error or already assigned */ goto out; + /* + * We are about to try to access the firmware file. Because we may have been + * called by a driver when serving an unrelated request from userland, we use + * the kernel credentials to read the file. + */ + kern_cred = prepare_kernel_cred(NULL); + if (!kern_cred) { + ret = -ENOMEM; + goto out; + } + old_cred = override_creds(kern_cred); + ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL); #ifdef CONFIG_FW_LOADER_COMPRESS if (ret == -ENOENT) @@ -791,6 +806,9 @@ _request_firmware(const struct firmware **firmware_p, const char *name, } else ret = assign_fw(fw, device, opt_flags); + revert_creds(old_cred); + put_cred(kern_cred); + out: if (ret < 0) { fw_abort_batch_reqs(fw); diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 5a8c430fb8ffa..28b33b36a2e05 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -110,7 +110,7 @@ static ssize_t phys_index_show(struct device *dev, unsigned long phys_index; phys_index = mem->start_section_nr / sections_per_block; - return sprintf(buf, "%08lx\n", phys_index); + return sysfs_emit(buf, "%08lx\n", phys_index); } /* @@ -120,7 +120,7 @@ static ssize_t phys_index_show(struct device *dev, static ssize_t removable_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE)); + return sysfs_emit(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE)); } /* @@ -138,17 +138,17 @@ static ssize_t state_show(struct device *dev, struct device_attribute *attr, */ switch (mem->state) { case MEM_ONLINE: - len = sprintf(buf, "online\n"); + len = sysfs_emit(buf, "online\n"); break; case MEM_OFFLINE: - len = sprintf(buf, "offline\n"); + len = sysfs_emit(buf, "offline\n"); break; case MEM_GOING_OFFLINE: - len = sprintf(buf, "going-offline\n"); + len = sysfs_emit(buf, "going-offline\n"); break; default: - len = sprintf(buf, "ERROR-UNKNOWN-%ld\n", - mem->state); + len = sysfs_emit(buf, "ERROR-UNKNOWN-%ld\n", + mem->state); WARN_ON(1); break; } @@ -358,7 +358,7 @@ static ssize_t phys_device_show(struct device *dev, struct device_attribute *attr, char *buf) { struct memory_block *mem = to_memory_block(dev); - return sprintf(buf, "%d\n", mem->phys_device); + return sysfs_emit(buf, "%d\n", mem->phys_device); } #ifdef CONFIG_MEMORY_HOTREMOVE @@ -396,7 +396,7 @@ static ssize_t valid_zones_show(struct device *dev, */ if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages, &valid_start_pfn, &valid_end_pfn)) - return sprintf(buf, "none\n"); + return sysfs_emit(buf, "none\n"); start_pfn = valid_start_pfn; strcat(buf, page_zone(pfn_to_page(start_pfn))->name); goto out; @@ -429,7 +429,7 @@ static DEVICE_ATTR_RO(removable); static ssize_t block_size_bytes_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%lx\n", memory_block_size_bytes()); + return sysfs_emit(buf, "%lx\n", memory_block_size_bytes()); } static DEVICE_ATTR_RO(block_size_bytes); @@ -442,9 +442,9 @@ static ssize_t auto_online_blocks_show(struct device *dev, struct device_attribute *attr, char *buf) { if (memhp_auto_online) - return sprintf(buf, "online\n"); + return sysfs_emit(buf, "online\n"); else - return sprintf(buf, "offline\n"); + return sysfs_emit(buf, "offline\n"); } static ssize_t auto_online_blocks_store(struct device *dev, @@ -543,6 +543,8 @@ static ssize_t hard_offline_page_store(struct device *dev, return -EINVAL; pfn >>= PAGE_SHIFT; ret = memory_failure(pfn, 0); + if (ret == -EOPNOTSUPP) + ret = 0; return ret ? ret : count; } diff --git a/drivers/base/node.c b/drivers/base/node.c index 7198f87e88c63..f730c62ce2f2a 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -368,7 +368,7 @@ static ssize_t node_read_meminfo(struct device *dev, si_meminfo_node(&i, nid); sreclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B); sunreclaimable = node_page_state_pages(pgdat, NR_SLAB_UNRECLAIMABLE_B); - n = sprintf(buf, + n = sysfs_emit(buf, "Node %d MemTotal: %8lu kB\n" "Node %d MemFree: %8lu kB\n" "Node %d MemUsed: %8lu kB\n" @@ -469,19 +469,19 @@ static DEVICE_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL); static ssize_t node_read_numastat(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, - "numa_hit %lu\n" - "numa_miss %lu\n" - "numa_foreign %lu\n" - "interleave_hit %lu\n" - "local_node %lu\n" - "other_node %lu\n", - sum_zone_numa_state(dev->id, NUMA_HIT), - sum_zone_numa_state(dev->id, NUMA_MISS), - sum_zone_numa_state(dev->id, NUMA_FOREIGN), - sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT), - sum_zone_numa_state(dev->id, NUMA_LOCAL), - sum_zone_numa_state(dev->id, NUMA_OTHER)); + return sysfs_emit(buf, + "numa_hit %lu\n" + "numa_miss %lu\n" + "numa_foreign %lu\n" + "interleave_hit %lu\n" + "local_node %lu\n" + "other_node %lu\n", + sum_zone_numa_state(dev->id, NUMA_HIT), + sum_zone_numa_state(dev->id, NUMA_MISS), + sum_zone_numa_state(dev->id, NUMA_FOREIGN), + sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT), + sum_zone_numa_state(dev->id, NUMA_LOCAL), + sum_zone_numa_state(dev->id, NUMA_OTHER)); } static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL); @@ -638,6 +638,7 @@ static int register_node(struct node *node, int num) */ void unregister_node(struct node *node) { + compaction_unregister_node(node); hugetlb_unregister_node(node); /* no-op, if memoryless node */ node_remove_accesses(node); node_remove_caches(node); diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 0b67d41bab8ff..75623b914b8c2 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -1009,7 +1009,7 @@ static ssize_t driver_override_show(struct device *dev, ssize_t len; device_lock(dev); - len = sprintf(buf, "%s\n", pdev->driver_override); + len = sysfs_emit(buf, "%s\n", pdev->driver_override); device_unlock(dev); return len; } diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 23af545120534..ae382c4018fd8 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -2121,7 +2121,9 @@ static bool pm_ops_is_empty(const struct dev_pm_ops *ops) void device_pm_check_callbacks(struct device *dev) { - spin_lock_irq(&dev->power.lock); + unsigned long flags; + + spin_lock_irqsave(&dev->power.lock, flags); dev->power.no_pm_callbacks = (!dev->bus || (pm_ops_is_empty(dev->bus->pm) && !dev->bus->suspend && !dev->bus->resume)) && @@ -2130,7 +2132,7 @@ void device_pm_check_callbacks(struct device *dev) (!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) && (!dev->driver || (pm_ops_is_empty(dev->driver->pm) && !dev->driver->suspend && !dev->driver->resume)); - spin_unlock_irq(&dev->power.lock); + spin_unlock_irqrestore(&dev->power.lock, flags); } bool dev_pm_smart_suspend_and_suspended(struct device *dev) diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index d7d82db2e4bc4..2786962e08107 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -100,7 +100,7 @@ static const char ctrl_on[] = "on"; static ssize_t control_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%s\n", + return sysfs_emit(buf, "%s\n", dev->power.runtime_auto ? ctrl_auto : ctrl_on); } @@ -126,7 +126,7 @@ static ssize_t runtime_active_time_show(struct device *dev, int ret; u64 tmp = pm_runtime_active_time(dev); do_div(tmp, NSEC_PER_MSEC); - ret = sprintf(buf, "%llu\n", tmp); + ret = sysfs_emit(buf, "%llu\n", tmp); return ret; } @@ -138,7 +138,7 @@ static ssize_t runtime_suspended_time_show(struct device *dev, int ret; u64 tmp = pm_runtime_suspended_time(dev); do_div(tmp, NSEC_PER_MSEC); - ret = sprintf(buf, "%llu\n", tmp); + ret = sysfs_emit(buf, "%llu\n", tmp); return ret; } @@ -171,7 +171,7 @@ static ssize_t runtime_status_show(struct device *dev, return -EIO; } } - return sprintf(buf, p); + return sysfs_emit(buf, p); } static DEVICE_ATTR_RO(runtime_status); @@ -181,7 +181,7 @@ static ssize_t autosuspend_delay_ms_show(struct device *dev, { if (!dev->power.use_autosuspend) return -EIO; - return sprintf(buf, "%d\n", dev->power.autosuspend_delay); + return sysfs_emit(buf, "%d\n", dev->power.autosuspend_delay); } static ssize_t autosuspend_delay_ms_store(struct device *dev, @@ -210,11 +210,11 @@ static ssize_t pm_qos_resume_latency_us_show(struct device *dev, s32 value = dev_pm_qos_requested_resume_latency(dev); if (value == 0) - return sprintf(buf, "n/a\n"); + return sysfs_emit(buf, "n/a\n"); if (value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) value = 0; - return sprintf(buf, "%d\n", value); + return sysfs_emit(buf, "%d\n", value); } static ssize_t pm_qos_resume_latency_us_store(struct device *dev, @@ -254,11 +254,11 @@ static ssize_t pm_qos_latency_tolerance_us_show(struct device *dev, s32 value = dev_pm_qos_get_user_latency_tolerance(dev); if (value < 0) - return sprintf(buf, "auto\n"); + return sysfs_emit(buf, "auto\n"); if (value == PM_QOS_LATENCY_ANY) - return sprintf(buf, "any\n"); + return sysfs_emit(buf, "any\n"); - return sprintf(buf, "%d\n", value); + return sysfs_emit(buf, "%d\n", value); } static ssize_t pm_qos_latency_tolerance_us_store(struct device *dev, @@ -290,8 +290,8 @@ static ssize_t pm_qos_no_power_off_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev) - & PM_QOS_FLAG_NO_POWER_OFF)); + return sysfs_emit(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev) + & PM_QOS_FLAG_NO_POWER_OFF)); } static ssize_t pm_qos_no_power_off_store(struct device *dev, @@ -319,9 +319,9 @@ static const char _disabled[] = "disabled"; static ssize_t wakeup_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%s\n", device_can_wakeup(dev) - ? (device_may_wakeup(dev) ? _enabled : _disabled) - : ""); + return sysfs_emit(buf, "%s\n", device_can_wakeup(dev) + ? (device_may_wakeup(dev) ? _enabled : _disabled) + : ""); } static ssize_t wakeup_store(struct device *dev, struct device_attribute *attr, @@ -507,7 +507,7 @@ static DEVICE_ATTR_RO(wakeup_prevent_sleep_time_ms); static ssize_t runtime_usage_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", atomic_read(&dev->power.usage_count)); + return sysfs_emit(buf, "%d\n", atomic_read(&dev->power.usage_count)); } static DEVICE_ATTR_RO(runtime_usage); @@ -515,8 +515,8 @@ static ssize_t runtime_active_kids_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", dev->power.ignore_children ? - 0 : atomic_read(&dev->power.child_count)); + return sysfs_emit(buf, "%d\n", dev->power.ignore_children ? + 0 : atomic_read(&dev->power.child_count)); } static DEVICE_ATTR_RO(runtime_active_kids); @@ -524,12 +524,12 @@ static ssize_t runtime_enabled_show(struct device *dev, struct device_attribute *attr, char *buf) { if (dev->power.disable_depth && (dev->power.runtime_auto == false)) - return sprintf(buf, "disabled & forbidden\n"); + return sysfs_emit(buf, "disabled & forbidden\n"); if (dev->power.disable_depth) - return sprintf(buf, "disabled\n"); + return sysfs_emit(buf, "disabled\n"); if (dev->power.runtime_auto == false) - return sprintf(buf, "forbidden\n"); - return sprintf(buf, "enabled\n"); + return sysfs_emit(buf, "forbidden\n"); + return sysfs_emit(buf, "enabled\n"); } static DEVICE_ATTR_RO(runtime_enabled); @@ -537,9 +537,9 @@ static DEVICE_ATTR_RO(runtime_enabled); static ssize_t async_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%s\n", - device_async_suspend_enabled(dev) ? - _enabled : _disabled); + return sysfs_emit(buf, "%s\n", + device_async_suspend_enabled(dev) ? + _enabled : _disabled); } static ssize_t async_store(struct device *dev, struct device_attribute *attr, diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index 977d27bd1a220..9a9dec657c166 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -165,6 +166,9 @@ void generate_pm_trace(const void *tracedata, unsigned int user) const char *file = *(const char **)(tracedata + 2); unsigned int user_hash_value, file_hash_value; + if (!x86_platform.legacy.rtc) + return; + user_hash_value = user % USERHASH; file_hash_value = hash_string(lineno, file, FILEHASH); set_magic_time(user_hash_value, file_hash_value, dev_hash_value); @@ -267,6 +271,9 @@ static struct notifier_block pm_trace_nb = { static int early_resume_init(void) { + if (!x86_platform.legacy.rtc) + return 0; + hash_value_early_read = read_magic_time(); register_pm_notifier(&pm_trace_nb); return 0; @@ -277,6 +284,9 @@ static int late_resume_init(void) unsigned int val = hash_value_early_read; unsigned int user, file, dev; + if (!x86_platform.legacy.rtc) + return 0; + user = val % USERHASH; val = val / USERHASH; file = val % FILEHASH; diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 92f0960e90145..49082c5570deb 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -31,7 +31,8 @@ suspend_state_t pm_suspend_target_state; bool events_check_enabled __read_mostly; /* First wakeup IRQ seen by the kernel in the last cycle. */ -unsigned int pm_wakeup_irq __read_mostly; +static unsigned int wakeup_irq[2] __read_mostly; +static DEFINE_RAW_SPINLOCK(wakeup_irq_lock); /* If greater than 0 and the system is suspending, terminate the suspend. */ static atomic_t pm_abort_suspend __read_mostly; @@ -884,19 +885,45 @@ void pm_system_cancel_wakeup(void) atomic_dec_if_positive(&pm_abort_suspend); } -void pm_wakeup_clear(bool reset) +void pm_wakeup_clear(unsigned int irq_number) { - pm_wakeup_irq = 0; - if (reset) + raw_spin_lock_irq(&wakeup_irq_lock); + + if (irq_number && wakeup_irq[0] == irq_number) + wakeup_irq[0] = wakeup_irq[1]; + else + wakeup_irq[0] = 0; + + wakeup_irq[1] = 0; + + raw_spin_unlock_irq(&wakeup_irq_lock); + + if (!irq_number) atomic_set(&pm_abort_suspend, 0); } void pm_system_irq_wakeup(unsigned int irq_number) { - if (pm_wakeup_irq == 0) { - pm_wakeup_irq = irq_number; + unsigned long flags; + + raw_spin_lock_irqsave(&wakeup_irq_lock, flags); + + if (wakeup_irq[0] == 0) + wakeup_irq[0] = irq_number; + else if (wakeup_irq[1] == 0) + wakeup_irq[1] = irq_number; + else + irq_number = 0; + + raw_spin_unlock_irqrestore(&wakeup_irq_lock, flags); + + if (irq_number) pm_system_wakeup(); - } +} + +unsigned int pm_wakeup_irq(void) +{ + return wakeup_irq[0]; } /** diff --git a/drivers/base/power/wakeup_stats.c b/drivers/base/power/wakeup_stats.c index c7734914d9144..5568e25d7c9c2 100644 --- a/drivers/base/power/wakeup_stats.c +++ b/drivers/base/power/wakeup_stats.c @@ -42,7 +42,7 @@ static ssize_t active_time_ms_show(struct device *dev, ktime_t active_time = ws->active ? ktime_sub(ktime_get(), ws->last_time) : 0; - return sprintf(buf, "%lld\n", ktime_to_ms(active_time)); + return sysfs_emit(buf, "%lld\n", ktime_to_ms(active_time)); } static DEVICE_ATTR_RO(active_time_ms); @@ -57,7 +57,7 @@ static ssize_t total_time_ms_show(struct device *dev, active_time = ktime_sub(ktime_get(), ws->last_time); total_time = ktime_add(total_time, active_time); } - return sprintf(buf, "%lld\n", ktime_to_ms(total_time)); + return sysfs_emit(buf, "%lld\n", ktime_to_ms(total_time)); } static DEVICE_ATTR_RO(total_time_ms); @@ -73,7 +73,7 @@ static ssize_t max_time_ms_show(struct device *dev, if (active_time > max_time) max_time = active_time; } - return sprintf(buf, "%lld\n", ktime_to_ms(max_time)); + return sysfs_emit(buf, "%lld\n", ktime_to_ms(max_time)); } static DEVICE_ATTR_RO(max_time_ms); @@ -82,7 +82,7 @@ static ssize_t last_change_ms_show(struct device *dev, { struct wakeup_source *ws = dev_get_drvdata(dev); - return sprintf(buf, "%lld\n", ktime_to_ms(ws->last_time)); + return sysfs_emit(buf, "%lld\n", ktime_to_ms(ws->last_time)); } static DEVICE_ATTR_RO(last_change_ms); @@ -91,7 +91,7 @@ static ssize_t name_show(struct device *dev, struct device_attribute *attr, { struct wakeup_source *ws = dev_get_drvdata(dev); - return sprintf(buf, "%s\n", ws->name); + return sysfs_emit(buf, "%s\n", ws->name); } static DEVICE_ATTR_RO(name); @@ -106,7 +106,7 @@ static ssize_t prevent_suspend_time_ms_show(struct device *dev, prevent_sleep_time = ktime_add(prevent_sleep_time, ktime_sub(ktime_get(), ws->start_prevent_time)); } - return sprintf(buf, "%lld\n", ktime_to_ms(prevent_sleep_time)); + return sysfs_emit(buf, "%lld\n", ktime_to_ms(prevent_sleep_time)); } static DEVICE_ATTR_RO(prevent_suspend_time_ms); diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index cfa29dc89bbff..fabf87058d80b 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -281,14 +281,14 @@ static int regcache_rbtree_insert_to_block(struct regmap *map, if (!blk) return -ENOMEM; + rbnode->block = blk; + if (BITS_TO_LONGS(blklen) > BITS_TO_LONGS(rbnode->blklen)) { present = krealloc(rbnode->cache_present, BITS_TO_LONGS(blklen) * sizeof(*present), GFP_KERNEL); - if (!present) { - kfree(blk); + if (!present) return -ENOMEM; - } memset(present + BITS_TO_LONGS(rbnode->blklen), 0, (BITS_TO_LONGS(blklen) - BITS_TO_LONGS(rbnode->blklen)) @@ -305,7 +305,6 @@ static int regcache_rbtree_insert_to_block(struct regmap *map, } /* update the rbnode block, its size and the base register */ - rbnode->block = blk; rbnode->blklen = blklen; rbnode->base_reg = base_reg; rbnode->cache_present = present; diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 3d64c9331a82a..3c1e554df4eb2 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -214,6 +214,7 @@ static void regmap_irq_enable(struct irq_data *data) struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data); struct regmap *map = d->map; const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq); + unsigned int reg = irq_data->reg_offset / map->reg_stride; unsigned int mask, type; type = irq_data->type.type_falling_val | irq_data->type.type_rising_val; @@ -230,14 +231,14 @@ static void regmap_irq_enable(struct irq_data *data) * at the corresponding offset in regmap_irq_set_type(). */ if (d->chip->type_in_mask && type) - mask = d->type_buf[irq_data->reg_offset / map->reg_stride]; + mask = d->type_buf[reg] & irq_data->mask; else mask = irq_data->mask; if (d->chip->clear_on_unmask) d->clear_status = true; - d->mask_buf[irq_data->reg_offset / map->reg_stride] &= ~mask; + d->mask_buf[reg] &= ~mask; } static void regmap_irq_disable(struct irq_data *data) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index e0893f1b14522..43c0452a8ba91 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1505,7 +1505,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, if (ret) { dev_err(map->dev, "Error in caching of register: %x ret: %d\n", - reg + i, ret); + reg + regmap_get_offset(map, i), ret); return ret; } } diff --git a/drivers/base/soc.c b/drivers/base/soc.c index 7c0c5ca5953d2..735f122c2ba31 100644 --- a/drivers/base/soc.c +++ b/drivers/base/soc.c @@ -76,15 +76,15 @@ static ssize_t soc_info_get(struct device *dev, struct soc_device *soc_dev = container_of(dev, struct soc_device, dev); if (attr == &dev_attr_machine) - return sprintf(buf, "%s\n", soc_dev->attr->machine); + return sysfs_emit(buf, "%s\n", soc_dev->attr->machine); if (attr == &dev_attr_family) - return sprintf(buf, "%s\n", soc_dev->attr->family); + return sysfs_emit(buf, "%s\n", soc_dev->attr->family); if (attr == &dev_attr_revision) - return sprintf(buf, "%s\n", soc_dev->attr->revision); + return sysfs_emit(buf, "%s\n", soc_dev->attr->revision); if (attr == &dev_attr_serial_number) - return sprintf(buf, "%s\n", soc_dev->attr->serial_number); + return sysfs_emit(buf, "%s\n", soc_dev->attr->serial_number); if (attr == &dev_attr_soc_id) - return sprintf(buf, "%s\n", soc_dev->attr->soc_id); + return sysfs_emit(buf, "%s\n", soc_dev->attr->soc_id); return -EINVAL; diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 6535614a7dc13..1df2b5801c3bc 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -236,6 +236,7 @@ EXPORT_SYMBOL(bcma_core_irq); void bcma_prepare_core(struct bcma_bus *bus, struct bcma_device *core) { + device_initialize(&core->dev); core->dev.release = bcma_release_core_dev; core->dev.bus = &bcma_bus_type; dev_set_name(&core->dev, "bcma%d:%d", bus->num, core->core_index); @@ -277,11 +278,10 @@ static void bcma_register_core(struct bcma_bus *bus, struct bcma_device *core) { int err; - err = device_register(&core->dev); + err = device_add(&core->dev); if (err) { bcma_err(bus, "Could not register dev for core 0x%03X\n", core->id.id); - put_device(&core->dev); return; } core->dev_registered = true; @@ -372,7 +372,7 @@ void bcma_unregister_cores(struct bcma_bus *bus) /* Now noone uses internally-handled cores, we can free them */ list_for_each_entry_safe(core, tmp, &bus->cores, list) { list_del(&core->list); - kfree(core); + put_device(&core->dev); } } diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 0fc27ac14f29c..8a08cbb958d12 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -39,6 +39,22 @@ config BLK_DEV_FD To compile this driver as a module, choose M here: the module will be called floppy. +config BLK_DEV_FD_RAWCMD + bool "Support for raw floppy disk commands (DEPRECATED)" + depends on BLK_DEV_FD + help + If you want to use actual physical floppies and expect to do + special low-level hardware accesses to them (access and use + non-standard formats, for example), then enable this. + + Note that the code enabled by this option is rarely used and + might be unstable or insecure, and distros should not enable it. + + Note: FDRAWCMD is deprecated and will be removed from the kernel + in the near future. + + If unsure, say N. + config AMIGA_FLOPPY tristate "Amiga floppy support" depends on AMIGA @@ -230,7 +246,7 @@ config BLK_DEV_LOOP_MIN_COUNT dynamically allocated with the /dev/loop-control interface. config BLK_DEV_CRYPTOLOOP - tristate "Cryptoloop Support" + tristate "Cryptoloop Support (DEPRECATED)" select CRYPTO select CRYPTO_CBC depends on BLK_DEV_LOOP @@ -242,7 +258,7 @@ config BLK_DEV_CRYPTOLOOP WARNING: This device is not safe for journaled file systems like ext3 or Reiserfs. Please use the Device Mapper crypto module instead, which can be configured to be on-disk compatible with the - cryptoloop device. + cryptoloop device. cryptoloop support will be removed in Linux 5.16. source "drivers/block/drbd/Kconfig" diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index bd7d3bb8b890b..ad4cf10749100 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -653,9 +653,6 @@ static inline void copy_buffer(void *from, void *to) *p2++ = *p1++; } - - - /* General Interrupt Handling */ static void (*FloppyIRQHandler)( int status ) = NULL; @@ -1225,6 +1222,7 @@ static void fd_rwsec_done1(int status) } else { /* all sectors finished */ + finish_fdc(); fd_end_request_cur(BLK_STS_OK); } return; @@ -1472,15 +1470,6 @@ static void setup_req_params( int drive ) ReqTrack, ReqSector, (unsigned long)ReqData )); } -static void ataflop_commit_rqs(struct blk_mq_hw_ctx *hctx) -{ - spin_lock_irq(&ataflop_lock); - atari_disable_irq(IRQ_MFP_FDC); - finish_fdc(); - atari_enable_irq(IRQ_MFP_FDC); - spin_unlock_irq(&ataflop_lock); -} - static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -1488,6 +1477,8 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, int drive = floppy - unit; int type = floppy->type; + DPRINT(("Queue request: drive %d type %d last %d\n", drive, type, bd->last)); + spin_lock_irq(&ataflop_lock); if (fd_request) { spin_unlock_irq(&ataflop_lock); @@ -1547,8 +1538,6 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, setup_req_params( drive ); do_fd_action( drive ); - if (bd->last) - finish_fdc(); atari_enable_irq( IRQ_MFP_FDC ); out: @@ -1958,7 +1947,6 @@ static const struct block_device_operations floppy_fops = { static const struct blk_mq_ops ataflop_mq_ops = { .queue_rq = ataflop_queue_rq, - .commit_rqs = ataflop_commit_rqs, }; static struct kobject *floppy_find(dev_t dev, int *part, void *data) diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c index 3cabc335ae744..f0a91faa43a89 100644 --- a/drivers/block/cryptoloop.c +++ b/drivers/block/cryptoloop.c @@ -189,6 +189,8 @@ init_cryptoloop(void) if (rc) printk(KERN_ERR "cryptoloop: loop_register_transfer failed\n"); + else + pr_warn("the cryptoloop driver has been deprecated and will be removed in in Linux 5.16\n"); return rc; } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ddbf56014c51a..6da7f5749a7c4 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1673,22 +1673,22 @@ struct sib_info { }; void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib); -extern void notify_resource_state(struct sk_buff *, +extern int notify_resource_state(struct sk_buff *, unsigned int, struct drbd_resource *, struct resource_info *, enum drbd_notification_type); -extern void notify_device_state(struct sk_buff *, +extern int notify_device_state(struct sk_buff *, unsigned int, struct drbd_device *, struct device_info *, enum drbd_notification_type); -extern void notify_connection_state(struct sk_buff *, +extern int notify_connection_state(struct sk_buff *, unsigned int, struct drbd_connection *, struct connection_info *, enum drbd_notification_type); -extern void notify_peer_device_state(struct sk_buff *, +extern int notify_peer_device_state(struct sk_buff *, unsigned int, struct drbd_peer_device *, struct peer_device_info *, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a18155cdce416..5ece2fd70d9cf 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -183,7 +183,7 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, unsigned int set_size) { struct drbd_request *r; - struct drbd_request *req = NULL; + struct drbd_request *req = NULL, *tmp = NULL; int expect_epoch = 0; int expect_size = 0; @@ -237,8 +237,11 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, * to catch requests being barrier-acked "unexpectedly". * It usually should find the same req again, or some READ preceding it. */ list_for_each_entry(req, &connection->transfer_log, tl_requests) - if (req->epoch == expect_epoch) + if (req->epoch == expect_epoch) { + tmp = req; break; + } + req = list_prepare_entry(tmp, &connection->transfer_log, tl_requests); list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) { if (req->epoch != expect_epoch) break; @@ -3706,9 +3709,8 @@ const char *cmdname(enum drbd_packet cmd) * when we want to support more than * one PRO_VERSION */ static const char *cmdnames[] = { + [P_DATA] = "Data", - [P_WSAME] = "WriteSame", - [P_TRIM] = "Trim", [P_DATA_REPLY] = "DataReply", [P_RS_DATA_REPLY] = "RSDataReply", [P_BARRIER] = "Barrier", @@ -3719,7 +3721,6 @@ const char *cmdname(enum drbd_packet cmd) [P_DATA_REQUEST] = "DataRequest", [P_RS_DATA_REQUEST] = "RSDataRequest", [P_SYNC_PARAM] = "SyncParam", - [P_SYNC_PARAM89] = "SyncParam89", [P_PROTOCOL] = "ReportProtocol", [P_UUIDS] = "ReportUUIDs", [P_SIZES] = "ReportSizes", @@ -3727,6 +3728,7 @@ const char *cmdname(enum drbd_packet cmd) [P_SYNC_UUID] = "ReportSyncUUID", [P_AUTH_CHALLENGE] = "AuthChallenge", [P_AUTH_RESPONSE] = "AuthResponse", + [P_STATE_CHG_REQ] = "StateChgRequest", [P_PING] = "Ping", [P_PING_ACK] = "PingAck", [P_RECV_ACK] = "RecvAck", @@ -3737,24 +3739,26 @@ const char *cmdname(enum drbd_packet cmd) [P_NEG_DREPLY] = "NegDReply", [P_NEG_RS_DREPLY] = "NegRSDReply", [P_BARRIER_ACK] = "BarrierAck", - [P_STATE_CHG_REQ] = "StateChgRequest", [P_STATE_CHG_REPLY] = "StateChgReply", [P_OV_REQUEST] = "OVRequest", [P_OV_REPLY] = "OVReply", [P_OV_RESULT] = "OVResult", [P_CSUM_RS_REQUEST] = "CsumRSRequest", [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", + [P_SYNC_PARAM89] = "SyncParam89", [P_COMPRESSED_BITMAP] = "CBitmap", [P_DELAY_PROBE] = "DelayProbe", [P_OUT_OF_SYNC] = "OutOfSync", - [P_RETRY_WRITE] = "RetryWrite", [P_RS_CANCEL] = "RSCancel", [P_CONN_ST_CHG_REQ] = "conn_st_chg_req", [P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply", [P_RETRY_WRITE] = "retry_write", [P_PROTOCOL_UPDATE] = "protocol_update", + [P_TRIM] = "Trim", [P_RS_THIN_REQ] = "rs_thin_req", [P_RS_DEALLOCATED] = "rs_deallocated", + [P_WSAME] = "WriteSame", + [P_ZEROES] = "Zeroes", /* enum drbd_packet, but not commands - obsoleted flags: * P_MAY_IGNORE diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 5d52a2d321559..52dd517ff5419 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -791,9 +791,11 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) mutex_lock(&adm_ctx.resource->adm_mutex); if (info->genlhdr->cmd == DRBD_ADM_PRIMARY) - retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate); + retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device, + R_PRIMARY, parms.assume_uptodate); else - retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0); + retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device, + R_SECONDARY, 0); mutex_unlock(&adm_ctx.resource->adm_mutex); genl_lock(); @@ -1971,7 +1973,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) drbd_flush_workqueue(&connection->sender_work); rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE); - retcode = rv; /* FIXME: Type mismatch. */ + retcode = (enum drbd_ret_code)rv; drbd_resume_io(device); if (rv < SS_SUCCESS) goto fail; @@ -2696,7 +2698,8 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) } rcu_read_unlock(); - retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE); + retcode = (enum drbd_ret_code)conn_request_state(connection, + NS(conn, C_UNCONNECTED), CS_VERBOSE); conn_reconfig_done(connection); mutex_unlock(&adm_ctx.resource->adm_mutex); @@ -2809,7 +2812,7 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) mutex_lock(&adm_ctx.resource->adm_mutex); rv = conn_try_disconnect(connection, parms.force_disconnect); if (rv < SS_SUCCESS) - retcode = rv; /* FIXME: Type mismatch. */ + retcode = (enum drbd_ret_code)rv; else retcode = NO_ERROR; mutex_unlock(&adm_ctx.resource->adm_mutex); @@ -4630,7 +4633,7 @@ static int nla_put_notification_header(struct sk_buff *msg, return drbd_notification_header_to_skb(msg, &nh, true); } -void notify_resource_state(struct sk_buff *skb, +int notify_resource_state(struct sk_buff *skb, unsigned int seq, struct drbd_resource *resource, struct resource_info *resource_info, @@ -4672,16 +4675,17 @@ void notify_resource_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_device_state(struct sk_buff *skb, +int notify_device_state(struct sk_buff *skb, unsigned int seq, struct drbd_device *device, struct device_info *device_info, @@ -4721,16 +4725,17 @@ void notify_device_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_connection_state(struct sk_buff *skb, +int notify_connection_state(struct sk_buff *skb, unsigned int seq, struct drbd_connection *connection, struct connection_info *connection_info, @@ -4770,16 +4775,17 @@ void notify_connection_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_peer_device_state(struct sk_buff *skb, +int notify_peer_device_state(struct sk_buff *skb, unsigned int seq, struct drbd_peer_device *peer_device, struct peer_device_info *peer_device_info, @@ -4820,13 +4826,14 @@ void notify_peer_device_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } void notify_helper(enum drbd_notification_type type, @@ -4877,7 +4884,7 @@ void notify_helper(enum drbd_notification_type type, err, seq); } -static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq) +static int notify_initial_state_done(struct sk_buff *skb, unsigned int seq) { struct drbd_genlmsghdr *dh; int err; @@ -4891,11 +4898,12 @@ static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq) if (nla_put_notification_header(skb, NOTIFY_EXISTS)) goto nla_put_failure; genlmsg_end(skb, dh); - return; + return 0; nla_put_failure: nlmsg_free(skb); pr_err("Error %d sending event. Event seq:%u\n", err, seq); + return err; } static void free_state_changes(struct list_head *list) @@ -4922,6 +4930,7 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) unsigned int seq = cb->args[2]; unsigned int n; enum drbd_notification_type flags = 0; + int err = 0; /* There is no need for taking notification_mutex here: it doesn't matter if the initial state events mix with later state chage @@ -4930,32 +4939,32 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) cb->args[5]--; if (cb->args[5] == 1) { - notify_initial_state_done(skb, seq); + err = notify_initial_state_done(skb, seq); goto out; } n = cb->args[4]++; if (cb->args[4] < cb->args[3]) flags |= NOTIFY_CONTINUES; if (n < 1) { - notify_resource_state_change(skb, seq, state_change->resource, + err = notify_resource_state_change(skb, seq, state_change->resource, NOTIFY_EXISTS | flags); goto next; } n--; if (n < state_change->n_connections) { - notify_connection_state_change(skb, seq, &state_change->connections[n], + err = notify_connection_state_change(skb, seq, &state_change->connections[n], NOTIFY_EXISTS | flags); goto next; } n -= state_change->n_connections; if (n < state_change->n_devices) { - notify_device_state_change(skb, seq, &state_change->devices[n], + err = notify_device_state_change(skb, seq, &state_change->devices[n], NOTIFY_EXISTS | flags); goto next; } n -= state_change->n_devices; if (n < state_change->n_devices * state_change->n_connections) { - notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n], + err = notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n], NOTIFY_EXISTS | flags); goto next; } @@ -4970,7 +4979,10 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) cb->args[4] = 0; } out: - return skb->len; + if (err) + return err; + else + return skb->len; } int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index f86cea4c0f8df..5e92632881d81 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -195,7 +195,8 @@ void start_new_tl_epoch(struct drbd_connection *connection) void complete_master_bio(struct drbd_device *device, struct bio_and_error *m) { - m->bio->bi_status = errno_to_blk_status(m->error); + if (unlikely(m->error)) + m->bio->bi_status = errno_to_blk_status(m->error); bio_endio(m->bio); dec_ap_bio(device); } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index eeaa3b49b2649..4dad4dd0ceb66 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1537,7 +1537,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device, return rv; } -void notify_resource_state_change(struct sk_buff *skb, +int notify_resource_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_resource_state_change *resource_state_change, enum drbd_notification_type type) @@ -1550,10 +1550,10 @@ void notify_resource_state_change(struct sk_buff *skb, .res_susp_fen = resource_state_change->susp_fen[NEW], }; - notify_resource_state(skb, seq, resource, &resource_info, type); + return notify_resource_state(skb, seq, resource, &resource_info, type); } -void notify_connection_state_change(struct sk_buff *skb, +int notify_connection_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_connection_state_change *connection_state_change, enum drbd_notification_type type) @@ -1564,10 +1564,10 @@ void notify_connection_state_change(struct sk_buff *skb, .conn_role = connection_state_change->peer_role[NEW], }; - notify_connection_state(skb, seq, connection, &connection_info, type); + return notify_connection_state(skb, seq, connection, &connection_info, type); } -void notify_device_state_change(struct sk_buff *skb, +int notify_device_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_device_state_change *device_state_change, enum drbd_notification_type type) @@ -1577,10 +1577,10 @@ void notify_device_state_change(struct sk_buff *skb, .dev_disk_state = device_state_change->disk_state[NEW], }; - notify_device_state(skb, seq, device, &device_info, type); + return notify_device_state(skb, seq, device, &device_info, type); } -void notify_peer_device_state_change(struct sk_buff *skb, +int notify_peer_device_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_peer_device_state_change *p, enum drbd_notification_type type) @@ -1594,7 +1594,7 @@ void notify_peer_device_state_change(struct sk_buff *skb, .peer_resync_susp_dependency = p->resync_susp_dependency[NEW], }; - notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type); + return notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type); } static void broadcast_state_change(struct drbd_state_change *state_change) @@ -1602,7 +1602,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change) struct drbd_resource_state_change *resource_state_change = &state_change->resource[0]; bool resource_state_has_changed; unsigned int n_device, n_connection, n_peer_device, n_peer_devices; - void (*last_func)(struct sk_buff *, unsigned int, void *, + int (*last_func)(struct sk_buff *, unsigned int, void *, enum drbd_notification_type) = NULL; void *uninitialized_var(last_arg); diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h index ba80f612d6abb..d5b0479bc9a66 100644 --- a/drivers/block/drbd/drbd_state_change.h +++ b/drivers/block/drbd/drbd_state_change.h @@ -44,19 +44,19 @@ extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_ extern void copy_old_to_new_state_change(struct drbd_state_change *); extern void forget_state_change(struct drbd_state_change *); -extern void notify_resource_state_change(struct sk_buff *, +extern int notify_resource_state_change(struct sk_buff *, unsigned int, struct drbd_resource_state_change *, enum drbd_notification_type type); -extern void notify_connection_state_change(struct sk_buff *, +extern int notify_connection_state_change(struct sk_buff *, unsigned int, struct drbd_connection_state_change *, enum drbd_notification_type type); -extern void notify_device_state_change(struct sk_buff *, +extern int notify_device_state_change(struct sk_buff *, unsigned int, struct drbd_device_state_change *, enum drbd_notification_type type); -extern void notify_peer_device_state_change(struct sk_buff *, +extern int notify_peer_device_state_change(struct sk_buff *, unsigned int, struct drbd_peer_device_state_change *, enum drbd_notification_type type); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 40ea1a425c431..e133ff5fa5961 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -521,8 +521,8 @@ static unsigned long fdc_busy; static DECLARE_WAIT_QUEUE_HEAD(fdc_wait); static DECLARE_WAIT_QUEUE_HEAD(command_done); -/* Errors during formatting are counted here. */ -static int format_errors; +/* errors encountered on the current (or last) request */ +static int floppy_errors; /* Format request descriptor. */ static struct format_descr format_req; @@ -542,7 +542,6 @@ static struct format_descr format_req; static char *floppy_track_buffer; static int max_buffer_sectors; -static int *errors; typedef void (*done_f)(int); static const struct cont_t { void (*interrupt)(void); @@ -1003,7 +1002,7 @@ static DECLARE_DELAYED_WORK(fd_timer, fd_timer_workfn); static void cancel_activity(void) { do_floppy = NULL; - cancel_delayed_work_sync(&fd_timer); + cancel_delayed_work(&fd_timer); cancel_work_sync(&floppy_work); } @@ -1435,7 +1434,7 @@ static int interpret_errors(void) if (DP->flags & FTD_MSG) DPRINT("Over/Underrun - retrying\n"); bad = 0; - } else if (*errors >= DP->max_errors.reporting) { + } else if (floppy_errors >= DP->max_errors.reporting) { print_errors(); } if (ST2 & ST2_WC || ST2 & ST2_BC) @@ -2055,7 +2054,7 @@ static void bad_flp_intr(void) if (!next_valid_format()) return; } - err_count = ++(*errors); + err_count = ++floppy_errors; INFBOUND(DRWE->badness, err_count); if (err_count > DP->max_errors.abort) cont->done(0); @@ -2200,9 +2199,8 @@ static int do_format(int drive, struct format_descr *tmp_format_req) return -EINVAL; } format_req = *tmp_format_req; - format_errors = 0; cont = &format_cont; - errors = &format_errors; + floppy_errors = 0; ret = wait_til_done(redo_format, true); if (ret == -EINTR) return -EINTR; @@ -2677,7 +2675,7 @@ static int make_raw_rw_request(void) */ if (!direct || (indirect * 2 > direct * 3 && - *errors < DP->max_errors.read_track && + floppy_errors < DP->max_errors.read_track && ((!probing || (DP->read_track & (1 << DRS->probed_format)))))) { max_size = blk_rq_sectors(current_req); @@ -2801,10 +2799,11 @@ static int set_next_request(void) current_req = list_first_entry_or_null(&floppy_reqs, struct request, queuelist); if (current_req) { - current_req->error_count = 0; + floppy_errors = 0; list_del_init(¤t_req->queuelist); + return 1; } - return current_req != NULL; + return 0; } static void redo_fd_request(void) @@ -2860,7 +2859,6 @@ static void redo_fd_request(void) _floppy = floppy_type + DP->autodetect[DRS->probed_format]; } else probing = 0; - errors = &(current_req->error_count); tmp = make_raw_rw_request(); if (tmp < 2) { request_done(tmp); @@ -3012,6 +3010,8 @@ static const char *drive_name(int type, int drive) return "(null)"; } +#ifdef CONFIG_BLK_DEV_FD_RAWCMD + /* raw commands */ static void raw_cmd_done(int flag) { @@ -3112,6 +3112,8 @@ static void raw_cmd_free(struct floppy_raw_cmd **ptr) } } +#define MAX_LEN (1UL << MAX_ORDER << PAGE_SHIFT) + static int raw_cmd_copyin(int cmd, void __user *param, struct floppy_raw_cmd **rcmd) { @@ -3149,7 +3151,7 @@ static int raw_cmd_copyin(int cmd, void __user *param, ptr->resultcode = 0; if (ptr->flags & (FD_RAW_READ | FD_RAW_WRITE)) { - if (ptr->length <= 0) + if (ptr->length <= 0 || ptr->length >= MAX_LEN) return -EINVAL; ptr->kernel_data = (char *)fd_dma_mem_alloc(ptr->length); fallback_on_nodma_alloc(&ptr->kernel_data, ptr->length); @@ -3221,6 +3223,35 @@ static int raw_cmd_ioctl(int cmd, void __user *param) return ret; } +static int floppy_raw_cmd_ioctl(int type, int drive, int cmd, + void __user *param) +{ + int ret; + + pr_warn_once("Note: FDRAWCMD is deprecated and will be removed from the kernel in the near future.\n"); + + if (type) + return -EINVAL; + if (lock_fdc(drive)) + return -EINTR; + set_floppy(drive); + ret = raw_cmd_ioctl(cmd, param); + if (ret == -EINTR) + return -EINTR; + process_fd_request(); + return ret; +} + +#else /* CONFIG_BLK_DEV_FD_RAWCMD */ + +static int floppy_raw_cmd_ioctl(int type, int drive, int cmd, + void __user *param) +{ + return -EOPNOTSUPP; +} + +#endif + static int invalidate_drive(struct block_device *bdev) { /* invalidate the buffer track to force a reread */ @@ -3408,7 +3439,6 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int { int drive = (long)bdev->bd_disk->private_data; int type = ITYPE(UDRS->fd_device); - int i; int ret; int size; union inparam { @@ -3559,16 +3589,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int outparam = UDRWE; break; case FDRAWCMD: - if (type) - return -EINVAL; - if (lock_fdc(drive)) - return -EINTR; - set_floppy(drive); - i = raw_cmd_ioctl(cmd, (void __user *)param); - if (i == -EINTR) - return -EINTR; - process_fd_request(); - return i; + return floppy_raw_cmd_ioctl(type, drive, cmd, (void __user *)param); case FDTWADDLE: if (lock_fdc(drive)) return -EINTR; @@ -4063,22 +4084,21 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (UFDCS->rawcmd == 1) UFDCS->rawcmd = 2; - if (mode & (FMODE_READ|FMODE_WRITE)) { - UDRS->last_checked = 0; - clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); - check_disk_change(bdev); - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) - goto out; - if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) + if (!(mode & FMODE_NDELAY)) { + if (mode & (FMODE_READ|FMODE_WRITE)) { + UDRS->last_checked = 0; + clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); + check_disk_change(bdev); + if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) + goto out; + if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) + goto out; + } + res = -EROFS; + if ((mode & FMODE_WRITE) && + !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags)) goto out; } - - res = -EROFS; - - if ((mode & FMODE_WRITE) && - !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags)) - goto out; - mutex_unlock(&open_lock); mutex_unlock(&floppy_mutex); return 0; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ffbe792410d1c..6b3e27b8cd245 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -794,33 +794,33 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf) static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf) { - return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset); + return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_offset); } static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf) { - return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); + return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); } static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf) { int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR); - return sprintf(buf, "%s\n", autoclear ? "1" : "0"); + return sysfs_emit(buf, "%s\n", autoclear ? "1" : "0"); } static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf) { int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN); - return sprintf(buf, "%s\n", partscan ? "1" : "0"); + return sysfs_emit(buf, "%s\n", partscan ? "1" : "0"); } static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf) { int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO); - return sprintf(buf, "%s\n", dio ? "1" : "0"); + return sysfs_emit(buf, "%s\n", dio ? "1" : "0"); } LOOP_ATTR_RO(backing_file); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index bdc750c5d6dda..06d13c8c9dcc2 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1318,7 +1318,7 @@ static int nbd_start_device(struct nbd_device *nbd) worker = kthread_create(recv_work, args, "knbd%d.%d-recv", nbd->index, i); - if (!worker) { + if (IS_ERR(worker)) { kfree(args); goto err; } @@ -1381,7 +1381,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b static void nbd_clear_sock_ioctl(struct nbd_device *nbd, struct block_device *bdev) { - sock_shutdown(nbd); + nbd_clear_sock(nbd); __invalidate_device(bdev, true); nbd_bdev_reset(bdev); if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF, @@ -1494,16 +1494,21 @@ static struct nbd_config *nbd_alloc_config(void) { struct nbd_config *config; + if (!try_module_get(THIS_MODULE)) + return ERR_PTR(-ENODEV); + config = kzalloc(sizeof(struct nbd_config), GFP_NOFS); - if (!config) - return NULL; + if (!config) { + module_put(THIS_MODULE); + return ERR_PTR(-ENOMEM); + } + atomic_set(&config->recv_threads, 0); init_waitqueue_head(&config->recv_wq); init_waitqueue_head(&config->conn_wait); spin_lock_init(&config->recv_lock); config->blksize = NBD_DEF_BLKSIZE; atomic_set(&config->live_connections, 0); - try_module_get(THIS_MODULE); return config; } @@ -1530,12 +1535,13 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) mutex_unlock(&nbd->config_lock); goto out; } - config = nbd->config = nbd_alloc_config(); - if (!config) { - ret = -ENOMEM; + config = nbd_alloc_config(); + if (IS_ERR(config)) { + ret = PTR_ERR(config); mutex_unlock(&nbd->config_lock); goto out; } + nbd->config = config; refcount_set(&nbd->config_refs, 1); refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); @@ -1957,13 +1963,14 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) nbd_put(nbd); return -EINVAL; } - config = nbd->config = nbd_alloc_config(); - if (!nbd->config) { + config = nbd_alloc_config(); + if (IS_ERR(config)) { mutex_unlock(&nbd->config_lock); nbd_put(nbd); printk(KERN_ERR "nbd: couldn't allocate config\n"); - return -ENOMEM; + return PTR_ERR(config); } + nbd->config = config; refcount_set(&nbd->config_refs, 1); set_bit(NBD_RT_BOUND, &config->runtime_flags); @@ -2491,6 +2498,12 @@ static void __exit nbd_cleanup(void) struct nbd_device *nbd; LIST_HEAD(del_list); + /* + * Unregister netlink interface prior to waiting + * for the completion of netlink commands. + */ + genl_unregister_family(&nbd_genl_family); + nbd_dbg_close(); mutex_lock(&nbd_index_mutex); @@ -2500,13 +2513,15 @@ static void __exit nbd_cleanup(void) while (!list_empty(&del_list)) { nbd = list_first_entry(&del_list, struct nbd_device, list); list_del_init(&nbd->list); + if (refcount_read(&nbd->config_refs)) + printk(KERN_ERR "nbd: possibly leaking nbd_config (ref %d)\n", + refcount_read(&nbd->config_refs)); if (refcount_read(&nbd->refs) != 1) printk(KERN_ERR "nbd: possibly leaking a device\n"); nbd_put(nbd); } idr_destroy(&nbd_index_idr); - genl_unregister_family(&nbd_genl_family); unregister_blkdev(NBD_MAJOR, "nbd"); } diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index a1824bb080446..220e26c409c8a 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -627,7 +627,7 @@ static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card) } static pci_ers_result_t rsxx_error_detected(struct pci_dev *dev, - enum pci_channel_state error) + pci_channel_state_t error) { int st; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index d43a3e5d3999b..d2d7720643c8b 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -21,6 +21,11 @@ #define VQ_NAME_LEN 16 #define MAX_DISCARD_SEGMENTS 256u +static bool force_remove; +module_param(force_remove, bool, 0644); +MODULE_PARM_DESC(force_remove, + "Enable removing device regardless of inflight I/Os"); + static int major; static DEFINE_IDA(vd_index_ida); @@ -805,6 +810,16 @@ static enum blk_eh_timer_return virtblk_timeout(struct request *req, return BLK_EH_DONE; } + if (force_remove && blk_queue_dying(req->q)) { + dev_warn(disk_to_dev(vblk->disk), + "Abort Request: %p: %llu,%uB\n", req, + (unsigned long long)blk_rq_pos(req) << 9, + blk_rq_bytes(req)); + vbr->status = VIRTIO_BLK_S_IOERR; + blk_mq_complete_request(req); + return BLK_EH_DONE; + } + dev_info(disk_to_dev(vblk->disk), "Possible stuck request %p: %llu,%uB. Runtime %u seconds\n", req, (unsigned long long)blk_rq_pos(req) << 9, @@ -969,9 +984,17 @@ static int virtblk_probe(struct virtio_device *vdev) err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, struct virtio_blk_config, blk_size, &blk_size); - if (!err) + if (!err) { + err = blk_validate_block_size(blk_size); + if (err) { + dev_err(&vdev->dev, + "virtio_blk: invalid block size: 0x%x\n", + blk_size); + goto out_free_tags; + } + blk_queue_logical_block_size(q, blk_size); - else + } else blk_size = queue_logical_block_size(q); /* Use topology information if available */ @@ -1001,11 +1024,12 @@ static int virtblk_probe(struct virtio_device *vdev) blk_queue_io_opt(q, blk_size * opt_io_size); if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { - q->limits.discard_granularity = blk_size; - virtio_cread(vdev, struct virtio_blk_config, discard_sector_alignment, &v); - q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0; + if (v) + q->limits.discard_granularity = v << SECTOR_SHIFT; + else + q->limits.discard_granularity = blk_size; virtio_cread(vdev, struct virtio_blk_config, max_discard_sectors, &v); @@ -1056,6 +1080,7 @@ static int virtblk_probe(struct virtio_device *vdev) static void virtblk_remove(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; + int i; /* Make sure no work handler is accessing the device. */ flush_work(&vblk->config_work); @@ -1067,6 +1092,13 @@ static void virtblk_remove(struct virtio_device *vdev) mutex_lock(&vblk->vdev_mutex); + for (i = 0; i < vblk->num_vqs; i++) { + if (!force_remove) + continue; + + while (virtqueue_detach_unused_buf(vblk->vqs[i].vq)); + } + /* Stop all the virtqueues. */ vdev->config->reset(vdev); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index def41e1bd7364..d0538c03f0332 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -80,6 +80,7 @@ enum blkif_state { BLKIF_STATE_DISCONNECTED, BLKIF_STATE_CONNECTED, BLKIF_STATE_SUSPENDED, + BLKIF_STATE_ERROR, }; struct grant { @@ -89,6 +90,7 @@ struct grant { }; enum blk_req_status { + REQ_PROCESSING, REQ_WAITING, REQ_DONE, REQ_ERROR, @@ -149,6 +151,10 @@ static unsigned int xen_blkif_max_ring_order; module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444); MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); +static bool __read_mostly xen_blkif_trusted = true; +module_param_named(trusted, xen_blkif_trusted, bool, 0644); +MODULE_PARM_DESC(trusted, "Is the backend trusted"); + #define BLK_RING_SIZE(info) \ __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages) @@ -209,6 +215,7 @@ struct blkfront_info unsigned int feature_discard:1; unsigned int feature_secdiscard:1; unsigned int feature_persistent:1; + unsigned int bounce:1; unsigned int discard_granularity; unsigned int discard_alignment; /* Number of 4KB segments handled */ @@ -298,8 +305,8 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num) if (!gnt_list_entry) goto out_of_memory; - if (info->feature_persistent) { - granted_page = alloc_page(GFP_NOIO); + if (info->bounce) { + granted_page = alloc_page(GFP_NOIO | __GFP_ZERO); if (!granted_page) { kfree(gnt_list_entry); goto out_of_memory; @@ -318,7 +325,7 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num) list_for_each_entry_safe(gnt_list_entry, n, &rinfo->grants, node) { list_del(&gnt_list_entry->node); - if (info->feature_persistent) + if (info->bounce) __free_page(gnt_list_entry->page); kfree(gnt_list_entry); i--; @@ -364,7 +371,7 @@ static struct grant *get_grant(grant_ref_t *gref_head, /* Assign a gref to this page */ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); BUG_ON(gnt_list_entry->gref == -ENOSPC); - if (info->feature_persistent) + if (info->bounce) grant_foreign_access(gnt_list_entry, info); else { /* Grant access to the GFN passed by the caller */ @@ -388,7 +395,7 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head, /* Assign a gref to this page */ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); BUG_ON(gnt_list_entry->gref == -ENOSPC); - if (!info->feature_persistent) { + if (!info->bounce) { struct page *indirect_page; /* Fetch a pre-allocated page to use for indirect grefs */ @@ -533,10 +540,10 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo, id = get_id_from_freelist(rinfo); rinfo->shadow[id].request = req; - rinfo->shadow[id].status = REQ_WAITING; + rinfo->shadow[id].status = REQ_PROCESSING; rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID; - (*ring_req)->u.rw.id = id; + rinfo->shadow[id].req.u.rw.id = id; return id; } @@ -544,11 +551,12 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo, static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_info *rinfo) { struct blkfront_info *info = rinfo->dev_info; - struct blkif_request *ring_req; + struct blkif_request *ring_req, *final_ring_req; unsigned long id; /* Fill out a communications ring structure. */ - id = blkif_ring_get_request(rinfo, req, &ring_req); + id = blkif_ring_get_request(rinfo, req, &final_ring_req); + ring_req = &rinfo->shadow[id].req; ring_req->operation = BLKIF_OP_DISCARD; ring_req->u.discard.nr_sectors = blk_rq_sectors(req); @@ -559,8 +567,9 @@ static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_inf else ring_req->u.discard.flag = 0; - /* Keep a private copy so we can reissue requests when recovering. */ - rinfo->shadow[id].req = *ring_req; + /* Copy the request to the ring page. */ + *final_ring_req = *ring_req; + rinfo->shadow[id].status = REQ_WAITING; return 0; } @@ -693,6 +702,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri { struct blkfront_info *info = rinfo->dev_info; struct blkif_request *ring_req, *extra_ring_req = NULL; + struct blkif_request *final_ring_req, *final_extra_ring_req = NULL; unsigned long id, extra_id = NO_ASSOCIATED_ID; bool require_extra_req = false; int i; @@ -700,7 +710,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri .grant_idx = 0, .segments = NULL, .rinfo = rinfo, - .need_copy = rq_data_dir(req) && info->feature_persistent, + .need_copy = rq_data_dir(req) && info->bounce, }; /* @@ -737,7 +747,8 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri } /* Fill out a communications ring structure. */ - id = blkif_ring_get_request(rinfo, req, &ring_req); + id = blkif_ring_get_request(rinfo, req, &final_ring_req); + ring_req = &rinfo->shadow[id].req; num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg); num_grant = 0; @@ -788,7 +799,9 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri ring_req->u.rw.nr_segments = num_grant; if (unlikely(require_extra_req)) { extra_id = blkif_ring_get_request(rinfo, req, - &extra_ring_req); + &final_extra_ring_req); + extra_ring_req = &rinfo->shadow[extra_id].req; + /* * Only the first request contains the scatter-gather * list. @@ -830,10 +843,13 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri if (setup.segments) kunmap_atomic(setup.segments); - /* Keep a private copy so we can reissue requests when recovering. */ - rinfo->shadow[id].req = *ring_req; - if (unlikely(require_extra_req)) - rinfo->shadow[extra_id].req = *extra_ring_req; + /* Copy request(s) to the ring page. */ + *final_ring_req = *ring_req; + rinfo->shadow[id].status = REQ_WAITING; + if (unlikely(require_extra_req)) { + *final_extra_ring_req = *extra_ring_req; + rinfo->shadow[extra_id].status = REQ_WAITING; + } if (new_persistent_gnts) gnttab_free_grant_references(setup.gref_head); @@ -1015,11 +1031,12 @@ static void xlvbd_flush(struct blkfront_info *info) { blk_queue_write_cache(info->rq, info->feature_flush ? true : false, info->feature_fua ? true : false); - pr_info("blkfront: %s: %s %s %s %s %s\n", + pr_info("blkfront: %s: %s %s %s %s %s %s %s\n", info->gd->disk_name, flush_info(info), "persistent grants:", info->feature_persistent ? "enabled;" : "disabled;", "indirect descriptors:", - info->max_indirect_segments ? "enabled;" : "disabled;"); + info->max_indirect_segments ? "enabled;" : "disabled;", + "bounce buffer:", info->bounce ? "enabled" : "disabled;"); } static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) @@ -1254,7 +1271,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) if (!list_empty(&rinfo->indirect_pages)) { struct page *indirect_page, *n; - BUG_ON(info->feature_persistent); + BUG_ON(info->bounce); list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) { list_del(&indirect_page->lru); __free_page(indirect_page); @@ -1271,7 +1288,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) 0, 0UL); rinfo->persistent_gnts_c--; } - if (info->feature_persistent) + if (info->bounce) __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1292,7 +1309,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) for (j = 0; j < segs; j++) { persistent_gnt = rinfo->shadow[i].grants_used[j]; gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); - if (info->feature_persistent) + if (info->bounce) __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1333,7 +1350,8 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) rinfo->ring_ref[i] = GRANT_INVALID_REF; } } - free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE)); + free_pages_exact(rinfo->ring.sring, + info->nr_ring_pages * XEN_PAGE_SIZE); rinfo->ring.sring = NULL; if (rinfo->irq) @@ -1407,8 +1425,8 @@ static enum blk_req_status blkif_rsp_to_req_status(int rsp) static int blkif_get_final_status(enum blk_req_status s1, enum blk_req_status s2) { - BUG_ON(s1 == REQ_WAITING); - BUG_ON(s2 == REQ_WAITING); + BUG_ON(s1 < REQ_DONE); + BUG_ON(s2 < REQ_DONE); if (s1 == REQ_ERROR || s2 == REQ_ERROR) return BLKIF_RSP_ERROR; @@ -1417,9 +1435,15 @@ static int blkif_get_final_status(enum blk_req_status s1, return BLKIF_RSP_OKAY; } -static bool blkif_completion(unsigned long *id, - struct blkfront_ring_info *rinfo, - struct blkif_response *bret) +/* + * Return values: + * 1 response processed. + * 0 missing further responses. + * -1 error while processing. + */ +static int blkif_completion(unsigned long *id, + struct blkfront_ring_info *rinfo, + struct blkif_response *bret) { int i = 0; struct scatterlist *sg; @@ -1441,8 +1465,8 @@ static bool blkif_completion(unsigned long *id, s->status = blkif_rsp_to_req_status(bret->status); /* Wait the second response if not yet here. */ - if (s2->status == REQ_WAITING) - return false; + if (s2->status < REQ_DONE) + return 0; bret->status = blkif_get_final_status(s->status, s2->status); @@ -1475,7 +1499,7 @@ static bool blkif_completion(unsigned long *id, data.s = s; num_sg = s->num_sg; - if (bret->operation == BLKIF_OP_READ && info->feature_persistent) { + if (bret->operation == BLKIF_OP_READ && info->bounce) { for_each_sg(s->sg, sg, num_sg, i) { BUG_ON(sg->offset + sg->length > PAGE_SIZE); @@ -1493,47 +1517,48 @@ static bool blkif_completion(unsigned long *id, } /* Add the persistent grant into the list of free grants */ for (i = 0; i < num_grant; i++) { - if (gnttab_query_foreign_access(s->grants_used[i]->gref)) { + if (!gnttab_try_end_foreign_access(s->grants_used[i]->gref)) { /* * If the grant is still mapped by the backend (the * backend has chosen to make this grant persistent) * we add it at the head of the list, so it will be * reused first. */ - if (!info->feature_persistent) - pr_alert_ratelimited("backed has not unmapped grant: %u\n", - s->grants_used[i]->gref); + if (!info->feature_persistent) { + pr_alert("backed has not unmapped grant: %u\n", + s->grants_used[i]->gref); + return -1; + } list_add(&s->grants_used[i]->node, &rinfo->grants); rinfo->persistent_gnts_c++; } else { /* - * If the grant is not mapped by the backend we end the - * foreign access and add it to the tail of the list, - * so it will not be picked again unless we run out of - * persistent grants. + * If the grant is not mapped by the backend we add it + * to the tail of the list, so it will not be picked + * again unless we run out of persistent grants. */ - gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL); s->grants_used[i]->gref = GRANT_INVALID_REF; list_add_tail(&s->grants_used[i]->node, &rinfo->grants); } } if (s->req.operation == BLKIF_OP_INDIRECT) { for (i = 0; i < INDIRECT_GREFS(num_grant); i++) { - if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) { - if (!info->feature_persistent) - pr_alert_ratelimited("backed has not unmapped grant: %u\n", - s->indirect_grants[i]->gref); + if (!gnttab_try_end_foreign_access(s->indirect_grants[i]->gref)) { + if (!info->feature_persistent) { + pr_alert("backed has not unmapped grant: %u\n", + s->indirect_grants[i]->gref); + return -1; + } list_add(&s->indirect_grants[i]->node, &rinfo->grants); rinfo->persistent_gnts_c++; } else { struct page *indirect_page; - gnttab_end_foreign_access(s->indirect_grants[i]->gref, 0, 0UL); /* * Add the used indirect page back to the list of * available pages for indirect grefs. */ - if (!info->feature_persistent) { + if (!info->bounce) { indirect_page = s->indirect_grants[i]->page; list_add(&indirect_page->lru, &rinfo->indirect_pages); } @@ -1543,71 +1568,103 @@ static bool blkif_completion(unsigned long *id, } } - return true; + return 1; } static irqreturn_t blkif_interrupt(int irq, void *dev_id) { struct request *req; - struct blkif_response *bret; + struct blkif_response bret; RING_IDX i, rp; unsigned long flags; struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; struct blkfront_info *info = rinfo->dev_info; + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); return IRQ_HANDLED; + } spin_lock_irqsave(&rinfo->ring_lock, flags); again: - rp = rinfo->ring.sring->rsp_prod; - rmb(); /* Ensure we see queued responses up to 'rp'. */ + rp = READ_ONCE(rinfo->ring.sring->rsp_prod); + virt_rmb(); /* Ensure we see queued responses up to 'rp'. */ + if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) { + pr_alert("%s: illegal number of responses %u\n", + info->gd->disk_name, rp - rinfo->ring.rsp_cons); + goto err; + } for (i = rinfo->ring.rsp_cons; i != rp; i++) { unsigned long id; + unsigned int op; + + eoiflag = 0; + + RING_COPY_RESPONSE(&rinfo->ring, i, &bret); + id = bret.id; - bret = RING_GET_RESPONSE(&rinfo->ring, i); - id = bret->id; /* * The backend has messed up and given us an id that we would * never have given to it (we stamp it up to BLK_RING_SIZE - * look in get_id_from_freelist. */ if (id >= BLK_RING_SIZE(info)) { - WARN(1, "%s: response to %s has incorrect id (%ld)\n", - info->gd->disk_name, op_name(bret->operation), id); - /* We can't safely get the 'struct request' as - * the id is busted. */ - continue; + pr_alert("%s: response has incorrect id (%ld)\n", + info->gd->disk_name, id); + goto err; + } + if (rinfo->shadow[id].status != REQ_WAITING) { + pr_alert("%s: response references no pending request\n", + info->gd->disk_name); + goto err; } + + rinfo->shadow[id].status = REQ_PROCESSING; req = rinfo->shadow[id].request; - if (bret->operation != BLKIF_OP_DISCARD) { + op = rinfo->shadow[id].req.operation; + if (op == BLKIF_OP_INDIRECT) + op = rinfo->shadow[id].req.u.indirect.indirect_op; + if (bret.operation != op) { + pr_alert("%s: response has wrong operation (%u instead of %u)\n", + info->gd->disk_name, bret.operation, op); + goto err; + } + + if (bret.operation != BLKIF_OP_DISCARD) { + int ret; + /* * We may need to wait for an extra response if the * I/O request is split in 2 */ - if (!blkif_completion(&id, rinfo, bret)) + ret = blkif_completion(&id, rinfo, &bret); + if (!ret) continue; + if (unlikely(ret < 0)) + goto err; } if (add_id_to_freelist(rinfo, id)) { WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", - info->gd->disk_name, op_name(bret->operation), id); + info->gd->disk_name, op_name(bret.operation), id); continue; } - if (bret->status == BLKIF_RSP_OKAY) + if (bret.status == BLKIF_RSP_OKAY) blkif_req(req)->error = BLK_STS_OK; else blkif_req(req)->error = BLK_STS_IOERR; - switch (bret->operation) { + switch (bret.operation) { case BLKIF_OP_DISCARD: - if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { + if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { struct request_queue *rq = info->rq; - printk(KERN_WARNING "blkfront: %s: %s op failed\n", - info->gd->disk_name, op_name(bret->operation)); + + pr_warn_ratelimited("blkfront: %s: %s op failed\n", + info->gd->disk_name, op_name(bret.operation)); blkif_req(req)->error = BLK_STS_NOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; @@ -1617,15 +1674,15 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) break; case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: - if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { - printk(KERN_WARNING "blkfront: %s: %s op failed\n", - info->gd->disk_name, op_name(bret->operation)); + if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { + pr_warn_ratelimited("blkfront: %s: %s op failed\n", + info->gd->disk_name, op_name(bret.operation)); blkif_req(req)->error = BLK_STS_NOTSUPP; } - if (unlikely(bret->status == BLKIF_RSP_ERROR && + if (unlikely(bret.status == BLKIF_RSP_ERROR && rinfo->shadow[id].req.u.rw.nr_segments == 0)) { - printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", - info->gd->disk_name, op_name(bret->operation)); + pr_warn_ratelimited("blkfront: %s: empty %s op failed\n", + info->gd->disk_name, op_name(bret.operation)); blkif_req(req)->error = BLK_STS_NOTSUPP; } if (unlikely(blkif_req(req)->error)) { @@ -1638,9 +1695,10 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) /* fall through */ case BLKIF_OP_READ: case BLKIF_OP_WRITE: - if (unlikely(bret->status != BLKIF_RSP_OKAY)) - dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " - "request: %x\n", bret->status); + if (unlikely(bret.status != BLKIF_RSP_OKAY)) + dev_dbg_ratelimited(&info->xbdev->dev, + "Bad return from blkdev data request: %#x\n", + bret.status); break; default: @@ -1664,6 +1722,18 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&rinfo->ring_lock, flags); + xen_irq_lateeoi(irq, eoiflag); + + return IRQ_HANDLED; + + err: + info->connected = BLKIF_STATE_ERROR; + + spin_unlock_irqrestore(&rinfo->ring_lock, flags); + + /* No EOI in order to avoid further interrupts. */ + + pr_alert("%s disabled for further use\n", info->gd->disk_name); return IRQ_HANDLED; } @@ -1680,8 +1750,7 @@ static int setup_blkring(struct xenbus_device *dev, for (i = 0; i < info->nr_ring_pages; i++) rinfo->ring_ref[i] = GRANT_INVALID_REF; - sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH, - get_order(ring_size)); + sring = alloc_pages_exact(ring_size, GFP_NOIO | __GFP_ZERO); if (!sring) { xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); return -ENOMEM; @@ -1691,7 +1760,7 @@ static int setup_blkring(struct xenbus_device *dev, err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref); if (err < 0) { - free_pages((unsigned long)sring, get_order(ring_size)); + free_pages_exact(sring, ring_size); rinfo->ring.sring = NULL; goto fail; } @@ -1702,8 +1771,8 @@ static int setup_blkring(struct xenbus_device *dev, if (err) goto fail; - err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0, - "blkif", rinfo); + err = bind_evtchn_to_irqhandler_lateeoi(rinfo->evtchn, blkif_interrupt, + 0, "blkif", rinfo); if (err <= 0) { xenbus_dev_fatal(dev, err, "bind_evtchn_to_irqhandler failed"); @@ -1784,6 +1853,10 @@ static int talk_to_blkback(struct xenbus_device *dev, if (!info) return -ENODEV; + /* Check if backend is trusted. */ + info->bounce = !xen_blkif_trusted || + !xenbus_read_unsigned(dev->nodename, "trusted", 1); + max_page_order = xenbus_read_unsigned(info->xbdev->otherend, "max-ring-page-order", 0); ring_page_order = min(xen_blkif_max_ring_order, max_page_order); @@ -2210,17 +2283,18 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) if (err) goto out_of_memory; - if (!info->feature_persistent && info->max_indirect_segments) { + if (!info->bounce && info->max_indirect_segments) { /* - * We are using indirect descriptors but not persistent - * grants, we need to allocate a set of pages that can be + * We are using indirect descriptors but don't have a bounce + * buffer, we need to allocate a set of pages that can be * used for mapping indirect grefs */ int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info); BUG_ON(!list_empty(&rinfo->indirect_pages)); for (i = 0; i < num; i++) { - struct page *indirect_page = alloc_page(GFP_KERNEL); + struct page *indirect_page = alloc_page(GFP_KERNEL | + __GFP_ZERO); if (!indirect_page) goto out_of_memory; list_add(&indirect_page->lru, &rinfo->indirect_pages); @@ -2312,6 +2386,8 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) info->feature_persistent = !!xenbus_read_unsigned(info->xbdev->otherend, "feature-persistent", 0); + if (info->feature_persistent) + info->bounce = true; indirect_segments = xenbus_read_unsigned(info->xbdev->otherend, "feature-max-indirect-segments", 0); @@ -2669,11 +2745,10 @@ static void purge_persistent_grants(struct blkfront_info *info) list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants, node) { if (gnt_list_entry->gref == GRANT_INVALID_REF || - gnttab_query_foreign_access(gnt_list_entry->gref)) + !gnttab_try_end_foreign_access(gnt_list_entry->gref)) continue; list_del(&gnt_list_entry->node); - gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL); rinfo->persistent_gnts_c--; gnt_list_entry->gref = GRANT_INVALID_REF; list_add_tail(&gnt_list_entry->node, &rinfo->grants); @@ -2688,6 +2763,13 @@ static void blkfront_delay_work(struct work_struct *work) struct blkfront_info *info; bool need_schedule_work = false; + /* + * Note that when using bounce buffers but not persistent grants + * there's no need to run blkfront_delay_work because grants are + * revoked in blkif_completion or else an error is reported and the + * connection is closed. + */ + mutex_lock(&blkfront_mutex); list_for_each_entry(info, &info_list, info_list) { diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 719c6b7741afa..1cc9b67e9bcaa 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -901,7 +901,7 @@ static ssize_t read_block_state(struct file *file, char __user *buf, zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.'); - if (count < copied) { + if (count <= copied) { zram_slot_unlock(zram, index); break; } diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c index 0e5954cac98e0..00d72f6d8e8e2 100644 --- a/drivers/bluetooth/bfusb.c +++ b/drivers/bluetooth/bfusb.c @@ -629,6 +629,9 @@ static int bfusb_probe(struct usb_interface *intf, const struct usb_device_id *i data->bulk_out_ep = bulk_out_ep->desc.bEndpointAddress; data->bulk_pkt_size = le16_to_cpu(bulk_out_ep->desc.wMaxPacketSize); + if (!data->bulk_pkt_size) + goto done; + rwlock_init(&data->lock); data->reassembly = NULL; diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index 304178be1ef40..0e7f44461946e 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -980,6 +980,8 @@ static int btmtksdio_probe(struct sdio_func *func, hdev->manufacturer = 70; set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); + sdio_set_drvdata(func, bdev); + err = hci_register_dev(hdev); if (err < 0) { dev_err(&func->dev, "Can't register HCI device\n"); @@ -987,8 +989,6 @@ static int btmtksdio_probe(struct sdio_func *func, return err; } - sdio_set_drvdata(func, bdev); - /* pm_runtime_enable would be done after the firmware is being * downloaded because the core layer probably already enables * runtime PM for this func such as the case host->caps & @@ -1041,6 +1041,8 @@ static int btmtksdio_runtime_suspend(struct device *dev) if (!bdev) return 0; + sdio_set_host_pm_flags(func, MMC_PM_KEEP_POWER); + sdio_claim_host(bdev->func); sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, &err); diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c index 8a81fbca5c9d8..2beb2321825e3 100644 --- a/drivers/bluetooth/btmtkuart.c +++ b/drivers/bluetooth/btmtkuart.c @@ -158,8 +158,10 @@ static int mtk_hci_wmt_sync(struct hci_dev *hdev, int err; hlen = sizeof(*hdr) + wmt_params->dlen; - if (hlen > 255) - return -EINVAL; + if (hlen > 255) { + err = -EINVAL; + goto err_free_skb; + } hdr = (struct mtk_wmt_hdr *)&wc; hdr->dir = 1; @@ -173,7 +175,7 @@ static int mtk_hci_wmt_sync(struct hci_dev *hdev, err = __hci_cmd_send(hdev, 0xfc6f, hlen, &wc); if (err < 0) { clear_bit(BTMTKUART_TX_WAIT_VND_EVT, &bdev->tx_state); - return err; + goto err_free_skb; } /* The vendor specific WMT commands are all answered by a vendor @@ -190,13 +192,14 @@ static int mtk_hci_wmt_sync(struct hci_dev *hdev, if (err == -EINTR) { bt_dev_err(hdev, "Execution of wmt command interrupted"); clear_bit(BTMTKUART_TX_WAIT_VND_EVT, &bdev->tx_state); - return err; + goto err_free_skb; } if (err) { bt_dev_err(hdev, "Execution of wmt command timed out"); clear_bit(BTMTKUART_TX_WAIT_VND_EVT, &bdev->tx_state); - return -ETIMEDOUT; + err = -ETIMEDOUT; + goto err_free_skb; } /* Parse and handle the return WMT event */ diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 6d643651d69f7..c8f2b991e9cf7 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2568,6 +2568,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) skb = bt_skb_alloc(HCI_WMT_MAX_EVENT_SIZE, GFP_ATOMIC); if (!skb) { hdev->stat.err_rx++; + kfree(urb->setup_packet); return; } @@ -2588,6 +2589,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) data->evt_skb = skb_clone(skb, GFP_ATOMIC); if (!data->evt_skb) { kfree_skb(skb); + kfree(urb->setup_packet); return; } } @@ -2596,6 +2598,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) if (err < 0) { kfree_skb(data->evt_skb); data->evt_skb = NULL; + kfree(urb->setup_packet); return; } @@ -2606,6 +2609,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) wake_up_bit(&data->flags, BTUSB_TX_WAIT_VND_EVT); } + kfree(urb->setup_packet); return; } else if (urb->status == -ENOENT) { /* Avoid suspend failed when usb_kill_urb */ @@ -2626,6 +2630,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) usb_anchor_urb(urb, &data->ctrl_anchor); err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) { + kfree(urb->setup_packet); /* -EPERM: urb is being killed; * -ENODEV: device got disconnected */ diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 94ed734c1d7eb..c6bb380806f9b 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -1127,7 +1127,12 @@ static int bcm_probe(struct platform_device *pdev) return -ENOMEM; dev->dev = &pdev->dev; - dev->irq = platform_get_irq(pdev, 0); + + ret = platform_get_irq(pdev, 0); + if (ret < 0) + return ret; + + dev->irq = ret; if (has_acpi_companion(&pdev->dev)) { ret = bcm_acpi_probe(dev); diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index 1b4ad231e6ed3..fd081bdd2dd8a 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -279,6 +279,8 @@ int hci_uart_register_device(struct hci_uart *hu, if (err) return err; + percpu_init_rwsem(&hu->proto_lock); + err = p->open(hu); if (err) goto err_open; @@ -301,7 +303,6 @@ int hci_uart_register_device(struct hci_uart *hu, INIT_WORK(&hu->init_ready, hci_uart_init_work); INIT_WORK(&hu->write_work, hci_uart_write_work); - percpu_init_rwsem(&hu->proto_lock); /* Only when vendor specific setup callback is provided, consider * the manufacturer information valid. This avoids filling in the diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 1bb00a959c67f..9b1a5e62417cb 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -224,6 +224,8 @@ static struct sunxi_rsb_device *sunxi_rsb_device_create(struct sunxi_rsb *rsb, dev_dbg(&rdev->dev, "device %s registered\n", dev_name(&rdev->dev)); + return rdev; + err_device_add: put_device(&rdev->dev); diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 90053c4a8290d..44aeceaccfa48 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -1388,6 +1388,9 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = { /* Quirks that need to be set based on detected module */ SYSC_QUIRK("aess", 0, 0, 0x10, -ENODEV, 0x40000000, 0xffffffff, SYSC_MODULE_QUIRK_AESS), + /* Errata i893 handling for dra7 dcan1 and 2 */ + SYSC_QUIRK("dcan", 0x4ae3c000, 0x20, -ENODEV, -ENODEV, 0xa3170504, 0xffffffff, + SYSC_QUIRK_CLKDM_NOAUTO), SYSC_QUIRK("dcan", 0x48480000, 0x20, -ENODEV, -ENODEV, 0xa3170504, 0xffffffff, SYSC_QUIRK_CLKDM_NOAUTO), SYSC_QUIRK("dss", 0x4832a000, 0, 0x10, 0x14, 0x00000020, 0xffffffff, @@ -2721,7 +2724,9 @@ static int sysc_remove(struct platform_device *pdev) struct sysc *ddata = platform_get_drvdata(pdev); int error; - cancel_delayed_work_sync(&ddata->idle_work); + /* Device can still be enabled, see deferred idle quirk in probe */ + if (cancel_delayed_work_sync(&ddata->idle_work)) + ti_sysc_idle(&ddata->idle_work.work); error = pm_runtime_get_sync(ddata->dev); if (error < 0) { diff --git a/drivers/bytedance/smith/src/smith_hook.c b/drivers/bytedance/smith/src/smith_hook.c index 75ad1c64b7f2a..e925e39ad1514 100644 --- a/drivers/bytedance/smith/src/smith_hook.c +++ b/drivers/bytedance/smith/src/smith_hook.c @@ -1788,9 +1788,6 @@ int udp_recvmsg_entry_handler(struct kretprobe_instance *ri, struct msghdr *msg; struct udp_recvmsg_data *data; - /* counting udpv4_recvmsg callings */ - smith_count_dnsv4_kretprobe(); - data = (struct udp_recvmsg_data *) ri->data; #if LINUX_VERSION_CODE > KERNEL_VERSION(4, 0, 9) @@ -1871,6 +1868,10 @@ int udp_recvmsg_entry_handler(struct kretprobe_instance *ri, return -EINVAL; do_kretprobe: + + /* counting dns requests */ + smith_count_dnsv4_kretprobe(); + return 0; } @@ -1887,9 +1888,6 @@ int udpv6_recvmsg_entry_handler(struct kretprobe_instance *ri, void *tmp_sk; int flags; - /* counting udpv6_recvmsg callings */ - smith_count_dnsv6_kretprobe(); - data = (struct udp_recvmsg_data *)ri->data; #if LINUX_VERSION_CODE > KERNEL_VERSION(4, 0, 9) @@ -1985,6 +1983,10 @@ int udpv6_recvmsg_entry_handler(struct kretprobe_instance *ri, return -EINVAL; do_kretprobe: + + /* counting dns requests */ + smith_count_dnsv6_kretprobe(); + return 0; } #endif @@ -1994,8 +1996,8 @@ void unregister_udp_recvmsg_kprobe(void); int register_udpv6_recvmsg_kprobe(void); void unregister_udpv6_recvmsg_kprobe(void); -#define SMITH_DNS_THRESHOLD (5000) /* DNS threshold */ -#define SMITH_UDP_THRESHOLD (28000 * num_online_cpus()) /* UDP threshold */ +#define SMITH_DNS_THRESHOLD (800) /* DNS threshold: ops/s */ +#define SMITH_UDP_THRESHOLD (80000) /* UDP threshold: ops/s */ #define SMITH_DNS_KRP_INTERVAL (10) /* 10 seconds */ #define SMITH_DNS_NET_INTERVAL (20) /* 20 seconds, WARNING: atomic_t may overflow */ @@ -2004,7 +2006,6 @@ struct smith_dns_switch { atomic_t krp ____cacheline_aligned_in_smp; /* udp_recvmsg kretprobe handling count */ atomic_t ops ____cacheline_aligned_in_smp; /* udp in traffic count */ uint64_t start ____cacheline_aligned_in_smp; /* start time stamp of counting */ - uint64_t anchor ____cacheline_aligned_in_smp; /* anchor time stamp */ int *regs; /* kretprobe registration result */ int (*enable)(void); void (*disable)(void); @@ -2017,43 +2018,45 @@ static void smith_count_dnsv4_kretprobe(void) atomic_inc(&g_dns_v4_switch.krp); } +/* turn off kretprobe if dns requests exceed threshold */ static void smith_dns_kretprobe(struct smith_dns_switch *ds) { - uint64_t now = smith_get_seconds(); + uint64_t now = smith_get_seconds(), delta; - if (ds->start + SMITH_DNS_KRP_INTERVAL < now) { - if (atomic_read(&ds->krp) > SMITH_DNS_THRESHOLD * (now - ds->start)) { + delta = now - ds->start; + if (delta > SMITH_DNS_KRP_INTERVAL) { + if (atomic_read(&ds->krp) > delta * SMITH_DNS_THRESHOLD || + atomic_read(&ds->ops) > delta * SMITH_UDP_THRESHOLD ){ /* trying to avoid concurrent issue */ - if (atomic_cmpxchg(&ds->armed, 1, 0) == 1) { - atomic_set(&ds->ops, 0); - ds->anchor = smith_get_seconds(); + if (atomic_cmpxchg(&ds->armed, 1, 0) == 1) ds->disable(); - } } - atomic_set(&ds->krp, 0); /* inaccurate for SMP, but acceptable */ + atomic_set(&ds->krp, 0); + atomic_set(&ds->ops, 0); ds->start = smith_get_seconds(); } } +/* try to trun on kretprobe for udp_recvmsg */ static void smith_dns_try_switch(struct smith_dns_switch *ds) { - uint64_t now = smith_get_seconds(); + uint64_t now = smith_get_seconds(), delta; if (0 == DNS_HOOK || atomic_read(&ds->armed) || *(ds->regs)) return; - if (ds->anchor + SMITH_DNS_NET_INTERVAL < now) { - if (atomic_read(&ds->ops) < SMITH_UDP_THRESHOLD * (now - ds->anchor)) { + delta = now - ds->start; + if (delta > SMITH_DNS_NET_INTERVAL) { + if (atomic_read(&ds->ops) < delta * SMITH_UDP_THRESHOLD) { /* trying to avoid concurrent issue */ if (atomic_cmpxchg(&ds->armed, 0, 1) == 0) { - atomic_set(&ds->krp, 0); - ds->start = smith_get_seconds(); if (ds->enable()) atomic_set(&ds->armed, 0); } } - atomic_set(&ds->ops, 0); /* inaccurate for SMP, but acceptable */ - ds->anchor = smith_get_seconds(); + atomic_set(&ds->krp, 0); + atomic_set(&ds->ops, 0); + ds->start = smith_get_seconds(); } } @@ -2081,7 +2084,11 @@ static unsigned int smith_nf_udp_v4_handler( #endif ) { - atomic_inc(&g_dns_v4_switch.ops); + struct iphdr *iph = ip_hdr(skb); + + /* only counting udp packets */ + if (iph->protocol == IPPROTO_UDP) + atomic_inc(&g_dns_v4_switch.ops); return NF_ACCEPT; } @@ -2119,7 +2126,11 @@ static unsigned int smith_nf_udp_v6_handler( #endif ) { - atomic_inc(&g_dns_v6_switch.ops); + struct iphdr *iph = ip_hdr(skb); + + /* only counting udp packets */ + if (iph->protocol == IPPROTO_UDP) + atomic_inc(&g_dns_v6_switch.ops); return NF_ACCEPT; } #endif @@ -2184,10 +2195,10 @@ static int smith_dns_work_handler(void *argu) { unsigned long timeout = msecs_to_jiffies(1000); - /* reset anchor timestamp */ - g_dns_v4_switch.anchor = smith_get_seconds(); + /* reset start timestamp */ + g_dns_v4_switch.start = smith_get_seconds(); #if IS_ENABLED(CONFIG_IPV6) - g_dns_v6_switch.anchor = smith_get_seconds(); + g_dns_v6_switch.start = smith_get_seconds(); #endif do { @@ -3392,22 +3403,20 @@ struct kprobe ptrace_kprobe = { .pre_handler = ptrace_pre_handler, }; -struct kretprobe udp_recvmsg_kretprobe_value = { +struct kretprobe udp_recvmsg_kretprobe = { .kp.symbol_name = "udp_recvmsg", .data_size = sizeof(struct udp_recvmsg_data), .handler = udp_recvmsg_handler, .entry_handler = udp_recvmsg_entry_handler, }; -struct kretprobe udp_recvmsg_kretprobe; #if IS_ENABLED(CONFIG_IPV6) -struct kretprobe udpv6_recvmsg_kretprobe_value = { +struct kretprobe udpv6_recvmsg_kretprobe = { .kp.symbol_name = "udpv6_recvmsg", .data_size = sizeof(struct udp_recvmsg_data), .handler = udp_recvmsg_handler, .entry_handler = udpv6_recvmsg_entry_handler, }; -struct kretprobe udpv6_recvmsg_kretprobe; struct kretprobe ip6_datagram_connect_kretprobe = { .kp.symbol_name = "ip6_datagram_connect", @@ -3571,10 +3580,31 @@ static struct notifier_block smith_usb_notifier = { .notifier_call = smith_usb_ncb, }; +/* + * set minimal of maxactive as 32 to avoid possible missings + * of kretprobe events, especially for NON-PREEMPTED systems + * with small number of CPU cores (ex: 2-core KVM guest) + */ +static int smith_register_kretprobe(struct kretprobe *kr) +{ + int ninsts = max_t(int, 32, 2 * num_present_cpus()); + if (kr->maxactive < ninsts) + kr->maxactive = ninsts; + return register_kretprobe(kr); +} + +static void smith_unregister_kretprobe(struct kretprobe *kr) +{ + unregister_kretprobe(kr); + + /* set addr to NULL to enable re-registeration */ + kr->kp.addr = NULL; +} + int register_bind_kprobe(void) { int ret; - ret = register_kretprobe(&bind_kretprobe); + ret = smith_register_kretprobe(&bind_kretprobe); if (ret == 0) bind_kprobe_state = 0x1; @@ -3584,7 +3614,7 @@ int register_bind_kprobe(void) void unregister_bind_kprobe(void) { - unregister_kretprobe(&bind_kretprobe); + smith_unregister_kretprobe(&bind_kretprobe); } int register_call_usermodehelper_exec_kprobe(void) @@ -3670,7 +3700,7 @@ void unregister_link_kprobe(void) int register_execve_kprobe(void) { int ret; - ret = register_kretprobe(&execve_kretprobe); + ret = smith_register_kretprobe(&execve_kretprobe); if (ret == 0) execve_kretprobe_state = 0x1; @@ -3681,7 +3711,7 @@ int register_execve_kprobe(void) int register_execveat_kprobe(void) { int ret; - ret = register_kretprobe(&execveat_kretprobe); + ret = smith_register_kretprobe(&execveat_kretprobe); if (ret == 0) execveat_kretprobe_state = 0x1; @@ -3693,7 +3723,7 @@ int register_execveat_kprobe(void) int register_compat_execve_kprobe(void) { int ret; - ret = register_kretprobe(&compat_execve_kretprobe); + ret = smith_register_kretprobe(&compat_execve_kretprobe); if (ret == 0) compat_execve_kretprobe_state = 0x1; @@ -3702,14 +3732,14 @@ int register_compat_execve_kprobe(void) void unregister_compat_execve_kprobe(void) { - unregister_kretprobe(&compat_execve_kretprobe); + smith_unregister_kretprobe(&compat_execve_kretprobe); } #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) int register_compat_execveat_kprobe(void) { int ret; - ret = register_kretprobe(&compat_execveat_kretprobe); + ret = smith_register_kretprobe(&compat_execveat_kretprobe); if (ret == 0) compat_execveat_kretprobe_state = 0x1; @@ -3718,20 +3748,20 @@ int register_compat_execveat_kprobe(void) void unregister_compat_execveat_kprobe(void) { - unregister_kretprobe(&compat_execveat_kretprobe); + smith_unregister_kretprobe(&compat_execveat_kretprobe); } #endif #endif void unregister_execve_kprobe(void) { - unregister_kretprobe(&execve_kretprobe); + smith_unregister_kretprobe(&execve_kretprobe); } #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) void unregister_execveat_kprobe(void) { - unregister_kretprobe(&execveat_kretprobe); + smith_unregister_kretprobe(&execveat_kretprobe); } #endif @@ -3754,8 +3784,7 @@ void unregister_ptrace_kprobe(void) int register_udp_recvmsg_kprobe(void) { int ret; - udp_recvmsg_kretprobe = udp_recvmsg_kretprobe_value; - ret = register_kretprobe(&udp_recvmsg_kretprobe); + ret = smith_register_kretprobe(&udp_recvmsg_kretprobe); if (ret == 0) udp_recvmsg_kprobe_state = 0x1; @@ -3765,7 +3794,7 @@ int register_udp_recvmsg_kprobe(void) void unregister_udp_recvmsg_kprobe(void) { - unregister_kretprobe(&udp_recvmsg_kretprobe); + smith_unregister_kretprobe(&udp_recvmsg_kretprobe); udp_recvmsg_kprobe_state = 0; } @@ -3773,8 +3802,7 @@ void unregister_udp_recvmsg_kprobe(void) int register_udpv6_recvmsg_kprobe(void) { int ret; - udpv6_recvmsg_kretprobe = udpv6_recvmsg_kretprobe_value; - ret = register_kretprobe(&udpv6_recvmsg_kretprobe); + ret = smith_register_kretprobe(&udpv6_recvmsg_kretprobe); if (ret == 0) udpv6_recvmsg_kprobe_state = 0x1; @@ -3784,14 +3812,14 @@ int register_udpv6_recvmsg_kprobe(void) void unregister_udpv6_recvmsg_kprobe(void) { - unregister_kretprobe(&udpv6_recvmsg_kretprobe); + smith_unregister_kretprobe(&udpv6_recvmsg_kretprobe); udpv6_recvmsg_kprobe_state = 0; } int register_ip6_datagram_connect_kprobe(void) { int ret; - ret = register_kretprobe(&ip6_datagram_connect_kretprobe); + ret = smith_register_kretprobe(&ip6_datagram_connect_kretprobe); if (ret == 0) ip6_datagram_connect_kprobe_state = 0x1; @@ -3801,13 +3829,13 @@ int register_ip6_datagram_connect_kprobe(void) void unregister_ip6_datagram_connect_kprobe(void) { - unregister_kretprobe(&ip6_datagram_connect_kretprobe); + smith_unregister_kretprobe(&ip6_datagram_connect_kretprobe); } int register_tcp_v6_connect_kprobe(void) { int ret; - ret = register_kretprobe(&tcp_v6_connect_kretprobe); + ret = smith_register_kretprobe(&tcp_v6_connect_kretprobe); if (ret == 0) tcp_v6_connect_kprobe_state = 0x1; @@ -3817,14 +3845,14 @@ int register_tcp_v6_connect_kprobe(void) void unregister_tcp_v6_connect_kprobe(void) { - unregister_kretprobe(&tcp_v6_connect_kretprobe); + smith_unregister_kretprobe(&tcp_v6_connect_kretprobe); } #endif int register_ip4_datagram_connect_kprobe(void) { int ret; - ret = register_kretprobe(&ip4_datagram_connect_kretprobe); + ret = smith_register_kretprobe(&ip4_datagram_connect_kretprobe); if (ret == 0) ip4_datagram_connect_kprobe_state = 0x1; @@ -3834,13 +3862,13 @@ int register_ip4_datagram_connect_kprobe(void) void unregister_ip4_datagram_connect_kprobe(void) { - unregister_kretprobe(&ip4_datagram_connect_kretprobe); + smith_unregister_kretprobe(&ip4_datagram_connect_kretprobe); } int register_tcp_v4_connect_kprobe(void) { int ret; - ret = register_kretprobe(&tcp_v4_connect_kretprobe); + ret = smith_register_kretprobe(&tcp_v4_connect_kretprobe); if (ret == 0) tcp_v4_connect_kprobe_state = 0x1; @@ -3850,13 +3878,13 @@ int register_tcp_v4_connect_kprobe(void) void unregister_tcp_v4_connect_kprobe(void) { - unregister_kretprobe(&tcp_v4_connect_kretprobe); + smith_unregister_kretprobe(&tcp_v4_connect_kretprobe); } int register_connect_syscall_kprobe(void) { int ret; - ret = register_kretprobe(&connect_syscall_kretprobe); + ret = smith_register_kretprobe(&connect_syscall_kretprobe); if (ret == 0) connect_syscall_kprobe_state = 0x1; @@ -3866,13 +3894,13 @@ int register_connect_syscall_kprobe(void) void unregister_connect_syscall_kprobe(void) { - unregister_kretprobe(&connect_syscall_kretprobe); + smith_unregister_kretprobe(&connect_syscall_kretprobe); } int register_accept_kprobe(void) { int ret; - ret = register_kretprobe(&accept_kretprobe); + ret = smith_register_kretprobe(&accept_kretprobe); if (ret == 0) accept_kretprobe_state = 0x1; @@ -3882,13 +3910,13 @@ int register_accept_kprobe(void) void unregister_accept_kprobe(void) { - unregister_kretprobe(&accept_kretprobe); + smith_unregister_kretprobe(&accept_kretprobe); } int register_accept4_kprobe(void) { int ret; - ret = register_kretprobe(&accept4_kretprobe); + ret = smith_register_kretprobe(&accept4_kretprobe); if (ret == 0) accept4_kretprobe_state = 0x1; @@ -3898,7 +3926,7 @@ int register_accept4_kprobe(void) void unregister_accept4_kprobe(void) { - unregister_kretprobe(&accept4_kretprobe); + smith_unregister_kretprobe(&accept4_kretprobe); } int register_create_file_kprobe(void) @@ -4000,7 +4028,7 @@ void unregister_prctl_kprobe(void) int register_update_cred_kprobe(void) { int ret; - ret = register_kretprobe(&update_cred_kretprobe); + ret = smith_register_kretprobe(&update_cred_kretprobe); if (ret == 0) update_cred_kprobe_state = 0x1; @@ -4009,7 +4037,7 @@ int register_update_cred_kprobe(void) void unregister_update_cred_kprobe(void) { - unregister_kretprobe(&update_cred_kretprobe); + smith_unregister_kretprobe(&update_cred_kretprobe); } int register_mprotect_kprobe(void) @@ -4105,7 +4133,7 @@ void unregister_nanosleep_kprobe(void) int register_security_path_rmdir_kprobe(void) { int ret; - ret = register_kretprobe(&security_path_rmdir_kprobe); + ret = smith_register_kretprobe(&security_path_rmdir_kprobe); if (ret == 0) security_path_rmdir_kprobe_state = 0x1; @@ -4114,13 +4142,13 @@ int register_security_path_rmdir_kprobe(void) void unregister_security_path_rmdir_kprobe(void) { - unregister_kretprobe(&security_path_rmdir_kprobe); + smith_unregister_kretprobe(&security_path_rmdir_kprobe); } int register_security_path_unlink_kprobe(void) { int ret; - ret = register_kretprobe(&security_path_unlink_kprobe); + ret = smith_register_kretprobe(&security_path_unlink_kprobe); if (ret == 0) security_path_unlink_kprobe_state = 0x1; @@ -4129,7 +4157,7 @@ int register_security_path_unlink_kprobe(void) void unregister_security_path_unlink_kprobe(void) { - unregister_kretprobe(&security_path_unlink_kprobe); + smith_unregister_kretprobe(&security_path_unlink_kprobe); } int register_kill_kprobe(void) diff --git a/drivers/bytedance/toa.c b/drivers/bytedance/toa.c index 8d0b9bb98fd89..af0119ae34b1e 100644 --- a/drivers/bytedance/toa.c +++ b/drivers/bytedance/toa.c @@ -686,7 +686,10 @@ static int ip_option_to_four_tuple(int outside, struct ip_option* src, struct fo int inside = -1; if (outside == 0){ - inside = src->header.ipv4.operation; + if (src->header.ipv4.operation == 0 && src->header.ipv4.length == IP_OPTION_IPV4_LEN) + inside = 0; + else if (src->header.ipv4.operation == 1 && src->header.ipv4.length == IP_OPTION_IPV6_LEN ) + inside = 1; } else if (outside == 1){ if (src->header.ipv6.optlen == IPV6_HEADER_OPTION_IPV4_LEN) @@ -695,8 +698,9 @@ static int ip_option_to_four_tuple(int outside, struct ip_option* src, struct fo inside = 1; } + memset(dst, 0, sizeof(*dst)); + if (inside == 0){ - memset(dst, 0, sizeof(struct four_tuple)); memcpy(dst, src, IP_OPTION_IPV4_LEN); dst->type = 0; diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index df0fc997dc3e3..85e97550a9a60 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -535,28 +535,41 @@ config ADI and SSM (Silicon Secured Memory). Intended consumers of this driver include crash and makedumpfile. -endmenu - config RANDOM_TRUST_CPU - bool "Trust the CPU manufacturer to initialize Linux's CRNG" - depends on X86 || S390 || PPC - default n + bool "Initialize RNG using CPU RNG instructions" + default y + depends on ARCH_RANDOM help - Assume that CPU manufacturer (e.g., Intel or AMD for RDSEED or - RDRAND, IBM for the S390 and Power PC architectures) is trustworthy - for the purposes of initializing Linux's CRNG. Since this is not - something that can be independently audited, this amounts to trusting - that CPU manufacturer (perhaps with the insistence or mandate - of a Nation State's intelligence or law enforcement agencies) - has not installed a hidden back door to compromise the CPU's - random number generation facilities. This can also be configured - at boot with "random.trust_cpu=on/off". + Initialize the RNG using random numbers supplied by the CPU's + RNG instructions (e.g. RDRAND), if supported and available. These + random numbers are never used directly, but are rather hashed into + the main input pool, and this happens regardless of whether or not + this option is enabled. Instead, this option controls whether the + they are credited and hence can initialize the RNG. Additionally, + other sources of randomness are always used, regardless of this + setting. Enabling this implies trusting that the CPU can supply high + quality and non-backdoored random numbers. + + Say Y here unless you have reason to mistrust your CPU or believe + its RNG facilities may be faulty. This may also be configured at + boot time with "random.trust_cpu=on/off". config RANDOM_TRUST_BOOTLOADER - bool "Trust the bootloader to initialize Linux's CRNG" + bool "Initialize RNG using bootloader-supplied seed" + default y help - Some bootloaders can provide entropy to increase the kernel's initial - device randomness. Say Y here to assume the entropy provided by the - booloader is trustworthy so it will be added to the kernel's entropy - pool. Otherwise, say N here so it will be regarded as device input that - only mixes the entropy pool. \ No newline at end of file + Initialize the RNG using a seed supplied by the bootloader or boot + environment (e.g. EFI or a bootloader-generated device tree). This + seed is not used directly, but is rather hashed into the main input + pool, and this happens regardless of whether or not this option is + enabled. Instead, this option controls whether the seed is credited + and hence can initialize the RNG. Additionally, other sources of + randomness are always used, regardless of this setting. Enabling + this implies trusting that the bootloader can supply high quality and + non-backdoored seeds. + + Say Y here unless you have reason to mistrust your bootloader or + believe its RNG facilities may be faulty. This may also be configured + at boot time with "random.trust_bootloader=on/off". + +endmenu diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c index ed3c4c42fc23b..d68d05d5d3838 100644 --- a/drivers/char/agp/parisc-agp.c +++ b/drivers/char/agp/parisc-agp.c @@ -281,7 +281,7 @@ agp_ioc_init(void __iomem *ioc_regs) return 0; } -static int +static int __init lba_find_capability(int cap) { struct _parisc_agp_info *info = &parisc_agp_info; @@ -366,7 +366,7 @@ parisc_agp_setup(void __iomem *ioc_hpa, void __iomem *lba_hpa) return error; } -static int +static int __init find_quicksilver(struct device *dev, void *data) { struct parisc_device **lba = data; @@ -378,7 +378,7 @@ find_quicksilver(struct device *dev, void *data) return 0; } -static int +static int __init parisc_agp_init(void) { extern struct sba_device *sba_list; diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c index e55705745d5e4..f4c94f8acfe0d 100644 --- a/drivers/char/hw_random/atmel-rng.c +++ b/drivers/char/hw_random/atmel-rng.c @@ -96,6 +96,7 @@ static int atmel_trng_probe(struct platform_device *pdev) err_register: clk_disable_unprepare(trng->clk); + atmel_trng_disable(trng); return ret; } diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 8d53b8ef545cb..559ca503b7b6e 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/hw_random/mtk-rng.c b/drivers/char/hw_random/mtk-rng.c index e649be5a5f132..6670516fa194d 100644 --- a/drivers/char/hw_random/mtk-rng.c +++ b/drivers/char/hw_random/mtk-rng.c @@ -173,8 +173,13 @@ static int mtk_rng_runtime_resume(struct device *dev) return mtk_rng_init(&priv->rng); } -static UNIVERSAL_DEV_PM_OPS(mtk_rng_pm_ops, mtk_rng_runtime_suspend, - mtk_rng_runtime_resume, NULL); +static const struct dev_pm_ops mtk_rng_pm_ops = { + SET_RUNTIME_PM_OPS(mtk_rng_runtime_suspend, + mtk_rng_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) +}; + #define MTK_RNG_PM_OPS (&mtk_rng_pm_ops) #else /* CONFIG_PM */ #define MTK_RNG_PM_OPS NULL diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 331c11a157d68..1181981f51918 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -11,8 +11,8 @@ * Copyright 2002 MontaVista Software Inc. */ -#define pr_fmt(fmt) "%s" fmt, "IPMI message handler: " -#define dev_fmt pr_fmt +#define pr_fmt(fmt) "IPMI message handler: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) #include #include @@ -4799,7 +4799,9 @@ static atomic_t recv_msg_inuse_count = ATOMIC_INIT(0); static void free_smi_msg(struct ipmi_smi_msg *msg) { atomic_dec(&smi_msg_inuse_count); - kfree(msg); + /* Try to keep as much stuff out of the panic path as possible. */ + if (!oops_in_progress) + kfree(msg); } struct ipmi_smi_msg *ipmi_alloc_smi_msg(void) @@ -4818,7 +4820,9 @@ EXPORT_SYMBOL(ipmi_alloc_smi_msg); static void free_recv_msg(struct ipmi_recv_msg *msg) { atomic_dec(&recv_msg_inuse_count); - kfree(msg); + /* Try to keep as much stuff out of the panic path as possible. */ + if (!oops_in_progress) + kfree(msg); } static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void) @@ -4836,7 +4840,7 @@ static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void) void ipmi_free_recv_msg(struct ipmi_recv_msg *msg) { - if (msg->user) + if (msg->user && !oops_in_progress) kref_put(&msg->user->refcount, free_user); msg->done(msg); } @@ -5152,7 +5156,9 @@ static int ipmi_init_msghandler(void) if (initialized) goto out; - init_srcu_struct(&ipmi_interfaces_srcu); + rv = init_srcu_struct(&ipmi_interfaces_srcu); + if (rv) + goto out; remove_work_wq = create_singlethread_workqueue("ipmi-msghandler-remove-wq"); if (!remove_work_wq) { @@ -5166,6 +5172,13 @@ static int ipmi_init_msghandler(void) atomic_notifier_chain_register(&panic_notifier_list, &panic_block); + remove_work_wq = create_singlethread_workqueue("ipmi-msghandler-remove-wq"); + if (!remove_work_wq) { + pr_err("unable to create ipmi-msghandler-remove-wq workqueue"); + rv = -ENOMEM; + goto out; + } + initialized = true; out_wq: diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index dc655276c0171..027b3cf67802f 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -845,6 +845,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, break; case SSIF_GETTING_EVENTS: + if (!msg) { + /* Should never happen, but just in case. */ + dev_warn(&ssif_info->client->dev, + "No message set while getting events\n"); + ipmi_ssif_unlock_cond(ssif_info, flags); + break; + } + if ((result < 0) || (len < 3) || (msg->rsp[2] != 0)) { /* Error getting event, probably done. */ msg->done(msg); @@ -869,6 +877,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, break; case SSIF_GETTING_MESSAGES: + if (!msg) { + /* Should never happen, but just in case. */ + dev_warn(&ssif_info->client->dev, + "No message set while getting messages\n"); + ipmi_ssif_unlock_cond(ssif_info, flags); + break; + } + if ((result < 0) || (len < 3) || (msg->rsp[2] != 0)) { /* Error getting event, probably done. */ msg->done(msg); @@ -892,6 +908,13 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, deliver_recv_msg(ssif_info, msg); } break; + + default: + /* Should never happen, but just in case. */ + dev_warn(&ssif_info->client->dev, + "Invalid state in message done handling: %d\n", + ssif_info->ssif_state); + ipmi_ssif_unlock_cond(ssif_info, flags); } flags = ipmi_ssif_lock_cond(ssif_info, &oflags); @@ -1498,6 +1521,7 @@ static int start_multipart_test(struct i2c_client *client, ret = i2c_smbus_write_block_data(client, SSIF_IPMI_MULTI_PART_REQUEST_START, 32, msg); + msleep(SSIF_MSG_MSEC); if (ret) { retry_cnt--; if (retry_cnt > 0) @@ -1512,6 +1536,7 @@ static int start_multipart_test(struct i2c_client *client, ret = i2c_smbus_write_block_data(client, SSIF_IPMI_MULTI_PART_REQUEST_MIDDLE, 32, msg + 32); + msleep(SSIF_MSG_MSEC); if (ret) { dev_err(&client->dev, "Could not write multi-part middle, though the BMC said it could handle it. Just limit sends to one part.\n"); return ret; @@ -1704,6 +1729,10 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) } } + client->adapter->timeout = 10 * HZ; + ssif_info->client = client; + i2c_set_clientdata(client, ssif_info); + rv = ssif_check_and_remove(client, ssif_info); /* If rv is 0 and addr source is not SI_ACPI, continue probing */ if (!rv && ssif_info->addr_source == SI_ACPI) { @@ -1724,10 +1753,6 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) ipmi_addr_src_to_str(ssif_info->addr_source), client->addr, client->adapter->name, slave_addr); - ssif_info->client = client; - ssif_info->client->adapter->timeout = 10*HZ; /* Set to 10 seconds in this case */ - i2c_set_clientdata(client, ssif_info); - /* Now check for system interface capabilities */ msg[0] = IPMI_NETFN_APP_REQUEST << 2; msg[1] = IPMI_GET_SYSTEM_INTERFACE_CAPABILITIES_CMD; @@ -1927,6 +1952,7 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) dev_err(&ssif_info->client->dev, "Unable to start IPMI SSIF: %d\n", rv); + i2c_set_clientdata(client, NULL); kfree(ssif_info); } kfree(resp); diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index ae06e5402e9d5..72ad7fff64a7a 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -337,13 +337,17 @@ static atomic_t msg_tofree = ATOMIC_INIT(0); static DECLARE_COMPLETION(msg_wait); static void msg_free_smi(struct ipmi_smi_msg *msg) { - if (atomic_dec_and_test(&msg_tofree)) - complete(&msg_wait); + if (atomic_dec_and_test(&msg_tofree)) { + if (!oops_in_progress) + complete(&msg_wait); + } } static void msg_free_recv(struct ipmi_recv_msg *msg) { - if (atomic_dec_and_test(&msg_tofree)) - complete(&msg_wait); + if (atomic_dec_and_test(&msg_tofree)) { + if (!oops_in_progress) + complete(&msg_wait); + } } static struct ipmi_smi_msg smi_msg = { .done = msg_free_smi @@ -429,8 +433,10 @@ static int _ipmi_set_timeout(int do_heartbeat) rv = __ipmi_set_timeout(&smi_msg, &recv_msg, &send_heartbeat_now); - if (rv) + if (rv) { + atomic_set(&msg_tofree, 0); return rv; + } wait_for_completion(&msg_wait); @@ -575,6 +581,7 @@ static int __ipmi_heartbeat(void) &recv_msg, 1); if (rv) { + atomic_set(&msg_tofree, 0); pr_warn("heartbeat send failure: %d\n", rv); return rv; } diff --git a/drivers/char/mwave/3780i.h b/drivers/char/mwave/3780i.h index 9ccb6b270b071..95164246afd1a 100644 --- a/drivers/char/mwave/3780i.h +++ b/drivers/char/mwave/3780i.h @@ -68,7 +68,7 @@ typedef struct { unsigned char ClockControl:1; /* RW: Clock control: 0=normal, 1=stop 3780i clocks */ unsigned char SoftReset:1; /* RW: Soft reset 0=normal, 1=soft reset active */ unsigned char ConfigMode:1; /* RW: Configuration mode, 0=normal, 1=config mode */ - unsigned char Reserved:5; /* 0: Reserved */ + unsigned short Reserved:13; /* 0: Reserved */ } DSP_ISA_SLAVE_CONTROL; diff --git a/drivers/char/random.c b/drivers/char/random.c index ffd61aadb7614..1ef94d1125210 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1,311 +1,29 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* - * random.c -- A strong random number generator - * - * Copyright (C) 2017 Jason A. Donenfeld . All - * Rights Reserved. - * + * Copyright (C) 2017-2022 Jason A. Donenfeld . All Rights Reserved. * Copyright Matt Mackall , 2003, 2004, 2005 - * - * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All - * rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, and the entire permission notice in its entirety, - * including the disclaimer of warranties. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * ALTERNATIVELY, this product may be distributed under the terms of - * the GNU General Public License, in which case the provisions of the GPL are - * required INSTEAD OF the above restrictions. (This clause is - * necessary due to a potential bad interaction between the GPL and - * the restrictions contained in a BSD-style copyright.) - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF - * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT - * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. + * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All rights reserved. + * + * This driver produces cryptographically secure pseudorandom data. It is divided + * into roughly six sections, each with a section header: + * + * - Initialization and readiness waiting. + * - Fast key erasure RNG, the "crng". + * - Entropy accumulation and extraction routines. + * - Entropy collection routines. + * - Userspace reader/writer interfaces. + * - Sysctl interface. + * + * The high level overview is that there is one input pool, into which + * various pieces of data are hashed. Prior to initialization, some of that + * data is then "credited" as having a certain number of bits of entropy. + * When enough bits of entropy are available, the hash is finalized and + * handed as a key to a stream cipher that expands it indefinitely for + * various consumers. This key is periodically refreshed as the various + * entropy collectors, described below, add data to the input pool. */ -/* - * (now, with legal B.S. out of the way.....) - * - * This routine gathers environmental noise from device drivers, etc., - * and returns good random numbers, suitable for cryptographic use. - * Besides the obvious cryptographic uses, these numbers are also good - * for seeding TCP sequence numbers, and other places where it is - * desirable to have numbers which are not only random, but hard to - * predict by an attacker. - * - * Theory of operation - * =================== - * - * Computers are very predictable devices. Hence it is extremely hard - * to produce truly random numbers on a computer --- as opposed to - * pseudo-random numbers, which can easily generated by using a - * algorithm. Unfortunately, it is very easy for attackers to guess - * the sequence of pseudo-random number generators, and for some - * applications this is not acceptable. So instead, we must try to - * gather "environmental noise" from the computer's environment, which - * must be hard for outside attackers to observe, and use that to - * generate random numbers. In a Unix environment, this is best done - * from inside the kernel. - * - * Sources of randomness from the environment include inter-keyboard - * timings, inter-interrupt timings from some interrupts, and other - * events which are both (a) non-deterministic and (b) hard for an - * outside observer to measure. Randomness from these sources are - * added to an "entropy pool", which is mixed using a CRC-like function. - * This is not cryptographically strong, but it is adequate assuming - * the randomness is not chosen maliciously, and it is fast enough that - * the overhead of doing it on every interrupt is very reasonable. - * As random bytes are mixed into the entropy pool, the routines keep - * an *estimate* of how many bits of randomness have been stored into - * the random number generator's internal state. - * - * When random bytes are desired, they are obtained by taking the SHA - * hash of the contents of the "entropy pool". The SHA hash avoids - * exposing the internal state of the entropy pool. It is believed to - * be computationally infeasible to derive any useful information - * about the input of SHA from its output. Even if it is possible to - * analyze SHA in some clever way, as long as the amount of data - * returned from the generator is less than the inherent entropy in - * the pool, the output data is totally unpredictable. For this - * reason, the routine decreases its internal estimate of how many - * bits of "true randomness" are contained in the entropy pool as it - * outputs random numbers. - * - * If this estimate goes to zero, the routine can still generate - * random numbers; however, an attacker may (at least in theory) be - * able to infer the future output of the generator from prior - * outputs. This requires successful cryptanalysis of SHA, which is - * not believed to be feasible, but there is a remote possibility. - * Nonetheless, these numbers should be useful for the vast majority - * of purposes. - * - * Exported interfaces ---- output - * =============================== - * - * There are four exported interfaces; two for use within the kernel, - * and two or use from userspace. - * - * Exported interfaces ---- userspace output - * ----------------------------------------- - * - * The userspace interfaces are two character devices /dev/random and - * /dev/urandom. /dev/random is suitable for use when very high - * quality randomness is desired (for example, for key generation or - * one-time pads), as it will only return a maximum of the number of - * bits of randomness (as estimated by the random number generator) - * contained in the entropy pool. - * - * The /dev/urandom device does not have this limit, and will return - * as many bytes as are requested. As more and more random bytes are - * requested without giving time for the entropy pool to recharge, - * this will result in random numbers that are merely cryptographically - * strong. For many applications, however, this is acceptable. - * - * Exported interfaces ---- kernel output - * -------------------------------------- - * - * The primary kernel interface is - * - * void get_random_bytes(void *buf, int nbytes); - * - * This interface will return the requested number of random bytes, - * and place it in the requested buffer. This is equivalent to a - * read from /dev/urandom. - * - * For less critical applications, there are the functions: - * - * u32 get_random_u32() - * u64 get_random_u64() - * unsigned int get_random_int() - * unsigned long get_random_long() - * - * These are produced by a cryptographic RNG seeded from get_random_bytes, - * and so do not deplete the entropy pool as much. These are recommended - * for most in-kernel operations *if the result is going to be stored in - * the kernel*. - * - * Specifically, the get_random_int() family do not attempt to do - * "anti-backtracking". If you capture the state of the kernel (e.g. - * by snapshotting the VM), you can figure out previous get_random_int() - * return values. But if the value is stored in the kernel anyway, - * this is not a problem. - * - * It *is* safe to expose get_random_int() output to attackers (e.g. as - * network cookies); given outputs 1..n, it's not feasible to predict - * outputs 0 or n+1. The only concern is an attacker who breaks into - * the kernel later; the get_random_int() engine is not reseeded as - * often as the get_random_bytes() one. - * - * get_random_bytes() is needed for keys that need to stay secret after - * they are erased from the kernel. For example, any key that will - * be wrapped and stored encrypted. And session encryption keys: we'd - * like to know that after the session is closed and the keys erased, - * the plaintext is unrecoverable to someone who recorded the ciphertext. - * - * But for network ports/cookies, stack canaries, PRNG seeds, address - * space layout randomization, session *authentication* keys, or other - * applications where the sensitive data is stored in the kernel in - * plaintext for as long as it's sensitive, the get_random_int() family - * is just fine. - * - * Consider ASLR. We want to keep the address space secret from an - * outside attacker while the process is running, but once the address - * space is torn down, it's of no use to an attacker any more. And it's - * stored in kernel data structures as long as it's alive, so worrying - * about an attacker's ability to extrapolate it from the get_random_int() - * CRNG is silly. - * - * Even some cryptographic keys are safe to generate with get_random_int(). - * In particular, keys for SipHash are generally fine. Here, knowledge - * of the key authorizes you to do something to a kernel object (inject - * packets to a network connection, or flood a hash table), and the - * key is stored with the object being protected. Once it goes away, - * we no longer care if anyone knows the key. - * - * prandom_u32() - * ------------- - * - * For even weaker applications, see the pseudorandom generator - * prandom_u32(), prandom_max(), and prandom_bytes(). If the random - * numbers aren't security-critical at all, these are *far* cheaper. - * Useful for self-tests, random error simulation, randomized backoffs, - * and any other application where you trust that nobody is trying to - * maliciously mess with you by guessing the "random" numbers. - * - * Exported interfaces ---- input - * ============================== - * - * The current exported interfaces for gathering environmental noise - * from the devices are: - * - * void add_device_randomness(const void *buf, unsigned int size); - * void add_input_randomness(unsigned int type, unsigned int code, - * unsigned int value); - * void add_interrupt_randomness(int irq, int irq_flags); - * void add_disk_randomness(struct gendisk *disk); - * - * add_device_randomness() is for adding data to the random pool that - * is likely to differ between two devices (or possibly even per boot). - * This would be things like MAC addresses or serial numbers, or the - * read-out of the RTC. This does *not* add any actual entropy to the - * pool, but it initializes the pool to different values for devices - * that might otherwise be identical and have very little entropy - * available to them (particularly common in the embedded world). - * - * add_input_randomness() uses the input layer interrupt timing, as well as - * the event type information from the hardware. - * - * add_interrupt_randomness() uses the interrupt timing as random - * inputs to the entropy pool. Using the cycle counters and the irq source - * as inputs, it feeds the randomness roughly once a second. - * - * add_disk_randomness() uses what amounts to the seek time of block - * layer request events, on a per-disk_devt basis, as input to the - * entropy pool. Note that high-speed solid state drives with very low - * seek times do not make for good sources of entropy, as their seek - * times are usually fairly consistent. - * - * All of these routines try to estimate how many bits of randomness a - * particular randomness source. They do this by keeping track of the - * first and second order deltas of the event timings. - * - * Ensuring unpredictability at system startup - * ============================================ - * - * When any operating system starts up, it will go through a sequence - * of actions that are fairly predictable by an adversary, especially - * if the start-up does not involve interaction with a human operator. - * This reduces the actual number of bits of unpredictability in the - * entropy pool below the value in entropy_count. In order to - * counteract this effect, it helps to carry information in the - * entropy pool across shut-downs and start-ups. To do this, put the - * following lines an appropriate script which is run during the boot - * sequence: - * - * echo "Initializing random number generator..." - * random_seed=/var/run/random-seed - * # Carry a random seed from start-up to start-up - * # Load and then save the whole entropy pool - * if [ -f $random_seed ]; then - * cat $random_seed >/dev/urandom - * else - * touch $random_seed - * fi - * chmod 600 $random_seed - * dd if=/dev/urandom of=$random_seed count=1 bs=512 - * - * and the following lines in an appropriate script which is run as - * the system is shutdown: - * - * # Carry a random seed from shut-down to start-up - * # Save the whole entropy pool - * echo "Saving random seed..." - * random_seed=/var/run/random-seed - * touch $random_seed - * chmod 600 $random_seed - * dd if=/dev/urandom of=$random_seed count=1 bs=512 - * - * For example, on most modern systems using the System V init - * scripts, such code fragments would be found in - * /etc/rc.d/init.d/random. On older Linux systems, the correct script - * location might be in /etc/rcb.d/rc.local or /etc/rc.d/rc.0. - * - * Effectively, these commands cause the contents of the entropy pool - * to be saved at shut-down time and reloaded into the entropy pool at - * start-up. (The 'dd' in the addition to the bootup script is to - * make sure that /etc/random-seed is different for every start-up, - * even if the system crashes without executing rc.0.) Even with - * complete knowledge of the start-up activities, predicting the state - * of the entropy pool requires knowledge of the previous history of - * the system. - * - * Configuring the /dev/random driver under Linux - * ============================================== - * - * The /dev/random driver under Linux uses minor numbers 8 and 9 of - * the /dev/mem major number (#1). So if your system does not have - * /dev/random and /dev/urandom created already, they can be created - * by using the commands: - * - * mknod /dev/random c 1 8 - * mknod /dev/urandom c 1 9 - * - * Acknowledgements: - * ================= - * - * Ideas for constructing this random number generator were derived - * from Pretty Good Privacy's random number generator, and from private - * discussions with Phil Karn. Colin Plumb provided a faster random - * number generator, which speed up the mixing function of the entropy - * pool, taken from PGPfone. Dale Worley has also contributed many - * useful ideas and suggestions to improve this driver. - * - * Any flaws in the design are solely my responsibility, and should - * not be attributed to the Phil, Colin, or any of authors of PGP. - * - * Further background information on this topic may be obtained from - * RFC 1750, "Randomness Recommendations for Security", by Donald - * Eastlake, Steve Crocker, and Jeff Schiller. - */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include @@ -325,8 +43,6 @@ #include #include #include -#include -#include #include #include #include @@ -334,838 +50,795 @@ #include #include #include +#include +#include +#include #include - +#include #include -#include #include #include #include -#define CREATE_TRACE_POINTS -#include - -/* #define ADD_INTERRUPT_BENCH */ +/********************************************************************* + * + * Initialization and readiness waiting. + * + * Much of the RNG infrastructure is devoted to various dependencies + * being able to wait until the RNG has collected enough entropy and + * is ready for safe consumption. + * + *********************************************************************/ /* - * Configuration information + * crng_init is protected by base_crng->lock, and only increases + * its value (from empty->early->ready). */ -#define INPUT_POOL_SHIFT 12 -#define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5)) -#define OUTPUT_POOL_SHIFT 10 -#define OUTPUT_POOL_WORDS (1 << (OUTPUT_POOL_SHIFT-5)) -#define SEC_XFER_SIZE 512 -#define EXTRACT_SIZE 10 - +static enum { + CRNG_EMPTY = 0, /* Little to no entropy collected */ + CRNG_EARLY = 1, /* At least POOL_EARLY_BITS collected */ + CRNG_READY = 2 /* Fully initialized with POOL_READY_BITS collected */ +} crng_init __read_mostly = CRNG_EMPTY; +#define crng_ready() (likely(crng_init >= CRNG_READY)) +/* Various types of waiters for crng_init->CRNG_READY transition. */ +static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); +static struct fasync_struct *fasync; +static DEFINE_SPINLOCK(random_ready_chain_lock); +static RAW_NOTIFIER_HEAD(random_ready_chain); -#define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long)) +/* Control how we warn userspace. */ +static struct ratelimit_state urandom_warning = + RATELIMIT_STATE_INIT_FLAGS("urandom_warning", HZ, 3, RATELIMIT_MSG_ON_RELEASE); +static int ratelimit_disable __read_mostly = + IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM); +module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); +MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); /* - * To allow fractional bits to be tracked, the entropy_count field is - * denominated in units of 1/8th bits. + * Returns whether or not the input pool has been seeded and thus guaranteed + * to supply cryptographically secure random numbers. This applies to: the + * /dev/urandom device, the get_random_bytes function, and the get_random_{u32, + * ,u64,int,long} family of functions. * - * 2*(ENTROPY_SHIFT + poolbitshift) must <= 31, or the multiply in - * credit_entropy_bits() needs to be 64 bits wide. + * Returns: true if the input pool has been seeded. + * false if the input pool has not been seeded. */ -#define ENTROPY_SHIFT 3 -#define ENTROPY_BITS(r) ((r)->entropy_count >> ENTROPY_SHIFT) +bool rng_is_initialized(void) +{ + return crng_ready(); +} +EXPORT_SYMBOL(rng_is_initialized); -/* - * The minimum number of bits of entropy before we wake up a read on - * /dev/random. Should be enough to do a significant reseed. - */ -static int random_read_wakeup_bits = 64; +/* Used by wait_for_random_bytes(), and considered an entropy collector, below. */ +static void try_to_generate_entropy(void); /* - * If the entropy count falls under this number of bits, then we - * should wake up processes which are selecting or polling on write - * access to /dev/random. + * Wait for the input pool to be seeded and thus guaranteed to supply + * cryptographically secure random numbers. This applies to: the /dev/urandom + * device, the get_random_bytes function, and the get_random_{u32,u64,int,long} + * family of functions. Using any of these functions without first calling + * this function forfeits the guarantee of security. + * + * Returns: 0 if the input pool has been seeded. + * -ERESTARTSYS if the function was interrupted by a signal. */ -static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS; +int wait_for_random_bytes(void) +{ + while (!crng_ready()) { + int ret; + + try_to_generate_entropy(); + ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); + if (ret) + return ret > 0 ? 0 : ret; + } + return 0; +} +EXPORT_SYMBOL(wait_for_random_bytes); /* - * Originally, we used a primitive polynomial of degree .poolwords - * over GF(2). The taps for various sizes are defined below. They - * were chosen to be evenly spaced except for the last tap, which is 1 - * to get the twisting happening as fast as possible. - * - * For the purposes of better mixing, we use the CRC-32 polynomial as - * well to make a (modified) twisted Generalized Feedback Shift - * Register. (See M. Matsumoto & Y. Kurita, 1992. Twisted GFSR - * generators. ACM Transactions on Modeling and Computer Simulation - * 2(3):179-194. Also see M. Matsumoto & Y. Kurita, 1994. Twisted - * GFSR generators II. ACM Transactions on Modeling and Computer - * Simulation 4:254-266) - * - * Thanks to Colin Plumb for suggesting this. - * - * The mixing operation is much less sensitive than the output hash, - * where we use SHA-1. All that we want of mixing operation is that - * it be a good non-cryptographic hash; i.e. it not produce collisions - * when fed "random" data of the sort we expect to see. As long as - * the pool state differs for different inputs, we have preserved the - * input entropy and done a good job. The fact that an intelligent - * attacker can construct inputs that will produce controlled - * alterations to the pool's state is not important because we don't - * consider such inputs to contribute any randomness. The only - * property we need with respect to them is that the attacker can't - * increase his/her knowledge of the pool's state. Since all - * additions are reversible (knowing the final state and the input, - * you can reconstruct the initial state), if an attacker has any - * uncertainty about the initial state, he/she can only shuffle that - * uncertainty about, but never cause any collisions (which would - * decrease the uncertainty). + * Add a callback function that will be invoked when the input + * pool is initialised. * - * Our mixing functions were analyzed by Lacharme, Roeck, Strubel, and - * Videau in their paper, "The Linux Pseudorandom Number Generator - * Revisited" (see: http://eprint.iacr.org/2012/251.pdf). In their - * paper, they point out that we are not using a true Twisted GFSR, - * since Matsumoto & Kurita used a trinomial feedback polynomial (that - * is, with only three taps, instead of the six that we are using). - * As a result, the resulting polynomial is neither primitive nor - * irreducible, and hence does not have a maximal period over - * GF(2**32). They suggest a slight change to the generator - * polynomial which improves the resulting TGFSR polynomial to be - * irreducible, which we have made here. + * returns: 0 if callback is successfully added + * -EALREADY if pool is already initialised (callback not called) */ -static const struct poolinfo { - int poolbitshift, poolwords, poolbytes, poolfracbits; -#define S(x) ilog2(x)+5, (x), (x)*4, (x) << (ENTROPY_SHIFT+5) - int tap1, tap2, tap3, tap4, tap5; -} poolinfo_table[] = { - /* was: x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 */ - /* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */ - { S(128), 104, 76, 51, 25, 1 }, - /* was: x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 */ - /* x^32 + x^26 + x^19 + x^14 + x^7 + x + 1 */ - { S(32), 26, 19, 14, 7, 1 }, -#if 0 - /* x^2048 + x^1638 + x^1231 + x^819 + x^411 + x + 1 -- 115 */ - { S(2048), 1638, 1231, 819, 411, 1 }, - - /* x^1024 + x^817 + x^615 + x^412 + x^204 + x + 1 -- 290 */ - { S(1024), 817, 615, 412, 204, 1 }, - - /* x^1024 + x^819 + x^616 + x^410 + x^207 + x^2 + 1 -- 115 */ - { S(1024), 819, 616, 410, 207, 2 }, - - /* x^512 + x^411 + x^308 + x^208 + x^104 + x + 1 -- 225 */ - { S(512), 411, 308, 208, 104, 1 }, - - /* x^512 + x^409 + x^307 + x^206 + x^102 + x^2 + 1 -- 95 */ - { S(512), 409, 307, 206, 102, 2 }, - /* x^512 + x^409 + x^309 + x^205 + x^103 + x^2 + 1 -- 95 */ - { S(512), 409, 309, 205, 103, 2 }, - - /* x^256 + x^205 + x^155 + x^101 + x^52 + x + 1 -- 125 */ - { S(256), 205, 155, 101, 52, 1 }, - - /* x^128 + x^103 + x^78 + x^51 + x^27 + x^2 + 1 -- 70 */ - { S(128), 103, 78, 51, 27, 2 }, - - /* x^64 + x^52 + x^39 + x^26 + x^14 + x + 1 -- 15 */ - { S(64), 52, 39, 26, 14, 1 }, -#endif -}; +int __cold register_random_ready_notifier(struct notifier_block *nb) +{ + unsigned long flags; + int ret = -EALREADY; + + if (crng_ready()) + return ret; + + spin_lock_irqsave(&random_ready_chain_lock, flags); + if (!crng_ready()) + ret = raw_notifier_chain_register(&random_ready_chain, nb); + spin_unlock_irqrestore(&random_ready_chain_lock, flags); + return ret; +} /* - * Static global variables + * Delete a previously registered readiness callback function. */ -static DECLARE_WAIT_QUEUE_HEAD(random_read_wait); -static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); -static struct fasync_struct *fasync; - -static DEFINE_SPINLOCK(random_ready_list_lock); -static LIST_HEAD(random_ready_list); +int __cold unregister_random_ready_notifier(struct notifier_block *nb) +{ + unsigned long flags; + int ret; -struct crng_state { - __u32 state[16]; - unsigned long init_time; - spinlock_t lock; -}; + spin_lock_irqsave(&random_ready_chain_lock, flags); + ret = raw_notifier_chain_unregister(&random_ready_chain, nb); + spin_unlock_irqrestore(&random_ready_chain_lock, flags); + return ret; +} -static struct crng_state primary_crng = { - .lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock), -}; +static void __cold process_random_ready_list(void) +{ + unsigned long flags; -/* - * crng_init = 0 --> Uninitialized - * 1 --> Initialized - * 2 --> Initialized from input_pool - * - * crng_init is protected by primary_crng->lock, and only increases - * its value (from 0->1->2). - */ -static int crng_init = 0; -#define crng_ready() (likely(crng_init > 1)) -static int crng_init_cnt = 0; -static unsigned long crng_global_init_time = 0; -#define CRNG_INIT_CNT_THRESH (2*CHACHA_KEY_SIZE) -static void _extract_crng(struct crng_state *crng, __u8 out[CHACHA_BLOCK_SIZE]); -static void _crng_backtrack_protect(struct crng_state *crng, - __u8 tmp[CHACHA_BLOCK_SIZE], int used); -static void process_random_ready_list(void); -static void _get_random_bytes(void *buf, int nbytes); - -static struct ratelimit_state unseeded_warning = - RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3); -static struct ratelimit_state urandom_warning = - RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3); + spin_lock_irqsave(&random_ready_chain_lock, flags); + raw_notifier_call_chain(&random_ready_chain, 0, NULL); + spin_unlock_irqrestore(&random_ready_chain_lock, flags); +} -static int ratelimit_disable __read_mostly; +#define warn_unseeded_randomness() \ + if (IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM) && !crng_ready()) \ + printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", \ + __func__, (void *)_RET_IP_, crng_init) -module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); -MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); -/********************************************************************** +/********************************************************************* * - * OS independent entropy store. Here are the functions which handle - * storing entropy in an entropy pool. + * Fast key erasure RNG, the "crng". * - **********************************************************************/ + * These functions expand entropy from the entropy extractor into + * long streams for external consumption using the "fast key erasure" + * RNG described at . + * + * There are a few exported interfaces for use by other drivers: + * + * void get_random_bytes(void *buf, size_t len) + * u32 get_random_u32() + * u64 get_random_u64() + * unsigned int get_random_int() + * unsigned long get_random_long() + * + * These interfaces will return the requested number of random bytes + * into the given buffer or as a return value. This is equivalent to + * a read from /dev/urandom. The u32, u64, int, and long family of + * functions may be higher performance for one-off random integers, + * because they do a bit of buffering and do not invoke reseeding + * until the buffer is emptied. + * + *********************************************************************/ -struct entropy_store; -struct entropy_store { - /* read-only data: */ - const struct poolinfo *poolinfo; - __u32 *pool; - const char *name; - struct entropy_store *pull; - struct work_struct push_work; - - /* read-write data: */ - unsigned long last_pulled; - spinlock_t lock; - unsigned short add_ptr; - unsigned short input_rotate; - int entropy_count; - unsigned int initialized:1; - unsigned int last_data_init:1; - __u8 last_data[EXTRACT_SIZE]; +enum { + CRNG_RESEED_START_INTERVAL = HZ, + CRNG_RESEED_INTERVAL = 60 * HZ }; -static ssize_t extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int min, int rsvd); -static ssize_t _extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int fips); - -static void crng_reseed(struct crng_state *crng, struct entropy_store *r); -static void push_to_pool(struct work_struct *work); -static __u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; -static __u32 blocking_pool_data[OUTPUT_POOL_WORDS] __latent_entropy; +static struct { + u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long)); + unsigned long birth; + unsigned long generation; + spinlock_t lock; +} base_crng = { + .lock = __SPIN_LOCK_UNLOCKED(base_crng.lock) +}; -static struct entropy_store input_pool = { - .poolinfo = &poolinfo_table[0], - .name = "input", - .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), - .pool = input_pool_data +struct crng { + u8 key[CHACHA_KEY_SIZE]; + unsigned long generation; }; -static struct entropy_store blocking_pool = { - .poolinfo = &poolinfo_table[1], - .name = "blocking", - .pull = &input_pool, - .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock), - .pool = blocking_pool_data, - .push_work = __WORK_INITIALIZER(blocking_pool.push_work, - push_to_pool), +static DEFINE_PER_CPU(struct crng, crngs) = { + .generation = ULONG_MAX }; -static __u32 const twist_table[8] = { - 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, - 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; +/* Used by crng_reseed() and crng_make_state() to extract a new seed from the input pool. */ +static void extract_entropy(void *buf, size_t len); -/* - * This function adds bytes into the entropy "pool". It does not - * update the entropy estimate. The caller should call - * credit_entropy_bits if this is appropriate. - * - * The pool is stirred with a primitive polynomial of the appropriate - * degree, and then twisted. We twist by three bits at a time because - * it's cheap to do so and helps slightly in the expected case where - * the entropy is concentrated in the low-order bits. - */ -static void _mix_pool_bytes(struct entropy_store *r, const void *in, - int nbytes) +/* This extracts a new crng key from the input pool. */ +static void crng_reseed(void) { - unsigned long i, tap1, tap2, tap3, tap4, tap5; - int input_rotate; - int wordmask = r->poolinfo->poolwords - 1; - const char *bytes = in; - __u32 w; - - tap1 = r->poolinfo->tap1; - tap2 = r->poolinfo->tap2; - tap3 = r->poolinfo->tap3; - tap4 = r->poolinfo->tap4; - tap5 = r->poolinfo->tap5; - - input_rotate = r->input_rotate; - i = r->add_ptr; - - /* mix one byte at a time to simplify size handling and churn faster */ - while (nbytes--) { - w = rol32(*bytes++, input_rotate); - i = (i - 1) & wordmask; - - /* XOR in the various taps */ - w ^= r->pool[i]; - w ^= r->pool[(i + tap1) & wordmask]; - w ^= r->pool[(i + tap2) & wordmask]; - w ^= r->pool[(i + tap3) & wordmask]; - w ^= r->pool[(i + tap4) & wordmask]; - w ^= r->pool[(i + tap5) & wordmask]; - - /* Mix the result back in with a twist */ - r->pool[i] = (w >> 3) ^ twist_table[w & 7]; - - /* - * Normally, we add 7 bits of rotation to the pool. - * At the beginning of the pool, add an extra 7 bits - * rotation, so that successive passes spread the - * input bits across the pool evenly. - */ - input_rotate = (input_rotate + (i ? 7 : 14)) & 31; - } + unsigned long flags; + unsigned long next_gen; + u8 key[CHACHA_KEY_SIZE]; - r->input_rotate = input_rotate; - r->add_ptr = i; -} + extract_entropy(key, sizeof(key)); -static void __mix_pool_bytes(struct entropy_store *r, const void *in, - int nbytes) -{ - trace_mix_pool_bytes_nolock(r->name, nbytes, _RET_IP_); - _mix_pool_bytes(r, in, nbytes); + /* + * We copy the new key into the base_crng, overwriting the old one, + * and update the generation counter. We avoid hitting ULONG_MAX, + * because the per-cpu crngs are initialized to ULONG_MAX, so this + * forces new CPUs that come online to always initialize. + */ + spin_lock_irqsave(&base_crng.lock, flags); + memcpy(base_crng.key, key, sizeof(base_crng.key)); + next_gen = base_crng.generation + 1; + if (next_gen == ULONG_MAX) + ++next_gen; + WRITE_ONCE(base_crng.generation, next_gen); + WRITE_ONCE(base_crng.birth, jiffies); + if (!crng_ready()) + crng_init = CRNG_READY; + spin_unlock_irqrestore(&base_crng.lock, flags); + memzero_explicit(key, sizeof(key)); } -static void mix_pool_bytes(struct entropy_store *r, const void *in, - int nbytes) +/* + * This generates a ChaCha block using the provided key, and then + * immediately overwites that key with half the block. It returns + * the resultant ChaCha state to the user, along with the second + * half of the block containing 32 bytes of random data that may + * be used; random_data_len may not be greater than 32. + * + * The returned ChaCha state contains within it a copy of the old + * key value, at index 4, so the state should always be zeroed out + * immediately after using in order to maintain forward secrecy. + * If the state cannot be erased in a timely manner, then it is + * safer to set the random_data parameter to &chacha_state[4] so + * that this function overwrites it before returning. + */ +static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], + u32 chacha_state[CHACHA_BLOCK_SIZE / sizeof(u32)], + u8 *random_data, size_t random_data_len) { - unsigned long flags; + u8 first_block[CHACHA_BLOCK_SIZE]; + + BUG_ON(random_data_len > 32); - trace_mix_pool_bytes(r->name, nbytes, _RET_IP_); - spin_lock_irqsave(&r->lock, flags); - _mix_pool_bytes(r, in, nbytes); - spin_unlock_irqrestore(&r->lock, flags); + chacha_init_consts(chacha_state); + memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE); + memset(&chacha_state[12], 0, sizeof(u32) * 4); + chacha20_block(chacha_state, first_block); + + memcpy(key, first_block, CHACHA_KEY_SIZE); + memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len); + memzero_explicit(first_block, sizeof(first_block)); } -struct fast_pool { - __u32 pool[4]; - unsigned long last; - unsigned short reg_idx; - unsigned char count; -}; +/* + * Return whether the crng seed is considered to be sufficiently old + * that a reseeding is needed. This happens if the last reseeding + * was CRNG_RESEED_INTERVAL ago, or during early boot, at an interval + * proportional to the uptime. + */ +static bool crng_has_old_seed(void) +{ + static bool early_boot = true; + unsigned long interval = CRNG_RESEED_INTERVAL; + + if (unlikely(READ_ONCE(early_boot))) { + time64_t uptime = ktime_get_seconds(); + if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2) + WRITE_ONCE(early_boot, false); + else + interval = max_t(unsigned int, CRNG_RESEED_START_INTERVAL, + (unsigned int)uptime / 2 * HZ); + } + return time_is_before_jiffies(READ_ONCE(base_crng.birth) + interval); +} /* - * This is a fast mixing routine used by the interrupt randomness - * collector. It's hardcoded for an 128 bit pool and assumes that any - * locks that might be needed are taken by the caller. + * This function returns a ChaCha state that you may use for generating + * random data. It also returns up to 32 bytes on its own of random data + * that may be used; random_data_len may not be greater than 32. */ -static void fast_mix(struct fast_pool *f) +static void crng_make_state(u32 chacha_state[CHACHA_BLOCK_SIZE / sizeof(u32)], + u8 *random_data, size_t random_data_len) { - __u32 a = f->pool[0], b = f->pool[1]; - __u32 c = f->pool[2], d = f->pool[3]; + unsigned long flags; + struct crng *crng; - a += b; c += d; - b = rol32(b, 6); d = rol32(d, 27); - d ^= a; b ^= c; + BUG_ON(random_data_len > 32); + + /* + * For the fast path, we check whether we're ready, unlocked first, and + * then re-check once locked later. In the case where we're really not + * ready, we do fast key erasure with the base_crng directly, extracting + * when crng_init is CRNG_EMPTY. + */ + if (!crng_ready()) { + bool ready; + + spin_lock_irqsave(&base_crng.lock, flags); + ready = crng_ready(); + if (!ready) { + if (crng_init == CRNG_EMPTY) + extract_entropy(base_crng.key, sizeof(base_crng.key)); + crng_fast_key_erasure(base_crng.key, chacha_state, + random_data, random_data_len); + } + spin_unlock_irqrestore(&base_crng.lock, flags); + if (!ready) + return; + } - a += b; c += d; - b = rol32(b, 16); d = rol32(d, 14); - d ^= a; b ^= c; + /* + * If the base_crng is old enough, we reseed, which in turn bumps the + * generation counter that we check below. + */ + if (unlikely(crng_has_old_seed())) + crng_reseed(); - a += b; c += d; - b = rol32(b, 6); d = rol32(d, 27); - d ^= a; b ^= c; + local_irq_save(flags); + crng = raw_cpu_ptr(&crngs); - a += b; c += d; - b = rol32(b, 16); d = rol32(d, 14); - d ^= a; b ^= c; + /* + * If our per-cpu crng is older than the base_crng, then it means + * somebody reseeded the base_crng. In that case, we do fast key + * erasure on the base_crng, and use its output as the new key + * for our per-cpu crng. This brings us up to date with base_crng. + */ + if (unlikely(crng->generation != READ_ONCE(base_crng.generation))) { + spin_lock(&base_crng.lock); + crng_fast_key_erasure(base_crng.key, chacha_state, + crng->key, sizeof(crng->key)); + crng->generation = base_crng.generation; + spin_unlock(&base_crng.lock); + } - f->pool[0] = a; f->pool[1] = b; - f->pool[2] = c; f->pool[3] = d; - f->count++; + /* + * Finally, when we've made it this far, our per-cpu crng has an up + * to date key, and we can do fast key erasure with it to produce + * some random data and a ChaCha state for the caller. All other + * branches of this function are "unlikely", so most of the time we + * should wind up here immediately. + */ + crng_fast_key_erasure(crng->key, chacha_state, random_data, random_data_len); + local_irq_restore(flags); } -static void process_random_ready_list(void) +static void _get_random_bytes(void *buf, size_t len) { - unsigned long flags; - struct random_ready_callback *rdy, *tmp; + u32 chacha_state[CHACHA_BLOCK_SIZE / sizeof(u32)]; + u8 tmp[CHACHA_BLOCK_SIZE]; + size_t first_block_len; + + if (!len) + return; + + first_block_len = min_t(size_t, 32, len); + crng_make_state(chacha_state, buf, first_block_len); + len -= first_block_len; + buf += first_block_len; - spin_lock_irqsave(&random_ready_list_lock, flags); - list_for_each_entry_safe(rdy, tmp, &random_ready_list, list) { - struct module *owner = rdy->owner; + while (len) { + if (len < CHACHA_BLOCK_SIZE) { + chacha20_block(chacha_state, tmp); + memcpy(buf, tmp, len); + memzero_explicit(tmp, sizeof(tmp)); + break; + } - list_del_init(&rdy->list); - rdy->func(rdy); - module_put(owner); + chacha20_block(chacha_state, buf); + if (unlikely(chacha_state[12] == 0)) + ++chacha_state[13]; + len -= CHACHA_BLOCK_SIZE; + buf += CHACHA_BLOCK_SIZE; } - spin_unlock_irqrestore(&random_ready_list_lock, flags); + + memzero_explicit(chacha_state, sizeof(chacha_state)); } /* - * Credit (or debit) the entropy store with n bits of entropy. - * Use credit_entropy_bits_safe() if the value comes from userspace - * or otherwise should be checked for extreme values. + * This function is the exported kernel interface. It returns some + * number of good random numbers, suitable for key generation, seeding + * TCP sequence numbers, etc. It does not rely on the hardware random + * number generator. For random bytes direct from the hardware RNG + * (when available), use get_random_bytes_arch(). In order to ensure + * that the randomness provided by this function is okay, the function + * wait_for_random_bytes() should be called and return 0 at least once + * at any point prior. */ -static void credit_entropy_bits(struct entropy_store *r, int nbits) +void get_random_bytes(void *buf, size_t len) { - int entropy_count, orig, has_initialized = 0; - const int pool_size = r->poolinfo->poolfracbits; - int nfrac = nbits << ENTROPY_SHIFT; + warn_unseeded_randomness(); + _get_random_bytes(buf, len); +} +EXPORT_SYMBOL(get_random_bytes); - if (!nbits) - return; +static ssize_t get_random_bytes_user(struct iov_iter *iter) +{ + u32 chacha_state[CHACHA_BLOCK_SIZE / sizeof(u32)]; + u8 block[CHACHA_BLOCK_SIZE]; + size_t ret = 0, copied; -retry: - entropy_count = orig = READ_ONCE(r->entropy_count); - if (nfrac < 0) { - /* Debit */ - entropy_count += nfrac; - } else { - /* - * Credit: we have to account for the possibility of - * overwriting already present entropy. Even in the - * ideal case of pure Shannon entropy, new contributions - * approach the full value asymptotically: - * - * entropy <- entropy + (pool_size - entropy) * - * (1 - exp(-add_entropy/pool_size)) - * - * For add_entropy <= pool_size/2 then - * (1 - exp(-add_entropy/pool_size)) >= - * (add_entropy/pool_size)*0.7869... - * so we can approximate the exponential with - * 3/4*add_entropy/pool_size and still be on the - * safe side by adding at most pool_size/2 at a time. - * - * The use of pool_size-2 in the while statement is to - * prevent rounding artifacts from making the loop - * arbitrarily long; this limits the loop to log2(pool_size)*2 - * turns no matter how large nbits is. - */ - int pnfrac = nfrac; - const int s = r->poolinfo->poolbitshift + ENTROPY_SHIFT + 2; - /* The +2 corresponds to the /4 in the denominator */ - - do { - unsigned int anfrac = min(pnfrac, pool_size/2); - unsigned int add = - ((pool_size - entropy_count)*anfrac*3) >> s; - - entropy_count += add; - pnfrac -= anfrac; - } while (unlikely(entropy_count < pool_size-2 && pnfrac)); - } + if (unlikely(!iov_iter_count(iter))) + return 0; - if (unlikely(entropy_count < 0)) { - pr_warn("random: negative entropy/overflow: pool %s count %d\n", - r->name, entropy_count); - WARN_ON(1); - entropy_count = 0; - } else if (entropy_count > pool_size) - entropy_count = pool_size; - if ((r == &blocking_pool) && !r->initialized && - (entropy_count >> ENTROPY_SHIFT) > 128) - has_initialized = 1; - if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) - goto retry; - - if (has_initialized) { - r->initialized = 1; - wake_up_interruptible(&random_read_wait); - kill_fasync(&fasync, SIGIO, POLL_IN); + /* + * Immediately overwrite the ChaCha key at index 4 with random + * bytes, in case userspace causes copy_to_iter() below to sleep + * forever, so that we still retain forward secrecy in that case. + */ + crng_make_state(chacha_state, (u8 *)&chacha_state[4], CHACHA_KEY_SIZE); + /* + * However, if we're doing a read of len <= 32, we don't need to + * use chacha_state after, so we can simply return those bytes to + * the user directly. + */ + if (iov_iter_count(iter) <= CHACHA_KEY_SIZE) { + ret = copy_to_iter(&chacha_state[4], CHACHA_KEY_SIZE, iter); + goto out_zero_chacha; } - trace_credit_entropy_bits(r->name, nbits, - entropy_count >> ENTROPY_SHIFT, _RET_IP_); + for (;;) { + chacha20_block(chacha_state, block); + if (unlikely(chacha_state[12] == 0)) + ++chacha_state[13]; - if (r == &input_pool) { - int entropy_bits = entropy_count >> ENTROPY_SHIFT; - struct entropy_store *other = &blocking_pool; + copied = copy_to_iter(block, sizeof(block), iter); + ret += copied; + if (!iov_iter_count(iter) || copied != sizeof(block)) + break; - if (crng_init < 2) { - if (entropy_bits < 128) - return; - crng_reseed(&primary_crng, r); - entropy_bits = r->entropy_count >> ENTROPY_SHIFT; + BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0); + if (ret % PAGE_SIZE == 0) { + if (signal_pending(current)) + break; + cond_resched(); } + } - /* initialize the blocking pool if necessary */ - if (entropy_bits >= random_read_wakeup_bits && - !other->initialized) { - schedule_work(&other->push_work); - return; - } + memzero_explicit(block, sizeof(block)); +out_zero_chacha: + memzero_explicit(chacha_state, sizeof(chacha_state)); + return ret ? ret : -EFAULT; +} - /* should we wake readers? */ - if (entropy_bits >= random_read_wakeup_bits && - wq_has_sleeper(&random_read_wait)) { - wake_up_interruptible(&random_read_wait); - kill_fasync(&fasync, SIGIO, POLL_IN); - } - /* If the input pool is getting full, and the blocking - * pool has room, send some entropy to the blocking - * pool. - */ - if (!work_pending(&other->push_work) && - (ENTROPY_BITS(r) > 6 * r->poolinfo->poolbytes) && - (ENTROPY_BITS(other) <= 6 * other->poolinfo->poolbytes)) - schedule_work(&other->push_work); - } +/* + * Batched entropy returns random integers. The quality of the random + * number is good as /dev/urandom. In order to ensure that the randomness + * provided by this function is okay, the function wait_for_random_bytes() + * should be called and return 0 at least once at any point prior. + */ + +#define DEFINE_BATCHED_ENTROPY(type) \ +struct batch_ ##type { \ + /* \ + * We make this 1.5x a ChaCha block, so that we get the \ + * remaining 32 bytes from fast key erasure, plus one full \ + * block from the detached ChaCha state. We can increase \ + * the size of this later if needed so long as we keep the \ + * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE. \ + */ \ + type entropy[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(type))]; \ + unsigned long generation; \ + unsigned int position; \ +}; \ + \ +static DEFINE_PER_CPU(struct batch_ ##type, batched_entropy_ ##type) = { \ + .position = UINT_MAX \ +}; \ + \ +type get_random_ ##type(void) \ +{ \ + type ret; \ + unsigned long flags; \ + struct batch_ ##type *batch; \ + unsigned long next_gen; \ + \ + warn_unseeded_randomness(); \ + \ + if (!crng_ready()) { \ + _get_random_bytes(&ret, sizeof(ret)); \ + return ret; \ + } \ + \ + local_irq_save(flags); \ + batch = raw_cpu_ptr(&batched_entropy_##type); \ + \ + next_gen = READ_ONCE(base_crng.generation); \ + if (batch->position >= ARRAY_SIZE(batch->entropy) || \ + next_gen != batch->generation) { \ + _get_random_bytes(batch->entropy, sizeof(batch->entropy)); \ + batch->position = 0; \ + batch->generation = next_gen; \ + } \ + \ + ret = batch->entropy[batch->position]; \ + batch->entropy[batch->position] = 0; \ + ++batch->position; \ + local_irq_restore(flags); \ + return ret; \ +} \ +EXPORT_SYMBOL(get_random_ ##type); + +DEFINE_BATCHED_ENTROPY(u64) +DEFINE_BATCHED_ENTROPY(u32) + +#ifdef CONFIG_SMP +/* + * This function is called when the CPU is coming up, with entry + * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP. + */ +int __cold random_prepare_cpu(unsigned int cpu) +{ + /* + * When the cpu comes back online, immediately invalidate both + * the per-cpu crng and all batches, so that we serve fresh + * randomness. + */ + per_cpu_ptr(&crngs, cpu)->generation = ULONG_MAX; + per_cpu_ptr(&batched_entropy_u32, cpu)->position = UINT_MAX; + per_cpu_ptr(&batched_entropy_u64, cpu)->position = UINT_MAX; + return 0; } +#endif -static int credit_entropy_bits_safe(struct entropy_store *r, int nbits) +/* + * This function will use the architecture-specific hardware random + * number generator if it is available. It is not recommended for + * use. Use get_random_bytes() instead. It returns the number of + * bytes filled in. + */ +size_t __must_check get_random_bytes_arch(void *buf, size_t len) { - const int nbits_max = r->poolinfo->poolwords * 32; + size_t left = len; + u8 *p = buf; - if (nbits < 0) - return -EINVAL; + while (left) { + unsigned long v; + size_t block_len = min_t(size_t, left, sizeof(unsigned long)); + + if (!arch_get_random_long(&v)) + break; - /* Cap the value to avoid overflows */ - nbits = min(nbits, nbits_max); + memcpy(p, &v, block_len); + p += block_len; + left -= block_len; + } - credit_entropy_bits(r, nbits); - return 0; + return len - left; } +EXPORT_SYMBOL(get_random_bytes_arch); -/********************************************************************* + +/********************************************************************** * - * CRNG using CHACHA20 + * Entropy accumulation and extraction routines. * - *********************************************************************/ - -#define CRNG_RESEED_INTERVAL (300*HZ) - -static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); - -#ifdef CONFIG_NUMA -/* - * Hack to deal with crazy userspace progams when they are all trying - * to access /dev/urandom in parallel. The programs are almost - * certainly doing something terribly wrong, but we'll work around - * their brain damage. - */ -static struct crng_state **crng_node_pool __read_mostly; -#endif - -static void invalidate_batched_entropy(void); -static void numa_crng_init(void); - -static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); -static int __init parse_trust_cpu(char *arg) -{ - return kstrtobool(arg, &trust_cpu); -} -early_param("random.trust_cpu", parse_trust_cpu); - -static void crng_initialize(struct crng_state *crng) -{ - int i; - int arch_init = 1; - unsigned long rv; - - memcpy(&crng->state[0], "expand 32-byte k", 16); - if (crng == &primary_crng) - _extract_entropy(&input_pool, &crng->state[4], - sizeof(__u32) * 12, 0); - else - _get_random_bytes(&crng->state[4], sizeof(__u32) * 12); - for (i = 4; i < 16; i++) { - if (!arch_get_random_seed_long(&rv) && - !arch_get_random_long(&rv)) { - rv = random_get_entropy(); - arch_init = 0; - } - crng->state[i] ^= rv; - } - if (trust_cpu && arch_init && crng == &primary_crng) { - invalidate_batched_entropy(); - numa_crng_init(); - crng_init = 2; - pr_notice("random: crng done (trusting CPU's manufacturer)\n"); - } - crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; -} + * Callers may add entropy via: + * + * static void mix_pool_bytes(const void *buf, size_t len) + * + * After which, if added entropy should be credited: + * + * static void credit_init_bits(size_t bits) + * + * Finally, extract entropy via: + * + * static void extract_entropy(void *buf, size_t len) + * + **********************************************************************/ -#ifdef CONFIG_NUMA -static void do_numa_crng_init(struct work_struct *work) -{ - int i; - struct crng_state *crng; - struct crng_state **pool; - - pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL|__GFP_NOFAIL); - for_each_online_node(i) { - crng = kmalloc_node(sizeof(struct crng_state), - GFP_KERNEL | __GFP_NOFAIL, i); - spin_lock_init(&crng->lock); - crng_initialize(crng); - pool[i] = crng; - } - mb(); - if (cmpxchg(&crng_node_pool, NULL, pool)) { - for_each_node(i) - kfree(pool[i]); - kfree(pool); - } -} +enum { + POOL_BITS = BLAKE2S_HASH_SIZE * 8, + POOL_READY_BITS = POOL_BITS, /* When crng_init->CRNG_READY */ + POOL_EARLY_BITS = POOL_READY_BITS / 2 /* When crng_init->CRNG_EARLY */ +}; -static DECLARE_WORK(numa_crng_init_work, do_numa_crng_init); +static struct { + struct blake2s_state hash; + spinlock_t lock; + unsigned int init_bits; +} input_pool = { + .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE), + BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4, + BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 }, + .hash.outlen = BLAKE2S_HASH_SIZE, + .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), +}; -static void numa_crng_init(void) +static void _mix_pool_bytes(const void *buf, size_t len) { - schedule_work(&numa_crng_init_work); + blake2s_update(&input_pool.hash, buf, len); } -#else -static void numa_crng_init(void) {} -#endif /* - * crng_fast_load() can be called by code in the interrupt service - * path. So we can't afford to dilly-dally. + * This function adds bytes into the input pool. It does not + * update the initialization bit counter; the caller should call + * credit_init_bits if this is appropriate. */ -static int crng_fast_load(const char *cp, size_t len) +static void mix_pool_bytes(const void *buf, size_t len) { unsigned long flags; - char *p; - if (!spin_trylock_irqsave(&primary_crng.lock, flags)) - return 0; - if (crng_init != 0) { - spin_unlock_irqrestore(&primary_crng.lock, flags); - return 0; - } - p = (unsigned char *) &primary_crng.state[4]; - while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { - p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp; - cp++; crng_init_cnt++; len--; - } - spin_unlock_irqrestore(&primary_crng.lock, flags); - if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { - invalidate_batched_entropy(); - crng_init = 1; - wake_up_interruptible(&crng_init_wait); - pr_notice("random: fast init done\n"); - } - return 1; + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(buf, len); + spin_unlock_irqrestore(&input_pool.lock, flags); } /* - * crng_slow_load() is called by add_device_randomness, which has two - * attributes. (1) We can't trust the buffer passed to it is - * guaranteed to be unpredictable (so it might not have any entropy at - * all), and (2) it doesn't have the performance constraints of - * crng_fast_load(). - * - * So we do something more comprehensive which is guaranteed to touch - * all of the primary_crng's state, and which uses a LFSR with a - * period of 255 as part of the mixing algorithm. Finally, we do - * *not* advance crng_init_cnt since buffer we may get may be something - * like a fixed DMI table (for example), which might very well be - * unique to the machine, but is otherwise unvarying. + * This is an HKDF-like construction for using the hashed collected entropy + * as a PRF key, that's then expanded block-by-block. */ -static int crng_slow_load(const char *cp, size_t len) +static void extract_entropy(void *buf, size_t len) { - unsigned long flags; - static unsigned char lfsr = 1; - unsigned char tmp; - unsigned i, max = CHACHA_KEY_SIZE; - const char * src_buf = cp; - char * dest_buf = (char *) &primary_crng.state[4]; - - if (!spin_trylock_irqsave(&primary_crng.lock, flags)) - return 0; - if (crng_init != 0) { - spin_unlock_irqrestore(&primary_crng.lock, flags); - return 0; + unsigned long flags; + u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; + struct { + unsigned long rdseed[32 / sizeof(long)]; + size_t counter; + } block; + size_t i; + + for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) { + if (!arch_get_random_seed_long(&block.rdseed[i]) && + !arch_get_random_long(&block.rdseed[i])) + block.rdseed[i] = random_get_entropy(); } - if (len > max) - max = len; - - for (i = 0; i < max ; i++) { - tmp = lfsr; - lfsr >>= 1; - if (tmp & 1) - lfsr ^= 0xE1; - tmp = dest_buf[i % CHACHA_KEY_SIZE]; - dest_buf[i % CHACHA_KEY_SIZE] ^= src_buf[i % len] ^ lfsr; - lfsr += (tmp << 3) | (tmp >> 5); + + spin_lock_irqsave(&input_pool.lock, flags); + + /* seed = HASHPRF(last_key, entropy_input) */ + blake2s_final(&input_pool.hash, seed); + + /* next_key = HASHPRF(seed, RDSEED || 0) */ + block.counter = 0; + blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed)); + blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key)); + + spin_unlock_irqrestore(&input_pool.lock, flags); + memzero_explicit(next_key, sizeof(next_key)); + + while (len) { + i = min_t(size_t, len, BLAKE2S_HASH_SIZE); + /* output = HASHPRF(seed, RDSEED || ++counter) */ + ++block.counter; + blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed)); + len -= i; + buf += i; } - spin_unlock_irqrestore(&primary_crng.lock, flags); - return 1; + + memzero_explicit(seed, sizeof(seed)); + memzero_explicit(&block, sizeof(block)); } -static void crng_reseed(struct crng_state *crng, struct entropy_store *r) +#define credit_init_bits(bits) if (!crng_ready()) _credit_init_bits(bits) + +static void __cold _credit_init_bits(size_t bits) { - unsigned long flags; - int i, num; - union { - __u8 block[CHACHA_BLOCK_SIZE]; - __u32 key[8]; - } buf; - - if (r) { - num = extract_entropy(r, &buf, 32, 16, 0); - if (num == 0) - return; - } else { - _extract_crng(&primary_crng, buf.block); - _crng_backtrack_protect(&primary_crng, buf.block, - CHACHA_KEY_SIZE); - } - spin_lock_irqsave(&crng->lock, flags); - for (i = 0; i < 8; i++) { - unsigned long rv; - if (!arch_get_random_seed_long(&rv) && - !arch_get_random_long(&rv)) - rv = random_get_entropy(); - crng->state[i+4] ^= buf.key[i] ^ rv; - } - memzero_explicit(&buf, sizeof(buf)); - crng->init_time = jiffies; - spin_unlock_irqrestore(&crng->lock, flags); - if (crng == &primary_crng && crng_init < 2) { - invalidate_batched_entropy(); - numa_crng_init(); - crng_init = 2; + unsigned int new, orig, add; + unsigned long flags; + + if (!bits) + return; + + add = min_t(size_t, bits, POOL_BITS); + + do { + orig = READ_ONCE(input_pool.init_bits); + new = min_t(unsigned int, POOL_BITS, orig + add); + } while (cmpxchg(&input_pool.init_bits, orig, new) != orig); + + if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) { + crng_reseed(); /* Sets crng_init to CRNG_READY under base_crng.lock. */ process_random_ready_list(); wake_up_interruptible(&crng_init_wait); - pr_notice("random: crng init done\n"); - if (unseeded_warning.missed) { - pr_notice("random: %d get_random_xx warning(s) missed " - "due to ratelimiting\n", - unseeded_warning.missed); - unseeded_warning.missed = 0; - } - if (urandom_warning.missed) { - pr_notice("random: %d urandom warning(s) missed " - "due to ratelimiting\n", + kill_fasync(&fasync, SIGIO, POLL_IN); + pr_notice("crng init done\n"); + if (urandom_warning.missed) + pr_notice("%d urandom warning(s) missed due to ratelimiting\n", urandom_warning.missed); - urandom_warning.missed = 0; + } else if (orig < POOL_EARLY_BITS && new >= POOL_EARLY_BITS) { + spin_lock_irqsave(&base_crng.lock, flags); + /* Check if crng_init is CRNG_EMPTY, to avoid race with crng_reseed(). */ + if (crng_init == CRNG_EMPTY) { + extract_entropy(base_crng.key, sizeof(base_crng.key)); + crng_init = CRNG_EARLY; } + spin_unlock_irqrestore(&base_crng.lock, flags); } } -static void _extract_crng(struct crng_state *crng, - __u8 out[CHACHA_BLOCK_SIZE]) + +/********************************************************************** + * + * Entropy collection routines. + * + * The following exported functions are used for pushing entropy into + * the above entropy accumulation routines: + * + * void add_device_randomness(const void *buf, size_t len); + * void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy); + * void add_bootloader_randomness(const void *buf, size_t len); + * void add_interrupt_randomness(int irq); + * void add_input_randomness(unsigned int type, unsigned int code, unsigned int value); + * void add_disk_randomness(struct gendisk *disk); + * + * add_device_randomness() adds data to the input pool that + * is likely to differ between two devices (or possibly even per boot). + * This would be things like MAC addresses or serial numbers, or the + * read-out of the RTC. This does *not* credit any actual entropy to + * the pool, but it initializes the pool to different values for devices + * that might otherwise be identical and have very little entropy + * available to them (particularly common in the embedded world). + * + * add_hwgenerator_randomness() is for true hardware RNGs, and will credit + * entropy as specified by the caller. If the entropy pool is full it will + * block until more entropy is needed. + * + * add_bootloader_randomness() is called by bootloader drivers, such as EFI + * and device tree, and credits its input depending on whether or not the + * configuration option CONFIG_RANDOM_TRUST_BOOTLOADER is set. + * + * add_interrupt_randomness() uses the interrupt timing as random + * inputs to the entropy pool. Using the cycle counters and the irq source + * as inputs, it feeds the input pool roughly once a second or after 64 + * interrupts, crediting 1 bit of entropy for whichever comes first. + * + * add_input_randomness() uses the input layer interrupt timing, as well + * as the event type information from the hardware. + * + * add_disk_randomness() uses what amounts to the seek time of block + * layer request events, on a per-disk_devt basis, as input to the + * entropy pool. Note that high-speed solid state drives with very low + * seek times do not make for good sources of entropy, as their seek + * times are usually fairly consistent. + * + * The last two routines try to estimate how many bits of entropy + * to credit. They do this by keeping track of the first and second + * order deltas of the event timings. + * + **********************************************************************/ + +static bool trust_cpu __initdata = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); +static bool trust_bootloader __initdata = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER); +static int __init parse_trust_cpu(char *arg) { - unsigned long v, flags; - - if (crng_ready() && - (time_after(crng_global_init_time, crng->init_time) || - time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL))) - crng_reseed(crng, crng == &primary_crng ? &input_pool : NULL); - spin_lock_irqsave(&crng->lock, flags); - if (arch_get_random_long(&v)) - crng->state[14] ^= v; - chacha20_block(&crng->state[0], out); - if (crng->state[12] == 0) - crng->state[13]++; - spin_unlock_irqrestore(&crng->lock, flags); + return kstrtobool(arg, &trust_cpu); } - -static void extract_crng(__u8 out[CHACHA_BLOCK_SIZE]) +static int __init parse_trust_bootloader(char *arg) { - struct crng_state *crng = NULL; - -#ifdef CONFIG_NUMA - if (crng_node_pool) - crng = crng_node_pool[numa_node_id()]; - if (crng == NULL) -#endif - crng = &primary_crng; - _extract_crng(crng, out); + return kstrtobool(arg, &trust_bootloader); } +early_param("random.trust_cpu", parse_trust_cpu); +early_param("random.trust_bootloader", parse_trust_bootloader); /* - * Use the leftover bytes from the CRNG block output (if there is - * enough) to mutate the CRNG key to provide backtracking protection. + * The first collection of entropy occurs at system boot while interrupts + * are still turned off. Here we push in latent entropy, RDSEED, a timestamp, + * utsname(), and the command line. Depending on the above configuration knob, + * RDSEED may be considered sufficient for initialization. Note that much + * earlier setup may already have pushed entropy into the input pool by the + * time we get here. */ -static void _crng_backtrack_protect(struct crng_state *crng, - __u8 tmp[CHACHA_BLOCK_SIZE], int used) -{ - unsigned long flags; - __u32 *s, *d; - int i; - - used = round_up(used, sizeof(__u32)); - if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) { - extract_crng(tmp); - used = 0; - } - spin_lock_irqsave(&crng->lock, flags); - s = (__u32 *) &tmp[used]; - d = &crng->state[4]; - for (i=0; i < 8; i++) - *d++ ^= *s++; - spin_unlock_irqrestore(&crng->lock, flags); -} - -static void crng_backtrack_protect(__u8 tmp[CHACHA_BLOCK_SIZE], int used) +int __init random_init(const char *command_line) { - struct crng_state *crng = NULL; + ktime_t now = ktime_get_real(); + unsigned int i, arch_bits; + unsigned long entropy; -#ifdef CONFIG_NUMA - if (crng_node_pool) - crng = crng_node_pool[numa_node_id()]; - if (crng == NULL) +#if defined(LATENT_ENTROPY_PLUGIN) + static const u8 compiletime_seed[BLAKE2S_BLOCK_SIZE] __initconst __latent_entropy; + _mix_pool_bytes(compiletime_seed, sizeof(compiletime_seed)); #endif - crng = &primary_crng; - _crng_backtrack_protect(crng, tmp, used); -} -static ssize_t extract_crng_user(void __user *buf, size_t nbytes) -{ - ssize_t ret = 0, i = CHACHA_BLOCK_SIZE; - __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); - int large_request = (nbytes > 256); - - while (nbytes) { - if (large_request && need_resched()) { - if (signal_pending(current)) { - if (ret == 0) - ret = -ERESTARTSYS; - break; - } - schedule(); - } - - extract_crng(tmp); - i = min_t(int, nbytes, CHACHA_BLOCK_SIZE); - if (copy_to_user(buf, tmp, i)) { - ret = -EFAULT; - break; + for (i = 0, arch_bits = BLAKE2S_BLOCK_SIZE * 8; + i < BLAKE2S_BLOCK_SIZE; i += sizeof(entropy)) { + if (!arch_get_random_seed_long_early(&entropy) && + !arch_get_random_long_early(&entropy)) { + entropy = random_get_entropy(); + arch_bits -= sizeof(entropy) * 8; } - - nbytes -= i; - buf += i; - ret += i; + _mix_pool_bytes(&entropy, sizeof(entropy)); } - crng_backtrack_protect(tmp, i); + _mix_pool_bytes(&now, sizeof(now)); + _mix_pool_bytes(utsname(), sizeof(*(utsname()))); + _mix_pool_bytes(command_line, strlen(command_line)); + add_latent_entropy(); - /* Wipe data just written to memory */ - memzero_explicit(tmp, sizeof(tmp)); + if (crng_ready()) + crng_reseed(); + else if (trust_cpu) + _credit_init_bits(arch_bits); - return ret; + return 0; } - -/********************************************************************* - * - * Entropy input management - * - *********************************************************************/ - -/* There is one of these per entropy source */ -struct timer_rand_state { - cycles_t last_time; - long last_delta, last_delta2; -}; - -#define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, }; - /* * Add device- or boot-specific data to the input pool to help * initialize it. @@ -1174,800 +847,270 @@ struct timer_rand_state { * the entropy pool having similar initial state across largely * identical devices. */ -void add_device_randomness(const void *buf, unsigned int size) +void add_device_randomness(const void *buf, size_t len) { - unsigned long time = random_get_entropy() ^ jiffies; + unsigned long entropy = random_get_entropy(); unsigned long flags; - if (!crng_ready() && size) - crng_slow_load(buf, size); - - trace_add_device_randomness(size, _RET_IP_); spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(&input_pool, buf, size); - _mix_pool_bytes(&input_pool, &time, sizeof(time)); + _mix_pool_bytes(&entropy, sizeof(entropy)); + _mix_pool_bytes(buf, len); spin_unlock_irqrestore(&input_pool.lock, flags); } EXPORT_SYMBOL(add_device_randomness); -static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE; - /* - * This function adds entropy to the entropy "pool" by using timing - * delays. It uses the timer_rand_state structure to make an estimate - * of how many bits of entropy this call has added to the pool. - * - * The number "num" is also added to the pool - it should somehow describe - * the type of event which just happened. This is currently 0-255 for - * keyboard scan codes, and 256 upwards for interrupts. - * + * Interface for in-kernel drivers of true hardware RNGs. + * Those devices may produce endless random bits and will be throttled + * when our pool is full. */ -static void add_timer_randomness(struct timer_rand_state *state, unsigned num) +void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy) { - struct entropy_store *r; - struct { - long jiffies; - unsigned cycles; - unsigned num; - } sample; - long delta, delta2, delta3; - - sample.jiffies = jiffies; - sample.cycles = random_get_entropy(); - sample.num = num; - r = &input_pool; - mix_pool_bytes(r, &sample, sizeof(sample)); + mix_pool_bytes(buf, len); + credit_init_bits(entropy); /* - * Calculate number of bits of randomness we probably added. - * We take into account the first, second and third-order deltas - * in order to make our estimate. + * Throttle writing to once every CRNG_RESEED_INTERVAL, unless + * we're not yet initialized. */ - delta = sample.jiffies - READ_ONCE(state->last_time); - WRITE_ONCE(state->last_time, sample.jiffies); - - delta2 = delta - READ_ONCE(state->last_delta); - WRITE_ONCE(state->last_delta, delta); - - delta3 = delta2 - READ_ONCE(state->last_delta2); - WRITE_ONCE(state->last_delta2, delta2); - - if (delta < 0) - delta = -delta; - if (delta2 < 0) - delta2 = -delta2; - if (delta3 < 0) - delta3 = -delta3; - if (delta > delta2) - delta = delta2; - if (delta > delta3) - delta = delta3; - - /* - * delta is now minimum absolute delta. - * Round down by 1 bit on general principles, - * and limit entropy entimate to 12 bits. - */ - credit_entropy_bits(r, min_t(int, fls(delta>>1), 11)); -} - -void add_input_randomness(unsigned int type, unsigned int code, - unsigned int value) -{ - static unsigned char last_value; - - /* ignore autorepeat and the like */ - if (value == last_value) - return; - - last_value = value; - add_timer_randomness(&input_timer_state, - (type << 4) ^ code ^ (code >> 4) ^ value); - trace_add_input_randomness(ENTROPY_BITS(&input_pool)); -} -EXPORT_SYMBOL_GPL(add_input_randomness); - -static DEFINE_PER_CPU(struct fast_pool, irq_randomness); - -#ifdef ADD_INTERRUPT_BENCH -static unsigned long avg_cycles, avg_deviation; - -#define AVG_SHIFT 8 /* Exponential average factor k=1/256 */ -#define FIXED_1_2 (1 << (AVG_SHIFT-1)) - -static void add_interrupt_bench(cycles_t start) -{ - long delta = random_get_entropy() - start; - - /* Use a weighted moving average */ - delta = delta - ((avg_cycles + FIXED_1_2) >> AVG_SHIFT); - avg_cycles += delta; - /* And average deviation */ - delta = abs(delta) - ((avg_deviation + FIXED_1_2) >> AVG_SHIFT); - avg_deviation += delta; -} -#else -#define add_interrupt_bench(x) -#endif - -static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) -{ - __u32 *ptr = (__u32 *) regs; - unsigned int idx; - - if (regs == NULL) - return 0; - idx = READ_ONCE(f->reg_idx); - if (idx >= sizeof(struct pt_regs) / sizeof(__u32)) - idx = 0; - ptr += idx++; - WRITE_ONCE(f->reg_idx, idx); - return *ptr; -} - -void add_interrupt_randomness(int irq, int irq_flags) -{ - struct entropy_store *r; - struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); - struct pt_regs *regs = get_irq_regs(); - unsigned long now = jiffies; - cycles_t cycles = random_get_entropy(); - __u32 c_high, j_high; - __u64 ip; - unsigned long seed; - int credit = 0; - - if (cycles == 0) - cycles = get_reg(fast_pool, regs); - c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0; - j_high = (sizeof(now) > 4) ? now >> 32 : 0; - fast_pool->pool[0] ^= cycles ^ j_high ^ irq; - fast_pool->pool[1] ^= now ^ c_high; - ip = regs ? instruction_pointer(regs) : _RET_IP_; - fast_pool->pool[2] ^= ip; - fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 : - get_reg(fast_pool, regs); - - fast_mix(fast_pool); - add_interrupt_bench(cycles); - - if (unlikely(crng_init == 0)) { - if ((fast_pool->count >= 64) && - crng_fast_load((char *) fast_pool->pool, - sizeof(fast_pool->pool))) { - fast_pool->count = 0; - fast_pool->last = now; - } - return; - } - - if ((fast_pool->count < 64) && - !time_after(now, fast_pool->last + HZ)) - return; - - r = &input_pool; - if (!spin_trylock(&r->lock)) - return; - - fast_pool->last = now; - __mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool)); - - /* - * If we have architectural seed generator, produce a seed and - * add it to the pool. For the sake of paranoia don't let the - * architectural seed generator dominate the input from the - * interrupt noise. - */ - if (arch_get_random_seed_long(&seed)) { - __mix_pool_bytes(r, &seed, sizeof(seed)); - credit = 1; - } - spin_unlock(&r->lock); - - fast_pool->count = 0; - - /* award one bit for the contents of the fast pool */ - credit_entropy_bits(r, credit + 1); -} -EXPORT_SYMBOL_GPL(add_interrupt_randomness); - -#ifdef CONFIG_BLOCK -void add_disk_randomness(struct gendisk *disk) -{ - if (!disk || !disk->random) - return; - /* first major is 1, so we get >= 0x200 here */ - add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); - trace_add_disk_randomness(disk_devt(disk), ENTROPY_BITS(&input_pool)); + if (!kthread_should_stop() && crng_ready()) + schedule_timeout_interruptible(CRNG_RESEED_INTERVAL); } -EXPORT_SYMBOL_GPL(add_disk_randomness); -#endif - -/********************************************************************* - * - * Entropy extraction routines - * - *********************************************************************/ +EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); /* - * This utility inline function is responsible for transferring entropy - * from the primary pool to the secondary extraction pool. We make - * sure we pull enough for a 'catastrophic reseed'. + * Handle random seed passed by bootloader, and credit it if + * CONFIG_RANDOM_TRUST_BOOTLOADER is set. */ -static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes); -static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) +void __init add_bootloader_randomness(const void *buf, size_t len) { - if (!r->pull || - r->entropy_count >= (nbytes << (ENTROPY_SHIFT + 3)) || - r->entropy_count > r->poolinfo->poolfracbits) - return; - - _xfer_secondary_pool(r, nbytes); + mix_pool_bytes(buf, len); + if (trust_bootloader) + credit_init_bits(len * 8); } -static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes) -{ - __u32 tmp[OUTPUT_POOL_WORDS]; - - int bytes = nbytes; - - /* pull at least as much as a wakeup */ - bytes = max_t(int, bytes, random_read_wakeup_bits / 8); - /* but never more than the buffer size */ - bytes = min_t(int, bytes, sizeof(tmp)); +struct fast_pool { + struct work_struct mix; + unsigned long pool[4]; + unsigned long last; + unsigned int count; +}; - trace_xfer_secondary_pool(r->name, bytes * 8, nbytes * 8, - ENTROPY_BITS(r), ENTROPY_BITS(r->pull)); - bytes = extract_entropy(r->pull, tmp, bytes, - random_read_wakeup_bits / 8, 0); - mix_pool_bytes(r, tmp, bytes); - credit_entropy_bits(r, bytes*8); -} +static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = { +#ifdef CONFIG_64BIT +#define FASTMIX_PERM SIPHASH_PERMUTATION + .pool = { SIPHASH_CONST_0, SIPHASH_CONST_1, SIPHASH_CONST_2, SIPHASH_CONST_3 } +#else +#define FASTMIX_PERM HSIPHASH_PERMUTATION + .pool = { HSIPHASH_CONST_0, HSIPHASH_CONST_1, HSIPHASH_CONST_2, HSIPHASH_CONST_3 } +#endif +}; /* - * Used as a workqueue function so that when the input pool is getting - * full, we can "spill over" some entropy to the output pools. That - * way the output pools can store some of the excess entropy instead - * of letting it go to waste. + * This is [Half]SipHash-1-x, starting from an empty key. Because + * the key is fixed, it assumes that its inputs are non-malicious, + * and therefore this has no security on its own. s represents the + * four-word SipHash state, while v represents a two-word input. */ -static void push_to_pool(struct work_struct *work) +static void fast_mix(unsigned long s[4], unsigned long v1, unsigned long v2) { - struct entropy_store *r = container_of(work, struct entropy_store, - push_work); - BUG_ON(!r); - _xfer_secondary_pool(r, random_read_wakeup_bits/8); - trace_push_to_pool(r->name, r->entropy_count >> ENTROPY_SHIFT, - r->pull->entropy_count >> ENTROPY_SHIFT); + s[3] ^= v1; + FASTMIX_PERM(s[0], s[1], s[2], s[3]); + s[0] ^= v1; + s[3] ^= v2; + FASTMIX_PERM(s[0], s[1], s[2], s[3]); + s[0] ^= v2; } +#ifdef CONFIG_SMP /* - * This function decides how many bytes to actually take from the - * given pool, and also debits the entropy count accordingly. + * This function is called when the CPU has just come online, with + * entry CPUHP_AP_RANDOM_ONLINE, just after CPUHP_AP_WORKQUEUE_ONLINE. */ -static size_t account(struct entropy_store *r, size_t nbytes, int min, - int reserved) +int __cold random_online_cpu(unsigned int cpu) { - int entropy_count, orig, have_bytes; - size_t ibytes, nfrac; - - BUG_ON(r->entropy_count > r->poolinfo->poolfracbits); - - /* Can we pull enough? */ -retry: - entropy_count = orig = READ_ONCE(r->entropy_count); - ibytes = nbytes; - /* never pull more than available */ - have_bytes = entropy_count >> (ENTROPY_SHIFT + 3); - - if ((have_bytes -= reserved) < 0) - have_bytes = 0; - ibytes = min_t(size_t, ibytes, have_bytes); - if (ibytes < min) - ibytes = 0; - - if (unlikely(entropy_count < 0)) { - pr_warn("random: negative entropy count: pool %s count %d\n", - r->name, entropy_count); - WARN_ON(1); - entropy_count = 0; - } - nfrac = ibytes << (ENTROPY_SHIFT + 3); - if ((size_t) entropy_count > nfrac) - entropy_count -= nfrac; - else - entropy_count = 0; - - if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) - goto retry; - - trace_debit_entropy(r->name, 8 * ibytes); - if (ibytes && - (r->entropy_count >> ENTROPY_SHIFT) < random_write_wakeup_bits) { - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - } - - return ibytes; -} - -/* - * This function does the actual extraction for extract_entropy and - * extract_entropy_user. - * - * Note: we assume that .poolwords is a multiple of 16 words. - */ -static void extract_buf(struct entropy_store *r, __u8 *out) -{ - int i; - union { - __u32 w[5]; - unsigned long l[LONGS(20)]; - } hash; - __u32 workspace[SHA_WORKSPACE_WORDS]; - unsigned long flags; - /* - * If we have an architectural hardware random number - * generator, use it for SHA's initial vector + * During CPU shutdown and before CPU onlining, add_interrupt_ + * randomness() may schedule mix_interrupt_randomness(), and + * set the MIX_INFLIGHT flag. However, because the worker can + * be scheduled on a different CPU during this period, that + * flag will never be cleared. For that reason, we zero out + * the flag here, which runs just after workqueues are onlined + * for the CPU again. This also has the effect of setting the + * irq randomness count to zero so that new accumulated irqs + * are fresh. */ - sha_init(hash.w); - for (i = 0; i < LONGS(20); i++) { - unsigned long v; - if (!arch_get_random_long(&v)) - break; - hash.l[i] = v; - } - - /* Generate a hash across the pool, 16 words (512 bits) at a time */ - spin_lock_irqsave(&r->lock, flags); - for (i = 0; i < r->poolinfo->poolwords; i += 16) - sha_transform(hash.w, (__u8 *)(r->pool + i), workspace); + per_cpu_ptr(&irq_randomness, cpu)->count = 0; + return 0; +} +#endif +static void mix_interrupt_randomness(struct work_struct *work) +{ + struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix); /* - * We mix the hash back into the pool to prevent backtracking - * attacks (where the attacker knows the state of the pool - * plus the current outputs, and attempts to find previous - * ouputs), unless the hash function can be inverted. By - * mixing at least a SHA1 worth of hash data back, we make - * brute-forcing the feedback as hard as brute-forcing the - * hash. + * The size of the copied stack pool is explicitly 2 longs so that we + * only ever ingest half of the siphash output each time, retaining + * the other half as the next "key" that carries over. The entropy is + * supposed to be sufficiently dispersed between bits so on average + * we don't wind up "losing" some. */ - __mix_pool_bytes(r, hash.w, sizeof(hash.w)); - spin_unlock_irqrestore(&r->lock, flags); + unsigned long pool[2]; + unsigned int count; - memzero_explicit(workspace, sizeof(workspace)); + /* Check to see if we're running on the wrong CPU due to hotplug. */ + local_irq_disable(); + if (fast_pool != this_cpu_ptr(&irq_randomness)) { + local_irq_enable(); + return; + } /* - * In case the hash function has some recognizable output - * pattern, we fold it in half. Thus, we always feed back - * twice as much data as we output. + * Copy the pool to the stack so that the mixer always has a + * consistent view, before we reenable irqs again. */ - hash.w[0] ^= hash.w[3]; - hash.w[1] ^= hash.w[4]; - hash.w[2] ^= rol32(hash.w[2], 16); - - memcpy(out, &hash, EXTRACT_SIZE); - memzero_explicit(&hash, sizeof(hash)); -} - -static ssize_t _extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int fips) -{ - ssize_t ret = 0, i; - __u8 tmp[EXTRACT_SIZE]; - unsigned long flags; - - while (nbytes) { - extract_buf(r, tmp); - - if (fips) { - spin_lock_irqsave(&r->lock, flags); - if (!memcmp(tmp, r->last_data, EXTRACT_SIZE)) - panic("Hardware RNG duplicated output!\n"); - memcpy(r->last_data, tmp, EXTRACT_SIZE); - spin_unlock_irqrestore(&r->lock, flags); - } - i = min_t(int, nbytes, EXTRACT_SIZE); - memcpy(buf, tmp, i); - nbytes -= i; - buf += i; - ret += i; - } - - /* Wipe data just returned from memory */ - memzero_explicit(tmp, sizeof(tmp)); - - return ret; -} - -/* - * This function extracts randomness from the "entropy pool", and - * returns it in a buffer. - * - * The min parameter specifies the minimum amount we can pull before - * failing to avoid races that defeat catastrophic reseeding while the - * reserved parameter indicates how much entropy we must leave in the - * pool after each pull to avoid starving other readers. - */ -static ssize_t extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int min, int reserved) -{ - __u8 tmp[EXTRACT_SIZE]; - unsigned long flags; - - /* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */ - if (fips_enabled) { - spin_lock_irqsave(&r->lock, flags); - if (!r->last_data_init) { - r->last_data_init = 1; - spin_unlock_irqrestore(&r->lock, flags); - trace_extract_entropy(r->name, EXTRACT_SIZE, - ENTROPY_BITS(r), _RET_IP_); - xfer_secondary_pool(r, EXTRACT_SIZE); - extract_buf(r, tmp); - spin_lock_irqsave(&r->lock, flags); - memcpy(r->last_data, tmp, EXTRACT_SIZE); - } - spin_unlock_irqrestore(&r->lock, flags); - } + memcpy(pool, fast_pool->pool, sizeof(pool)); + count = fast_pool->count; + fast_pool->count = 0; + fast_pool->last = jiffies; + local_irq_enable(); - trace_extract_entropy(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_); - xfer_secondary_pool(r, nbytes); - nbytes = account(r, nbytes, min, reserved); + mix_pool_bytes(pool, sizeof(pool)); + credit_init_bits(max(1u, (count & U16_MAX) / 64)); - return _extract_entropy(r, buf, nbytes, fips_enabled); + memzero_explicit(pool, sizeof(pool)); } -/* - * This function extracts randomness from the "entropy pool", and - * returns it in a userspace buffer. - */ -static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf, - size_t nbytes) +void add_interrupt_randomness(int irq) { - ssize_t ret = 0, i; - __u8 tmp[EXTRACT_SIZE]; - int large_request = (nbytes > 256); - - trace_extract_entropy_user(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_); - if (!r->initialized && r->pull) { - xfer_secondary_pool(r, ENTROPY_BITS(r->pull)/8); - if (!r->initialized) - return 0; - } - xfer_secondary_pool(r, nbytes); - nbytes = account(r, nbytes, 0, 0); - - while (nbytes) { - if (large_request && need_resched()) { - if (signal_pending(current)) { - if (ret == 0) - ret = -ERESTARTSYS; - break; - } - schedule(); - } - - extract_buf(r, tmp); - i = min_t(int, nbytes, EXTRACT_SIZE); - if (copy_to_user(buf, tmp, i)) { - ret = -EFAULT; - break; - } - - nbytes -= i; - buf += i; - ret += i; - } - - /* Wipe data just returned from memory */ - memzero_explicit(tmp, sizeof(tmp)); - - return ret; -} + enum { MIX_INFLIGHT = 1U << 31 }; + unsigned long entropy = random_get_entropy(); + struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); + struct pt_regs *regs = get_irq_regs(); + unsigned int new_count; -#define warn_unseeded_randomness(previous) \ - _warn_unseeded_randomness(__func__, (void *) _RET_IP_, (previous)) + fast_mix(fast_pool->pool, entropy, + (regs ? instruction_pointer(regs) : _RET_IP_) ^ swab(irq)); + new_count = ++fast_pool->count; -static void _warn_unseeded_randomness(const char *func_name, void *caller, - void **previous) -{ -#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM - const bool print_once = false; -#else - static bool print_once __read_mostly; -#endif - - if (print_once || - crng_ready() || - (previous && (caller == READ_ONCE(*previous)))) + if (new_count & MIX_INFLIGHT) return; - WRITE_ONCE(*previous, caller); -#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM - print_once = true; -#endif - if (__ratelimit(&unseeded_warning)) - printk_deferred(KERN_NOTICE "random: %s called from %pS " - "with crng_init=%d\n", func_name, caller, - crng_init); -} - -/* - * This function is the exported kernel interface. It returns some - * number of good random numbers, suitable for key generation, seeding - * TCP sequence numbers, etc. It does not rely on the hardware random - * number generator. For random bytes direct from the hardware RNG - * (when available), use get_random_bytes_arch(). In order to ensure - * that the randomness provided by this function is okay, the function - * wait_for_random_bytes() should be called and return 0 at least once - * at any point prior. - */ -static void _get_random_bytes(void *buf, int nbytes) -{ - __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); - - trace_get_random_bytes(nbytes, _RET_IP_); - - while (nbytes >= CHACHA_BLOCK_SIZE) { - extract_crng(buf); - buf += CHACHA_BLOCK_SIZE; - nbytes -= CHACHA_BLOCK_SIZE; - } - - if (nbytes > 0) { - extract_crng(tmp); - memcpy(buf, tmp, nbytes); - crng_backtrack_protect(tmp, nbytes); - } else - crng_backtrack_protect(tmp, CHACHA_BLOCK_SIZE); - memzero_explicit(tmp, sizeof(tmp)); -} - -void get_random_bytes(void *buf, int nbytes) -{ - static void *previous; - warn_unseeded_randomness(&previous); - _get_random_bytes(buf, nbytes); -} -EXPORT_SYMBOL(get_random_bytes); - - -/* - * Each time the timer fires, we expect that we got an unpredictable - * jump in the cycle counter. Even if the timer is running on another - * CPU, the timer activity will be touching the stack of the CPU that is - * generating entropy.. - * - * Note that we don't re-arm the timer in the timer itself - we are - * happy to be scheduled away, since that just makes the load more - * complex, but we do not want the timer to keep ticking unless the - * entropy loop is running. - * - * So the re-arming always happens in the entropy loop itself. - */ -static void entropy_timer(struct timer_list *t) -{ - credit_entropy_bits(&input_pool, 1); -} - -/* - * If we have an actual cycle counter, see if we can - * generate enough entropy with timing noise - */ -static void try_to_generate_entropy(void) -{ - struct { - unsigned long now; - struct timer_list timer; - } stack; - - stack.now = random_get_entropy(); - - /* Slow counter - or none. Don't even bother */ - if (stack.now == random_get_entropy()) + if (new_count < 1024 && !time_is_before_jiffies(fast_pool->last + HZ)) return; - timer_setup_on_stack(&stack.timer, entropy_timer, 0); - while (!crng_ready()) { - if (!timer_pending(&stack.timer)) - mod_timer(&stack.timer, jiffies+1); - mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now)); - schedule(); - stack.now = random_get_entropy(); - } - - del_timer_sync(&stack.timer); - destroy_timer_on_stack(&stack.timer); - mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now)); + if (unlikely(!fast_pool->mix.func)) + INIT_WORK(&fast_pool->mix, mix_interrupt_randomness); + fast_pool->count |= MIX_INFLIGHT; + queue_work_on(raw_smp_processor_id(), system_highpri_wq, &fast_pool->mix); } +EXPORT_SYMBOL_GPL(add_interrupt_randomness); -/* - * Wait for the urandom pool to be seeded and thus guaranteed to supply - * cryptographically secure random numbers. This applies to: the /dev/urandom - * device, the get_random_bytes function, and the get_random_{u32,u64,int,long} - * family of functions. Using any of these functions without first calling - * this function forfeits the guarantee of security. - * - * Returns: 0 if the urandom pool has been seeded. - * -ERESTARTSYS if the function was interrupted by a signal. - */ -int wait_for_random_bytes(void) -{ - if (likely(crng_ready())) - return 0; - - do { - int ret; - ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); - if (ret) - return ret > 0 ? 0 : ret; - - try_to_generate_entropy(); - } while (!crng_ready()); - - return 0; -} -EXPORT_SYMBOL(wait_for_random_bytes); - -/* - * Returns whether or not the urandom pool has been seeded and thus guaranteed - * to supply cryptographically secure random numbers. This applies to: the - * /dev/urandom device, the get_random_bytes function, and the get_random_{u32, - * ,u64,int,long} family of functions. - * - * Returns: true if the urandom pool has been seeded. - * false if the urandom pool has not been seeded. - */ -bool rng_is_initialized(void) -{ - return crng_ready(); -} -EXPORT_SYMBOL(rng_is_initialized); +/* There is one of these per entropy source */ +struct timer_rand_state { + unsigned long last_time; + long last_delta, last_delta2; +}; /* - * Add a callback function that will be invoked when the nonblocking - * pool is initialised. - * - * returns: 0 if callback is successfully added - * -EALREADY if pool is already initialised (callback not called) - * -ENOENT if module for callback is not alive + * This function adds entropy to the entropy "pool" by using timing + * delays. It uses the timer_rand_state structure to make an estimate + * of how many bits of entropy this call has added to the pool. The + * value "num" is also added to the pool; it should somehow describe + * the type of event that just happened. */ -int add_random_ready_callback(struct random_ready_callback *rdy) +static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) { - struct module *owner; - unsigned long flags; - int err = -EALREADY; - - if (crng_ready()) - return err; + unsigned long entropy = random_get_entropy(), now = jiffies, flags; + long delta, delta2, delta3; + unsigned int bits; - owner = rdy->owner; - if (!try_module_get(owner)) - return -ENOENT; + /* + * If we're in a hard IRQ, add_interrupt_randomness() will be called + * sometime after, so mix into the fast pool. + */ + if (in_irq()) { + fast_mix(this_cpu_ptr(&irq_randomness)->pool, entropy, num); + } else { + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(&entropy, sizeof(entropy)); + _mix_pool_bytes(&num, sizeof(num)); + spin_unlock_irqrestore(&input_pool.lock, flags); + } - spin_lock_irqsave(&random_ready_list_lock, flags); if (crng_ready()) - goto out; - - owner = NULL; - - list_add(&rdy->list, &random_ready_list); - err = 0; - -out: - spin_unlock_irqrestore(&random_ready_list_lock, flags); - - module_put(owner); - - return err; -} -EXPORT_SYMBOL(add_random_ready_callback); - -/* - * Delete a previously registered readiness callback function. - */ -void del_random_ready_callback(struct random_ready_callback *rdy) -{ - unsigned long flags; - struct module *owner = NULL; - - spin_lock_irqsave(&random_ready_list_lock, flags); - if (!list_empty(&rdy->list)) { - list_del_init(&rdy->list); - owner = rdy->owner; - } - spin_unlock_irqrestore(&random_ready_list_lock, flags); + return; - module_put(owner); -} -EXPORT_SYMBOL(del_random_ready_callback); + /* + * Calculate number of bits of randomness we probably added. + * We take into account the first, second and third-order deltas + * in order to make our estimate. + */ + delta = now - READ_ONCE(state->last_time); + WRITE_ONCE(state->last_time, now); -/* - * This function will use the architecture-specific hardware random - * number generator if it is available. The arch-specific hw RNG will - * almost certainly be faster than what we can do in software, but it - * is impossible to verify that it is implemented securely (as - * opposed, to, say, the AES encryption of a sequence number using a - * key known by the NSA). So it's useful if we need the speed, but - * only if we're willing to trust the hardware manufacturer not to - * have put in a back door. - * - * Return number of bytes filled in. - */ -int __must_check get_random_bytes_arch(void *buf, int nbytes) -{ - int left = nbytes; - char *p = buf; + delta2 = delta - READ_ONCE(state->last_delta); + WRITE_ONCE(state->last_delta, delta); - trace_get_random_bytes_arch(left, _RET_IP_); - while (left) { - unsigned long v; - int chunk = min_t(int, left, sizeof(unsigned long)); + delta3 = delta2 - READ_ONCE(state->last_delta2); + WRITE_ONCE(state->last_delta2, delta2); - if (!arch_get_random_long(&v)) - break; + if (delta < 0) + delta = -delta; + if (delta2 < 0) + delta2 = -delta2; + if (delta3 < 0) + delta3 = -delta3; + if (delta > delta2) + delta = delta2; + if (delta > delta3) + delta = delta3; - memcpy(p, &v, chunk); - p += chunk; - left -= chunk; - } + /* + * delta is now minimum absolute delta. Round down by 1 bit + * on general principles, and limit entropy estimate to 11 bits. + */ + bits = min(fls(delta >> 1), 11); - return nbytes - left; + /* + * As mentioned above, if we're in a hard IRQ, add_interrupt_randomness() + * will run after this, which uses a different crediting scheme of 1 bit + * per every 64 interrupts. In order to let that function do accounting + * close to the one in this function, we credit a full 64/64 bit per bit, + * and then subtract one to account for the extra one added. + */ + if (in_irq()) + this_cpu_ptr(&irq_randomness)->count += max(1u, bits * 64) - 1; + else + _credit_init_bits(bits); } -EXPORT_SYMBOL(get_random_bytes_arch); -/* - * init_std_data - initialize pool with system data - * - * @r: pool to initialize - * - * This function clears the pool's entropy count and mixes some system - * data into the pool to prepare it for use. The pool is not cleared - * as that can only decrease the entropy in the pool. - */ -static void __init init_std_data(struct entropy_store *r) +void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) { - int i; - ktime_t now = ktime_get_real(); - unsigned long rv; - - r->last_pulled = jiffies; - mix_pool_bytes(r, &now, sizeof(now)); - for (i = r->poolinfo->poolbytes; i > 0; i -= sizeof(rv)) { - if (!arch_get_random_seed_long(&rv) && - !arch_get_random_long(&rv)) - rv = random_get_entropy(); - mix_pool_bytes(r, &rv, sizeof(rv)); - } - mix_pool_bytes(r, utsname(), sizeof(*(utsname()))); + static unsigned char last_value; + static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES }; + + /* Ignore autorepeat and the like. */ + if (value == last_value) + return; + + last_value = value; + add_timer_randomness(&input_timer_state, + (type << 4) ^ code ^ (code >> 4) ^ value); } +EXPORT_SYMBOL_GPL(add_input_randomness); -/* - * Note that setup_arch() may call add_device_randomness() - * long before we get here. This allows seeding of the pools - * with some platform dependent data very early in the boot - * process. But it limits our options here. We must use - * statically allocated structures that already have all - * initializations complete at compile time. We should also - * take care not to overwrite the precious per platform data - * we were given. - */ -int __init rand_initialize(void) +#ifdef CONFIG_BLOCK +void add_disk_randomness(struct gendisk *disk) { - init_std_data(&input_pool); - init_std_data(&blocking_pool); - crng_initialize(&primary_crng); - crng_global_init_time = jiffies; - if (ratelimit_disable) { - urandom_warning.interval = 0; - unseeded_warning.interval = 0; - } - return 0; + if (!disk || !disk->random) + return; + /* First major is 1, so we get >= 0x200 here. */ + add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); } +EXPORT_SYMBOL_GPL(add_disk_randomness); -#ifdef CONFIG_BLOCK -void rand_initialize_disk(struct gendisk *disk) +void __cold rand_initialize_disk(struct gendisk *disk) { struct timer_rand_state *state; @@ -1983,134 +1126,189 @@ void rand_initialize_disk(struct gendisk *disk) } #endif -static ssize_t -_random_read(int nonblock, char __user *buf, size_t nbytes) +/* + * Each time the timer fires, we expect that we got an unpredictable + * jump in the cycle counter. Even if the timer is running on another + * CPU, the timer activity will be touching the stack of the CPU that is + * generating entropy.. + * + * Note that we don't re-arm the timer in the timer itself - we are + * happy to be scheduled away, since that just makes the load more + * complex, but we do not want the timer to keep ticking unless the + * entropy loop is running. + * + * So the re-arming always happens in the entropy loop itself. + */ +static void __cold entropy_timer(struct timer_list *t) +{ + credit_init_bits(1); +} + +/* + * If we have an actual cycle counter, see if we can + * generate enough entropy with timing noise + */ +static void __cold try_to_generate_entropy(void) { - ssize_t n; + struct { + unsigned long entropy; + struct timer_list timer; + } stack; - if (nbytes == 0) - return 0; + stack.entropy = random_get_entropy(); - nbytes = min_t(size_t, nbytes, SEC_XFER_SIZE); - while (1) { - n = extract_entropy_user(&blocking_pool, buf, nbytes); - if (n < 0) - return n; - trace_random_read(n*8, (nbytes-n)*8, - ENTROPY_BITS(&blocking_pool), - ENTROPY_BITS(&input_pool)); - if (n > 0) - return n; - - /* Pool is (near) empty. Maybe wait and retry. */ - if (nonblock) - return -EAGAIN; + /* Slow counter - or none. Don't even bother */ + if (stack.entropy == random_get_entropy()) + return; - wait_event_interruptible(random_read_wait, - blocking_pool.initialized && - (ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits)); - if (signal_pending(current)) - return -ERESTARTSYS; + timer_setup_on_stack(&stack.timer, entropy_timer, 0); + while (!crng_ready() && !signal_pending(current)) { + if (!timer_pending(&stack.timer)) + mod_timer(&stack.timer, jiffies + 1); + mix_pool_bytes(&stack.entropy, sizeof(stack.entropy)); + schedule(); + stack.entropy = random_get_entropy(); } -} -static ssize_t -random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) -{ - return _random_read(file->f_flags & O_NONBLOCK, buf, nbytes); + del_timer_sync(&stack.timer); + destroy_timer_on_stack(&stack.timer); + mix_pool_bytes(&stack.entropy, sizeof(stack.entropy)); } -static ssize_t -urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) + +/********************************************************************** + * + * Userspace reader/writer interfaces. + * + * getrandom(2) is the primary modern interface into the RNG and should + * be used in preference to anything else. + * + * Reading from /dev/random has the same functionality as calling + * getrandom(2) with flags=0. In earlier versions, however, it had + * vastly different semantics and should therefore be avoided, to + * prevent backwards compatibility issues. + * + * Reading from /dev/urandom has the same functionality as calling + * getrandom(2) with flags=GRND_INSECURE. Because it does not block + * waiting for the RNG to be ready, it should not be used. + * + * Writing to either /dev/random or /dev/urandom adds entropy to + * the input pool but does not credit it. + * + * Polling on /dev/random indicates when the RNG is initialized, on + * the read side, and when it wants new entropy, on the write side. + * + * Both /dev/random and /dev/urandom have the same set of ioctls for + * adding entropy, getting the entropy count, zeroing the count, and + * reseeding the crng. + * + **********************************************************************/ + +SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags) { - unsigned long flags; - static int maxwarn = 10; + struct iov_iter iter; + struct iovec iov; int ret; - if (!crng_ready() && maxwarn > 0) { - maxwarn--; - if (__ratelimit(&urandom_warning)) - printk(KERN_NOTICE "random: %s: uninitialized " - "urandom read (%zd bytes read)\n", - current->comm, nbytes); - spin_lock_irqsave(&primary_crng.lock, flags); - crng_init_cnt = 0; - spin_unlock_irqrestore(&primary_crng.lock, flags); + if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) + return -EINVAL; + + /* + * Requesting insecure and blocking randomness at the same time makes + * no sense. + */ + if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM)) + return -EINVAL; + + if (!crng_ready() && !(flags & GRND_INSECURE)) { + if (flags & GRND_NONBLOCK) + return -EAGAIN; + ret = wait_for_random_bytes(); + if (unlikely(ret)) + return ret; } - nbytes = min_t(size_t, nbytes, INT_MAX >> (ENTROPY_SHIFT + 3)); - ret = extract_crng_user(buf, nbytes); - trace_urandom_read(8 * nbytes, 0, ENTROPY_BITS(&input_pool)); - return ret; + + ret = import_single_range(READ, ubuf, len, &iov, &iter); + if (unlikely(ret)) + return ret; + return get_random_bytes_user(&iter); } -static __poll_t -random_poll(struct file *file, poll_table * wait) +static __poll_t random_poll(struct file *file, poll_table *wait) { - __poll_t mask; - - poll_wait(file, &random_read_wait, wait); - poll_wait(file, &random_write_wait, wait); - mask = 0; - if (ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits) - mask |= EPOLLIN | EPOLLRDNORM; - if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits) - mask |= EPOLLOUT | EPOLLWRNORM; - return mask; + poll_wait(file, &crng_init_wait, wait); + return crng_ready() ? EPOLLIN | EPOLLRDNORM : EPOLLOUT | EPOLLWRNORM; } -static int -write_pool(struct entropy_store *r, const char __user *buffer, size_t count) +static ssize_t write_pool_user(struct iov_iter *iter) { - size_t bytes; - __u32 t, buf[16]; - const char __user *p = buffer; + u8 block[BLAKE2S_BLOCK_SIZE]; + ssize_t ret = 0; + size_t copied; - while (count > 0) { - int b, i = 0; + if (unlikely(!iov_iter_count(iter))) + return 0; - bytes = min(count, sizeof(buf)); - if (copy_from_user(&buf, p, bytes)) - return -EFAULT; + for (;;) { + copied = copy_from_iter(block, sizeof(block), iter); + ret += copied; + mix_pool_bytes(block, copied); + if (!iov_iter_count(iter) || copied != sizeof(block)) + break; - for (b = bytes ; b > 0 ; b -= sizeof(__u32), i++) { - if (!arch_get_random_int(&t)) + BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0); + if (ret % PAGE_SIZE == 0) { + if (signal_pending(current)) break; - buf[i] ^= t; + cond_resched(); } + } + + memzero_explicit(block, sizeof(block)); + return ret ? ret : -EFAULT; +} - count -= bytes; - p += bytes; +static ssize_t random_write_iter(struct kiocb *kiocb, struct iov_iter *iter) +{ + return write_pool_user(iter); +} + +static ssize_t urandom_read_iter(struct kiocb *kiocb, struct iov_iter *iter) +{ + static int maxwarn = 10; - mix_pool_bytes(r, buf, bytes); - cond_resched(); + if (!crng_ready()) { + if (!ratelimit_disable && maxwarn <= 0) + ++urandom_warning.missed; + else if (ratelimit_disable || __ratelimit(&urandom_warning)) { + --maxwarn; + pr_notice("%s: uninitialized urandom read (%zu bytes read)\n", + current->comm, iov_iter_count(iter)); + } } - return 0; + return get_random_bytes_user(iter); } -static ssize_t random_write(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) +static ssize_t random_read_iter(struct kiocb *kiocb, struct iov_iter *iter) { - size_t ret; + int ret; - ret = write_pool(&input_pool, buffer, count); - if (ret) + ret = wait_for_random_bytes(); + if (ret != 0) return ret; - - return (ssize_t)count; + return get_random_bytes_user(iter); } static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) { - int size, ent_count; int __user *p = (int __user *)arg; - int retval; + int ent_count; switch (cmd) { case RNDGETENTCNT: - /* inherently racy, no point locking */ - ent_count = ENTROPY_BITS(&input_pool); - if (put_user(ent_count, p)) + /* Inherently racy, no point locking. */ + if (put_user(input_pool.init_bits, p)) return -EFAULT; return 0; case RNDADDTOENTCNT: @@ -2118,39 +1316,48 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EPERM; if (get_user(ent_count, p)) return -EFAULT; - return credit_entropy_bits_safe(&input_pool, ent_count); - case RNDADDENTROPY: + if (ent_count < 0) + return -EINVAL; + credit_init_bits(ent_count); + return 0; + case RNDADDENTROPY: { + struct iov_iter iter; + struct iovec iov; + ssize_t ret; + int len; + if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(ent_count, p++)) return -EFAULT; if (ent_count < 0) return -EINVAL; - if (get_user(size, p++)) + if (get_user(len, p++)) + return -EFAULT; + ret = import_single_range(WRITE, p, len, &iov, &iter); + if (unlikely(ret)) + return ret; + ret = write_pool_user(&iter); + if (unlikely(ret < 0)) + return ret; + /* Since we're crediting, enforce that it was all written into the pool. */ + if (unlikely(ret != len)) return -EFAULT; - retval = write_pool(&input_pool, (const char __user *)p, - size); - if (retval < 0) - return retval; - return credit_entropy_bits_safe(&input_pool, ent_count); + credit_init_bits(ent_count); + return 0; + } case RNDZAPENTCNT: case RNDCLEARPOOL: - /* - * Clear the entropy pool counters. We no longer clear - * the entropy pool, as that's silly. - */ + /* No longer has any effect. */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - input_pool.entropy_count = 0; - blocking_pool.entropy_count = 0; return 0; case RNDRESEEDCRNG: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (crng_init < 2) + if (!crng_ready()) return -ENODATA; - crng_reseed(&primary_crng, &input_pool); - crng_global_init_time = jiffies - 1; + crng_reseed(); return 0; default: return -EINVAL; @@ -2163,49 +1370,56 @@ static int random_fasync(int fd, struct file *filp, int on) } const struct file_operations random_fops = { - .read = random_read, - .write = random_write, - .poll = random_poll, + .read_iter = random_read_iter, + .write_iter = random_write_iter, + .poll = random_poll, .unlocked_ioctl = random_ioctl, + .compat_ioctl = compat_ptr_ioctl, .fasync = random_fasync, .llseek = noop_llseek, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, }; const struct file_operations urandom_fops = { - .read = urandom_read, - .write = random_write, + .read_iter = urandom_read_iter, + .write_iter = random_write_iter, .unlocked_ioctl = random_ioctl, + .compat_ioctl = compat_ptr_ioctl, .fasync = random_fasync, .llseek = noop_llseek, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, }; -SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, - unsigned int, flags) -{ - int ret; - - if (flags & ~(GRND_NONBLOCK|GRND_RANDOM)) - return -EINVAL; - - if (count > INT_MAX) - count = INT_MAX; - - if (flags & GRND_RANDOM) - return _random_read(flags & GRND_NONBLOCK, buf, count); - - if (!crng_ready()) { - if (flags & GRND_NONBLOCK) - return -EAGAIN; - ret = wait_for_random_bytes(); - if (unlikely(ret)) - return ret; - } - return urandom_read(NULL, buf, count, NULL); -} /******************************************************************** * - * Sysctl interface + * Sysctl interface. + * + * These are partly unused legacy knobs with dummy values to not break + * userspace and partly still useful things. They are usually accessible + * in /proc/sys/kernel/random/ and are as follows: + * + * - boot_id - a UUID representing the current boot. + * + * - uuid - a random UUID, different each time the file is read. + * + * - poolsize - the number of bits of entropy that the input pool can + * hold, tied to the POOL_BITS constant. + * + * - entropy_avail - the number of bits of entropy currently in the + * input pool. Always <= poolsize. + * + * - write_wakeup_threshold - the amount of entropy in the input pool + * below which write polls to /dev/random will unblock, requesting + * more entropy, tied to the POOL_READY_BITS constant. It is writable + * to avoid breaking old userspaces, but writing to it does not + * change any behavior of the RNG. + * + * - urandom_min_reseed_secs - fixed to the value CRNG_RESEED_INTERVAL. + * It is writable to avoid breaking old userspaces, but writing + * to it does not change any behavior of the RNG. * ********************************************************************/ @@ -2213,26 +1427,28 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, #include -static int min_read_thresh = 8, min_write_thresh; -static int max_read_thresh = OUTPUT_POOL_WORDS * 32; -static int max_write_thresh = INPUT_POOL_WORDS * 32; -static int random_min_urandom_seed = 60; -static char sysctl_bootid[16]; +static int sysctl_random_min_urandom_seed = CRNG_RESEED_INTERVAL / HZ; +static int sysctl_random_write_wakeup_bits = POOL_READY_BITS; +static int sysctl_poolsize = POOL_BITS; +static u8 sysctl_bootid[UUID_SIZE]; /* * This function is used to return both the bootid UUID, and random - * UUID. The difference is in whether table->data is NULL; if it is, + * UUID. The difference is in whether table->data is NULL; if it is, * then a new UUID is generated and returned to the user. - * - * If the user accesses this via the proc interface, the UUID will be - * returned as an ASCII string in the standard UUID format; if via the - * sysctl system call, as 16 bytes of binary data. */ -static int proc_do_uuid(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table fake_table; - unsigned char buf[64], tmp_uuid[16], *uuid; +static int proc_do_uuid(struct ctl_table *table, int write, void __user *buf, + size_t *lenp, loff_t *ppos) +{ + u8 tmp_uuid[UUID_SIZE], *uuid; + char uuid_string[UUID_STRING_LEN + 1]; + struct ctl_table fake_table = { + .data = uuid_string, + .maxlen = UUID_STRING_LEN + }; + + if (write) + return -EPERM; uuid = table->data; if (!uuid) { @@ -2247,32 +1463,17 @@ static int proc_do_uuid(struct ctl_table *table, int write, spin_unlock(&bootid_spinlock); } - sprintf(buf, "%pU", uuid); - - fake_table.data = buf; - fake_table.maxlen = sizeof(buf); - - return proc_dostring(&fake_table, write, buffer, lenp, ppos); + snprintf(uuid_string, sizeof(uuid_string), "%pU", uuid); + return proc_dostring(&fake_table, 0, buf, lenp, ppos); } -/* - * Return entropy available scaled to integral bits - */ -static int proc_do_entropy(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +/* The same as proc_dointvec, but writes don't change anything. */ +static int proc_do_rointvec(struct ctl_table *table, int write, void __user *buf, + size_t *lenp, loff_t *ppos) { - struct ctl_table fake_table; - int entropy_count; - - entropy_count = *(int *)table->data >> ENTROPY_SHIFT; - - fake_table.data = &entropy_count; - fake_table.maxlen = sizeof(entropy_count); - - return proc_dointvec(&fake_table, write, buffer, lenp, ppos); + return write ? 0 : proc_dointvec(table, 0, buf, lenp, ppos); } -static int sysctl_poolsize = INPUT_POOL_WORDS * 32; extern struct ctl_table random_table[]; struct ctl_table random_table[] = { { @@ -2284,227 +1485,36 @@ struct ctl_table random_table[] = { }, { .procname = "entropy_avail", + .data = &input_pool.init_bits, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_do_entropy, - .data = &input_pool.entropy_count, - }, - { - .procname = "read_wakeup_threshold", - .data = &random_read_wakeup_bits, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_read_thresh, - .extra2 = &max_read_thresh, + .proc_handler = proc_dointvec, }, { .procname = "write_wakeup_threshold", - .data = &random_write_wakeup_bits, + .data = &sysctl_random_write_wakeup_bits, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_write_thresh, - .extra2 = &max_write_thresh, + .proc_handler = proc_do_rointvec, }, { .procname = "urandom_min_reseed_secs", - .data = &random_min_urandom_seed, + .data = &sysctl_random_min_urandom_seed, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_do_rointvec, }, { .procname = "boot_id", .data = &sysctl_bootid, - .maxlen = 16, .mode = 0444, .proc_handler = proc_do_uuid, }, { .procname = "uuid", - .maxlen = 16, .mode = 0444, .proc_handler = proc_do_uuid, }, -#ifdef ADD_INTERRUPT_BENCH - { - .procname = "add_interrupt_avg_cycles", - .data = &avg_cycles, - .maxlen = sizeof(avg_cycles), - .mode = 0444, - .proc_handler = proc_doulongvec_minmax, - }, - { - .procname = "add_interrupt_avg_deviation", - .data = &avg_deviation, - .maxlen = sizeof(avg_deviation), - .mode = 0444, - .proc_handler = proc_doulongvec_minmax, - }, -#endif { } }; -#endif /* CONFIG_SYSCTL */ - -struct batched_entropy { - union { - u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)]; - u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)]; - }; - unsigned int position; - spinlock_t batch_lock; -}; - -/* - * Get a random word for internal kernel use only. The quality of the random - * number is good as /dev/urandom, but there is no backtrack protection, with - * the goal of being quite fast and not depleting entropy. In order to ensure - * that the randomness provided by this function is okay, the function - * wait_for_random_bytes() should be called and return 0 at least once at any - * point prior. - */ -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { - .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock), -}; - -u64 get_random_u64(void) -{ - u64 ret; - unsigned long flags; - struct batched_entropy *batch; - static void *previous; - - warn_unseeded_randomness(&previous); - - batch = raw_cpu_ptr(&batched_entropy_u64); - spin_lock_irqsave(&batch->batch_lock, flags); - if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) { - extract_crng((u8 *)batch->entropy_u64); - batch->position = 0; - } - ret = batch->entropy_u64[batch->position++]; - spin_unlock_irqrestore(&batch->batch_lock, flags); - return ret; -} -EXPORT_SYMBOL(get_random_u64); - -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { - .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock), -}; -u32 get_random_u32(void) -{ - u32 ret; - unsigned long flags; - struct batched_entropy *batch; - static void *previous; - - warn_unseeded_randomness(&previous); - - batch = raw_cpu_ptr(&batched_entropy_u32); - spin_lock_irqsave(&batch->batch_lock, flags); - if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) { - extract_crng((u8 *)batch->entropy_u32); - batch->position = 0; - } - ret = batch->entropy_u32[batch->position++]; - spin_unlock_irqrestore(&batch->batch_lock, flags); - return ret; -} -EXPORT_SYMBOL(get_random_u32); - -/* It's important to invalidate all potential batched entropy that might - * be stored before the crng is initialized, which we can do lazily by - * simply resetting the counter to zero so that it's re-extracted on the - * next usage. */ -static void invalidate_batched_entropy(void) -{ - int cpu; - unsigned long flags; - - for_each_possible_cpu (cpu) { - struct batched_entropy *batched_entropy; - - batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu); - spin_lock_irqsave(&batched_entropy->batch_lock, flags); - batched_entropy->position = 0; - spin_unlock(&batched_entropy->batch_lock); - - batched_entropy = per_cpu_ptr(&batched_entropy_u64, cpu); - spin_lock(&batched_entropy->batch_lock); - batched_entropy->position = 0; - spin_unlock_irqrestore(&batched_entropy->batch_lock, flags); - } -} - -/** - * randomize_page - Generate a random, page aligned address - * @start: The smallest acceptable address the caller will take. - * @range: The size of the area, starting at @start, within which the - * random address must fall. - * - * If @start + @range would overflow, @range is capped. - * - * NOTE: Historical use of randomize_range, which this replaces, presumed that - * @start was already page aligned. We now align it regardless. - * - * Return: A page aligned address within [start, start + range). On error, - * @start is returned. - */ -unsigned long -randomize_page(unsigned long start, unsigned long range) -{ - if (!PAGE_ALIGNED(start)) { - range -= PAGE_ALIGN(start) - start; - start = PAGE_ALIGN(start); - } - - if (start > ULONG_MAX - range) - range = ULONG_MAX - start; - - range >>= PAGE_SHIFT; - - if (range == 0) - return start; - - return start + (get_random_long() % range << PAGE_SHIFT); -} - -/* Interface for in-kernel drivers of true hardware RNGs. - * Those devices may produce endless random bits and will be throttled - * when our pool is full. - */ -void add_hwgenerator_randomness(const char *buffer, size_t count, - size_t entropy) -{ - struct entropy_store *poolp = &input_pool; - - if (unlikely(crng_init == 0)) { - crng_fast_load(buffer, count); - return; - } - - /* Suspend writing if we're above the trickle threshold. - * We'll be woken up again once below random_write_wakeup_thresh, - * or when the calling thread is about to terminate. - */ - wait_event_interruptible(random_write_wait, kthread_should_stop() || - ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits); - mix_pool_bytes(poolp, buffer, count); - credit_entropy_bits(poolp, entropy); -} -EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); - -/* Handle random seed passed by bootloader. - * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise - * it would be regarded as device data. - * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER. - */ -void add_bootloader_randomness(const void *buf, unsigned int size) -{ - if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER)) - add_hwgenerator_randomness(buf, size, size * 8); - else - add_device_randomness(buf, size); -} -EXPORT_SYMBOL_GPL(add_bootloader_randomness); +#endif /* CONFIG_SYSCTL */ diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 1838039b03333..17fbd7f7a2954 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -274,14 +274,6 @@ static void tpm_dev_release(struct device *dev) kfree(chip); } -static void tpm_devs_release(struct device *dev) -{ - struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs); - - /* release the master device reference */ - put_device(&chip->dev); -} - /** * tpm_class_shutdown() - prepare the TPM device for loss of power. * @dev: device to which the chip is associated. @@ -344,7 +336,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev, chip->dev_num = rc; device_initialize(&chip->dev); - device_initialize(&chip->devs); chip->dev.class = tpm_class; chip->dev.class->shutdown_pre = tpm_class_shutdown; @@ -352,29 +343,12 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev, chip->dev.parent = pdev; chip->dev.groups = chip->groups; - chip->devs.parent = pdev; - chip->devs.class = tpmrm_class; - chip->devs.release = tpm_devs_release; - /* get extra reference on main device to hold on - * behalf of devs. This holds the chip structure - * while cdevs is in use. The corresponding put - * is in the tpm_devs_release (TPM2 only) - */ - if (chip->flags & TPM_CHIP_FLAG_TPM2) - get_device(&chip->dev); - if (chip->dev_num == 0) chip->dev.devt = MKDEV(MISC_MAJOR, TPM_MINOR); else chip->dev.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num); - chip->devs.devt = - MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES); - rc = dev_set_name(&chip->dev, "tpm%d", chip->dev_num); - if (rc) - goto out; - rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num); if (rc) goto out; @@ -382,9 +356,7 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev, chip->flags |= TPM_CHIP_FLAG_VIRTUAL; cdev_init(&chip->cdev, &tpm_fops); - cdev_init(&chip->cdevs, &tpmrm_fops); chip->cdev.owner = THIS_MODULE; - chip->cdevs.owner = THIS_MODULE; rc = tpm2_init_space(&chip->work_space, TPM2_SPACE_BUFFER_SIZE); if (rc) { @@ -396,7 +368,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev, return chip; out: - put_device(&chip->devs); put_device(&chip->dev); return ERR_PTR(rc); } @@ -445,14 +416,9 @@ static int tpm_add_char_device(struct tpm_chip *chip) } if (chip->flags & TPM_CHIP_FLAG_TPM2) { - rc = cdev_device_add(&chip->cdevs, &chip->devs); - if (rc) { - dev_err(&chip->devs, - "unable to cdev_device_add() %s, major %d, minor %d, err=%d\n", - dev_name(&chip->devs), MAJOR(chip->devs.devt), - MINOR(chip->devs.devt), rc); - return rc; - } + rc = tpm_devs_add(chip); + if (rc) + goto err_del_cdev; } /* Make the chip available. */ @@ -460,6 +426,10 @@ static int tpm_add_char_device(struct tpm_chip *chip) idr_replace(&dev_nums_idr, chip, chip->dev_num); mutex_unlock(&idr_lock); + return 0; + +err_del_cdev: + cdev_device_del(&chip->cdev, &chip->dev); return rc; } @@ -641,7 +611,7 @@ void tpm_chip_unregister(struct tpm_chip *chip) hwrng_unregister(&chip->hwrng); tpm_bios_log_teardown(chip); if (chip->flags & TPM_CHIP_FLAG_TPM2) - cdev_device_del(&chip->cdevs, &chip->devs); + tpm_devs_remove(chip); tpm_del_char_device(chip); } EXPORT_SYMBOL_GPL(tpm_chip_unregister); diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c index 1784530b8387b..b99e1941c52c9 100644 --- a/drivers/char/tpm/tpm-dev-common.c +++ b/drivers/char/tpm/tpm-dev-common.c @@ -70,7 +70,13 @@ static void tpm_dev_async_work(struct work_struct *work) ret = tpm_dev_transmit(priv->chip, priv->space, priv->data_buffer, sizeof(priv->data_buffer)); tpm_put_ops(priv->chip); - if (ret > 0) { + + /* + * If ret is > 0 then tpm_dev_transmit returned the size of the + * response. If ret is < 0 then tpm_dev_transmit failed and + * returned an error code. + */ + if (ret != 0) { priv->response_length = ret; mod_timer(&priv->user_read_timer, jiffies + (120 * HZ)); } diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 37f010421a369..58312062d435c 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -466,6 +466,8 @@ int tpm2_prepare_space(struct tpm_chip *chip, struct tpm_space *space, u8 *cmd, size_t cmdsiz); int tpm2_commit_space(struct tpm_chip *chip, struct tpm_space *space, void *buf, size_t *bufsiz); +int tpm_devs_add(struct tpm_chip *chip); +void tpm_devs_remove(struct tpm_chip *chip); void tpm_bios_log_setup(struct tpm_chip *chip); void tpm_bios_log_teardown(struct tpm_chip *chip); diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index 1385c2c0acbe1..89635bb117d28 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -706,7 +706,16 @@ ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id, u32 *value, if (!rc) { out = (struct tpm2_get_cap_out *) &buf.data[TPM_HEADER_SIZE]; - *value = be32_to_cpu(out->value); + /* + * To prevent failing boot up of some systems, Infineon TPM2.0 + * returns SUCCESS on TPM2_Startup in field upgrade mode. Also + * the TPM2_Getcapability command returns a zero length list + * in field upgrade mode. + */ + if (be32_to_cpu(out->property_cnt) > 0) + *value = be32_to_cpu(out->value); + else + rc = -ENODATA; } tpm_buf_destroy(&buf); return rc; diff --git a/drivers/char/tpm/tpm2-space.c b/drivers/char/tpm/tpm2-space.c index 784b8b3cb903f..ffb35f0154c16 100644 --- a/drivers/char/tpm/tpm2-space.c +++ b/drivers/char/tpm/tpm2-space.c @@ -58,12 +58,12 @@ int tpm2_init_space(struct tpm_space *space, unsigned int buf_size) void tpm2_del_space(struct tpm_chip *chip, struct tpm_space *space) { - mutex_lock(&chip->tpm_mutex); - if (!tpm_chip_start(chip)) { + + if (tpm_try_get_ops(chip) == 0) { tpm2_flush_sessions(chip, space); - tpm_chip_stop(chip); + tpm_put_ops(chip); } - mutex_unlock(&chip->tpm_mutex); + kfree(space->context_buf); kfree(space->session_buf); } @@ -455,6 +455,9 @@ static int tpm2_map_response_body(struct tpm_chip *chip, u32 cc, u8 *rsp, if (be32_to_cpu(data->capability) != TPM2_CAP_HANDLES) return 0; + if (be32_to_cpu(data->count) > (UINT_MAX - TPM_HEADER_SIZE - 9) / 4) + return -EFAULT; + if (len != TPM_HEADER_SIZE + 9 + 4 * be32_to_cpu(data->count)) return -EFAULT; @@ -571,3 +574,68 @@ int tpm2_commit_space(struct tpm_chip *chip, struct tpm_space *space, dev_err(&chip->dev, "%s: error %d\n", __func__, rc); return rc; } + +/* + * Put the reference to the main device. + */ +static void tpm_devs_release(struct device *dev) +{ + struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs); + + /* release the master device reference */ + put_device(&chip->dev); +} + +/* + * Remove the device file for exposed TPM spaces and release the device + * reference. This may also release the reference to the master device. + */ +void tpm_devs_remove(struct tpm_chip *chip) +{ + cdev_device_del(&chip->cdevs, &chip->devs); + put_device(&chip->devs); +} + +/* + * Add a device file to expose TPM spaces. Also take a reference to the + * main device. + */ +int tpm_devs_add(struct tpm_chip *chip) +{ + int rc; + + device_initialize(&chip->devs); + chip->devs.parent = chip->dev.parent; + chip->devs.class = tpmrm_class; + + /* + * Get extra reference on main device to hold on behalf of devs. + * This holds the chip structure while cdevs is in use. The + * corresponding put is in the tpm_devs_release. + */ + get_device(&chip->dev); + chip->devs.release = tpm_devs_release; + chip->devs.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES); + cdev_init(&chip->cdevs, &tpmrm_fops); + chip->cdevs.owner = THIS_MODULE; + + rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num); + if (rc) + goto err_put_devs; + + rc = cdev_device_add(&chip->cdevs, &chip->devs); + if (rc) { + dev_err(&chip->devs, + "unable to cdev_device_add() %s, major %d, minor %d, err=%d\n", + dev_name(&chip->devs), MAJOR(chip->devs.devt), + MINOR(chip->devs.devt), rc); + goto err_put_devs; + } + + return 0; + +err_put_devs: + put_device(&chip->devs); + + return rc; +} diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 64428dbed9928..4236607f69e43 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -685,6 +685,7 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, if (!wait_event_timeout(ibmvtpm->crq_queue.wq, ibmvtpm->rtce_buf != NULL, HZ)) { + rc = -ENODEV; dev_err(dev, "CRQ response timed out\n"); goto init_irq_cleanup; } diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index 2fe26ec03552b..70f7859942287 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -877,7 +877,15 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq, intmask |= TPM_INTF_CMD_READY_INT | TPM_INTF_LOCALITY_CHANGE_INT | TPM_INTF_DATA_AVAIL_INT | TPM_INTF_STS_VALID_INT; intmask &= ~TPM_GLOBAL_INT_ENABLE; + + rc = request_locality(chip, 0); + if (rc < 0) { + rc = -ENODEV; + goto out_err; + } + tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), intmask); + release_locality(chip, 0); rc = tpm_chip_start(chip); if (rc) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index b453029487a12..c736adef9d3c8 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1961,6 +1961,13 @@ static void virtcons_remove(struct virtio_device *vdev) list_del(&portdev->list); spin_unlock_irq(&pdrvdata_lock); + /* Device is going away, exit any polling for buffers */ + virtio_break_device(vdev); + if (use_multiport(portdev)) + flush_work(&portdev->control_work); + else + flush_work(&portdev->config_work); + /* Disable interrupts for vqs */ vdev->config->reset(vdev); /* Finish up work that's lined up */ @@ -2234,7 +2241,7 @@ static struct virtio_driver virtio_rproc_serial = { .remove = virtcons_remove, }; -static int __init init(void) +static int __init virtio_console_init(void) { int err; @@ -2271,7 +2278,7 @@ static int __init init(void) return err; } -static void __exit fini(void) +static void __exit virtio_console_fini(void) { reclaim_dma_bufs(); @@ -2281,8 +2288,8 @@ static void __exit fini(void) class_destroy(pdrvdata.class); debugfs_remove_recursive(pdrvdata.debugfs_dir); } -module_init(init); -module_exit(fini); +module_init(virtio_console_init); +module_exit(virtio_console_fini); MODULE_DESCRIPTION("Virtio console driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/clk/actions/owl-s700.c b/drivers/clk/actions/owl-s700.c index a2f34d13fb543..6ea7da1d6d755 100644 --- a/drivers/clk/actions/owl-s700.c +++ b/drivers/clk/actions/owl-s700.c @@ -162,6 +162,7 @@ static struct clk_div_table hdmia_div_table[] = { static struct clk_div_table rmii_div_table[] = { {0, 4}, {1, 10}, + {0, 0} }; /* divider clocks */ diff --git a/drivers/clk/actions/owl-s900.c b/drivers/clk/actions/owl-s900.c index 790890978424a..5144ada2c7e1a 100644 --- a/drivers/clk/actions/owl-s900.c +++ b/drivers/clk/actions/owl-s900.c @@ -140,7 +140,7 @@ static struct clk_div_table rmii_ref_div_table[] = { static struct clk_div_table usb3_mac_div_table[] = { { 1, 2 }, { 2, 3 }, { 3, 4 }, - { 0, 8 }, + { 0, 0 } }; static struct clk_div_table i2s_div_table[] = { diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c index 44a46dcc0518b..0e7ec5075689a 100644 --- a/drivers/clk/at91/clk-generated.c +++ b/drivers/clk/at91/clk-generated.c @@ -18,8 +18,6 @@ #define GENERATED_MAX_DIV 255 -#define GCK_INDEX_DT_AUDIO_PLL 5 - struct clk_generated { struct clk_hw hw; struct regmap *regmap; @@ -29,7 +27,7 @@ struct clk_generated { u32 gckdiv; const struct clk_pcr_layout *layout; u8 parent_id; - bool audio_pll_allowed; + int chg_pid; }; #define to_clk_generated(hw) \ @@ -107,9 +105,13 @@ static void clk_generated_best_diff(struct clk_rate_request *req, tmp_rate = parent_rate; else tmp_rate = parent_rate / div; + + if (tmp_rate < req->min_rate || tmp_rate > req->max_rate) + return; + tmp_diff = abs(req->rate - tmp_rate); - if (*best_diff < 0 || *best_diff > tmp_diff) { + if (*best_diff < 0 || *best_diff >= tmp_diff) { *best_rate = tmp_rate; *best_diff = tmp_diff; req->best_parent_rate = parent_rate; @@ -129,7 +131,16 @@ static int clk_generated_determine_rate(struct clk_hw *hw, int i; u32 div; - for (i = 0; i < clk_hw_get_num_parents(hw) - 1; i++) { + /* do not look for a rate that is outside of our range */ + if (gck->range.max && req->rate > gck->range.max) + req->rate = gck->range.max; + if (gck->range.min && req->rate < gck->range.min) + req->rate = gck->range.min; + + for (i = 0; i < clk_hw_get_num_parents(hw); i++) { + if (gck->chg_pid == i) + continue; + parent = clk_hw_get_parent_by_index(hw, i); if (!parent) continue; @@ -161,10 +172,10 @@ static int clk_generated_determine_rate(struct clk_hw *hw, * that the only clks able to modify gck rate are those of audio IPs. */ - if (!gck->audio_pll_allowed) + if (gck->chg_pid < 0) goto end; - parent = clk_hw_get_parent_by_index(hw, GCK_INDEX_DT_AUDIO_PLL); + parent = clk_hw_get_parent_by_index(hw, gck->chg_pid); if (!parent) goto end; @@ -271,8 +282,8 @@ struct clk_hw * __init at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, const struct clk_pcr_layout *layout, const char *name, const char **parent_names, - u8 num_parents, u8 id, bool pll_audio, - const struct clk_range *range) + u8 num_parents, u8 id, + const struct clk_range *range, int chg_pid) { struct clk_generated *gck; struct clk_init_data init; @@ -287,15 +298,16 @@ at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, init.ops = &generated_ops; init.parent_names = parent_names; init.num_parents = num_parents; - init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | - CLK_SET_RATE_PARENT; + init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE; + if (chg_pid >= 0) + init.flags |= CLK_SET_RATE_PARENT; gck->id = id; gck->hw.init = &init; gck->regmap = regmap; gck->lock = lock; gck->range = *range; - gck->audio_pll_allowed = pll_audio; + gck->chg_pid = chg_pid; gck->layout = layout; clk_generated_startup(gck); diff --git a/drivers/clk/at91/dt-compat.c b/drivers/clk/at91/dt-compat.c index aa1754eac59ff..8a652c44c25ab 100644 --- a/drivers/clk/at91/dt-compat.c +++ b/drivers/clk/at91/dt-compat.c @@ -22,6 +22,8 @@ #define SYSTEM_MAX_ID 31 +#define GCK_INDEX_DT_AUDIO_PLL 5 + #ifdef CONFIG_HAVE_AT91_AUDIO_PLL static void __init of_sama5d2_clk_audio_pll_frac_setup(struct device_node *np) { @@ -135,7 +137,7 @@ static void __init of_sama5d2_clk_generated_setup(struct device_node *np) return; for_each_child_of_node(np, gcknp) { - bool pll_audio = false; + int chg_pid = INT_MIN; if (of_property_read_u32(gcknp, "reg", &id)) continue; @@ -152,12 +154,12 @@ static void __init of_sama5d2_clk_generated_setup(struct device_node *np) if (of_device_is_compatible(np, "atmel,sama5d2-clk-generated") && (id == GCK_ID_I2S0 || id == GCK_ID_I2S1 || id == GCK_ID_CLASSD)) - pll_audio = true; + chg_pid = GCK_INDEX_DT_AUDIO_PLL; hw = at91_clk_register_generated(regmap, &pmc_pcr_lock, &dt_pcr_layout, name, parent_names, num_parents, - id, pll_audio, &range); + id, &range, chg_pid); if (IS_ERR(hw)) continue; diff --git a/drivers/clk/at91/pmc.c b/drivers/clk/at91/pmc.c index b71515acdec1f..976ca41e9157e 100644 --- a/drivers/clk/at91/pmc.c +++ b/drivers/clk/at91/pmc.c @@ -275,6 +275,11 @@ static int __init pmc_register_ops(void) np = of_find_matching_node(NULL, sama5d2_pmc_dt_ids); + if (!of_device_is_available(np)) { + of_node_put(np); + return -ENODEV; + } + pmcreg = device_node_to_regmap(np); if (IS_ERR(pmcreg)) return PTR_ERR(pmcreg); diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h index 9b8db9cdcda53..8a88ad2360742 100644 --- a/drivers/clk/at91/pmc.h +++ b/drivers/clk/at91/pmc.h @@ -118,8 +118,8 @@ struct clk_hw * __init at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, const struct clk_pcr_layout *layout, const char *name, const char **parent_names, - u8 num_parents, u8 id, bool pll_audio, - const struct clk_range *range); + u8 num_parents, u8 id, + const struct clk_range *range, int chg_pid); struct clk_hw * __init at91_clk_register_h32mx(struct regmap *regmap, const char *name, diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c index e3f4c8f20223a..39923899478f9 100644 --- a/drivers/clk/at91/sam9x60.c +++ b/drivers/clk/at91/sam9x60.c @@ -124,7 +124,6 @@ static const struct { char *n; u8 id; struct clk_range r; - bool pll; } sam9x60_gck[] = { { .n = "flex0_gclk", .id = 5, }, { .n = "flex1_gclk", .id = 6, }, @@ -144,11 +143,9 @@ static const struct { { .n = "sdmmc1_gclk", .id = 26, .r = { .min = 0, .max = 105000000 }, }, { .n = "flex11_gclk", .id = 32, }, { .n = "flex12_gclk", .id = 33, }, - { .n = "i2s_gclk", .id = 34, .r = { .min = 0, .max = 105000000 }, - .pll = true, }, + { .n = "i2s_gclk", .id = 34, .r = { .min = 0, .max = 105000000 }, }, { .n = "pit64b_gclk", .id = 37, }, - { .n = "classd_gclk", .id = 42, .r = { .min = 0, .max = 100000000 }, - .pll = true, }, + { .n = "classd_gclk", .id = 42, .r = { .min = 0, .max = 100000000 }, }, { .n = "tcb1_gclk", .id = 45, }, { .n = "dbgu_gclk", .id = 47, }, }; @@ -285,8 +282,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) sam9x60_gck[i].n, parent_names, 6, sam9x60_gck[i].id, - sam9x60_gck[i].pll, - &sam9x60_gck[i].r); + &sam9x60_gck[i].r, INT_MIN); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/sama5d2.c b/drivers/clk/at91/sama5d2.c index ff7e3f727082e..d3c4bceb032d1 100644 --- a/drivers/clk/at91/sama5d2.c +++ b/drivers/clk/at91/sama5d2.c @@ -115,21 +115,20 @@ static const struct { char *n; u8 id; struct clk_range r; - bool pll; + int chg_pid; } sama5d2_gck[] = { - { .n = "sdmmc0_gclk", .id = 31, }, - { .n = "sdmmc1_gclk", .id = 32, }, - { .n = "tcb0_gclk", .id = 35, .r = { .min = 0, .max = 83000000 }, }, - { .n = "tcb1_gclk", .id = 36, .r = { .min = 0, .max = 83000000 }, }, - { .n = "pwm_gclk", .id = 38, .r = { .min = 0, .max = 83000000 }, }, - { .n = "isc_gclk", .id = 46, }, - { .n = "pdmic_gclk", .id = 48, }, - { .n = "i2s0_gclk", .id = 54, .pll = true }, - { .n = "i2s1_gclk", .id = 55, .pll = true }, - { .n = "can0_gclk", .id = 56, .r = { .min = 0, .max = 80000000 }, }, - { .n = "can1_gclk", .id = 57, .r = { .min = 0, .max = 80000000 }, }, - { .n = "classd_gclk", .id = 59, .r = { .min = 0, .max = 100000000 }, - .pll = true }, + { .n = "sdmmc0_gclk", .id = 31, .chg_pid = INT_MIN, }, + { .n = "sdmmc1_gclk", .id = 32, .chg_pid = INT_MIN, }, + { .n = "tcb0_gclk", .id = 35, .chg_pid = INT_MIN, .r = { .min = 0, .max = 83000000 }, }, + { .n = "tcb1_gclk", .id = 36, .chg_pid = INT_MIN, .r = { .min = 0, .max = 83000000 }, }, + { .n = "pwm_gclk", .id = 38, .chg_pid = INT_MIN, .r = { .min = 0, .max = 83000000 }, }, + { .n = "isc_gclk", .id = 46, .chg_pid = INT_MIN, }, + { .n = "pdmic_gclk", .id = 48, .chg_pid = INT_MIN, }, + { .n = "i2s0_gclk", .id = 54, .chg_pid = 5, }, + { .n = "i2s1_gclk", .id = 55, .chg_pid = 5, }, + { .n = "can0_gclk", .id = 56, .chg_pid = INT_MIN, .r = { .min = 0, .max = 80000000 }, }, + { .n = "can1_gclk", .id = 57, .chg_pid = INT_MIN, .r = { .min = 0, .max = 80000000 }, }, + { .n = "classd_gclk", .id = 59, .chg_pid = 5, .r = { .min = 0, .max = 100000000 }, }, }; static const struct clk_programmable_layout sama5d2_programmable_layout = { @@ -317,8 +316,8 @@ static void __init sama5d2_pmc_setup(struct device_node *np) sama5d2_gck[i].n, parent_names, 6, sama5d2_gck[i].id, - sama5d2_gck[i].pll, - &sama5d2_gck[i].r); + &sama5d2_gck[i].r, + sama5d2_gck[i].chg_pid); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index c5486537b9284..e637bd6b295bd 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -932,8 +932,7 @@ static int bcm2835_clock_is_on(struct clk_hw *hw) static u32 bcm2835_clock_choose_div(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate, - bool round_up) + unsigned long parent_rate) { struct bcm2835_clock *clock = bcm2835_clock_from_hw(hw); const struct bcm2835_clock_data *data = clock->data; @@ -945,10 +944,6 @@ static u32 bcm2835_clock_choose_div(struct clk_hw *hw, rem = do_div(temp, rate); div = temp; - - /* Round up and mask off the unused bits */ - if (round_up && ((div & unused_frac_mask) != 0 || rem != 0)) - div += unused_frac_mask + 1; div &= ~unused_frac_mask; /* different clamping limits apply for a mash clock */ @@ -1079,7 +1074,7 @@ static int bcm2835_clock_set_rate(struct clk_hw *hw, struct bcm2835_clock *clock = bcm2835_clock_from_hw(hw); struct bcm2835_cprman *cprman = clock->cprman; const struct bcm2835_clock_data *data = clock->data; - u32 div = bcm2835_clock_choose_div(hw, rate, parent_rate, false); + u32 div = bcm2835_clock_choose_div(hw, rate, parent_rate); u32 ctl; spin_lock(&cprman->regs_lock); @@ -1130,7 +1125,7 @@ static unsigned long bcm2835_clock_choose_div_and_prate(struct clk_hw *hw, if (!(BIT(parent_idx) & data->set_rate_parent)) { *prate = clk_hw_get_rate(parent); - *div = bcm2835_clock_choose_div(hw, rate, *prate, true); + *div = bcm2835_clock_choose_div(hw, rate, *prate); *avgrate = bcm2835_clock_rate_from_divisor(clock, *prate, *div); @@ -1216,7 +1211,7 @@ static int bcm2835_clock_determine_rate(struct clk_hw *hw, rate = bcm2835_clock_choose_div_and_prate(hw, i, req->rate, &div, &prate, &avgrate); - if (rate > best_rate && rate <= req->rate) { + if (abs(req->rate - rate) < abs(req->rate - best_rate)) { best_parent = parent; best_prate = prate; best_rate = rate; diff --git a/drivers/clk/clk-ast2600.c b/drivers/clk/clk-ast2600.c index af957179b135e..48122f574cb65 100644 --- a/drivers/clk/clk-ast2600.c +++ b/drivers/clk/clk-ast2600.c @@ -48,6 +48,8 @@ static DEFINE_SPINLOCK(aspeed_g6_clk_lock); static struct clk_hw_onecell_data *aspeed_g6_clk_data; static void __iomem *scu_g6_base; +/* AST2600 revision: A0, A1, A2, etc */ +static u8 soc_rev; /* * Clocks marked with CLK_IS_CRITICAL: @@ -190,9 +192,8 @@ static struct clk_hw *ast2600_calc_pll(const char *name, u32 val) static struct clk_hw *ast2600_calc_apll(const char *name, u32 val) { unsigned int mult, div; - u32 chip_id = readl(scu_g6_base + ASPEED_G6_SILICON_REV); - if (((chip_id & CHIP_REVISION_ID) >> 16) >= 2) { + if (soc_rev >= 2) { if (val & BIT(24)) { /* Pass through mode */ mult = div = 1; @@ -664,7 +665,7 @@ static const u32 ast2600_a1_axi_ahb200_tbl[] = { static void __init aspeed_g6_cc(struct regmap *map) { struct clk_hw *hw; - u32 val, div, divbits, chip_id, axi_div, ahb_div; + u32 val, div, divbits, axi_div, ahb_div; clk_hw_register_fixed_rate(NULL, "clkin", NULL, 0, 25000000); @@ -695,8 +696,7 @@ static void __init aspeed_g6_cc(struct regmap *map) axi_div = 2; divbits = (val >> 11) & 0x3; - regmap_read(map, ASPEED_G6_SILICON_REV, &chip_id); - if (chip_id & BIT(16)) { + if (soc_rev >= 1) { if (!divbits) { ahb_div = ast2600_a1_axi_ahb200_tbl[(val >> 8) & 0x3]; if (val & BIT(16)) @@ -741,6 +741,8 @@ static void __init aspeed_g6_cc_init(struct device_node *np) if (!scu_g6_base) return; + soc_rev = (readl(scu_g6_base + ASPEED_G6_SILICON_REV) & CHIP_REVISION_ID) >> 16; + aspeed_g6_clk_data = kzalloc(struct_size(aspeed_g6_clk_data, hws, ASPEED_G6_NUM_CLKS), GFP_KERNEL); if (!aspeed_g6_clk_data) diff --git a/drivers/clk/clk-clps711x.c b/drivers/clk/clk-clps711x.c index a2c6486ef1708..f8417ee2961aa 100644 --- a/drivers/clk/clk-clps711x.c +++ b/drivers/clk/clk-clps711x.c @@ -28,11 +28,13 @@ static const struct clk_div_table spi_div_table[] = { { .val = 1, .div = 8, }, { .val = 2, .div = 2, }, { .val = 3, .div = 1, }, + { /* sentinel */ } }; static const struct clk_div_table timer_div_table[] = { { .val = 0, .div = 256, }, { .val = 1, .div = 1, }, + { /* sentinel */ } }; struct clps711x_clk { diff --git a/drivers/clk/clk-si5341.c b/drivers/clk/clk-si5341.c index 8f9f3d4a54fd2..07ef9995b3cb1 100644 --- a/drivers/clk/clk-si5341.c +++ b/drivers/clk/clk-si5341.c @@ -638,6 +638,15 @@ static unsigned long si5341_output_clk_recalc_rate(struct clk_hw *hw, u32 r_divider; u8 r[3]; + err = regmap_read(output->data->regmap, + SI5341_OUT_CONFIG(output), &val); + if (err < 0) + return err; + + /* If SI5341_OUT_CFG_RDIV_FORCE2 is set, r_divider is 2 */ + if (val & SI5341_OUT_CFG_RDIV_FORCE2) + return parent_rate / 2; + err = regmap_bulk_read(output->data->regmap, SI5341_OUT_R_REG(output), r, 3); if (err < 0) @@ -654,13 +663,6 @@ static unsigned long si5341_output_clk_recalc_rate(struct clk_hw *hw, r_divider += 1; r_divider <<= 1; - err = regmap_read(output->data->regmap, - SI5341_OUT_CONFIG(output), &val); - if (err < 0) - return err; - - if (val & SI5341_OUT_CFG_RDIV_FORCE2) - r_divider = 2; return parent_rate / r_divider; } @@ -1303,7 +1305,7 @@ static int si5341_probe(struct i2c_client *client, clk_prepare(data->clk[i].hw.clk); } - err = of_clk_add_hw_provider(client->dev.of_node, of_clk_si5341_get, + err = devm_of_clk_add_hw_provider(&client->dev, of_clk_si5341_get, data); if (err) { dev_err(&client->dev, "unable to add clk provider\n"); diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c index 5c75e3d906c20..682a18b392f08 100644 --- a/drivers/clk/clk-stm32f4.c +++ b/drivers/clk/clk-stm32f4.c @@ -129,7 +129,6 @@ static const struct stm32f4_gate_data stm32f429_gates[] __initconst = { { STM32F4_RCC_APB2ENR, 20, "spi5", "apb2_div" }, { STM32F4_RCC_APB2ENR, 21, "spi6", "apb2_div" }, { STM32F4_RCC_APB2ENR, 22, "sai1", "apb2_div" }, - { STM32F4_RCC_APB2ENR, 26, "ltdc", "apb2_div" }, }; static const struct stm32f4_gate_data stm32f469_gates[] __initconst = { @@ -211,7 +210,6 @@ static const struct stm32f4_gate_data stm32f469_gates[] __initconst = { { STM32F4_RCC_APB2ENR, 20, "spi5", "apb2_div" }, { STM32F4_RCC_APB2ENR, 21, "spi6", "apb2_div" }, { STM32F4_RCC_APB2ENR, 22, "sai1", "apb2_div" }, - { STM32F4_RCC_APB2ENR, 26, "ltdc", "apb2_div" }, }; static const struct stm32f4_gate_data stm32f746_gates[] __initconst = { @@ -286,7 +284,6 @@ static const struct stm32f4_gate_data stm32f746_gates[] __initconst = { { STM32F4_RCC_APB2ENR, 21, "spi6", "apb2_div" }, { STM32F4_RCC_APB2ENR, 22, "sai1", "apb2_div" }, { STM32F4_RCC_APB2ENR, 23, "sai2", "apb2_div" }, - { STM32F4_RCC_APB2ENR, 26, "ltdc", "apb2_div" }, }; static const struct stm32f4_gate_data stm32f769_gates[] __initconst = { @@ -364,7 +361,6 @@ static const struct stm32f4_gate_data stm32f769_gates[] __initconst = { { STM32F4_RCC_APB2ENR, 21, "spi6", "apb2_div" }, { STM32F4_RCC_APB2ENR, 22, "sai1", "apb2_div" }, { STM32F4_RCC_APB2ENR, 23, "sai2", "apb2_div" }, - { STM32F4_RCC_APB2ENR, 26, "ltdc", "apb2_div" }, { STM32F4_RCC_APB2ENR, 30, "mdio", "apb2_div" }, }; diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 6ff87cd867121..13332f89e034b 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -637,6 +637,24 @@ static void clk_core_get_boundaries(struct clk_core *core, *max_rate = min(*max_rate, clk_user->max_rate); } +static bool clk_core_check_boundaries(struct clk_core *core, + unsigned long min_rate, + unsigned long max_rate) +{ + struct clk *user; + + lockdep_assert_held(&prepare_lock); + + if (min_rate > core->max_rate || max_rate < core->min_rate) + return false; + + hlist_for_each_entry(user, &core->clks, clks_node) + if (min_rate > user->max_rate || max_rate < user->min_rate) + return false; + + return true; +} + void clk_hw_set_rate_range(struct clk_hw *hw, unsigned long min_rate, unsigned long max_rate) { @@ -2306,6 +2324,11 @@ int clk_set_rate_range(struct clk *clk, unsigned long min, unsigned long max) clk->min_rate = min; clk->max_rate = max; + if (!clk_core_check_boundaries(clk->core, min, max)) { + ret = -EINVAL; + goto out; + } + rate = clk_core_get_rate_nolock(clk->core); if (rate < min || rate > max) { /* @@ -2334,6 +2357,7 @@ int clk_set_rate_range(struct clk *clk, unsigned long min, unsigned long max) } } +out: if (clk->exclusive_count) clk_core_rate_protect(clk->core); @@ -3278,6 +3302,19 @@ static void clk_core_reparent_orphans_nolock(void) __clk_set_parent_after(orphan, parent, NULL); __clk_recalc_accuracies(orphan); __clk_recalc_rates(orphan, 0); + + /* + * __clk_init_parent() will set the initial req_rate to + * 0 if the clock doesn't have clk_ops::recalc_rate and + * is an orphan when it's registered. + * + * 'req_rate' is used by clk_set_rate_range() and + * clk_put() to trigger a clk_set_rate() call whenever + * the boundaries are modified. Let's make sure + * 'req_rate' is set to something non-zero so that + * clk_set_rate_range() doesn't drop the frequency. + */ + orphan->req_rate = orphan->rate; } } } @@ -3299,6 +3336,14 @@ static int __clk_core_init(struct clk_core *core) clk_prepare_lock(); + /* + * Set hw->core after grabbing the prepare_lock to synchronize with + * callers of clk_core_fill_parent_index() where we treat hw->core + * being NULL as the clk not being registered yet. This is crucial so + * that clks aren't parented until their parent is fully registered. + */ + core->hw->core = core; + ret = clk_pm_runtime_get(core); if (ret) goto unlock; @@ -3452,8 +3497,10 @@ static int __clk_core_init(struct clk_core *core) out: clk_pm_runtime_put(core); unlock: - if (ret) + if (ret) { hlist_del_init(&core->child_node); + core->hw->core = NULL; + } clk_prepare_unlock(); @@ -3699,7 +3746,6 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) core->num_parents = init->num_parents; core->min_rate = 0; core->max_rate = ULONG_MAX; - hw->core = core; ret = clk_core_populate_parent_map(core, init); if (ret) @@ -3717,7 +3763,7 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) goto fail_create_clk; } - clk_core_link_consumer(hw->core, hw->clk); + clk_core_link_consumer(core, hw->clk); ret = __clk_core_init(core); if (!ret) diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c index bc931988fe7b2..f3ac5a524f4ed 100644 --- a/drivers/clk/imx/clk-imx6ul.c +++ b/drivers/clk/imx/clk-imx6ul.c @@ -161,7 +161,6 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node) hws[IMX6UL_PLL5_BYPASS] = imx_clk_hw_mux_flags("pll5_bypass", base + 0xa0, 16, 1, pll5_bypass_sels, ARRAY_SIZE(pll5_bypass_sels), CLK_SET_RATE_PARENT); hws[IMX6UL_PLL6_BYPASS] = imx_clk_hw_mux_flags("pll6_bypass", base + 0xe0, 16, 1, pll6_bypass_sels, ARRAY_SIZE(pll6_bypass_sels), CLK_SET_RATE_PARENT); hws[IMX6UL_PLL7_BYPASS] = imx_clk_hw_mux_flags("pll7_bypass", base + 0x20, 16, 1, pll7_bypass_sels, ARRAY_SIZE(pll7_bypass_sels), CLK_SET_RATE_PARENT); - hws[IMX6UL_CLK_CSI_SEL] = imx_clk_hw_mux_flags("csi_sel", base + 0x3c, 9, 2, csi_sels, ARRAY_SIZE(csi_sels), CLK_SET_RATE_PARENT); /* Do not bypass PLLs initially */ clk_set_parent(hws[IMX6UL_PLL1_BYPASS]->clk, hws[IMX6UL_CLK_PLL1]->clk); @@ -270,6 +269,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node) hws[IMX6UL_CLK_ECSPI_SEL] = imx_clk_hw_mux("ecspi_sel", base + 0x38, 18, 1, ecspi_sels, ARRAY_SIZE(ecspi_sels)); hws[IMX6UL_CLK_LCDIF_PRE_SEL] = imx_clk_hw_mux_flags("lcdif_pre_sel", base + 0x38, 15, 3, lcdif_pre_sels, ARRAY_SIZE(lcdif_pre_sels), CLK_SET_RATE_PARENT); hws[IMX6UL_CLK_LCDIF_SEL] = imx_clk_hw_mux("lcdif_sel", base + 0x38, 9, 3, lcdif_sels, ARRAY_SIZE(lcdif_sels)); + hws[IMX6UL_CLK_CSI_SEL] = imx_clk_hw_mux("csi_sel", base + 0x3c, 9, 2, csi_sels, ARRAY_SIZE(csi_sels)); hws[IMX6UL_CLK_LDB_DI0_DIV_SEL] = imx_clk_hw_mux("ldb_di0", base + 0x20, 10, 1, ldb_di0_div_sels, ARRAY_SIZE(ldb_di0_div_sels)); hws[IMX6UL_CLK_LDB_DI1_DIV_SEL] = imx_clk_hw_mux("ldb_di1", base + 0x20, 11, 1, ldb_di1_div_sels, ARRAY_SIZE(ldb_di1_div_sels)); diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c index fbea774ef6877..58577548fe212 100644 --- a/drivers/clk/imx/clk-imx7d.c +++ b/drivers/clk/imx/clk-imx7d.c @@ -859,7 +859,6 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node) hws[IMX7D_WDOG4_ROOT_CLK] = imx_clk_hw_gate4("wdog4_root_clk", "wdog_post_div", base + 0x49f0, 0); hws[IMX7D_KPP_ROOT_CLK] = imx_clk_hw_gate4("kpp_root_clk", "ipg_root_clk", base + 0x4aa0, 0); hws[IMX7D_CSI_MCLK_ROOT_CLK] = imx_clk_hw_gate4("csi_mclk_root_clk", "csi_mclk_post_div", base + 0x4490, 0); - hws[IMX7D_AUDIO_MCLK_ROOT_CLK] = imx_clk_hw_gate4("audio_mclk_root_clk", "audio_mclk_post_div", base + 0x4790, 0); hws[IMX7D_WRCLK_ROOT_CLK] = imx_clk_hw_gate4("wrclk_root_clk", "wrclk_post_div", base + 0x47a0, 0); hws[IMX7D_USB_CTRL_CLK] = imx_clk_hw_gate4("usb_ctrl_clk", "ahb_root_clk", base + 0x4680, 0); hws[IMX7D_USB_PHY1_CLK] = imx_clk_hw_gate4("usb_phy1_clk", "pll_usb1_main_clk", base + 0x46a0, 0); diff --git a/drivers/clk/imx/clk-imx8mn.c b/drivers/clk/imx/clk-imx8mn.c index 58b5acee38306..882b42efd2582 100644 --- a/drivers/clk/imx/clk-imx8mn.c +++ b/drivers/clk/imx/clk-imx8mn.c @@ -358,9 +358,9 @@ static const char * const imx8mn_pdm_sels[] = {"osc_24m", "sys_pll2_100m", "audi static const char * const imx8mn_dram_core_sels[] = {"dram_pll_out", "dram_alt_root", }; -static const char * const imx8mn_clko1_sels[] = {"osc_24m", "sys_pll1_800m", "osc_27m", - "sys_pll1_200m", "audio_pll2_out", "vpu_pll", - "sys_pll1_80m", }; +static const char * const imx8mn_clko1_sels[] = {"osc_24m", "sys_pll1_800m", "dummy", + "sys_pll1_200m", "audio_pll2_out", "sys_pll2_500m", + "dummy", "sys_pll1_80m", }; static const char * const imx8mn_clko2_sels[] = {"osc_24m", "sys_pll2_200m", "sys_pll1_400m", "sys_pll2_166m", "sys_pll3_out", "audio_pll1_out", "video_pll1_out", "osc_32k", }; diff --git a/drivers/clk/ingenic/cgu.c b/drivers/clk/ingenic/cgu.c index 7490d4f4d9366..dff759c0f6193 100644 --- a/drivers/clk/ingenic/cgu.c +++ b/drivers/clk/ingenic/cgu.c @@ -426,15 +426,15 @@ ingenic_clk_calc_div(const struct ingenic_cgu_clk_info *clk_info, } /* Impose hardware constraints */ - div = min_t(unsigned, div, 1 << clk_info->div.bits); - div = max_t(unsigned, div, 1); + div = clamp_t(unsigned int, div, clk_info->div.div, + clk_info->div.div << clk_info->div.bits); /* * If the divider value itself must be divided before being written to * the divider register, we must ensure we don't have any bits set that * would be lost as a result of doing so. */ - div /= clk_info->div.div; + div = DIV_ROUND_UP(div, clk_info->div.div); div *= clk_info->div.div; return div; diff --git a/drivers/clk/ingenic/jz4725b-cgu.c b/drivers/clk/ingenic/jz4725b-cgu.c index a3b4635f62784..97afabb7fe8e5 100644 --- a/drivers/clk/ingenic/jz4725b-cgu.c +++ b/drivers/clk/ingenic/jz4725b-cgu.c @@ -135,11 +135,10 @@ static const struct ingenic_cgu_clk_info jz4725b_cgu_clocks[] = { }, [JZ4725B_CLK_I2S] = { - "i2s", CGU_CLK_MUX | CGU_CLK_DIV | CGU_CLK_GATE, + "i2s", CGU_CLK_MUX | CGU_CLK_DIV, .parents = { JZ4725B_CLK_EXT, JZ4725B_CLK_PLL_HALF, -1, -1 }, .mux = { CGU_REG_CPCCR, 31, 1 }, .div = { CGU_REG_I2SCDR, 0, 1, 9, -1, -1, -1 }, - .gate = { CGU_REG_CLKGR, 6 }, }, [JZ4725B_CLK_SPI] = { diff --git a/drivers/clk/loongson1/clk-loongson1c.c b/drivers/clk/loongson1/clk-loongson1c.c index 703f87622cf5f..1ebf740380efb 100644 --- a/drivers/clk/loongson1/clk-loongson1c.c +++ b/drivers/clk/loongson1/clk-loongson1c.c @@ -37,6 +37,7 @@ static const struct clk_div_table ahb_div_table[] = { [1] = { .val = 1, .div = 4 }, [2] = { .val = 2, .div = 3 }, [3] = { .val = 3, .div = 3 }, + [4] = { /* sentinel */ } }; void __init ls1x_clk_init(void) diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c index 1f9c056e684ce..e8e36ec70b27f 100644 --- a/drivers/clk/meson/gxbb.c +++ b/drivers/clk/meson/gxbb.c @@ -712,6 +712,35 @@ static struct clk_regmap gxbb_mpll_prediv = { }; static struct clk_regmap gxbb_mpll0_div = { + .data = &(struct meson_clk_mpll_data){ + .sdm = { + .reg_off = HHI_MPLL_CNTL7, + .shift = 0, + .width = 14, + }, + .sdm_en = { + .reg_off = HHI_MPLL_CNTL, + .shift = 25, + .width = 1, + }, + .n2 = { + .reg_off = HHI_MPLL_CNTL7, + .shift = 16, + .width = 9, + }, + .lock = &meson_clk_lock, + }, + .hw.init = &(struct clk_init_data){ + .name = "mpll0_div", + .ops = &meson_clk_mpll_ops, + .parent_hws = (const struct clk_hw *[]) { + &gxbb_mpll_prediv.hw + }, + .num_parents = 1, + }, +}; + +static struct clk_regmap gxl_mpll0_div = { .data = &(struct meson_clk_mpll_data){ .sdm = { .reg_off = HHI_MPLL_CNTL7, @@ -748,7 +777,16 @@ static struct clk_regmap gxbb_mpll0 = { .hw.init = &(struct clk_init_data){ .name = "mpll0", .ops = &clk_regmap_gate_ops, - .parent_hws = (const struct clk_hw *[]) { &gxbb_mpll0_div.hw }, + .parent_data = &(const struct clk_parent_data) { + /* + * Note: + * GXL and GXBB have different SDM_EN registers. We + * fallback to the global naming string mechanism so + * mpll0_div picks up the appropriate one. + */ + .name = "mpll0_div", + .index = -1, + }, .num_parents = 1, .flags = CLK_SET_RATE_PARENT, }, @@ -3036,7 +3074,7 @@ static struct clk_hw_onecell_data gxl_hw_onecell_data = { [CLKID_VAPB_1] = &gxbb_vapb_1.hw, [CLKID_VAPB_SEL] = &gxbb_vapb_sel.hw, [CLKID_VAPB] = &gxbb_vapb.hw, - [CLKID_MPLL0_DIV] = &gxbb_mpll0_div.hw, + [CLKID_MPLL0_DIV] = &gxl_mpll0_div.hw, [CLKID_MPLL1_DIV] = &gxbb_mpll1_div.hw, [CLKID_MPLL2_DIV] = &gxbb_mpll2_div.hw, [CLKID_MPLL_PREDIV] = &gxbb_mpll_prediv.hw, @@ -3430,7 +3468,7 @@ static struct clk_regmap *const gxl_clk_regmaps[] = { &gxbb_mpll0, &gxbb_mpll1, &gxbb_mpll2, - &gxbb_mpll0_div, + &gxl_mpll0_div, &gxbb_mpll1_div, &gxbb_mpll2_div, &gxbb_cts_amclk_div, diff --git a/drivers/clk/mvebu/ap-cpu-clk.c b/drivers/clk/mvebu/ap-cpu-clk.c index af5e5acad3706..bde4a7d6a1d33 100644 --- a/drivers/clk/mvebu/ap-cpu-clk.c +++ b/drivers/clk/mvebu/ap-cpu-clk.c @@ -256,12 +256,15 @@ static int ap_cpu_clock_probe(struct platform_device *pdev) int cpu, err; err = of_property_read_u32(dn, "reg", &cpu); - if (WARN_ON(err)) + if (WARN_ON(err)) { + of_node_put(dn); return err; + } /* If cpu2 or cpu3 is enabled */ if (cpu & APN806_CLUSTER_NUM_MASK) { nclusters = 2; + of_node_put(dn); break; } } @@ -288,8 +291,10 @@ static int ap_cpu_clock_probe(struct platform_device *pdev) int cpu, err; err = of_property_read_u32(dn, "reg", &cpu); - if (WARN_ON(err)) + if (WARN_ON(err)) { + of_node_put(dn); return err; + } cluster_index = cpu & APN806_CLUSTER_NUM_MASK; cluster_index >>= APN806_CLUSTER_NUM_OFFSET; @@ -301,6 +306,7 @@ static int ap_cpu_clock_probe(struct platform_device *pdev) parent = of_clk_get(np, cluster_index); if (IS_ERR(parent)) { dev_err(dev, "Could not get the clock parent\n"); + of_node_put(dn); return -EINVAL; } parent_name = __clk_get_name(parent); @@ -319,8 +325,10 @@ static int ap_cpu_clock_probe(struct platform_device *pdev) init.parent_names = &parent_name; ret = devm_clk_hw_register(dev, &ap_cpu_clk[cluster_index].hw); - if (ret) + if (ret) { + of_node_put(dn); return ret; + } ap_cpu_data->hws[cluster_index] = &ap_cpu_clk[cluster_index].hw; } diff --git a/drivers/clk/mvebu/kirkwood.c b/drivers/clk/mvebu/kirkwood.c index 47680237d0beb..8bc893df47364 100644 --- a/drivers/clk/mvebu/kirkwood.c +++ b/drivers/clk/mvebu/kirkwood.c @@ -265,6 +265,7 @@ static const char *powersave_parents[] = { static const struct clk_muxing_soc_desc kirkwood_mux_desc[] __initconst = { { "powersave", powersave_parents, ARRAY_SIZE(powersave_parents), 11, 1, 0 }, + { } }; static struct clk *clk_muxing_get_src( diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index a88101480e337..89c1adeb84d44 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -263,7 +263,7 @@ static int clk_rcg2_determine_floor_rate(struct clk_hw *hw, static int __clk_rcg2_configure(struct clk_rcg2 *rcg, const struct freq_tbl *f) { - u32 cfg, mask; + u32 cfg, mask, d_val, not2d_val, n_minus_m; struct clk_hw *hw = &rcg->clkr.hw; int ret, index = qcom_find_src_index(hw, rcg->parent_map, f->src); @@ -282,8 +282,17 @@ static int __clk_rcg2_configure(struct clk_rcg2 *rcg, const struct freq_tbl *f) if (ret) return ret; + /* Calculate 2d value */ + d_val = f->n; + + n_minus_m = f->n - f->m; + n_minus_m *= 2; + + d_val = clamp_t(u32, d_val, f->m, n_minus_m); + not2d_val = ~d_val & mask; + ret = regmap_update_bits(rcg->clkr.regmap, - RCG_D_OFFSET(rcg), mask, ~f->n); + RCG_D_OFFSET(rcg), mask, not2d_val); if (ret) return ret; } @@ -638,6 +647,7 @@ static const struct frac_entry frac_table_pixel[] = { { 2, 9 }, { 4, 9 }, { 1, 1 }, + { 2, 3 }, { } }; diff --git a/drivers/clk/qcom/clk-regmap-mux.c b/drivers/clk/qcom/clk-regmap-mux.c index b2d00b4519634..45d9cca28064f 100644 --- a/drivers/clk/qcom/clk-regmap-mux.c +++ b/drivers/clk/qcom/clk-regmap-mux.c @@ -28,7 +28,7 @@ static u8 mux_get_parent(struct clk_hw *hw) val &= mask; if (mux->parent_map) - return qcom_find_src_index(hw, mux->parent_map, val); + return qcom_find_cfg_index(hw, mux->parent_map, val); return val; } diff --git a/drivers/clk/qcom/common.c b/drivers/clk/qcom/common.c index bdeacebbf0e47..f52ccb62c00b7 100644 --- a/drivers/clk/qcom/common.c +++ b/drivers/clk/qcom/common.c @@ -69,6 +69,18 @@ int qcom_find_src_index(struct clk_hw *hw, const struct parent_map *map, u8 src) } EXPORT_SYMBOL_GPL(qcom_find_src_index); +int qcom_find_cfg_index(struct clk_hw *hw, const struct parent_map *map, u8 cfg) +{ + int i, num_parents = clk_hw_get_num_parents(hw); + + for (i = 0; i < num_parents; i++) + if (cfg == map[i].cfg) + return i; + + return -ENOENT; +} +EXPORT_SYMBOL_GPL(qcom_find_cfg_index); + struct regmap * qcom_cc_map(struct platform_device *pdev, const struct qcom_cc_desc *desc) { diff --git a/drivers/clk/qcom/common.h b/drivers/clk/qcom/common.h index bb39a7e106d8a..9c8f7b798d9fc 100644 --- a/drivers/clk/qcom/common.h +++ b/drivers/clk/qcom/common.h @@ -49,6 +49,8 @@ extern void qcom_pll_set_fsm_mode(struct regmap *m, u32 reg, u8 bias_count, u8 lock_count); extern int qcom_find_src_index(struct clk_hw *hw, const struct parent_map *map, u8 src); +extern int qcom_find_cfg_index(struct clk_hw *hw, const struct parent_map *map, + u8 cfg); extern int qcom_cc_register_board_clk(struct device *dev, const char *path, const char *name, unsigned long rate); diff --git a/drivers/clk/qcom/gcc-ipq8074.c b/drivers/clk/qcom/gcc-ipq8074.c index e01f5f591d1e2..de48ba7eba3a1 100644 --- a/drivers/clk/qcom/gcc-ipq8074.c +++ b/drivers/clk/qcom/gcc-ipq8074.c @@ -1074,7 +1074,7 @@ static struct clk_rcg2 sdcc1_apps_clk_src = { .name = "sdcc1_apps_clk_src", .parent_names = gcc_xo_gpll0_gpll2_gpll0_out_main_div2, .num_parents = 4, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_floor_ops, }, }; diff --git a/drivers/clk/qcom/gcc-msm8994.c b/drivers/clk/qcom/gcc-msm8994.c index b7fc8c7ba1957..14f0d5d840802 100644 --- a/drivers/clk/qcom/gcc-msm8994.c +++ b/drivers/clk/qcom/gcc-msm8994.c @@ -107,6 +107,7 @@ static struct clk_alpha_pll gpll4_early = { static struct clk_alpha_pll_postdiv gpll4 = { .offset = 0x1dc0, + .width = 4, .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], .clkr.hw.init = &(struct clk_init_data) { diff --git a/drivers/clk/qcom/gcc-msm8996.c b/drivers/clk/qcom/gcc-msm8996.c index d004cdaa0e39a..c1e1148f0261d 100644 --- a/drivers/clk/qcom/gcc-msm8996.c +++ b/drivers/clk/qcom/gcc-msm8996.c @@ -2937,20 +2937,6 @@ static struct clk_branch gcc_smmu_aggre0_ahb_clk = { }, }; -static struct clk_branch gcc_aggre1_pnoc_ahb_clk = { - .halt_reg = 0x82014, - .clkr = { - .enable_reg = 0x82014, - .enable_mask = BIT(0), - .hw.init = &(struct clk_init_data){ - .name = "gcc_aggre1_pnoc_ahb_clk", - .parent_names = (const char *[]){ "periph_noc_clk_src" }, - .num_parents = 1, - .ops = &clk_branch2_ops, - }, - }, -}; - static struct clk_branch gcc_aggre2_ufs_axi_clk = { .halt_reg = 0x83014, .clkr = { @@ -3453,7 +3439,6 @@ static struct clk_regmap *gcc_msm8996_clocks[] = { [GCC_AGGRE0_CNOC_AHB_CLK] = &gcc_aggre0_cnoc_ahb_clk.clkr, [GCC_SMMU_AGGRE0_AXI_CLK] = &gcc_smmu_aggre0_axi_clk.clkr, [GCC_SMMU_AGGRE0_AHB_CLK] = &gcc_smmu_aggre0_ahb_clk.clkr, - [GCC_AGGRE1_PNOC_AHB_CLK] = &gcc_aggre1_pnoc_ahb_clk.clkr, [GCC_AGGRE2_UFS_AXI_CLK] = &gcc_aggre2_ufs_axi_clk.clkr, [GCC_AGGRE2_USB3_AXI_CLK] = &gcc_aggre2_usb3_axi_clk.clkr, [GCC_QSPI_AHB_CLK] = &gcc_qspi_ahb_clk.clkr, diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index a250f59708d85..888965bb93edf 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2015, 2017-2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2015, 2017-2018, 2022, The Linux Foundation. All rights reserved. */ #include @@ -31,9 +31,14 @@ #define CFG_GDSCR_OFFSET 0x4 /* Wait 2^n CXO cycles between all states. Here, n=2 (4 cycles). */ -#define EN_REST_WAIT_VAL (0x2 << 20) -#define EN_FEW_WAIT_VAL (0x8 << 16) -#define CLK_DIS_WAIT_VAL (0x2 << 12) +#define EN_REST_WAIT_VAL 0x2 +#define EN_FEW_WAIT_VAL 0x8 +#define CLK_DIS_WAIT_VAL 0x2 + +/* Transition delay shifts */ +#define EN_REST_WAIT_SHIFT 20 +#define EN_FEW_WAIT_SHIFT 16 +#define CLK_DIS_WAIT_SHIFT 12 #define RETAIN_MEM BIT(14) #define RETAIN_PERIPH BIT(13) @@ -308,7 +313,18 @@ static int gdsc_init(struct gdsc *sc) */ mask = HW_CONTROL_MASK | SW_OVERRIDE_MASK | EN_REST_WAIT_MASK | EN_FEW_WAIT_MASK | CLK_DIS_WAIT_MASK; - val = EN_REST_WAIT_VAL | EN_FEW_WAIT_VAL | CLK_DIS_WAIT_VAL; + + if (!sc->en_rest_wait_val) + sc->en_rest_wait_val = EN_REST_WAIT_VAL; + if (!sc->en_few_wait_val) + sc->en_few_wait_val = EN_FEW_WAIT_VAL; + if (!sc->clk_dis_wait_val) + sc->clk_dis_wait_val = CLK_DIS_WAIT_VAL; + + val = sc->en_rest_wait_val << EN_REST_WAIT_SHIFT | + sc->en_few_wait_val << EN_FEW_WAIT_SHIFT | + sc->clk_dis_wait_val << CLK_DIS_WAIT_SHIFT; + ret = regmap_update_bits(sc->regmap, sc->gdscr, mask, val); if (ret) return ret; diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h index 64cdc8cf0d4d2..907396ccb83fb 100644 --- a/drivers/clk/qcom/gdsc.h +++ b/drivers/clk/qcom/gdsc.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2015, 2017-2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2015, 2017-2018, 2022, The Linux Foundation. All rights reserved. */ #ifndef __QCOM_GDSC_H__ @@ -21,6 +21,9 @@ struct reset_controller_dev; * @cxcs: offsets of branch registers to toggle mem/periph bits in * @cxc_count: number of @cxcs * @pwrsts: Possible powerdomain power states + * @en_rest_wait_val: transition delay value for receiving enr ack signal + * @en_few_wait_val: transition delay value for receiving enf ack signal + * @clk_dis_wait_val: transition delay value for halting clock * @resets: ids of resets associated with this gdsc * @reset_count: number of @resets * @rcdev: reset controller @@ -34,6 +37,9 @@ struct gdsc { unsigned int clamp_io_ctrl; unsigned int *cxcs; unsigned int cxc_count; + unsigned int en_rest_wait_val; + unsigned int en_few_wait_val; + unsigned int clk_dis_wait_val; const u8 pwrsts; /* Powerdomain allowable state bitfields */ #define PWRSTS_OFF BIT(0) diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c index 542b31d6e96dd..636bcf2439ef2 100644 --- a/drivers/clk/sunxi/clk-sun9i-mmc.c +++ b/drivers/clk/sunxi/clk-sun9i-mmc.c @@ -109,6 +109,8 @@ static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev) spin_lock_init(&data->lock); r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) + return -EINVAL; /* one clock/reset pair per word */ count = DIV_ROUND_UP((resource_size(r)), SUN9I_MMC_WIDTH); data->membase = devm_ioremap_resource(&pdev->dev, r); diff --git a/drivers/clk/tegra/clk-emc.c b/drivers/clk/tegra/clk-emc.c index ea39caf3d7622..0c1b83bedb73d 100644 --- a/drivers/clk/tegra/clk-emc.c +++ b/drivers/clk/tegra/clk-emc.c @@ -191,6 +191,7 @@ static struct tegra_emc *emc_ensure_emc_driver(struct tegra_clk_emc *tegra) tegra->emc = platform_get_drvdata(pdev); if (!tegra->emc) { + put_device(&pdev->dev); pr_err("%s: cannot find EMC driver\n", __func__); return NULL; } diff --git a/drivers/clk/uniphier/clk-uniphier-fixed-rate.c b/drivers/clk/uniphier/clk-uniphier-fixed-rate.c index 5319cd3804801..3bc55ab75314b 100644 --- a/drivers/clk/uniphier/clk-uniphier-fixed-rate.c +++ b/drivers/clk/uniphier/clk-uniphier-fixed-rate.c @@ -24,6 +24,7 @@ struct clk_hw *uniphier_clk_register_fixed_rate(struct device *dev, init.name = name; init.ops = &clk_fixed_rate_ops; + init.flags = 0; init.parent_names = NULL; init.num_parents = 0; diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 3bb5625504e2f..9bfe4c5af87e3 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -24,6 +24,7 @@ config I8253_LOCK config OMAP_DM_TIMER bool + select TIMER_OF config CLKBLD_I8253 def_bool y if CLKSRC_I8253 || CLKEVT_I8253 || I8253_LOCK diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c index eb596ff9e7bb3..279ddff81ab49 100644 --- a/drivers/clocksource/acpi_pm.c +++ b/drivers/clocksource/acpi_pm.c @@ -229,8 +229,10 @@ static int __init parse_pmtmr(char *arg) int ret; ret = kstrtouint(arg, 16, &base); - if (ret) - return ret; + if (ret) { + pr_warn("PMTMR: invalid 'pmtmr=' value: '%s'\n", arg); + return 1; + } pr_info("PMTMR IOPort override: 0x%04x -> 0x%04x\n", pmtmr_ioport, base); diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 36933e2b3b0df..80b4b8fee54d5 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -325,4 +325,3 @@ void __init hv_init_clocksource(void) hv_sched_clock_offset = hyperv_cs->read(hyperv_cs); hv_setup_sched_clock(read_hv_sched_clock_msr); } -EXPORT_SYMBOL_GPL(hv_init_clocksource); diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index ef773db080e90..a0570213170d8 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -568,7 +568,8 @@ static int sh_cmt_start(struct sh_cmt_channel *ch, unsigned long flag) ch->flags |= flag; /* setup timeout if no clockevent */ - if ((flag == FLAG_CLOCKSOURCE) && (!(ch->flags & FLAG_CLOCKEVENT))) + if (ch->cmt->num_channels == 1 && + flag == FLAG_CLOCKSOURCE && (!(ch->flags & FLAG_CLOCKEVENT))) __sh_cmt_set_next(ch, ch->max_match_value); out: raw_spin_unlock_irqrestore(&ch->lock, flags); @@ -604,20 +605,25 @@ static struct sh_cmt_channel *cs_to_sh_cmt(struct clocksource *cs) static u64 sh_cmt_clocksource_read(struct clocksource *cs) { struct sh_cmt_channel *ch = cs_to_sh_cmt(cs); - unsigned long flags; u32 has_wrapped; - u64 value; - u32 raw; - raw_spin_lock_irqsave(&ch->lock, flags); - value = ch->total_cycles; - raw = sh_cmt_get_counter(ch, &has_wrapped); + if (ch->cmt->num_channels == 1) { + unsigned long flags; + u64 value; + u32 raw; - if (unlikely(has_wrapped)) - raw += ch->match_value + 1; - raw_spin_unlock_irqrestore(&ch->lock, flags); + raw_spin_lock_irqsave(&ch->lock, flags); + value = ch->total_cycles; + raw = sh_cmt_get_counter(ch, &has_wrapped); + + if (unlikely(has_wrapped)) + raw += ch->match_value + 1; + raw_spin_unlock_irqrestore(&ch->lock, flags); + + return value + raw; + } - return value + raw; + return sh_cmt_get_counter(ch, &has_wrapped); } static int sh_cmt_clocksource_enable(struct clocksource *cs) @@ -680,7 +686,7 @@ static int sh_cmt_register_clocksource(struct sh_cmt_channel *ch, cs->disable = sh_cmt_clocksource_disable; cs->suspend = sh_cmt_clocksource_suspend; cs->resume = sh_cmt_clocksource_resume; - cs->mask = CLOCKSOURCE_MASK(sizeof(u64) * 8); + cs->mask = CLOCKSOURCE_MASK(ch->cmt->info->width); cs->flags = CLOCK_SOURCE_IS_CONTINUOUS; dev_info(&ch->cmt->pdev->dev, "ch%u: used as clock source\n", diff --git a/drivers/clocksource/timer-ixp4xx.c b/drivers/clocksource/timer-ixp4xx.c index 9396745e1c179..ad904bbbac6fd 100644 --- a/drivers/clocksource/timer-ixp4xx.c +++ b/drivers/clocksource/timer-ixp4xx.c @@ -258,7 +258,6 @@ void __init ixp4xx_timer_setup(resource_size_t timerbase, } ixp4xx_timer_register(base, timer_irq, timer_freq); } -EXPORT_SYMBOL_GPL(ixp4xx_timer_setup); #ifdef CONFIG_OF static __init int ixp4xx_of_timer_init(struct device_node *np) diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index a3c73e972fce1..bf2a6f64ba0c5 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -157,9 +157,9 @@ static __init int timer_of_base_init(struct device_node *np, of_base->base = of_base->name ? of_io_request_and_map(np, of_base->index, of_base->name) : of_iomap(np, of_base->index); - if (IS_ERR(of_base->base)) { - pr_err("Failed to iomap (%s)\n", of_base->name); - return PTR_ERR(of_base->base); + if (IS_ERR_OR_NULL(of_base->base)) { + pr_err("Failed to iomap (%s:%s)\n", np->name, of_base->name); + return of_base->base ? PTR_ERR(of_base->base) : -ENOMEM; } return 0; diff --git a/drivers/clocksource/timer-oxnas-rps.c b/drivers/clocksource/timer-oxnas-rps.c index 56c0cc32d0ac6..d514b44e67dd1 100644 --- a/drivers/clocksource/timer-oxnas-rps.c +++ b/drivers/clocksource/timer-oxnas-rps.c @@ -236,7 +236,7 @@ static int __init oxnas_rps_timer_init(struct device_node *np) } rps->irq = irq_of_parse_and_map(np, 0); - if (rps->irq < 0) { + if (!rps->irq) { ret = -EINVAL; goto err_iomap; } diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 4b04ffbe5e7e9..e3be5c2f57b8e 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -26,7 +26,7 @@ static int riscv_clock_next_event(unsigned long delta, static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = { .name = "riscv_timer_clockevent", - .features = CLOCK_EVT_FEAT_ONESHOT, + .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP, .rating = 100, .set_next_event = riscv_clock_next_event, }; diff --git a/drivers/clocksource/timer-sp804.c b/drivers/clocksource/timer-sp804.c index 9c841980eed13..c9aa0498fb840 100644 --- a/drivers/clocksource/timer-sp804.c +++ b/drivers/clocksource/timer-sp804.c @@ -215,6 +215,11 @@ static int __init sp804_of_init(struct device_node *np) struct clk *clk1, *clk2; const char *name = of_get_property(np, "compatible", NULL); + if (initialized) { + pr_debug("%pOF: skipping further SP804 timer device\n", np); + return 0; + } + base = of_iomap(np, 0); if (!base) return -ENXIO; @@ -223,11 +228,6 @@ static int __init sp804_of_init(struct device_node *np) writel(0, base + TIMER_CTRL); writel(0, base + TIMER_2_BASE + TIMER_CTRL); - if (initialized || !of_device_is_available(np)) { - ret = -EINVAL; - goto err; - } - clk1 = of_clk_get(np, 0); if (IS_ERR(clk1)) clk1 = NULL; diff --git a/drivers/counter/104-quad-8.c b/drivers/counter/104-quad-8.c index 5c23a9a56921b..f261a57af1c01 100644 --- a/drivers/counter/104-quad-8.c +++ b/drivers/counter/104-quad-8.c @@ -1230,12 +1230,13 @@ static ssize_t quad8_count_ceiling_write(struct counter_device *counter, case 1: case 3: quad8_preset_register_set(priv, count->id, ceiling); - break; + mutex_unlock(&priv->lock); + return len; } mutex_unlock(&priv->lock); - return len; + return -EINVAL; } static ssize_t quad8_count_preset_enable_read(struct counter_device *counter, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c4e928375c40d..af9f348048629 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -995,10 +995,9 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; -static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu) +static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu, + struct device *dev) { - struct device *dev = get_cpu_device(cpu); - if (unlikely(!dev)) return; @@ -1384,7 +1383,7 @@ static int cpufreq_online(unsigned int cpu) if (new_policy) { for_each_cpu(j, policy->related_cpus) { per_cpu(cpufreq_cpu_data, j) = policy; - add_cpu_dev_symlink(policy, j); + add_cpu_dev_symlink(policy, j, get_cpu_device(j)); } policy->min_freq_req = kzalloc(2 * sizeof(*policy->min_freq_req), @@ -1394,7 +1393,7 @@ static int cpufreq_online(unsigned int cpu) ret = freq_qos_add_request(&policy->constraints, policy->min_freq_req, FREQ_QOS_MIN, - policy->min); + FREQ_QOS_MIN_DEFAULT_VALUE); if (ret < 0) { /* * So we don't call freq_qos_remove_request() for an @@ -1414,7 +1413,7 @@ static int cpufreq_online(unsigned int cpu) ret = freq_qos_add_request(&policy->constraints, policy->max_freq_req, FREQ_QOS_MAX, - policy->max); + FREQ_QOS_MAX_DEFAULT_VALUE); if (ret < 0) { policy->max_freq_req = NULL; goto out_destroy_policy; @@ -1547,7 +1546,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) /* Create sysfs link on CPU registration */ policy = per_cpu(cpufreq_cpu_data, cpu); if (policy) - add_cpu_dev_symlink(policy, cpu); + add_cpu_dev_symlink(policy, cpu, dev); return 0; } diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 73621bc119768..3704476bb83a0 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -471,6 +471,10 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) if (slew_done_gpio_np) slew_done_gpio = read_gpio(slew_done_gpio_np); + of_node_put(volt_gpio_np); + of_node_put(freq_gpio_np); + of_node_put(slew_done_gpio_np); + /* If we use the frequency GPIOs, calculate the min/max speeds based * on the bus frequencies */ diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index bc6ccf2c7aae0..c636c9ba01008 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -36,6 +36,7 @@ #define MAX_PSTATE_SHIFT 32 #define LPSTATE_SHIFT 48 #define GPSTATE_SHIFT 56 +#define MAX_NR_CHIPS 32 #define MAX_RAMP_DOWN_TIME 5120 /* @@ -1050,12 +1051,20 @@ static int init_chip_info(void) unsigned int *chip; unsigned int cpu, i; unsigned int prev_chip_id = UINT_MAX; + cpumask_t *chip_cpu_mask; int ret = 0; chip = kcalloc(num_possible_cpus(), sizeof(*chip), GFP_KERNEL); if (!chip) return -ENOMEM; + /* Allocate a chip cpu mask large enough to fit mask for all chips */ + chip_cpu_mask = kcalloc(MAX_NR_CHIPS, sizeof(cpumask_t), GFP_KERNEL); + if (!chip_cpu_mask) { + ret = -ENOMEM; + goto free_and_return; + } + for_each_possible_cpu(cpu) { unsigned int id = cpu_to_chip_id(cpu); @@ -1063,22 +1072,25 @@ static int init_chip_info(void) prev_chip_id = id; chip[nr_chips++] = id; } + cpumask_set_cpu(cpu, &chip_cpu_mask[nr_chips-1]); } chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL); if (!chips) { ret = -ENOMEM; - goto free_and_return; + goto out_free_chip_cpu_mask; } for (i = 0; i < nr_chips; i++) { chips[i].id = chip[i]; - cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i])); + cpumask_copy(&chips[i].mask, &chip_cpu_mask[i]); INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn); for_each_cpu(cpu, &chips[i].mask) per_cpu(chip_info, cpu) = &chips[i]; } +out_free_chip_cpu_mask: + kfree(chip_cpu_mask); free_and_return: kfree(chip); return ret; diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c index 2deed8d8773fa..75e1bf3a08f7c 100644 --- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c +++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c @@ -98,8 +98,10 @@ static int sun50i_cpufreq_nvmem_probe(struct platform_device *pdev) return -ENOMEM; ret = sun50i_cpufreq_get_efuse(&speed); - if (ret) + if (ret) { + kfree(opp_tables); return ret; + } snprintf(name, MAX_NAME_LEN, "speed%d", speed); diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 306a6213df1c5..86e89eba8b449 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -491,6 +491,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device) &kdev->kobj, "state%d", i); if (ret) { kobject_put(&kobj->kobj); + kfree(kobj); goto error_state; } cpuidle_add_s2idle_attr_group(kobj); @@ -622,6 +623,7 @@ static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev) &kdev->kobj, "driver"); if (ret) { kobject_put(&kdrv->kobj); + kfree(kdrv); return ret; } @@ -708,7 +710,6 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev) if (!kdev) return -ENOMEM; kdev->dev = dev; - dev->kobj_dev = kdev; init_completion(&kdev->kobj_unregister); @@ -716,9 +717,11 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev) "cpuidle"); if (error) { kobject_put(&kdev->kobj); + kfree(kdev); return error; } + dev->kobj_dev = kdev; kobject_uevent(&kdev->kobj, KOBJ_ADD); return 0; diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index 6863d70976746..28692d068176f 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -5421,7 +5421,7 @@ int dpaa2_caam_enqueue(struct device *dev, struct caam_request *req) dpaa2_fd_set_len(&fd, dpaa2_fl_get_len(&req->fd_flt[1])); dpaa2_fd_set_flc(&fd, req->flc_dma); - ppriv = this_cpu_ptr(priv->ppriv); + ppriv = raw_cpu_ptr(priv->ppriv); for (i = 0; i < (priv->dpseci_attr.num_tx_queues << 1); i++) { err = dpaa2_io_service_enqueue_fq(ppriv->dpio, ppriv->req_fqid, &fd); diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 83f96d4f86e03..30e3f41ed8721 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -1087,16 +1087,27 @@ static struct caam_akcipher_alg caam_rsa = { int caam_pkc_init(struct device *ctrldev) { struct caam_drv_private *priv = dev_get_drvdata(ctrldev); - u32 pk_inst; + u32 pk_inst, pkha; int err; init_done = false; /* Determine public key hardware accelerator presence. */ - if (priv->era < 10) + if (priv->era < 10) { pk_inst = (rd_reg32(&priv->ctrl->perfmon.cha_num_ls) & CHA_ID_LS_PK_MASK) >> CHA_ID_LS_PK_SHIFT; - else - pk_inst = rd_reg32(&priv->ctrl->vreg.pkha) & CHA_VER_NUM_MASK; + } else { + pkha = rd_reg32(&priv->ctrl->vreg.pkha); + pk_inst = pkha & CHA_VER_NUM_MASK; + + /* + * Newer CAAMs support partially disabled functionality. If this is the + * case, the number is non-zero, but this bit is set to indicate that + * no encryption or decryption is supported. Only signing and verifying + * is supported. + */ + if (pkha & CHA_VER_MISC_PKHA_NO_CRYPT) + pk_inst = 0; + } /* Do not register algorithms if PKHA is not present. */ if (!pk_inst) diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 05127b70527d7..43975f01465d2 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -317,6 +317,9 @@ struct version_regs { /* CHA Miscellaneous Information - AESA_MISC specific */ #define CHA_VER_MISC_AES_GCM BIT(1 + CHA_VER_MISC_SHIFT) +/* CHA Miscellaneous Information - PKHA_MISC specific */ +#define CHA_VER_MISC_PKHA_NO_CRYPT BIT(7 + CHA_VER_MISC_SHIFT) + /* * caam_perfmon - Performance Monitor/Secure Memory Status/ * CAAM Global Status/Component Version IDs diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c index 0770a83bf1a57..b3eea329f840f 100644 --- a/drivers/crypto/ccp/ccp-dmaengine.c +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -633,6 +633,20 @@ static int ccp_terminate_all(struct dma_chan *dma_chan) return 0; } +static void ccp_dma_release(struct ccp_device *ccp) +{ + struct ccp_dma_chan *chan; + struct dma_chan *dma_chan; + unsigned int i; + + for (i = 0; i < ccp->cmd_q_count; i++) { + chan = ccp->ccp_dma_chan + i; + dma_chan = &chan->dma_chan; + tasklet_kill(&chan->cleanup_tasklet); + list_del_rcu(&dma_chan->device_node); + } +} + int ccp_dmaengine_register(struct ccp_device *ccp) { struct ccp_dma_chan *chan; @@ -737,6 +751,7 @@ int ccp_dmaengine_register(struct ccp_device *ccp) return 0; err_reg: + ccp_dma_release(ccp); kmem_cache_destroy(ccp->dma_desc_cache); err_cache: @@ -753,6 +768,7 @@ void ccp_dmaengine_unregister(struct ccp_device *ccp) return; dma_async_device_unregister(dma_dev); + ccp_dma_release(ccp); kmem_cache_destroy(ccp->dma_desc_cache); kmem_cache_destroy(ccp->dma_cmd_cache); diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index 7234b95241e91..e826c4b6b3afd 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -778,7 +778,7 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); if (ret) - goto e_ctx; + goto e_aad; if (in_place) { dst = src; @@ -863,7 +863,7 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) op.u.aes.size = 0; ret = cmd_q->ccp->vdata->perform->aes(&op); if (ret) - goto e_dst; + goto e_final_wa; if (aes->action == CCP_AES_ACTION_ENCRYPT) { /* Put the ciphered tag after the ciphertext. */ @@ -873,17 +873,19 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) ret = ccp_init_dm_workarea(&tag, cmd_q, authsize, DMA_BIDIRECTIONAL); if (ret) - goto e_tag; + goto e_final_wa; ret = ccp_set_dm_area(&tag, 0, p_tag, 0, authsize); - if (ret) - goto e_tag; + if (ret) { + ccp_dm_free(&tag); + goto e_final_wa; + } ret = crypto_memneq(tag.address, final_wa.address, authsize) ? -EBADMSG : 0; ccp_dm_free(&tag); } -e_tag: +e_final_wa: ccp_dm_free(&final_wa); e_dst: diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index 954f14bddf1d7..dce30ae2b7040 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -295,6 +295,13 @@ static int cc_map_sg(struct device *dev, struct scatterlist *sg, { int ret = 0; + if (!nbytes) { + *mapped_nents = 0; + *lbytes = 0; + *nents = 0; + return 0; + } + *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes); if (*nents > max_sg_nents) { *nents = 0; diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index 82b76df43ae57..3b79bcd03e7bc 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -1103,8 +1103,8 @@ static struct sock *chtls_recv_sock(struct sock *lsk, csk->sndbuf = newsk->sk_sndbuf; csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx; RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk), - sock_net(newsk)-> - ipv4.sysctl_tcp_window_scaling, + READ_ONCE(sock_net(newsk)-> + ipv4.sysctl_tcp_window_scaling), tp->window_clamp); neigh_release(n); inet_inherit_port(&tcp_hashinfo, lsk, newsk); @@ -1235,7 +1235,7 @@ static void chtls_pass_accept_request(struct sock *sk, chtls_set_req_addr(oreq, iph->daddr, iph->saddr); ip_dsfield = ipv4_get_dsfield(iph); if (req->tcpopt.wsf <= 14 && - sock_net(sk)->ipv4.sysctl_tcp_window_scaling) { + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { inet_rsk(oreq)->wscale_ok = 1; inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf; } diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index 84ceddfee76b4..708dc63b2f099 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -610,7 +610,6 @@ struct skcipher_alg mv_cesa_ecb_des3_ede_alg = { .decrypt = mv_cesa_ecb_des3_ede_decrypt, .min_keysize = DES3_EDE_KEY_SIZE, .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, .base = { .cra_name = "ecb(des3_ede)", .cra_driver_name = "mv-ecb-des3-ede", diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index f8a48a84df2ab..9443f31acd27b 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -168,15 +168,19 @@ static struct dcp *global_sdcp; static int mxs_dcp_start_dma(struct dcp_async_ctx *actx) { + int dma_err; struct dcp *sdcp = global_sdcp; const int chan = actx->chan; uint32_t stat; unsigned long ret; struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; - dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc), DMA_TO_DEVICE); + dma_err = dma_mapping_error(sdcp->dev, desc_phys); + if (dma_err) + return dma_err; + reinit_completion(&sdcp->completion[chan]); /* Clear status register. */ @@ -214,18 +218,29 @@ static int mxs_dcp_start_dma(struct dcp_async_ctx *actx) static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, struct ablkcipher_request *req, int init) { + dma_addr_t key_phys, src_phys, dst_phys; struct dcp *sdcp = global_sdcp; struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; struct dcp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); int ret; - dma_addr_t key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key, - 2 * AES_KEYSIZE_128, - DMA_TO_DEVICE); - dma_addr_t src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf, - DCP_BUF_SZ, DMA_TO_DEVICE); - dma_addr_t dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf, - DCP_BUF_SZ, DMA_FROM_DEVICE); + key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key, + 2 * AES_KEYSIZE_128, DMA_TO_DEVICE); + ret = dma_mapping_error(sdcp->dev, key_phys); + if (ret) + return ret; + + src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf, + DCP_BUF_SZ, DMA_TO_DEVICE); + ret = dma_mapping_error(sdcp->dev, src_phys); + if (ret) + goto err_src; + + dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf, + DCP_BUF_SZ, DMA_FROM_DEVICE); + ret = dma_mapping_error(sdcp->dev, dst_phys); + if (ret) + goto err_dst; if (actx->fill % AES_BLOCK_SIZE) { dev_err(sdcp->dev, "Invalid block size!\n"); @@ -263,10 +278,12 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, ret = mxs_dcp_start_dma(actx); aes_done_run: + dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE); +err_dst: + dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE); +err_src: dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128, DMA_TO_DEVICE); - dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE); - dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE); return ret; } @@ -281,21 +298,20 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) struct scatterlist *dst = req->dst; struct scatterlist *src = req->src; - const int nents = sg_nents(req->src); + int dst_nents = sg_nents(dst); const int out_off = DCP_BUF_SZ; uint8_t *in_buf = sdcp->coh->aes_in_buf; uint8_t *out_buf = sdcp->coh->aes_out_buf; - uint8_t *out_tmp, *src_buf, *dst_buf = NULL; uint32_t dst_off = 0; + uint8_t *src_buf = NULL; uint32_t last_out_len = 0; uint8_t *key = sdcp->coh->aes_key; int ret = 0; - int split = 0; - unsigned int i, len, clen, rem = 0, tlen = 0; + unsigned int i, len, clen, tlen = 0; int init = 0; bool limit_hit = false; @@ -313,7 +329,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128); } - for_each_sg(req->src, src, nents, i) { + for_each_sg(req->src, src, sg_nents(req->src), i) { src_buf = sg_virt(src); len = sg_dma_len(src); tlen += len; @@ -338,34 +354,17 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) * submit the buffer. */ if (actx->fill == out_off || sg_is_last(src) || - limit_hit) { + limit_hit) { ret = mxs_dcp_run_aes(actx, req, init); if (ret) return ret; init = 0; - out_tmp = out_buf; + sg_pcopy_from_buffer(dst, dst_nents, out_buf, + actx->fill, dst_off); + dst_off += actx->fill; last_out_len = actx->fill; - while (dst && actx->fill) { - if (!split) { - dst_buf = sg_virt(dst); - dst_off = 0; - } - rem = min(sg_dma_len(dst) - dst_off, - actx->fill); - - memcpy(dst_buf + dst_off, out_tmp, rem); - out_tmp += rem; - dst_off += rem; - actx->fill -= rem; - - if (dst_off == sg_dma_len(dst)) { - dst = sg_next(dst); - split = 0; - } else { - split = 1; - } - } + actx->fill = 0; } } while (len); @@ -565,6 +564,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req) dma_addr_t buf_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_in_buf, DCP_BUF_SZ, DMA_TO_DEVICE); + ret = dma_mapping_error(sdcp->dev, buf_phys); + if (ret) + return ret; + /* Fill in the DMA descriptor. */ desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE | MXS_DCP_CONTROL0_INTERRUPT | @@ -597,6 +600,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req) if (rctx->fini) { digest_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_out_buf, DCP_SHA_PAY_SZ, DMA_FROM_DEVICE); + ret = dma_mapping_error(sdcp->dev, digest_phys); + if (ret) + goto done_run; + desc->control0 |= MXS_DCP_CONTROL0_HASH_TERM; desc->payload = digest_phys; } diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 72edb10181b86..41ffb088831de 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -1318,7 +1318,7 @@ static int omap_aes_suspend(struct device *dev) static int omap_aes_resume(struct device *dev) { - pm_runtime_resume_and_get(dev); + pm_runtime_get_sync(dev); return 0; } #endif diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index f80db1eb29945..f8a146554b1f3 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -1734,7 +1734,7 @@ static void omap_sham_done_task(unsigned long data) if (test_and_clear_bit(FLAGS_OUTPUT_READY, &dd->flags)) goto finish; } else if (test_bit(FLAGS_DMA_READY, &dd->flags)) { - if (test_and_clear_bit(FLAGS_DMA_ACTIVE, &dd->flags)) { + if (test_bit(FLAGS_DMA_ACTIVE, &dd->flags)) { omap_sham_update_dma_stop(dd); if (dd->err) { err = dd->err; diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c index d2d0ae445fd89..7c7d49a8a4034 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c @@ -123,10 +123,10 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) hw_data->enable_error_correction = adf_vf_void_noop; hw_data->init_admin_comms = adf_vf_int_noop; hw_data->exit_admin_comms = adf_vf_void_noop; - hw_data->send_admin_init = adf_vf2pf_init; + hw_data->send_admin_init = adf_vf2pf_notify_init; hw_data->init_arb = adf_vf_int_noop; hw_data->exit_arb = adf_vf_void_noop; - hw_data->disable_iov = adf_vf2pf_shutdown; + hw_data->disable_iov = adf_vf2pf_notify_shutdown; hw_data->get_accel_mask = get_accel_mask; hw_data->get_ae_mask = get_ae_mask; hw_data->get_num_accels = get_num_accels; diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c index 38e4bc04f407b..90e8a7564756b 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c @@ -123,10 +123,10 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data) hw_data->enable_error_correction = adf_vf_void_noop; hw_data->init_admin_comms = adf_vf_int_noop; hw_data->exit_admin_comms = adf_vf_void_noop; - hw_data->send_admin_init = adf_vf2pf_init; + hw_data->send_admin_init = adf_vf2pf_notify_init; hw_data->init_arb = adf_vf_int_noop; hw_data->exit_arb = adf_vf_void_noop; - hw_data->disable_iov = adf_vf2pf_shutdown; + hw_data->disable_iov = adf_vf2pf_notify_shutdown; hw_data->get_accel_mask = get_accel_mask; hw_data->get_ae_mask = get_ae_mask; hw_data->get_num_accels = get_num_accels; diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index d78f8d5c89c3f..289dd7e48d4a4 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -239,8 +239,8 @@ void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); -int adf_vf2pf_init(struct adf_accel_dev *accel_dev); -void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev); +int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev); +void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev); int adf_init_pf_wq(void); void adf_exit_pf_wq(void); int adf_init_vf_wq(void); @@ -263,12 +263,12 @@ static inline void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) { } -static inline int adf_vf2pf_init(struct adf_accel_dev *accel_dev) +static inline int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev) { return 0; } -static inline void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) +static inline void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev) { } diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c index 26556c7130497..7a7d43c475342 100644 --- a/drivers/crypto/qat/qat_common/adf_init.c +++ b/drivers/crypto/qat/qat_common/adf_init.c @@ -105,6 +105,7 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) struct service_hndl *service; struct list_head *list_itr; struct adf_hw_device_data *hw_data = accel_dev->hw_device; + int ret; if (!hw_data) { dev_err(&GET_DEV(accel_dev), @@ -171,9 +172,9 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) } hw_data->enable_error_correction(accel_dev); - hw_data->enable_vf2pf_comms(accel_dev); + ret = hw_data->enable_vf2pf_comms(accel_dev); - return 0; + return ret; } EXPORT_SYMBOL_GPL(adf_dev_init); diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c index 4898ef41fd9fd..7d319c5c071c4 100644 --- a/drivers/crypto/qat/qat_common/adf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_isr.c @@ -59,6 +59,8 @@ #include "adf_transport_access_macros.h" #include "adf_transport_internal.h" +#define ADF_MAX_NUM_VFS 32 + static int adf_enable_msix(struct adf_accel_dev *accel_dev) { struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev; @@ -111,7 +113,7 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) struct adf_bar *pmisc = &GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)]; void __iomem *pmisc_bar_addr = pmisc->virt_addr; - u32 vf_mask; + unsigned long vf_mask; /* Get the interrupt sources triggered by VFs */ vf_mask = ((ADF_CSR_RD(pmisc_bar_addr, ADF_ERRSOU5) & @@ -132,8 +134,7 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) * unless the VF is malicious and is attempting to * flood the host OS with VF2PF interrupts. */ - for_each_set_bit(i, (const unsigned long *)&vf_mask, - (sizeof(vf_mask) * BITS_PER_BYTE)) { + for_each_set_bit(i, &vf_mask, ADF_MAX_NUM_VFS) { vf_info = accel_dev->pf.vf_info + i; if (!__ratelimit(&vf_info->vf2pf_ratelimit)) { diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c index b3875fdf6cd72..180016e157771 100644 --- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c @@ -195,6 +195,13 @@ static int __adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr) val = ADF_CSR_RD(pmisc_bar_addr, pf2vf_offset); } while ((val & int_bit) && (count++ < ADF_IOV_MSG_ACK_MAX_RETRY)); + if (val != msg) { + dev_dbg(&GET_DEV(accel_dev), + "Collision - PFVF CSR overwritten by remote function\n"); + ret = -EIO; + goto out; + } + if (val & int_bit) { dev_dbg(&GET_DEV(accel_dev), "ACK not received from remote\n"); val &= ~int_bit; @@ -231,7 +238,6 @@ int adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr) return ret; } -EXPORT_SYMBOL_GPL(adf_iov_putmsg); void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info) { @@ -244,6 +250,11 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info) /* Read message from the VF */ msg = ADF_CSR_RD(pmisc_addr, hw_data->get_pf2vf_offset(vf_nr)); + if (!(msg & ADF_VF2PF_INT)) { + dev_info(&GET_DEV(accel_dev), + "Spurious VF2PF interrupt, msg %X. Ignored\n", msg); + goto out; + } /* To ACK, clear the VF2PFINT bit */ msg &= ~ADF_VF2PF_INT; @@ -327,6 +338,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info) if (resp && adf_iov_putmsg(accel_dev, resp, vf_nr)) dev_err(&GET_DEV(accel_dev), "Failed to send response to VF\n"); +out: /* re-enable interrupt on PF from this VF */ adf_enable_vf2pf_interrupts(accel_dev, (1 << vf_nr)); return; @@ -361,6 +373,8 @@ static int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev) msg |= ADF_PFVF_COMPATIBILITY_VERSION << ADF_VF2PF_COMPAT_VER_REQ_SHIFT; BUILD_BUG_ON(ADF_PFVF_COMPATIBILITY_VERSION > 255); + reinit_completion(&accel_dev->vf.iov_msg_completion); + /* Send request from VF to PF */ ret = adf_iov_putmsg(accel_dev, msg, 0); if (ret) { diff --git a/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c b/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c index cd5f37dffe8a6..1830194567e84 100644 --- a/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c @@ -49,14 +49,14 @@ #include "adf_pf2vf_msg.h" /** - * adf_vf2pf_init() - send init msg to PF + * adf_vf2pf_notify_init() - send init msg to PF * @accel_dev: Pointer to acceleration VF device. * * Function sends an init messge from the VF to a PF * * Return: 0 on success, error code otherwise. */ -int adf_vf2pf_init(struct adf_accel_dev *accel_dev) +int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev) { u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM | (ADF_VF2PF_MSGTYPE_INIT << ADF_VF2PF_MSGTYPE_SHIFT)); @@ -69,17 +69,17 @@ int adf_vf2pf_init(struct adf_accel_dev *accel_dev) set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status); return 0; } -EXPORT_SYMBOL_GPL(adf_vf2pf_init); +EXPORT_SYMBOL_GPL(adf_vf2pf_notify_init); /** - * adf_vf2pf_shutdown() - send shutdown msg to PF + * adf_vf2pf_notify_shutdown() - send shutdown msg to PF * @accel_dev: Pointer to acceleration VF device. * * Function sends a shutdown messge from the VF to a PF * * Return: void */ -void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) +void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev) { u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM | (ADF_VF2PF_MSGTYPE_SHUTDOWN << ADF_VF2PF_MSGTYPE_SHIFT)); @@ -89,4 +89,4 @@ void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) dev_err(&GET_DEV(accel_dev), "Failed to send Shutdown event to PF\n"); } -EXPORT_SYMBOL_GPL(adf_vf2pf_shutdown); +EXPORT_SYMBOL_GPL(adf_vf2pf_notify_shutdown); diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index df9a1f35b8320..86274e3c6781d 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -123,6 +123,11 @@ static void adf_pf2vf_bh_handler(void *data) /* Read the message from PF */ msg = ADF_CSR_RD(pmisc_bar_addr, hw_data->get_pf2vf_offset(0)); + if (!(msg & ADF_PF2VF_INT)) { + dev_info(&GET_DEV(accel_dev), + "Spurious PF2VF interrupt, msg %X. Ignored\n", msg); + goto out; + } if (!(msg & ADF_PF2VF_MSGORIGIN_SYSTEM)) /* Ignore legacy non-system (non-kernel) PF2VF messages */ @@ -171,6 +176,7 @@ static void adf_pf2vf_bh_handler(void *data) msg &= ~ADF_PF2VF_INT; ADF_CSR_WR(pmisc_bar_addr, hw_data->get_pf2vf_offset(0), msg); +out: /* Re-enable PF2VF interrupts */ adf_enable_pf2vf_interrupts(accel_dev); return; @@ -203,6 +209,7 @@ static irqreturn_t adf_isr(int irq, void *privdata) struct adf_bar *pmisc = &GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)]; void __iomem *pmisc_bar_addr = pmisc->virt_addr; + bool handled = false; u32 v_int; /* Read VF INT source CSR to determine the source of VF interrupt */ @@ -215,7 +222,7 @@ static irqreturn_t adf_isr(int irq, void *privdata) /* Schedule tasklet to handle interrupt BH */ tasklet_hi_schedule(&accel_dev->vf.pf2vf_bh_tasklet); - return IRQ_HANDLED; + handled = true; } /* Check bundle interrupt */ @@ -227,10 +234,10 @@ static irqreturn_t adf_isr(int irq, void *privdata) WRITE_CSR_INT_FLAG_AND_COL(bank->csr_addr, bank->bank_number, 0); tasklet_hi_schedule(&bank->resp_handler); - return IRQ_HANDLED; + handled = true; } - return IRQ_NONE; + return handled ? IRQ_HANDLED : IRQ_NONE; } static int adf_request_msi_irq(struct adf_accel_dev *accel_dev) diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index 3852d31ce0a4b..37a9f969c59cd 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c @@ -170,6 +170,14 @@ int qat_crypto_dev_config(struct adf_accel_dev *accel_dev) goto err; if (adf_cfg_section_add(accel_dev, "Accelerator0")) goto err; + + /* Temporarily set the number of crypto instances to zero to avoid + * registering the crypto algorithms. + * This will be removed when the algorithms will support the + * CRYPTO_TFM_REQ_MAY_BACKLOG flag + */ + instances = 0; + for (i = 0; i < instances; i++) { val = i; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_BANK_NUM, i); diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c index a3b4dd8099a7b..3a8361c83f0b1 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c @@ -123,10 +123,10 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) hw_data->enable_error_correction = adf_vf_void_noop; hw_data->init_admin_comms = adf_vf_int_noop; hw_data->exit_admin_comms = adf_vf_void_noop; - hw_data->send_admin_init = adf_vf2pf_init; + hw_data->send_admin_init = adf_vf2pf_notify_init; hw_data->init_arb = adf_vf_int_noop; hw_data->exit_arb = adf_vf_void_noop; - hw_data->disable_iov = adf_vf2pf_shutdown; + hw_data->disable_iov = adf_vf2pf_notify_shutdown; hw_data->get_accel_mask = get_accel_mask; hw_data->get_ae_mask = get_ae_mask; hw_data->get_num_accels = get_num_accels; diff --git a/drivers/crypto/qce/sha.c b/drivers/crypto/qce/sha.c index 0853e74583ade..29b0bad2507b1 100644 --- a/drivers/crypto/qce/sha.c +++ b/drivers/crypto/qce/sha.c @@ -512,8 +512,8 @@ static int qce_ahash_register_one(const struct qce_ahash_def *def, ret = crypto_register_ahash(alg); if (ret) { - kfree(tmpl); dev_err(qce->dev, "%s registration failed\n", base->cra_name); + kfree(tmpl); return ret; } diff --git a/drivers/crypto/qcom-rng.c b/drivers/crypto/qcom-rng.c index 4730f84b646de..6cc4fd005fe07 100644 --- a/drivers/crypto/qcom-rng.c +++ b/drivers/crypto/qcom-rng.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -42,16 +43,19 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max) { unsigned int currsize = 0; u32 val; + int ret; /* read random data from hardware */ do { - val = readl_relaxed(rng->base + PRNG_STATUS); - if (!(val & PRNG_STATUS_DATA_AVAIL)) - break; + ret = readl_poll_timeout(rng->base + PRNG_STATUS, val, + val & PRNG_STATUS_DATA_AVAIL, + 200, 10000); + if (ret) + return ret; val = readl_relaxed(rng->base + PRNG_DATA_OUT); if (!val) - break; + return -EINVAL; if ((max - currsize) >= WORD_SZ) { memcpy(data, &val, WORD_SZ); @@ -64,7 +68,7 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max) } } while (currsize < max); - return currsize; + return 0; } static int qcom_rng_generate(struct crypto_rng *tfm, @@ -86,7 +90,7 @@ static int qcom_rng_generate(struct crypto_rng *tfm, mutex_unlock(&rng->lock); clk_disable_unprepare(rng->clk); - return 0; + return ret; } static int qcom_rng_seed(struct crypto_rng *tfm, const u8 *seed, diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index 010f1bb20dada..86a13b738c2de 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -2208,6 +2208,8 @@ static int s5p_aes_probe(struct platform_device *pdev) variant = find_s5p_sss_version(pdev); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; /* * Note: HASH and PRNG uses the same registers in secss, avoid diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c index e68b856d03b6e..2ecc970f5cae5 100644 --- a/drivers/crypto/stm32/stm32-crc32.c +++ b/drivers/crypto/stm32/stm32-crc32.c @@ -230,7 +230,7 @@ static struct shash_alg algs[] = { .digestsize = CHKSUM_DIGEST_SIZE, .base = { .cra_name = "crc32", - .cra_driver_name = DRIVER_NAME, + .cra_driver_name = "stm32-crc32-crc32", .cra_priority = 200, .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, @@ -252,7 +252,7 @@ static struct shash_alg algs[] = { .digestsize = CHKSUM_DIGEST_SIZE, .base = { .cra_name = "crc32c", - .cra_driver_name = DRIVER_NAME, + .cra_driver_name = "stm32-crc32-crc32c", .cra_priority = 200, .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, @@ -332,8 +332,10 @@ static int stm32_crc_remove(struct platform_device *pdev) struct stm32_crc *crc = platform_get_drvdata(pdev); int ret = pm_runtime_get_sync(crc->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(crc->dev); return ret; + } spin_lock(&crc_list.lock); list_del(&crc->list); diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c index 9b3511236ba25..69c2468f1053d 100644 --- a/drivers/crypto/stm32/stm32-cryp.c +++ b/drivers/crypto/stm32/stm32-cryp.c @@ -639,7 +639,7 @@ static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err) /* Phase 4 : output tag */ err = stm32_cryp_read_auth_tag(cryp); - if (!err && (!(is_gcm(cryp) || is_ccm(cryp)))) + if (!err && (!(is_gcm(cryp) || is_ccm(cryp) || is_ecb(cryp)))) stm32_cryp_get_iv(cryp); if (cryp->sgs_copied) { @@ -669,8 +669,6 @@ static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err) else crypto_finalize_ablkcipher_request(cryp->engine, cryp->req, err); - - memset(cryp->ctx->key, 0, cryp->ctx->keylen); } static int stm32_cryp_cpu_start(struct stm32_cryp *cryp) @@ -2036,8 +2034,6 @@ static int stm32_cryp_probe(struct platform_device *pdev) list_del(&cryp->list); spin_unlock(&cryp_list.lock); - pm_runtime_disable(dev); - pm_runtime_put_noidle(dev); pm_runtime_disable(dev); pm_runtime_put_noidle(dev); diff --git a/drivers/crypto/virtio/Kconfig b/drivers/crypto/virtio/Kconfig index 01b625e4e5adc..93681efaf03c0 100644 --- a/drivers/crypto/virtio/Kconfig +++ b/drivers/crypto/virtio/Kconfig @@ -3,8 +3,11 @@ config CRYPTO_DEV_VIRTIO tristate "VirtIO crypto driver" depends on VIRTIO select CRYPTO_AEAD + select CRYPTO_AKCIPHER2 select CRYPTO_BLKCIPHER select CRYPTO_ENGINE + select CRYPTO_RSA + select MPILIB default m help This driver provides support for virtio crypto device. If you diff --git a/drivers/crypto/virtio/Makefile b/drivers/crypto/virtio/Makefile index cbfccccfa135d..bfa6cbae342e0 100644 --- a/drivers/crypto/virtio/Makefile +++ b/drivers/crypto/virtio/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio_crypto.o virtio_crypto-objs := \ - virtio_crypto_algs.o \ + virtio_crypto_skcipher_algs.o \ + virtio_crypto_akcipher_algs.o \ virtio_crypto_mgr.o \ virtio_crypto_core.o diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c new file mode 100644 index 0000000000000..0b6528c7e30b0 --- /dev/null +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -0,0 +1,594 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + /* Asymmetric algorithms supported by virtio crypto device + * + * Authors: zhenwei pi + * lei he + * + * Copyright 2022 Bytedance CO., LTD. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "virtio_crypto_common.h" + +struct virtio_crypto_rsa_ctx { + MPI n; +}; + +struct virtio_crypto_akcipher_ctx { + struct crypto_engine_ctx enginectx; + struct virtio_crypto *vcrypto; + struct crypto_akcipher *tfm; + bool session_valid; + __u64 session_id; + union { + struct virtio_crypto_rsa_ctx rsa_ctx; + }; +}; + +struct virtio_crypto_akcipher_request { + struct virtio_crypto_request base; + struct virtio_crypto_akcipher_ctx *akcipher_ctx; + struct akcipher_request *akcipher_req; + void *src_buf; + void *dst_buf; + uint32_t opcode; +}; + +struct virtio_crypto_akcipher_algo { + uint32_t algonum; + uint32_t service; + unsigned int active_devs; + struct akcipher_alg algo; +}; + +static DEFINE_MUTEX(algs_lock); + +static void virtio_crypto_akcipher_finalize_req( + struct virtio_crypto_akcipher_request *vc_akcipher_req, + struct akcipher_request *req, int err) +{ + kfree(vc_akcipher_req->src_buf); + kfree(vc_akcipher_req->dst_buf); + vc_akcipher_req->src_buf = NULL; + vc_akcipher_req->dst_buf = NULL; + virtcrypto_clear_request(&vc_akcipher_req->base); + + crypto_finalize_akcipher_request(vc_akcipher_req->base.dataq->engine, req, err); +} + +static void virtio_crypto_dataq_akcipher_callback(struct virtio_crypto_request *vc_req, int len) +{ + struct virtio_crypto_akcipher_request *vc_akcipher_req = + container_of(vc_req, struct virtio_crypto_akcipher_request, base); + struct akcipher_request *akcipher_req; + int error; + + switch (vc_req->status) { + case VIRTIO_CRYPTO_OK: + error = 0; + break; + case VIRTIO_CRYPTO_INVSESS: + case VIRTIO_CRYPTO_ERR: + error = -EINVAL; + break; + case VIRTIO_CRYPTO_BADMSG: + error = -EBADMSG; + break; + + case VIRTIO_CRYPTO_KEY_REJECTED: + error = -EKEYREJECTED; + break; + + default: + error = -EIO; + break; + } + + akcipher_req = vc_akcipher_req->akcipher_req; + if (vc_akcipher_req->opcode != VIRTIO_CRYPTO_AKCIPHER_VERIFY) { + /* actuall length maybe less than dst buffer */ + akcipher_req->dst_len = len - sizeof(vc_req->status); + sg_copy_from_buffer(akcipher_req->dst, sg_nents(akcipher_req->dst), + vc_akcipher_req->dst_buf, akcipher_req->dst_len); + } + virtio_crypto_akcipher_finalize_req(vc_akcipher_req, akcipher_req, error); +} + +static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher_ctx *ctx, + struct virtio_crypto_ctrl_header *header, void *para, + const uint8_t *key, unsigned int keylen) +{ + struct scatterlist outhdr_sg, key_sg, inhdr_sg, *sgs[3]; + struct virtio_crypto *vcrypto = ctx->vcrypto; + uint8_t *pkey; + int err; + unsigned int num_out = 0, num_in = 0; + struct virtio_crypto_op_ctrl_req *ctrl; + struct virtio_crypto_session_input *input; + struct virtio_crypto_ctrl_request *vc_ctrl_req; + + pkey = kmemdup(key, keylen, GFP_ATOMIC); + if (!pkey) + return -ENOMEM; + + vc_ctrl_req = kzalloc(sizeof(*vc_ctrl_req), GFP_KERNEL); + if (!vc_ctrl_req) { + err = -ENOMEM; + goto out; + } + + ctrl = &vc_ctrl_req->ctrl; + memcpy(&ctrl->header, header, sizeof(ctrl->header)); + memcpy(&ctrl->u, para, sizeof(ctrl->u)); + input = &vc_ctrl_req->input; + input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR); + + sg_init_one(&outhdr_sg, ctrl, sizeof(*ctrl)); + sgs[num_out++] = &outhdr_sg; + + sg_init_one(&key_sg, pkey, keylen); + sgs[num_out++] = &key_sg; + + sg_init_one(&inhdr_sg, input, sizeof(*input)); + sgs[num_out + num_in++] = &inhdr_sg; + + err = virtio_crypto_ctrl_vq_request(vcrypto, sgs, num_out, num_in, vc_ctrl_req); + if (err < 0) + goto out; + + if (le32_to_cpu(input->status) != VIRTIO_CRYPTO_OK) { + pr_err("virtio_crypto: Create session failed status: %u\n", + le32_to_cpu(input->status)); + err = -EINVAL; + goto out; + } + + ctx->session_id = le64_to_cpu(input->session_id); + ctx->session_valid = true; + err = 0; + +out: + kfree(vc_ctrl_req); + kzfree(pkey); + + return err; +} + +static int virtio_crypto_alg_akcipher_close_session(struct virtio_crypto_akcipher_ctx *ctx) +{ + struct scatterlist outhdr_sg, inhdr_sg, *sgs[2]; + struct virtio_crypto_destroy_session_req *destroy_session; + struct virtio_crypto *vcrypto = ctx->vcrypto; + unsigned int num_out = 0, num_in = 0; + int err; + struct virtio_crypto_op_ctrl_req *ctrl; + struct virtio_crypto_inhdr *ctrl_status; + struct virtio_crypto_ctrl_request *vc_ctrl_req; + + if (!ctx->session_valid) + return 0; + + vc_ctrl_req = kzalloc(sizeof(*vc_ctrl_req), GFP_KERNEL); + if (!vc_ctrl_req) + return -ENOMEM; + + ctrl_status = &vc_ctrl_req->ctrl_status; + ctrl_status->status = VIRTIO_CRYPTO_ERR; + ctrl = &vc_ctrl_req->ctrl; + ctrl->header.opcode = cpu_to_le32(VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION); + ctrl->header.queue_id = 0; + + destroy_session = &ctrl->u.destroy_session; + destroy_session->session_id = cpu_to_le64(ctx->session_id); + + sg_init_one(&outhdr_sg, ctrl, sizeof(*ctrl)); + sgs[num_out++] = &outhdr_sg; + + sg_init_one(&inhdr_sg, &ctrl_status->status, sizeof(ctrl_status->status)); + sgs[num_out + num_in++] = &inhdr_sg; + + err = virtio_crypto_ctrl_vq_request(vcrypto, sgs, num_out, num_in, vc_ctrl_req); + if (err < 0) + goto out; + + if (ctrl_status->status != VIRTIO_CRYPTO_OK) { + pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n", + ctrl_status->status, destroy_session->session_id); + err = -EINVAL; + goto out; + } + + err = 0; + ctx->session_valid = false; + +out: + kfree(vc_ctrl_req); + + return err; +} + +static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request *vc_akcipher_req, + struct akcipher_request *req, struct data_queue *data_vq) +{ + struct virtio_crypto_akcipher_ctx *ctx = vc_akcipher_req->akcipher_ctx; + struct virtio_crypto_request *vc_req = &vc_akcipher_req->base; + struct virtio_crypto *vcrypto = ctx->vcrypto; + struct virtio_crypto_op_data_req *req_data = vc_req->req_data; + struct scatterlist *sgs[4], outhdr_sg, inhdr_sg, srcdata_sg, dstdata_sg; + void *src_buf = NULL, *dst_buf = NULL; + unsigned int num_out = 0, num_in = 0; + int node = dev_to_node(&vcrypto->vdev->dev); + unsigned long flags; + int ret = -ENOMEM; + bool verify = vc_akcipher_req->opcode == VIRTIO_CRYPTO_AKCIPHER_VERIFY; + unsigned int src_len = verify ? req->src_len + req->dst_len : req->src_len; + + /* out header */ + sg_init_one(&outhdr_sg, req_data, sizeof(*req_data)); + sgs[num_out++] = &outhdr_sg; + + /* src data */ + src_buf = kcalloc_node(src_len, 1, GFP_KERNEL, node); + if (!src_buf) + goto err; + + if (verify) { + /* for verify operation, both src and dst data work as OUT direction */ + sg_copy_to_buffer(req->src, sg_nents(req->src), src_buf, src_len); + sg_init_one(&srcdata_sg, src_buf, src_len); + sgs[num_out++] = &srcdata_sg; + } else { + sg_copy_to_buffer(req->src, sg_nents(req->src), src_buf, src_len); + sg_init_one(&srcdata_sg, src_buf, src_len); + sgs[num_out++] = &srcdata_sg; + + /* dst data */ + dst_buf = kcalloc_node(req->dst_len, 1, GFP_KERNEL, node); + if (!dst_buf) + goto err; + + sg_init_one(&dstdata_sg, dst_buf, req->dst_len); + sgs[num_out + num_in++] = &dstdata_sg; + } + + vc_akcipher_req->src_buf = src_buf; + vc_akcipher_req->dst_buf = dst_buf; + + /* in header */ + sg_init_one(&inhdr_sg, &vc_req->status, sizeof(vc_req->status)); + sgs[num_out + num_in++] = &inhdr_sg; + + spin_lock_irqsave(&data_vq->lock, flags); + ret = virtqueue_add_sgs(data_vq->vq, sgs, num_out, num_in, vc_req, GFP_ATOMIC); + virtqueue_kick(data_vq->vq); + spin_unlock_irqrestore(&data_vq->lock, flags); + if (ret) + goto err; + + return 0; + +err: + kfree(src_buf); + kfree(dst_buf); + + return -ENOMEM; +} + +static int virtio_crypto_rsa_do_req(struct crypto_engine *engine, void *vreq) +{ + struct akcipher_request *req = container_of(vreq, struct akcipher_request, base); + struct virtio_crypto_akcipher_request *vc_akcipher_req = akcipher_request_ctx(req); + struct virtio_crypto_request *vc_req = &vc_akcipher_req->base; + struct virtio_crypto_akcipher_ctx *ctx = vc_akcipher_req->akcipher_ctx; + struct virtio_crypto *vcrypto = ctx->vcrypto; + struct data_queue *data_vq = vc_req->dataq; + struct virtio_crypto_op_header *header; + struct virtio_crypto_akcipher_data_req *akcipher_req; + int ret; + + vc_req->sgs = NULL; + vc_req->req_data = kzalloc_node(sizeof(*vc_req->req_data), + GFP_KERNEL, dev_to_node(&vcrypto->vdev->dev)); + if (!vc_req->req_data) + return -ENOMEM; + + /* build request header */ + header = &vc_req->req_data->header; + header->opcode = cpu_to_le32(vc_akcipher_req->opcode); + header->algo = cpu_to_le32(VIRTIO_CRYPTO_AKCIPHER_RSA); + header->session_id = cpu_to_le64(ctx->session_id); + + /* build request akcipher data */ + akcipher_req = &vc_req->req_data->u.akcipher_req; + akcipher_req->para.src_data_len = cpu_to_le32(req->src_len); + akcipher_req->para.dst_data_len = cpu_to_le32(req->dst_len); + + ret = __virtio_crypto_akcipher_do_req(vc_akcipher_req, req, data_vq); + if (ret < 0) { + kzfree(vc_req->req_data); + vc_req->req_data = NULL; + return ret; + } + + return 0; +} + +static int virtio_crypto_rsa_req(struct akcipher_request *req, uint32_t opcode) +{ + struct crypto_akcipher *atfm = crypto_akcipher_reqtfm(req); + struct virtio_crypto_akcipher_ctx *ctx = akcipher_tfm_ctx(atfm); + struct virtio_crypto_akcipher_request *vc_akcipher_req = akcipher_request_ctx(req); + struct virtio_crypto_request *vc_req = &vc_akcipher_req->base; + struct virtio_crypto *vcrypto = ctx->vcrypto; + /* Use the first data virtqueue as default */ + struct data_queue *data_vq = &vcrypto->data_vq[0]; + + vc_req->dataq = data_vq; + vc_req->alg_cb = virtio_crypto_dataq_akcipher_callback; + vc_akcipher_req->akcipher_ctx = ctx; + vc_akcipher_req->akcipher_req = req; + vc_akcipher_req->opcode = opcode; + + return crypto_transfer_akcipher_request_to_engine(data_vq->engine, req); +} + +static int virtio_crypto_rsa_encrypt(struct akcipher_request *req) +{ + return virtio_crypto_rsa_req(req, VIRTIO_CRYPTO_AKCIPHER_ENCRYPT); +} + +static int virtio_crypto_rsa_decrypt(struct akcipher_request *req) +{ + return virtio_crypto_rsa_req(req, VIRTIO_CRYPTO_AKCIPHER_DECRYPT); +} + +static int virtio_crypto_rsa_sign(struct akcipher_request *req) +{ + return virtio_crypto_rsa_req(req, VIRTIO_CRYPTO_AKCIPHER_SIGN); +} + +static int virtio_crypto_rsa_verify(struct akcipher_request *req) +{ + return virtio_crypto_rsa_req(req, VIRTIO_CRYPTO_AKCIPHER_VERIFY); +} + +static int virtio_crypto_rsa_set_key(struct crypto_akcipher *tfm, + const void *key, + unsigned int keylen, + bool private, + int padding_algo, + int hash_algo) +{ + struct virtio_crypto_akcipher_ctx *ctx = akcipher_tfm_ctx(tfm); + struct virtio_crypto_rsa_ctx *rsa_ctx = &ctx->rsa_ctx; + struct virtio_crypto *vcrypto; + struct virtio_crypto_ctrl_header header; + struct virtio_crypto_akcipher_session_para para; + struct rsa_key rsa_key = {0}; + int node = virtio_crypto_get_current_node(); + uint32_t keytype; + int ret; + + /* mpi_free will test n, just free it. */ + mpi_free(rsa_ctx->n); + rsa_ctx->n = NULL; + + if (private) { + keytype = VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PRIVATE; + ret = rsa_parse_priv_key(&rsa_key, key, keylen); + } else { + keytype = VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PUBLIC; + ret = rsa_parse_pub_key(&rsa_key, key, keylen); + } + + if (ret) + return ret; + + rsa_ctx->n = mpi_read_raw_data(rsa_key.n, rsa_key.n_sz); + if (!rsa_ctx->n) + return -ENOMEM; + + if (!ctx->vcrypto) { + vcrypto = virtcrypto_get_dev_node(node, VIRTIO_CRYPTO_SERVICE_AKCIPHER, + VIRTIO_CRYPTO_AKCIPHER_RSA); + if (!vcrypto) { + pr_err("virtio_crypto: Could not find a virtio device in the system or unsupported algo\n"); + return -ENODEV; + } + + ctx->vcrypto = vcrypto; + } else { + virtio_crypto_alg_akcipher_close_session(ctx); + } + + /* set ctrl header */ + header.opcode = cpu_to_le32(VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION); + header.algo = cpu_to_le32(VIRTIO_CRYPTO_AKCIPHER_RSA); + header.queue_id = 0; + + /* set RSA para */ + para.algo = cpu_to_le32(VIRTIO_CRYPTO_AKCIPHER_RSA); + para.keytype = cpu_to_le32(keytype); + para.keylen = cpu_to_le32(keylen); + para.u.rsa.padding_algo = cpu_to_le32(padding_algo); + para.u.rsa.hash_algo = cpu_to_le32(hash_algo); + + return virtio_crypto_alg_akcipher_init_session(ctx, &header, ¶, key, keylen); +} + +static int virtio_crypto_rsa_raw_set_priv_key(struct crypto_akcipher *tfm, + const void *key, + unsigned int keylen) +{ + return virtio_crypto_rsa_set_key(tfm, key, keylen, 1, + VIRTIO_CRYPTO_RSA_RAW_PADDING, + VIRTIO_CRYPTO_RSA_NO_HASH); +} + + +static int virtio_crypto_p1pad_rsa_sha1_set_priv_key(struct crypto_akcipher *tfm, + const void *key, + unsigned int keylen) +{ + return virtio_crypto_rsa_set_key(tfm, key, keylen, 1, + VIRTIO_CRYPTO_RSA_PKCS1_PADDING, + VIRTIO_CRYPTO_RSA_SHA1); +} + +static int virtio_crypto_rsa_raw_set_pub_key(struct crypto_akcipher *tfm, + const void *key, + unsigned int keylen) +{ + return virtio_crypto_rsa_set_key(tfm, key, keylen, 0, + VIRTIO_CRYPTO_RSA_RAW_PADDING, + VIRTIO_CRYPTO_RSA_NO_HASH); +} + +static int virtio_crypto_p1pad_rsa_sha1_set_pub_key(struct crypto_akcipher *tfm, + const void *key, + unsigned int keylen) +{ + return virtio_crypto_rsa_set_key(tfm, key, keylen, 0, + VIRTIO_CRYPTO_RSA_PKCS1_PADDING, + VIRTIO_CRYPTO_RSA_SHA1); +} + +static unsigned int virtio_crypto_rsa_max_size(struct crypto_akcipher *tfm) +{ + struct virtio_crypto_akcipher_ctx *ctx = akcipher_tfm_ctx(tfm); + struct virtio_crypto_rsa_ctx *rsa_ctx = &ctx->rsa_ctx; + + return mpi_get_size(rsa_ctx->n); +} + +static int virtio_crypto_rsa_init_tfm(struct crypto_akcipher *tfm) +{ + struct virtio_crypto_akcipher_ctx *ctx = akcipher_tfm_ctx(tfm); + + ctx->tfm = tfm; + ctx->enginectx.op.do_one_request = virtio_crypto_rsa_do_req; + ctx->enginectx.op.prepare_request = NULL; + ctx->enginectx.op.unprepare_request = NULL; + + return 0; +} + +static void virtio_crypto_rsa_exit_tfm(struct crypto_akcipher *tfm) +{ + struct virtio_crypto_akcipher_ctx *ctx = akcipher_tfm_ctx(tfm); + struct virtio_crypto_rsa_ctx *rsa_ctx = &ctx->rsa_ctx; + + virtio_crypto_alg_akcipher_close_session(ctx); + virtcrypto_dev_put(ctx->vcrypto); + mpi_free(rsa_ctx->n); + rsa_ctx->n = NULL; +} + +static struct virtio_crypto_akcipher_algo virtio_crypto_akcipher_algs[] = { + { + .algonum = VIRTIO_CRYPTO_AKCIPHER_RSA, + .service = VIRTIO_CRYPTO_SERVICE_AKCIPHER, + .algo = { + .encrypt = virtio_crypto_rsa_encrypt, + .decrypt = virtio_crypto_rsa_decrypt, + .set_pub_key = virtio_crypto_rsa_raw_set_pub_key, + .set_priv_key = virtio_crypto_rsa_raw_set_priv_key, + .max_size = virtio_crypto_rsa_max_size, + .init = virtio_crypto_rsa_init_tfm, + .exit = virtio_crypto_rsa_exit_tfm, + .reqsize = sizeof(struct virtio_crypto_akcipher_request), + .base = { + .cra_name = "rsa", + .cra_driver_name = "virtio-crypto-rsa", + .cra_priority = 150, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct virtio_crypto_akcipher_ctx), + }, + }, + }, + { + .algonum = VIRTIO_CRYPTO_AKCIPHER_RSA, + .service = VIRTIO_CRYPTO_SERVICE_AKCIPHER, + .algo = { + .encrypt = virtio_crypto_rsa_encrypt, + .decrypt = virtio_crypto_rsa_decrypt, + .sign = virtio_crypto_rsa_sign, + .verify = virtio_crypto_rsa_verify, + .set_pub_key = virtio_crypto_p1pad_rsa_sha1_set_pub_key, + .set_priv_key = virtio_crypto_p1pad_rsa_sha1_set_priv_key, + .max_size = virtio_crypto_rsa_max_size, + .init = virtio_crypto_rsa_init_tfm, + .exit = virtio_crypto_rsa_exit_tfm, + .reqsize = sizeof(struct virtio_crypto_akcipher_request), + .base = { + .cra_name = "pkcs1pad(rsa,sha1)", + .cra_driver_name = "virtio-pkcs1-rsa-with-sha1", + .cra_priority = 150, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct virtio_crypto_akcipher_ctx), + }, + }, + }, +}; + +int virtio_crypto_akcipher_algs_register(struct virtio_crypto *vcrypto) +{ + int ret = 0; + int i = 0; + + mutex_lock(&algs_lock); + + for (i = 0; i < ARRAY_SIZE(virtio_crypto_akcipher_algs); i++) { + uint32_t service = virtio_crypto_akcipher_algs[i].service; + uint32_t algonum = virtio_crypto_akcipher_algs[i].algonum; + + if (!virtcrypto_algo_is_supported(vcrypto, service, algonum)) + continue; + + if (virtio_crypto_akcipher_algs[i].active_devs == 0) { + ret = crypto_register_akcipher(&virtio_crypto_akcipher_algs[i].algo); + if (ret) + goto unlock; + } + + virtio_crypto_akcipher_algs[i].active_devs++; + dev_info(&vcrypto->vdev->dev, "Registered akcipher algo %s\n", + virtio_crypto_akcipher_algs[i].algo.base.cra_name); + } + +unlock: + mutex_unlock(&algs_lock); + return ret; +} + +void virtio_crypto_akcipher_algs_unregister(struct virtio_crypto *vcrypto) +{ + int i = 0; + + mutex_lock(&algs_lock); + + for (i = 0; i < ARRAY_SIZE(virtio_crypto_akcipher_algs); i++) { + uint32_t service = virtio_crypto_akcipher_algs[i].service; + uint32_t algonum = virtio_crypto_akcipher_algs[i].algonum; + + if (virtio_crypto_akcipher_algs[i].active_devs == 0 || + !virtcrypto_algo_is_supported(vcrypto, service, algonum)) + continue; + + if (virtio_crypto_akcipher_algs[i].active_devs == 1) + crypto_unregister_akcipher(&virtio_crypto_akcipher_algs[i].algo); + + virtio_crypto_akcipher_algs[i].active_devs--; + } + + mutex_unlock(&algs_lock); +} diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h index 1c6e00da5a290..d15e14cd8645d 100644 --- a/drivers/crypto/virtio/virtio_crypto_common.h +++ b/drivers/crypto/virtio/virtio_crypto_common.h @@ -13,6 +13,7 @@ #include #include #include +#include /* Internal representation of a data virtqueue */ @@ -56,6 +57,7 @@ struct virtio_crypto { u32 mac_algo_l; u32 mac_algo_h; u32 aead_algo; + u32 akcipher_algo; /* Maximum length of cipher key */ u32 max_cipher_key_len; @@ -64,11 +66,6 @@ struct virtio_crypto { /* Maximum size of per request */ u64 max_size; - /* Control VQ buffers: protected by the ctrl_lock */ - struct virtio_crypto_op_ctrl_req ctrl; - struct virtio_crypto_session_input input; - struct virtio_crypto_inhdr ctrl_status; - unsigned long status; atomic_t ref_count; struct list_head list; @@ -84,6 +81,18 @@ struct virtio_crypto_sym_session_info { __u64 session_id; }; +/* + * Note: there are padding fields in request, clear them to zero before + * sending to host to avoid to divulge any information. + * Ex, virtio_crypto_ctrl_request::ctrl::u::destroy_session::padding[48] + */ +struct virtio_crypto_ctrl_request { + struct virtio_crypto_op_ctrl_req ctrl; + struct virtio_crypto_session_input input; + struct virtio_crypto_inhdr ctrl_status; + struct completion compl; +}; + struct virtio_crypto_request; typedef void (*virtio_crypto_data_callback) (struct virtio_crypto_request *vc_req, int len); @@ -129,7 +138,12 @@ static inline int virtio_crypto_get_current_node(void) return node; } -int virtio_crypto_algs_register(struct virtio_crypto *vcrypto); -void virtio_crypto_algs_unregister(struct virtio_crypto *vcrypto); +int virtio_crypto_skcipher_algs_register(struct virtio_crypto *vcrypto); +void virtio_crypto_skcipher_algs_unregister(struct virtio_crypto *vcrypto); +int virtio_crypto_akcipher_algs_register(struct virtio_crypto *vcrypto); +void virtio_crypto_akcipher_algs_unregister(struct virtio_crypto *vcrypto); +int virtio_crypto_ctrl_vq_request(struct virtio_crypto *vcrypto, struct scatterlist *sgs[], + unsigned int out_sgs, unsigned int in_sgs, + struct virtio_crypto_ctrl_request *vc_ctrl_req); #endif /* _VIRTIO_CRYPTO_COMMON_H */ diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index c8a962c626635..9f7be94fda22e 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -22,6 +22,56 @@ virtcrypto_clear_request(struct virtio_crypto_request *vc_req) } } +static void virtio_crypto_ctrlq_callback(struct virtio_crypto_ctrl_request *vc_ctrl_req) +{ + complete(&vc_ctrl_req->compl); +} + +static void virtcrypto_ctrlq_callback(struct virtqueue *vq) +{ + struct virtio_crypto *vcrypto = vq->vdev->priv; + struct virtio_crypto_ctrl_request *vc_ctrl_req; + unsigned long flags; + unsigned int len; + + spin_lock_irqsave(&vcrypto->ctrl_lock, flags); + do { + virtqueue_disable_cb(vq); + while ((vc_ctrl_req = virtqueue_get_buf(vq, &len)) != NULL) { + spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags); + virtio_crypto_ctrlq_callback(vc_ctrl_req); + spin_lock_irqsave(&vcrypto->ctrl_lock, flags); + } + if (unlikely(virtqueue_is_broken(vq))) + break; + } while (!virtqueue_enable_cb(vq)); + spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags); +} + +int virtio_crypto_ctrl_vq_request(struct virtio_crypto *vcrypto, struct scatterlist *sgs[], + unsigned int out_sgs, unsigned int in_sgs, + struct virtio_crypto_ctrl_request *vc_ctrl_req) +{ + int err; + unsigned long flags; + + init_completion(&vc_ctrl_req->compl); + + spin_lock_irqsave(&vcrypto->ctrl_lock, flags); + err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, out_sgs, in_sgs, vc_ctrl_req, GFP_ATOMIC); + if (err < 0) { + spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags); + return err; + } + + virtqueue_kick(vcrypto->ctrl_vq); + spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags); + + wait_for_completion(&vc_ctrl_req->compl); + + return 0; +} + static void virtcrypto_dataq_callback(struct virtqueue *vq) { struct virtio_crypto *vcrypto = vq->vdev->priv; @@ -73,7 +123,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) goto err_names; /* Parameters for control virtqueue */ - callbacks[total_vqs - 1] = NULL; + callbacks[total_vqs - 1] = virtcrypto_ctrlq_callback; names[total_vqs - 1] = "controlq"; /* Allocate/initialize parameters for data virtqueues */ @@ -94,7 +144,8 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) spin_lock_init(&vi->data_vq[i].lock); vi->data_vq[i].vq = vqs[i]; /* Initialize crypto engine */ - vi->data_vq[i].engine = crypto_engine_alloc_init(dev, 1); + vi->data_vq[i].engine = crypto_engine_alloc_init_and_set(dev, true, NULL, true, + virtqueue_get_vring_size(vqs[i])); if (!vi->data_vq[i].engine) { ret = -ENOMEM; goto err_engine; @@ -187,9 +238,9 @@ static int virtcrypto_init_vqs(struct virtio_crypto *vi) if (ret) goto err_free; - get_online_cpus(); + cpus_read_lock(); virtcrypto_set_affinity(vi); - put_online_cpus(); + cpus_read_unlock(); return 0; @@ -297,6 +348,7 @@ static int virtcrypto_probe(struct virtio_device *vdev) u32 mac_algo_l = 0; u32 mac_algo_h = 0; u32 aead_algo = 0; + u32 akcipher_algo = 0; u32 crypto_services = 0; if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) @@ -348,6 +400,9 @@ static int virtcrypto_probe(struct virtio_device *vdev) mac_algo_h, &mac_algo_h); virtio_cread(vdev, struct virtio_crypto_config, aead_algo, &aead_algo); + if (crypto_services & (1 << VIRTIO_CRYPTO_SERVICE_AKCIPHER)) + virtio_cread(vdev, struct virtio_crypto_config, + akcipher_algo, &akcipher_algo); /* Add virtio crypto device to global table */ err = virtcrypto_devmgr_add_dev(vcrypto); @@ -374,7 +429,7 @@ static int virtcrypto_probe(struct virtio_device *vdev) vcrypto->mac_algo_h = mac_algo_h; vcrypto->hash_algo = hash_algo; vcrypto->aead_algo = aead_algo; - + vcrypto->akcipher_algo = akcipher_algo; dev_info(&vdev->dev, "max_queues: %u, max_cipher_key_len: %u, max_auth_key_len: %u, max_size 0x%llx\n", diff --git a/drivers/crypto/virtio/virtio_crypto_mgr.c b/drivers/crypto/virtio/virtio_crypto_mgr.c index 6860f8180c7c1..70e778aac0f2c 100644 --- a/drivers/crypto/virtio/virtio_crypto_mgr.c +++ b/drivers/crypto/virtio/virtio_crypto_mgr.c @@ -237,8 +237,14 @@ struct virtio_crypto *virtcrypto_get_dev_node(int node, uint32_t service, */ int virtcrypto_dev_start(struct virtio_crypto *vcrypto) { - if (virtio_crypto_algs_register(vcrypto)) { - pr_err("virtio_crypto: Failed to register crypto algs\n"); + if (virtio_crypto_skcipher_algs_register(vcrypto)) { + pr_err("virtio_crypto: Failed to register crypto skcipher algs\n"); + return -EFAULT; + } + + if (virtio_crypto_akcipher_algs_register(vcrypto)) { + pr_err("virtio_crypto: Failed to register crypto akcipher algs\n"); + virtio_crypto_skcipher_algs_unregister(vcrypto); return -EFAULT; } @@ -257,7 +263,8 @@ int virtcrypto_dev_start(struct virtio_crypto *vcrypto) */ void virtcrypto_dev_stop(struct virtio_crypto *vcrypto) { - virtio_crypto_algs_unregister(vcrypto); + virtio_crypto_skcipher_algs_unregister(vcrypto); + virtio_crypto_akcipher_algs_unregister(vcrypto); } /* @@ -312,6 +319,10 @@ bool virtcrypto_algo_is_supported(struct virtio_crypto *vcrypto, case VIRTIO_CRYPTO_SERVICE_AEAD: algo_mask = vcrypto->aead_algo; break; + + case VIRTIO_CRYPTO_SERVICE_AKCIPHER: + algo_mask = vcrypto->akcipher_algo; + break; } if (!(algo_mask & (1u << algo))) diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c similarity index 84% rename from drivers/crypto/virtio/virtio_crypto_algs.c rename to drivers/crypto/virtio/virtio_crypto_skcipher_algs.c index ac420b201dd81..66ce16ab4046f 100644 --- a/drivers/crypto/virtio/virtio_crypto_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c @@ -117,11 +117,14 @@ static int virtio_crypto_alg_ablkcipher_init_session( int encrypt) { struct scatterlist outhdr, key_sg, inhdr, *sgs[3]; - unsigned int tmp; struct virtio_crypto *vcrypto = ctx->vcrypto; int op = encrypt ? VIRTIO_CRYPTO_OP_ENCRYPT : VIRTIO_CRYPTO_OP_DECRYPT; int err; unsigned int num_out = 0, num_in = 0; + struct virtio_crypto_op_ctrl_req *ctrl; + struct virtio_crypto_session_input *input; + struct virtio_crypto_sym_create_session_req *sym_create_session; + struct virtio_crypto_ctrl_request *vc_ctrl_req; /* * Avoid to do DMA from the stack, switch to using @@ -132,26 +135,29 @@ static int virtio_crypto_alg_ablkcipher_init_session( if (!cipher_key) return -ENOMEM; - spin_lock(&vcrypto->ctrl_lock); + vc_ctrl_req = kzalloc(sizeof(*vc_ctrl_req), GFP_KERNEL); + if (!vc_ctrl_req) { + err = -ENOMEM; + goto out; + } + /* Pad ctrl header */ - vcrypto->ctrl.header.opcode = - cpu_to_le32(VIRTIO_CRYPTO_CIPHER_CREATE_SESSION); - vcrypto->ctrl.header.algo = cpu_to_le32(alg); + ctrl = &vc_ctrl_req->ctrl; + ctrl->header.opcode = cpu_to_le32(VIRTIO_CRYPTO_CIPHER_CREATE_SESSION); + ctrl->header.algo = cpu_to_le32(alg); /* Set the default dataqueue id to 0 */ - vcrypto->ctrl.header.queue_id = 0; + ctrl->header.queue_id = 0; - vcrypto->input.status = cpu_to_le32(VIRTIO_CRYPTO_ERR); + input = &vc_ctrl_req->input; + input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR); /* Pad cipher's parameters */ - vcrypto->ctrl.u.sym_create_session.op_type = - cpu_to_le32(VIRTIO_CRYPTO_SYM_OP_CIPHER); - vcrypto->ctrl.u.sym_create_session.u.cipher.para.algo = - vcrypto->ctrl.header.algo; - vcrypto->ctrl.u.sym_create_session.u.cipher.para.keylen = - cpu_to_le32(keylen); - vcrypto->ctrl.u.sym_create_session.u.cipher.para.op = - cpu_to_le32(op); - - sg_init_one(&outhdr, &vcrypto->ctrl, sizeof(vcrypto->ctrl)); + sym_create_session = &ctrl->u.sym_create_session; + sym_create_session->op_type = cpu_to_le32(VIRTIO_CRYPTO_SYM_OP_CIPHER); + sym_create_session->u.cipher.para.algo = ctrl->header.algo; + sym_create_session->u.cipher.para.keylen = cpu_to_le32(keylen); + sym_create_session->u.cipher.para.op = cpu_to_le32(op); + + sg_init_one(&outhdr, ctrl, sizeof(*ctrl)); sgs[num_out++] = &outhdr; /* Set key */ @@ -159,45 +165,30 @@ static int virtio_crypto_alg_ablkcipher_init_session( sgs[num_out++] = &key_sg; /* Return status and session id back */ - sg_init_one(&inhdr, &vcrypto->input, sizeof(vcrypto->input)); + sg_init_one(&inhdr, input, sizeof(*input)); sgs[num_out + num_in++] = &inhdr; - err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, - num_in, vcrypto, GFP_ATOMIC); - if (err < 0) { - spin_unlock(&vcrypto->ctrl_lock); - kzfree(cipher_key); - return err; - } - virtqueue_kick(vcrypto->ctrl_vq); + err = virtio_crypto_ctrl_vq_request(vcrypto, sgs, num_out, num_in, vc_ctrl_req); + if (err < 0) + goto out; - /* - * Trapping into the hypervisor, so the request should be - * handled immediately. - */ - while (!virtqueue_get_buf(vcrypto->ctrl_vq, &tmp) && - !virtqueue_is_broken(vcrypto->ctrl_vq)) - cpu_relax(); - - if (le32_to_cpu(vcrypto->input.status) != VIRTIO_CRYPTO_OK) { - spin_unlock(&vcrypto->ctrl_lock); + if (le32_to_cpu(input->status) != VIRTIO_CRYPTO_OK) { pr_err("virtio_crypto: Create session failed status: %u\n", - le32_to_cpu(vcrypto->input.status)); - kzfree(cipher_key); - return -EINVAL; + le32_to_cpu(input->status)); + err = -EINVAL; + goto out; } if (encrypt) - ctx->enc_sess_info.session_id = - le64_to_cpu(vcrypto->input.session_id); + ctx->enc_sess_info.session_id = le64_to_cpu(input->session_id); else - ctx->dec_sess_info.session_id = - le64_to_cpu(vcrypto->input.session_id); - - spin_unlock(&vcrypto->ctrl_lock); + ctx->dec_sess_info.session_id = le64_to_cpu(input->session_id); + err = 0; +out: + kfree(vc_ctrl_req); kzfree(cipher_key); - return 0; + return err; } static int virtio_crypto_alg_ablkcipher_close_session( @@ -205,60 +196,55 @@ static int virtio_crypto_alg_ablkcipher_close_session( int encrypt) { struct scatterlist outhdr, status_sg, *sgs[2]; - unsigned int tmp; struct virtio_crypto_destroy_session_req *destroy_session; struct virtio_crypto *vcrypto = ctx->vcrypto; int err; unsigned int num_out = 0, num_in = 0; + struct virtio_crypto_op_ctrl_req *ctrl; + struct virtio_crypto_inhdr *ctrl_status; + struct virtio_crypto_ctrl_request *vc_ctrl_req; - spin_lock(&vcrypto->ctrl_lock); - vcrypto->ctrl_status.status = VIRTIO_CRYPTO_ERR; + vc_ctrl_req = kzalloc(sizeof(*vc_ctrl_req), GFP_KERNEL); + if (!vc_ctrl_req) + return -ENOMEM; + + ctrl_status = &vc_ctrl_req->ctrl_status; + ctrl_status->status = VIRTIO_CRYPTO_ERR; /* Pad ctrl header */ - vcrypto->ctrl.header.opcode = - cpu_to_le32(VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION); + ctrl = &vc_ctrl_req->ctrl; + ctrl->header.opcode = cpu_to_le32(VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION); /* Set the default virtqueue id to 0 */ - vcrypto->ctrl.header.queue_id = 0; + ctrl->header.queue_id = 0; - destroy_session = &vcrypto->ctrl.u.destroy_session; + destroy_session = &ctrl->u.destroy_session; if (encrypt) - destroy_session->session_id = - cpu_to_le64(ctx->enc_sess_info.session_id); + destroy_session->session_id = cpu_to_le64(ctx->enc_sess_info.session_id); else - destroy_session->session_id = - cpu_to_le64(ctx->dec_sess_info.session_id); + destroy_session->session_id = cpu_to_le64(ctx->dec_sess_info.session_id); - sg_init_one(&outhdr, &vcrypto->ctrl, sizeof(vcrypto->ctrl)); + sg_init_one(&outhdr, ctrl, sizeof(*ctrl)); sgs[num_out++] = &outhdr; /* Return status and session id back */ - sg_init_one(&status_sg, &vcrypto->ctrl_status.status, - sizeof(vcrypto->ctrl_status.status)); + sg_init_one(&status_sg, &ctrl_status->status, sizeof(ctrl_status->status)); sgs[num_out + num_in++] = &status_sg; - err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, - num_in, vcrypto, GFP_ATOMIC); - if (err < 0) { - spin_unlock(&vcrypto->ctrl_lock); - return err; - } - virtqueue_kick(vcrypto->ctrl_vq); - - while (!virtqueue_get_buf(vcrypto->ctrl_vq, &tmp) && - !virtqueue_is_broken(vcrypto->ctrl_vq)) - cpu_relax(); + err = virtio_crypto_ctrl_vq_request(vcrypto, sgs, num_out, num_in, vc_ctrl_req); + if (err < 0) + goto out; - if (vcrypto->ctrl_status.status != VIRTIO_CRYPTO_OK) { - spin_unlock(&vcrypto->ctrl_lock); + if (ctrl_status->status != VIRTIO_CRYPTO_OK) { pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n", - vcrypto->ctrl_status.status, - destroy_session->session_id); + ctrl_status->status, destroy_session->session_id); return -EINVAL; } - spin_unlock(&vcrypto->ctrl_lock); - return 0; + err = 0; +out: + kfree(vc_ctrl_req); + return err; } static int virtio_crypto_alg_ablkcipher_init_sessions( @@ -621,7 +607,7 @@ static struct virtio_crypto_algo virtio_crypto_algs[] = { { }, } }; -int virtio_crypto_algs_register(struct virtio_crypto *vcrypto) +int virtio_crypto_skcipher_algs_register(struct virtio_crypto *vcrypto) { int ret = 0; int i = 0; @@ -652,7 +638,7 @@ int virtio_crypto_algs_register(struct virtio_crypto *vcrypto) return ret; } -void virtio_crypto_algs_unregister(struct virtio_crypto *vcrypto) +void virtio_crypto_skcipher_algs_unregister(struct virtio_crypto *vcrypto) { int i = 0; diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig index c85fab7ef0bdd..b2c28b87f14b3 100644 --- a/drivers/crypto/vmx/Kconfig +++ b/drivers/crypto/vmx/Kconfig @@ -2,7 +2,11 @@ config CRYPTO_DEV_VMX_ENCRYPT tristate "Encryption acceleration support on P8 CPU" depends on CRYPTO_DEV_VMX + select CRYPTO_AES + select CRYPTO_CBC + select CRYPTO_CTR select CRYPTO_GHASH + select CRYPTO_XTS default m help Support for VMX cryptographic acceleration instructions on Power8 CPU. diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 1af823b2fe6bd..4c0af2eb7e196 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -377,6 +377,7 @@ static int dax_open(struct inode *inode, struct file *filp) inode->i_mapping->a_ops = &dev_dax_aops; filp->f_mapping = inode->i_mapping; filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); + filp->f_sb_err = file_sample_sb_err(filp); filp->private_data = dev_dax; inode->i_flags = S_DAX; diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 8074e5de815b9..5005b92b197d7 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -691,6 +691,7 @@ static int dax_fs_init(void) static void dax_fs_exit(void) { kern_unmount(dax_mnt); + rcu_barrier(); kmem_cache_destroy(dax_cache); } diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index c4873bb791f88..94e1727d29998 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -514,15 +514,19 @@ static int of_get_devfreq_events(struct device_node *np, count = of_get_child_count(events_np); desc = devm_kcalloc(dev, count, sizeof(*desc), GFP_KERNEL); - if (!desc) + if (!desc) { + of_node_put(events_np); return -ENOMEM; + } info->num_events = count; of_id = of_match_device(exynos_ppmu_id_match, dev); if (of_id) info->ppmu_type = (enum exynos_ppmu_type)of_id->data; - else + else { + of_node_put(events_np); return -EINVAL; + } j = 0; for_each_child_of_node(events_np, node) { diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c index 027769e39f9b8..a491dcfa1dd07 100644 --- a/drivers/devfreq/rk3399_dmc.c +++ b/drivers/devfreq/rk3399_dmc.c @@ -485,6 +485,8 @@ static int rk3399_dmcfreq_remove(struct platform_device *pdev) { struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(&pdev->dev); + devfreq_event_disable_edev(dmcfreq->edev); + /* * Before remove the opp table we need to unregister the opp notifier. */ diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 758de0e9b2ddc..16bbc9bc9e6d1 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -79,6 +79,7 @@ static void dma_buf_release(struct dentry *dentry) if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) dma_resv_fini(dmabuf->resv); + WARN_ON(!list_empty(&dmabuf->attachments)); module_put(dmabuf->owner); kfree(dmabuf->name); kfree(dmabuf); diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index d3fbd950be944..3e07f961e2f3d 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -104,7 +104,11 @@ static bool dma_fence_array_signaled(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); - return atomic_read(&array->num_pending) <= 0; + if (atomic_read(&array->num_pending) > 0) + return false; + + dma_fence_array_clear_pending_error(array); + return true; } static void dma_fence_array_release(struct dma_fence *fence) diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index 9635897458a09..c6e9b7bd7618c 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -24,8 +24,11 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct udmabuf *ubuf = vma->vm_private_data; + pgoff_t pgoff = vmf->pgoff; - vmf->page = ubuf->pages[vmf->pgoff]; + if (pgoff >= ubuf->pagecount) + return VM_FAULT_SIGBUS; + vmf->page = ubuf->pages[pgoff]; get_page(vmf->page); return 0; } @@ -145,6 +148,10 @@ static long udmabuf_create(const struct udmabuf_create_list *head, if (ubuf->pagecount > pglimit) goto err; } + + if (!ubuf->pagecount) + goto err; + ubuf->pages = kmalloc_array(ubuf->pagecount, sizeof(*ubuf->pages), GFP_KERNEL); if (!ubuf->pages) { diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index a32d0d7152475..1322461f1f3c5 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -276,7 +276,7 @@ config INTEL_IDMA64 config INTEL_IOATDMA tristate "Intel I/OAT DMA support" - depends on PCI && X86_64 + depends on PCI && X86_64 && !UML select DMA_ENGINE select DMA_ENGINE_RAID select DCA diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c index dcbcb712de6e8..731f453ecb51f 100644 --- a/drivers/dma/acpi-dma.c +++ b/drivers/dma/acpi-dma.c @@ -70,10 +70,14 @@ static int acpi_dma_parse_resource_group(const struct acpi_csrt_group *grp, si = (const struct acpi_csrt_shared_info *)&grp[1]; - /* Match device by MMIO and IRQ */ + /* Match device by MMIO */ if (si->mmio_base_low != lower_32_bits(mem) || - si->mmio_base_high != upper_32_bits(mem) || - si->gsi_interrupt != irq) + si->mmio_base_high != upper_32_bits(mem)) + return 0; + + /* Match device by Linux vIRQ */ + ret = acpi_register_gsi(NULL, si->gsi_interrupt, si->interrupt_mode, si->interrupt_polarity); + if (ret != irq) return 0; dev_dbg(&adev->dev, "matches with %.4s%04X (rev %u)\n", diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index b58ac720d9a12..a406b3c0d170a 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -89,6 +89,7 @@ #define AT_XDMAC_CNDC_NDE (0x1 << 0) /* Channel x Next Descriptor Enable */ #define AT_XDMAC_CNDC_NDSUP (0x1 << 1) /* Channel x Next Descriptor Source Update */ #define AT_XDMAC_CNDC_NDDUP (0x1 << 2) /* Channel x Next Descriptor Destination Update */ +#define AT_XDMAC_CNDC_NDVIEW_MASK GENMASK(28, 27) #define AT_XDMAC_CNDC_NDVIEW_NDV0 (0x0 << 3) /* Channel x Next Descriptor View 0 */ #define AT_XDMAC_CNDC_NDVIEW_NDV1 (0x1 << 3) /* Channel x Next Descriptor View 1 */ #define AT_XDMAC_CNDC_NDVIEW_NDV2 (0x2 << 3) /* Channel x Next Descriptor View 2 */ @@ -145,7 +146,7 @@ #define AT_XDMAC_CC_WRIP (0x1 << 23) /* Write in Progress (read only) */ #define AT_XDMAC_CC_WRIP_DONE (0x0 << 23) #define AT_XDMAC_CC_WRIP_IN_PROGRESS (0x1 << 23) -#define AT_XDMAC_CC_PERID(i) (0x7f & (i) << 24) /* Channel Peripheral Identifier */ +#define AT_XDMAC_CC_PERID(i) ((0x7f & (i)) << 24) /* Channel Peripheral Identifier */ #define AT_XDMAC_CDS_MSP 0x2C /* Channel Data Stride Memory Set Pattern */ #define AT_XDMAC_CSUS 0x30 /* Channel Source Microblock Stride */ #define AT_XDMAC_CDUS 0x34 /* Channel Destination Microblock Stride */ @@ -220,15 +221,15 @@ struct at_xdmac { /* Linked List Descriptor */ struct at_xdmac_lld { - dma_addr_t mbr_nda; /* Next Descriptor Member */ - u32 mbr_ubc; /* Microblock Control Member */ - dma_addr_t mbr_sa; /* Source Address Member */ - dma_addr_t mbr_da; /* Destination Address Member */ - u32 mbr_cfg; /* Configuration Register */ - u32 mbr_bc; /* Block Control Register */ - u32 mbr_ds; /* Data Stride Register */ - u32 mbr_sus; /* Source Microblock Stride Register */ - u32 mbr_dus; /* Destination Microblock Stride Register */ + u32 mbr_nda; /* Next Descriptor Member */ + u32 mbr_ubc; /* Microblock Control Member */ + u32 mbr_sa; /* Source Address Member */ + u32 mbr_da; /* Destination Address Member */ + u32 mbr_cfg; /* Configuration Register */ + u32 mbr_bc; /* Block Control Register */ + u32 mbr_ds; /* Data Stride Register */ + u32 mbr_sus; /* Source Microblock Stride Register */ + u32 mbr_dus; /* Destination Microblock Stride Register */ }; /* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */ @@ -338,9 +339,6 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan, dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, first); - if (at_xdmac_chan_is_enabled(atchan)) - return; - /* Set transfer as active to not try to start it again. */ first->active_xfer = true; @@ -356,7 +354,8 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan, */ if (at_xdmac_chan_is_cyclic(atchan)) reg = AT_XDMAC_CNDC_NDVIEW_NDV1; - else if (first->lld.mbr_ubc & AT_XDMAC_MBR_UBC_NDV3) + else if ((first->lld.mbr_ubc & + AT_XDMAC_CNDC_NDVIEW_MASK) == AT_XDMAC_MBR_UBC_NDV3) reg = AT_XDMAC_CNDC_NDVIEW_NDV3; else reg = AT_XDMAC_CNDC_NDVIEW_NDV2; @@ -427,13 +426,12 @@ static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx) spin_lock_irqsave(&atchan->lock, irqflags); cookie = dma_cookie_assign(tx); + list_add_tail(&desc->xfer_node, &atchan->xfers_list); + spin_unlock_irqrestore(&atchan->lock, irqflags); + dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n", __func__, atchan, desc); - list_add_tail(&desc->xfer_node, &atchan->xfers_list); - if (list_is_singular(&atchan->xfers_list)) - at_xdmac_start_xfer(atchan, desc); - spin_unlock_irqrestore(&atchan->lock, irqflags); return cookie; } @@ -1392,7 +1390,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, { struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); - struct at_xdmac_desc *desc, *_desc; + struct at_xdmac_desc *desc, *_desc, *iter; struct list_head *descs_list; enum dma_status ret; int residue, retry; @@ -1507,11 +1505,13 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, * microblock. */ descs_list = &desc->descs_list; - list_for_each_entry_safe(desc, _desc, descs_list, desc_node) { - dwidth = at_xdmac_get_dwidth(desc->lld.mbr_cfg); - residue -= (desc->lld.mbr_ubc & 0xffffff) << dwidth; - if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda) + list_for_each_entry_safe(iter, _desc, descs_list, desc_node) { + dwidth = at_xdmac_get_dwidth(iter->lld.mbr_cfg); + residue -= (iter->lld.mbr_ubc & 0xffffff) << dwidth; + if ((iter->lld.mbr_nda & 0xfffffffc) == cur_nda) { + desc = iter; break; + } } residue += cur_ubc << dwidth; @@ -1568,14 +1568,17 @@ static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan) struct at_xdmac_desc *desc; struct dma_async_tx_descriptor *txd; - if (!list_empty(&atchan->xfers_list)) { - desc = list_first_entry(&atchan->xfers_list, - struct at_xdmac_desc, xfer_node); - txd = &desc->tx_dma_desc; - - if (txd->flags & DMA_PREP_INTERRUPT) - dmaengine_desc_get_callback_invoke(txd, NULL); + spin_lock_irq(&atchan->lock); + if (list_empty(&atchan->xfers_list)) { + spin_unlock_irq(&atchan->lock); + return; } + desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, + xfer_node); + spin_unlock_irq(&atchan->lock); + txd = &desc->tx_dma_desc; + if (txd->flags & DMA_PREP_INTERRUPT) + dmaengine_desc_get_callback_invoke(txd, NULL); } static void at_xdmac_handle_error(struct at_xdmac_chan *atchan) @@ -1725,11 +1728,13 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) static void at_xdmac_issue_pending(struct dma_chan *chan) { struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + unsigned long flags; dev_dbg(chan2dev(&atchan->chan), "%s\n", __func__); - if (!at_xdmac_chan_is_cyclic(atchan)) - at_xdmac_advance_work(atchan); + spin_lock_irqsave(&atchan->lock, flags); + at_xdmac_advance_work(atchan); + spin_unlock_irqrestore(&atchan->lock, flags); return; } @@ -1843,6 +1848,11 @@ static int at_xdmac_alloc_chan_resources(struct dma_chan *chan) for (i = 0; i < init_nr_desc_per_channel; i++) { desc = at_xdmac_alloc_desc(chan, GFP_ATOMIC); if (!desc) { + if (i == 0) { + dev_warn(chan2dev(chan), + "can't allocate any descriptors\n"); + return -EIO; + } dev_warn(chan2dev(chan), "only %d descriptors have been allocated\n", i); break; diff --git a/drivers/dma/dmaengine.h b/drivers/dma/dmaengine.h index 501c0b063f852..302f13efd35d9 100644 --- a/drivers/dma/dmaengine.h +++ b/drivers/dma/dmaengine.h @@ -168,7 +168,7 @@ dmaengine_desc_get_callback_invoke(struct dma_async_tx_descriptor *tx, static inline bool dmaengine_desc_callback_valid(struct dmaengine_desc_callback *cb) { - return (cb->callback) ? true : false; + return cb->callback || cb->callback_result; } #endif diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 67736c801f3ca..8ec7a7041e840 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -377,7 +377,6 @@ struct sdma_channel { unsigned long watermark_level; u32 shp_addr, per_addr; enum dma_status status; - bool context_loaded; struct imx_dma_data data; struct work_struct terminate_worker; }; @@ -988,9 +987,6 @@ static int sdma_load_context(struct sdma_channel *sdmac) int ret; unsigned long flags; - if (sdmac->context_loaded) - return 0; - if (sdmac->direction == DMA_DEV_TO_MEM) load_address = sdmac->pc_from_device; else if (sdmac->direction == DMA_DEV_TO_DEV) @@ -1033,8 +1029,6 @@ static int sdma_load_context(struct sdma_channel *sdmac) spin_unlock_irqrestore(&sdma->channel_0_lock, flags); - sdmac->context_loaded = true; - return ret; } @@ -1074,7 +1068,6 @@ static void sdma_channel_terminate_work(struct work_struct *work) sdmac->desc = NULL; spin_unlock_irqrestore(&sdmac->vc.lock, flags); vchan_dma_desc_free_list(&sdmac->vc, &head); - sdmac->context_loaded = false; } static int sdma_disable_channel_async(struct dma_chan *chan) @@ -1141,7 +1134,6 @@ static void sdma_set_watermarklevel_for_p2p(struct sdma_channel *sdmac) static int sdma_config_channel(struct dma_chan *chan) { struct sdma_channel *sdmac = to_sdma_chan(chan); - int ret; sdma_disable_channel(chan); @@ -1181,9 +1173,7 @@ static int sdma_config_channel(struct dma_chan *chan) sdmac->watermark_level = 0; /* FIXME: M3_BASE_ADDRESS */ } - ret = sdma_load_context(sdmac); - - return ret; + return 0; } static int sdma_set_channel_priority(struct sdma_channel *sdmac, @@ -1335,7 +1325,6 @@ static void sdma_free_chan_resources(struct dma_chan *chan) sdmac->event_id0 = 0; sdmac->event_id1 = 0; - sdmac->context_loaded = false; sdma_set_channel_priority(sdmac, 0); @@ -1795,7 +1784,7 @@ static int sdma_event_remap(struct sdma_engine *sdma) u32 reg, val, shift, num_map, i; int ret = 0; - if (IS_ERR(np) || IS_ERR(gpr_np)) + if (IS_ERR(np) || !gpr_np) goto out; event_remap = of_find_property(np, propname, NULL); @@ -1843,7 +1832,7 @@ static int sdma_event_remap(struct sdma_engine *sdma) } out: - if (!IS_ERR(gpr_np)) + if (gpr_np) of_node_put(gpr_np); return ret; @@ -2219,7 +2208,7 @@ MODULE_DESCRIPTION("i.MX SDMA driver"); #if IS_ENABLED(CONFIG_SOC_IMX6Q) MODULE_FIRMWARE("imx/sdma/sdma-imx6q.bin"); #endif -#if IS_ENABLED(CONFIG_SOC_IMX7D) +#if IS_ENABLED(CONFIG_SOC_IMX7D) || IS_ENABLED(CONFIG_SOC_IMX8M) MODULE_FIRMWARE("imx/sdma/sdma-imx7d.bin"); #endif MODULE_LICENSE("GPL"); diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index a6a6dc432db82..0387625382fb5 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -1267,7 +1267,7 @@ static void ioat_resume(struct ioatdma_device *ioat_dma) #define DRV_NAME "ioatdma" static pci_ers_result_t ioat_pcie_error_detected(struct pci_dev *pdev, - enum pci_channel_state error) + pci_channel_state_t error) { dev_dbg(&pdev->dev, "%s: PCIe AER error %d\n", DRV_NAME, error); diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c index 9c0ea13ca7883..7718d09e3d29f 100644 --- a/drivers/dma/mediatek/mtk-uart-apdma.c +++ b/drivers/dma/mediatek/mtk-uart-apdma.c @@ -274,7 +274,7 @@ static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan) unsigned int status; int ret; - ret = pm_runtime_get_sync(mtkd->ddev.dev); + ret = pm_runtime_resume_and_get(mtkd->ddev.dev); if (ret < 0) { pm_runtime_put_noidle(chan->device->dev); return ret; @@ -288,18 +288,21 @@ static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan) ret = readx_poll_timeout(readl, c->base + VFF_EN, status, !status, 10, 100); if (ret) - return ret; + goto err_pm; ret = request_irq(c->irq, mtk_uart_apdma_irq_handler, IRQF_TRIGGER_NONE, KBUILD_MODNAME, chan); if (ret < 0) { dev_err(chan->device->dev, "Can't request dma IRQ\n"); - return -EINVAL; + ret = -EINVAL; + goto err_pm; } if (mtkd->support_33bits) mtk_uart_apdma_write(c, VFF_4G_SUPPORT, VFF_4G_SUPPORT_CLR_B); +err_pm: + pm_runtime_put_noidle(mtkd->ddev.dev); return ret; } diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c index 7fe494fc50d4e..ec186cf8b8af1 100644 --- a/drivers/dma/mmp_pdma.c +++ b/drivers/dma/mmp_pdma.c @@ -728,12 +728,6 @@ static int mmp_pdma_config_write(struct dma_chan *dchan, chan->dir = direction; chan->dev_addr = addr; - /* FIXME: drivers should be ported over to use the filter - * function. Once that's done, the following two lines can - * be removed. - */ - if (cfg->slave_id) - chan->drcmr = cfg->slave_id; return 0; } diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 9a94d5b9e0590..77a6495bb6b19 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2585,7 +2585,7 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch) /* If the DMAC pool is empty, alloc new */ if (!desc) { - DEFINE_SPINLOCK(lock); + static DEFINE_SPINLOCK(lock); LIST_HEAD(pool); if (!add_desc(&pool, &lock, GFP_ATOMIC, 1)) diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index 349fb312c8725..b4ef4f19f7dec 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -911,13 +911,6 @@ static void pxad_get_config(struct pxad_chan *chan, *dcmd |= PXA_DCMD_BURST16; else if (maxburst == 32) *dcmd |= PXA_DCMD_BURST32; - - /* FIXME: drivers should be ported over to use the filter - * function. Once that's done, the following two lines can - * be removed. - */ - if (chan->cfg.slave_id) - chan->drcmr = chan->cfg.slave_id; } static struct dma_async_tx_descriptor * diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 89eb9ea258149..eba942441e382 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1825,7 +1825,9 @@ static int rcar_dmac_probe(struct platform_device *pdev) platform_set_drvdata(pdev, dmac); dmac->dev->dma_parms = &dmac->parms; dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK); - dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40)); + ret = dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40)); + if (ret) + return ret; ret = rcar_dmac_parse_of(&pdev->dev, dmac); if (ret < 0) diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c index 8546ad0347208..b966115bfad1d 100644 --- a/drivers/dma/sprd-dma.c +++ b/drivers/dma/sprd-dma.c @@ -1230,6 +1230,7 @@ static const struct of_device_id sprd_dma_match[] = { { .compatible = "sprd,sc9860-dma", }, {}, }; +MODULE_DEVICE_TABLE(of, sprd_dma_match); static int __maybe_unused sprd_dma_runtime_suspend(struct device *dev) { diff --git a/drivers/dma/st_fdma.c b/drivers/dma/st_fdma.c index 67087dbe2f9fa..f7393c19a1ba3 100644 --- a/drivers/dma/st_fdma.c +++ b/drivers/dma/st_fdma.c @@ -873,4 +873,4 @@ MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("STMicroelectronics FDMA engine driver"); MODULE_AUTHOR("Ludovic.barre "); MODULE_AUTHOR("Peter Griffin "); -MODULE_ALIAS("platform: " DRIVER_NAME); +MODULE_ALIAS("platform:" DRIVER_NAME); diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index ee1cbf3be75d5..c902c24806404 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -40,7 +40,6 @@ STM32_MDMA_SHIFT(mask)) #define STM32_MDMA_GISR0 0x0000 /* MDMA Int Status Reg 1 */ -#define STM32_MDMA_GISR1 0x0004 /* MDMA Int Status Reg 2 */ /* MDMA Channel x interrupt/status register */ #define STM32_MDMA_CISR(x) (0x40 + 0x40 * (x)) /* x = 0..62 */ @@ -184,7 +183,7 @@ #define STM32_MDMA_CTBR(x) (0x68 + 0x40 * (x)) #define STM32_MDMA_CTBR_DBUS BIT(17) #define STM32_MDMA_CTBR_SBUS BIT(16) -#define STM32_MDMA_CTBR_TSEL_MASK GENMASK(7, 0) +#define STM32_MDMA_CTBR_TSEL_MASK GENMASK(5, 0) #define STM32_MDMA_CTBR_TSEL(n) STM32_MDMA_SET(n, \ STM32_MDMA_CTBR_TSEL_MASK) @@ -196,7 +195,7 @@ #define STM32_MDMA_MAX_BUF_LEN 128 #define STM32_MDMA_MAX_BLOCK_LEN 65536 -#define STM32_MDMA_MAX_CHANNELS 63 +#define STM32_MDMA_MAX_CHANNELS 32 #define STM32_MDMA_MAX_REQUESTS 256 #define STM32_MDMA_MAX_BURST 128 #define STM32_MDMA_VERY_HIGH_PRIORITY 0x11 @@ -1351,21 +1350,11 @@ static irqreturn_t stm32_mdma_irq_handler(int irq, void *devid) /* Find out which channel generates the interrupt */ status = readl_relaxed(dmadev->base + STM32_MDMA_GISR0); - if (status) { - id = __ffs(status); - } else { - status = readl_relaxed(dmadev->base + STM32_MDMA_GISR1); - if (!status) { - dev_dbg(mdma2dev(dmadev), "spurious it\n"); - return IRQ_NONE; - } - id = __ffs(status); - /* - * As GISR0 provides status for channel id from 0 to 31, - * so GISR1 provides status for channel id from 32 to 62 - */ - id += 32; + if (!status) { + dev_dbg(mdma2dev(dmadev), "spurious it\n"); + return IRQ_NONE; } + id = __ffs(status); chan = &dmadev->chan[id]; if (!chan) { diff --git a/drivers/dma/ti/dma-crossbar.c b/drivers/dma/ti/dma-crossbar.c index f255056696eec..26cf62af08fc8 100644 --- a/drivers/dma/ti/dma-crossbar.c +++ b/drivers/dma/ti/dma-crossbar.c @@ -247,6 +247,7 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, if (dma_spec->args[0] >= xbar->xbar_requests) { dev_err(&pdev->dev, "Invalid XBAR request number: %d\n", dma_spec->args[0]); + put_device(&pdev->dev); return ERR_PTR(-EINVAL); } @@ -254,12 +255,14 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0); if (!dma_spec->np) { dev_err(&pdev->dev, "Can't get DMA master\n"); + put_device(&pdev->dev); return ERR_PTR(-EINVAL); } map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) { of_node_put(dma_spec->np); + put_device(&pdev->dev); return ERR_PTR(-ENOMEM); } @@ -270,6 +273,8 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, mutex_unlock(&xbar->mutex); dev_err(&pdev->dev, "Run out of free DMA requests\n"); kfree(map); + of_node_put(dma_spec->np); + put_device(&pdev->dev); return ERR_PTR(-ENOMEM); } set_bit(map->xbar_out, xbar->dma_inuse); diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index ce18bca45ff27..7729b8d22553e 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2703,7 +2703,7 @@ static int xilinx_dma_probe(struct platform_device *pdev) xdev->ext_addr = false; /* Set the dma mask bits */ - dma_set_mask(xdev->dev, DMA_BIT_MASK(addr_width)); + dma_set_mask_and_coherent(xdev->dev, DMA_BIT_MASK(addr_width)); /* Initialize the DMA engine */ xdev->common.dev = &pdev->dev; diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c index 84009c5e0f330..b61d0c79dffb6 100644 --- a/drivers/dma/xilinx/zynqmp_dma.c +++ b/drivers/dma/xilinx/zynqmp_dma.c @@ -232,7 +232,7 @@ struct zynqmp_dma_chan { bool is_dmacoherent; struct tasklet_struct tasklet; bool idle; - u32 desc_size; + size_t desc_size; bool err; u32 bus_width; u32 src_burst_len; @@ -489,7 +489,8 @@ static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan) } chan->desc_pool_v = dma_alloc_coherent(chan->dev, - (2 * chan->desc_size * ZYNQMP_DMA_NUM_DESCS), + (2 * ZYNQMP_DMA_DESC_SIZE(chan) * + ZYNQMP_DMA_NUM_DESCS), &chan->desc_pool_p, GFP_KERNEL); if (!chan->desc_pool_v) return -ENOMEM; diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 0be3d1b17f034..a09a5041e74c8 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -349,7 +349,7 @@ static int altr_sdram_probe(struct platform_device *pdev) if (irq < 0) { edac_printk(KERN_ERR, EDAC_MC, "No irq %d in DT\n", irq); - return -ENODEV; + return irq; } /* Arria10 has a 2nd IRQ */ diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 18b046c902428..9193fa0fa89e5 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -4,9 +4,6 @@ static struct edac_pci_ctl_info *pci_ctl; -static int report_gart_errors; -module_param(report_gart_errors, int, 0644); - /* * Set by command line parameter. If BIOS has enabled the ECC, this override is * cleared to prevent re-enabling the hardware by this driver. @@ -713,6 +710,205 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr) return csrow; } +static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) +{ + u64 dram_base_addr, dram_limit_addr, dram_hole_base; + /* We start from the normalized address */ + u64 ret_addr = norm_addr; + + u32 tmp; + + u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask; + u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets; + u8 intlv_addr_sel, intlv_addr_bit; + u8 num_intlv_bits, hashed_bit; + u8 lgcy_mmio_hole_en, base = 0; + u8 cs_mask, cs_id = 0; + bool hash_enabled = false; + + /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */ + if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp)) + goto out_err; + + /* Remove HiAddrOffset from normalized address, if enabled: */ + if (tmp & BIT(0)) { + u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8; + + if (norm_addr >= hi_addr_offset) { + ret_addr -= hi_addr_offset; + base = 1; + } + } + + /* Read D18F0x110 (DramBaseAddress). */ + if (amd_df_indirect_read(nid, 0, 0x110 + (8 * base), umc, &tmp)) + goto out_err; + + /* Check if address range is valid. */ + if (!(tmp & BIT(0))) { + pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n", + __func__, tmp); + goto out_err; + } + + lgcy_mmio_hole_en = tmp & BIT(1); + intlv_num_chan = (tmp >> 4) & 0xF; + intlv_addr_sel = (tmp >> 8) & 0x7; + dram_base_addr = (tmp & GENMASK_ULL(31, 12)) << 16; + + /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */ + if (intlv_addr_sel > 3) { + pr_err("%s: Invalid interleave address select %d.\n", + __func__, intlv_addr_sel); + goto out_err; + } + + /* Read D18F0x114 (DramLimitAddress). */ + if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp)) + goto out_err; + + intlv_num_sockets = (tmp >> 8) & 0x1; + intlv_num_dies = (tmp >> 10) & 0x3; + dram_limit_addr = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); + + intlv_addr_bit = intlv_addr_sel + 8; + + /* Re-use intlv_num_chan by setting it equal to log2(#channels) */ + switch (intlv_num_chan) { + case 0: intlv_num_chan = 0; break; + case 1: intlv_num_chan = 1; break; + case 3: intlv_num_chan = 2; break; + case 5: intlv_num_chan = 3; break; + case 7: intlv_num_chan = 4; break; + + case 8: intlv_num_chan = 1; + hash_enabled = true; + break; + default: + pr_err("%s: Invalid number of interleaved channels %d.\n", + __func__, intlv_num_chan); + goto out_err; + } + + num_intlv_bits = intlv_num_chan; + + if (intlv_num_dies > 2) { + pr_err("%s: Invalid number of interleaved nodes/dies %d.\n", + __func__, intlv_num_dies); + goto out_err; + } + + num_intlv_bits += intlv_num_dies; + + /* Add a bit if sockets are interleaved. */ + num_intlv_bits += intlv_num_sockets; + + /* Assert num_intlv_bits <= 4 */ + if (num_intlv_bits > 4) { + pr_err("%s: Invalid interleave bits %d.\n", + __func__, num_intlv_bits); + goto out_err; + } + + if (num_intlv_bits > 0) { + u64 temp_addr_x, temp_addr_i, temp_addr_y; + u8 die_id_bit, sock_id_bit, cs_fabric_id; + + /* + * Read FabricBlockInstanceInformation3_CS[BlockFabricID]. + * This is the fabric id for this coherent slave. Use + * umc/channel# as instance id of the coherent slave + * for FICAA. + */ + if (amd_df_indirect_read(nid, 0, 0x50, umc, &tmp)) + goto out_err; + + cs_fabric_id = (tmp >> 8) & 0xFF; + die_id_bit = 0; + + /* If interleaved over more than 1 channel: */ + if (intlv_num_chan) { + die_id_bit = intlv_num_chan; + cs_mask = (1 << die_id_bit) - 1; + cs_id = cs_fabric_id & cs_mask; + } + + sock_id_bit = die_id_bit; + + /* Read D18F1x208 (SystemFabricIdMask). */ + if (intlv_num_dies || intlv_num_sockets) + if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp)) + goto out_err; + + /* If interleaved over more than 1 die. */ + if (intlv_num_dies) { + sock_id_bit = die_id_bit + intlv_num_dies; + die_id_shift = (tmp >> 24) & 0xF; + die_id_mask = (tmp >> 8) & 0xFF; + + cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit; + } + + /* If interleaved over more than 1 socket. */ + if (intlv_num_sockets) { + socket_id_shift = (tmp >> 28) & 0xF; + socket_id_mask = (tmp >> 16) & 0xFF; + + cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit; + } + + /* + * The pre-interleaved address consists of XXXXXXIIIYYYYY + * where III is the ID for this CS, and XXXXXXYYYYY are the + * address bits from the post-interleaved address. + * "num_intlv_bits" has been calculated to tell us how many "I" + * bits there are. "intlv_addr_bit" tells us how many "Y" bits + * there are (where "I" starts). + */ + temp_addr_y = ret_addr & GENMASK_ULL(intlv_addr_bit-1, 0); + temp_addr_i = (cs_id << intlv_addr_bit); + temp_addr_x = (ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits; + ret_addr = temp_addr_x | temp_addr_i | temp_addr_y; + } + + /* Add dram base address */ + ret_addr += dram_base_addr; + + /* If legacy MMIO hole enabled */ + if (lgcy_mmio_hole_en) { + if (amd_df_indirect_read(nid, 0, 0x104, umc, &tmp)) + goto out_err; + + dram_hole_base = tmp & GENMASK(31, 24); + if (ret_addr >= dram_hole_base) + ret_addr += (BIT_ULL(32) - dram_hole_base); + } + + if (hash_enabled) { + /* Save some parentheses and grab ls-bit at the end. */ + hashed_bit = (ret_addr >> 12) ^ + (ret_addr >> 18) ^ + (ret_addr >> 21) ^ + (ret_addr >> 30) ^ + cs_id; + + hashed_bit &= BIT(0); + + if (hashed_bit != ((ret_addr >> intlv_addr_bit) & BIT(0))) + ret_addr ^= BIT(intlv_addr_bit); + } + + /* Is calculated system address is above DRAM limit address? */ + if (ret_addr > dram_limit_addr) + goto out_err; + + *sys_addr = ret_addr; + return 0; + +out_err: + return -EINVAL; +} + static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); /* @@ -790,12 +986,14 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) #define CS_ODD_PRIMARY BIT(1) #define CS_EVEN_SECONDARY BIT(2) #define CS_ODD_SECONDARY BIT(3) +#define CS_3R_INTERLEAVE BIT(4) #define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY) #define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY) static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) { + u8 base, count = 0; int cs_mode = 0; if (csrow_enabled(2 * dimm, ctrl, pvt)) @@ -808,6 +1006,20 @@ static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt)) cs_mode |= CS_ODD_SECONDARY; + /* + * 3 Rank inteleaving support. + * There should be only three bases enabled and their two masks should + * be equal. + */ + for_each_chip_select(base, ctrl, pvt) + count += csrow_enabled(base, ctrl, pvt); + + if (count == 3 && + pvt->csels[ctrl].csmasks[0] == pvt->csels[ctrl].csmasks[1]) { + edac_dbg(1, "3R interleaving in use.\n"); + cs_mode |= CS_3R_INTERLEAVE; + } + return cs_mode; } @@ -1616,10 +1828,14 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, * * The MSB is the number of bits in the full mask because BIT[0] is * always 0. + * + * In the special 3 Rank interleaving case, a single bit is flipped + * without swapping with the most significant bit. This can be handled + * by keeping the MSB where it is and ignoring the single zero bit. */ msb = fls(addr_mask_orig) - 1; weight = hweight_long(addr_mask_orig); - num_zero_bits = msb - weight; + num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE); /* Take the number of zero bits off from the top of the mask. */ addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); @@ -2354,6 +2570,16 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f17_addr_mask_to_cs_size, } }, + [F19_M10H_CPUS] = { + .ctl_name = "F19h_M10h", + .f0_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F6, + .max_mcs = 12, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_addr_mask_to_cs_size, + } + }, }; /* @@ -3397,6 +3623,21 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) break; case 0x19: + if (pvt->model >= 0x10 && pvt->model <= 0x1f) { + fam_type = &family_types[F19_M10H_CPUS]; + pvt->ops = &family_types[F19_M10H_CPUS].ops; + break; + } else if (pvt->model >= 0x20 && pvt->model <= 0x2f) { + fam_type = &family_types[F17_M70H_CPUS]; + pvt->ops = &family_types[F17_M70H_CPUS].ops; + fam_type->ctl_name = "F19h_M20h"; + break; + } else if (pvt->model >= 0xa0 && pvt->model <= 0xaf) { + fam_type = &family_types[F19_M10H_CPUS]; + pvt->ops = &family_types[F19_M10H_CPUS].ops; + fam_type->ctl_name = "F19h_MA0h"; + break; + } fam_type = &family_types[F19_CPUS]; pvt->ops = &family_types[F19_CPUS].ops; family_types[F19_CPUS].ctl_name = "F19h"; @@ -3695,9 +3936,6 @@ static int __init amd64_edac_init(void) } /* register stuff with EDAC MCE */ - if (report_gart_errors) - amd_report_gart_errors(true); - if (boot_cpu_data.x86 >= 0x17) amd_register_ecc_decoder(decode_umc_error); else @@ -3734,8 +3972,6 @@ static void __exit amd64_edac_exit(void) edac_pci_release_generic_ctl(pci_ctl); /* unregister from EDAC MCE */ - amd_report_gart_errors(false); - if (boot_cpu_data.x86 >= 0x17) amd_unregister_ecc_decoder(decode_umc_error); else diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 52b5d03eeba00..bc0d915e74aa9 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -96,7 +96,7 @@ /* Hardware limit on ChipSelect rows per MC and processors per system */ #define NUM_CHIPSELECTS 8 #define DRAM_RANGES 8 -#define NUM_CONTROLLERS 8 +#define NUM_CONTROLLERS 12 #define ON true #define OFF false @@ -126,6 +126,8 @@ #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446 #define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650 #define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656 +#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F0 0x14ad +#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F6 0x14b3 /* * Function 1 - Address Map @@ -298,6 +300,7 @@ enum amd_families { F17_M60H_CPUS, F17_M70H_CPUS, F19_CPUS, + F19_M10H_CPUS, NUM_FAMILIES, }; diff --git a/drivers/edac/armada_xp_edac.c b/drivers/edac/armada_xp_edac.c index 7f227bdcbc845..8c63447154730 100644 --- a/drivers/edac/armada_xp_edac.c +++ b/drivers/edac/armada_xp_edac.c @@ -178,7 +178,7 @@ static void axp_mc_check(struct mem_ctl_info *mci) "details unavailable (multiple errors)"); if (cnt_dbe) edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, - cnt_sbe, /* error count */ + cnt_dbe, /* error count */ 0, 0, 0, /* pfn, offset, syndrome */ -1, -1, -1, /* top, mid, low layer */ mci->ctl_name, diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 4b0a33a674fd7..ac60b5c18d823 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -215,6 +215,8 @@ const char * const edac_mem_types[] = { [MEM_RDDR4] = "Registered-DDR4", [MEM_LRDDR4] = "Load-Reduced-DDR4-RAM", [MEM_DDR5] = "Unbuffered-DDR5", + [MEM_RDDR5] = "Registered-DDR5", + [MEM_LRDDR5] = "Load-Reduced-DDR5-RAM", [MEM_NVDIMM] = "Non-volatile-RAM", }; EXPORT_SYMBOL_GPL(edac_mem_types); @@ -264,7 +266,7 @@ void *edac_align_ptr(void **p, unsigned int size, int n_elems) else return (char *)ptr; - r = (unsigned long)p % align; + r = (unsigned long)ptr % align; if (r == 0) return (char *)ptr; diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 470de34ed6603..1f7eddc154cf2 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -54,6 +54,7 @@ static struct list_head *i10nm_edac_list; static struct res_config *res_cfg; static int retry_rd_err_log; +static int skip_dsm_bias; static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}; static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}; @@ -505,6 +506,27 @@ static void __exit i10nm_exit(void) module_init(i10nm_init); module_exit(i10nm_exit); +static int set_skip_dsm_bias(const char *buf, const struct kernel_param *kp) +{ + unsigned long val; + int ret; + int prev = skip_dsm_bias; + + ret = kstrtoul(buf, 0, &val); + + if (ret || val > 1) + return -EINVAL; + + ret = param_set_int(buf, kp); + if (ret || prev == skip_dsm_bias) + return ret; + skx_set_skip_dsm(skip_dsm_bias); + + return ret; +} + +module_param_call(skip_dsm_bias, set_skip_dsm_bias, param_get_int, &skip_dsm_bias, 0644); +MODULE_PARM_DESC(skip_dsm_bias, "skip_dsm_bias: 0=off(default), 1=enable(skip _DSM calling bias)"); module_param(retry_rd_err_log, int, 0444); MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)"); diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 5cc55a271890d..e762b851a742e 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -10,15 +10,8 @@ static struct amd_decoder_ops *fam_ops; static u8 xec_mask = 0xf; -static bool report_gart_errors; static void (*decode_dram_ecc)(int node_id, struct mce *m); -void amd_report_gart_errors(bool v) -{ - report_gart_errors = v; -} -EXPORT_SYMBOL_GPL(amd_report_gart_errors); - void amd_register_ecc_decoder(void (*f)(int, struct mce *)) { decode_dram_ecc = f; @@ -322,6 +315,21 @@ static const char * const smca_umc_mce_desc[] = { "AES SRAM ECC error", }; +static const char * const smca_umc2_mce_desc[] = { + "DRAM ECC error", + "Data poison error", + "SDP parity error", + "Reserved", + "Address/Command parity error", + "Write data parity error", + "DCQ SRAM ECC error", + "Reserved", + "Read data parity error", + "Rdb SRAM ECC error", + "RdRsp SRAM ECC error", + "LM32 MP errors", +}; + static const char * const smca_pb_mce_desc[] = { "An ECC error in the Parameter Block RAM array", }; @@ -382,6 +390,63 @@ static const char * const smca_mp5_mce_desc[] = { "Instruction Tag Cache Bank B ECC or parity error", }; +static const char * const smca_mpdma_mce_desc[] = { + "Main SRAM [31:0] bank ECC or parity error", + "Main SRAM [63:32] bank ECC or parity error", + "Main SRAM [95:64] bank ECC or parity error", + "Main SRAM [127:96] bank ECC or parity error", + "Data Cache Bank A ECC or parity error", + "Data Cache Bank B ECC or parity error", + "Data Tag Cache Bank A ECC or parity error", + "Data Tag Cache Bank B ECC or parity error", + "Instruction Cache Bank A ECC or parity error", + "Instruction Cache Bank B ECC or parity error", + "Instruction Tag Cache Bank A ECC or parity error", + "Instruction Tag Cache Bank B ECC or parity error", + "Data Cache Bank A ECC or parity error", + "Data Cache Bank B ECC or parity error", + "Data Tag Cache Bank A ECC or parity error", + "Data Tag Cache Bank B ECC or parity error", + "Instruction Cache Bank A ECC or parity error", + "Instruction Cache Bank B ECC or parity error", + "Instruction Tag Cache Bank A ECC or parity error", + "Instruction Tag Cache Bank B ECC or parity error", + "Data Cache Bank A ECC or parity error", + "Data Cache Bank B ECC or parity error", + "Data Tag Cache Bank A ECC or parity error", + "Data Tag Cache Bank B ECC or parity error", + "Instruction Cache Bank A ECC or parity error", + "Instruction Cache Bank B ECC or parity error", + "Instruction Tag Cache Bank A ECC or parity error", + "Instruction Tag Cache Bank B ECC or parity error", + "System Hub Read Buffer ECC or parity error", + "MPDMA TVF DVSEC Memory ECC or parity error", + "MPDMA TVF MMIO Mailbox0 ECC or parity error", + "MPDMA TVF MMIO Mailbox1 ECC or parity error", + "MPDMA TVF Doorbell Memory ECC or parity error", + "MPDMA TVF SDP Slave Memory 0 ECC or parity error", + "MPDMA TVF SDP Slave Memory 1 ECC or parity error", + "MPDMA TVF SDP Slave Memory 2 ECC or parity error", + "MPDMA TVF SDP Master Memory 0 ECC or parity error", + "MPDMA TVF SDP Master Memory 1 ECC or parity error", + "MPDMA TVF SDP Master Memory 2 ECC or parity error", + "MPDMA TVF SDP Master Memory 3 ECC or parity error", + "MPDMA TVF SDP Master Memory 4 ECC or parity error", + "MPDMA TVF SDP Master Memory 5 ECC or parity error", + "MPDMA TVF SDP Master Memory 6 ECC or parity error", + "MPDMA PTE Command FIFO ECC or parity error", + "MPDMA PTE Hub Data FIFO ECC or parity error", + "MPDMA PTE Internal Data FIFO ECC or parity error", + "MPDMA PTE Command Memory DMA ECC or parity error", + "MPDMA PTE Command Memory Internal ECC or parity error", + "MPDMA PTE DMA Completion FIFO ECC or parity error", + "MPDMA PTE Tablewalk Completion FIFO ECC or parity error", + "MPDMA PTE Descriptor Completion FIFO ECC or parity error", + "MPDMA PTE ReadOnly Completion FIFO ECC or parity error", + "MPDMA PTE DirectWrite Completion FIFO ECC or parity error", + "SDP Watchdog Timer expired", +}; + static const char * const smca_nbio_mce_desc[] = { "ECC or Parity error", "PCIE error", @@ -398,6 +463,111 @@ static const char * const smca_pcie_mce_desc[] = { "CCIX Non-okay write response with data error", }; +static const char * const smca_pcie2_mce_desc[] = { + "SDP Parity Error logging", +}; + +static const char * const smca_xgmipcs_mce_desc[] = { + "Data Loss Error", + "Training Error", + "Flow Control Acknowledge Error", + "Rx Fifo Underflow Error", + "Rx Fifo Overflow Error", + "CRC Error", + "BER Exceeded Error", + "Tx Vcid Data Error", + "Replay Buffer Parity Error", + "Data Parity Error", + "Replay Fifo Overflow Error", + "Replay FIfo Underflow Error", + "Elastic Fifo Overflow Error", + "Deskew Error", + "Flow Control CRC Error", + "Data Startup Limit Error", + "FC Init Timeout Error", + "Recovery Timeout Error", + "Ready Serial Timeout Error", + "Ready Serial Attempt Error", + "Recovery Attempt Error", + "Recovery Relock Attempt Error", + "Replay Attempt Error", + "Sync Header Error", + "Tx Replay Timeout Error", + "Rx Replay Timeout Error", + "LinkSub Tx Timeout Error", + "LinkSub Rx Timeout Error", + "Rx CMD Packet Error", +}; + +static const char * const smca_xgmiphy_mce_desc[] = { + "RAM ECC Error", + "ARC instruction buffer parity error", + "ARC data buffer parity error", + "PHY APB error", +}; + +static const char * const smca_nbif_mce_desc[] = { + "Timeout error from GMI", + "SRAM ECC error", + "NTB Error Event", + "SDP Parity error", +}; + +static const char * const smca_sata_mce_desc[] = { + "Parity error for port 0", + "Parity error for port 1", + "Parity error for port 2", + "Parity error for port 3", + "Parity error for port 4", + "Parity error for port 5", + "Parity error for port 6", + "Parity error for port 7", +}; + +static const char * const smca_usb_mce_desc[] = { + "Parity error or ECC error for S0 RAM0", + "Parity error or ECC error for S0 RAM1", + "Parity error or ECC error for S0 RAM2", + "Parity error for PHY RAM0", + "Parity error for PHY RAM1", + "AXI Slave Response error", +}; + +static const char * const smca_gmipcs_mce_desc[] = { + "Data Loss Error", + "Training Error", + "Replay Parity Error", + "Rx Fifo Underflow Error", + "Rx Fifo Overflow Error", + "CRC Error", + "BER Exceeded Error", + "Tx Fifo Underflow Error", + "Replay Buffer Parity Error", + "Tx Overflow Error", + "Replay Fifo Overflow Error", + "Replay Fifo Underflow Error", + "Elastic Fifo Overflow Error", + "Deskew Error", + "Offline Error", + "Data Startup Limit Error", + "FC Init Timeout Error", + "Recovery Timeout Error", + "Ready Serial Timeout Error", + "Ready Serial Attempt Error", + "Recovery Attempt Error", + "Recovery Relock Attempt Error", + "Deskew Abort Error", + "Rx Buffer Error", + "Rx LFDS Fifo Overflow Error", + "Rx LFDS Fifo Underflow Error", + "LinkSub Tx Timeout Error", + "LinkSub Rx Timeout Error", + "Rx CMD Packet Error", + "LFDS Training Timeout Error", + "LFDS FC Init Timeout Error", + "Data Loss Error", +}; + struct smca_mce_desc { const char * const *descs; unsigned int num_descs; @@ -416,14 +586,28 @@ static struct smca_mce_desc smca_mce_descs[] = { [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) }, [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, + [SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) }, [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc) }, [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) }, [SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc) }, [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) }, + [SMCA_MPDMA] = { smca_mpdma_mce_desc, ARRAY_SIZE(smca_mpdma_mce_desc) }, [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) }, [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) }, + [SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) }, + [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) }, + /* NBIF and SHUB have the same error descriptions, for now. */ + [SMCA_NBIF] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) }, + [SMCA_SHUB] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) }, + [SMCA_SATA] = { smca_sata_mce_desc, ARRAY_SIZE(smca_sata_mce_desc) }, + [SMCA_USB] = { smca_usb_mce_desc, ARRAY_SIZE(smca_usb_mce_desc) }, + [SMCA_GMI_PCS] = { smca_gmipcs_mce_desc, ARRAY_SIZE(smca_gmipcs_mce_desc) }, + /* All the PHY bank types have the same error descriptions, for now. */ + [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, + [SMCA_WAFL_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, + [SMCA_GMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, }; static bool f12h_mc0_mce(u16 ec, u8 xec) @@ -973,20 +1157,13 @@ static void decode_mc6_mce(struct mce *m) /* Decode errors according to Scalable MCA specification */ static void decode_smca_error(struct mce *m) { - struct smca_hwid *hwid; - enum smca_bank_types bank_type; + enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank); const char *ip_name; u8 xec = XEC(m->status, xec_mask); - if (m->bank >= ARRAY_SIZE(smca_banks)) - return; - - hwid = smca_banks[m->bank].hwid; - if (!hwid) + if (bank_type >= N_SMCA_BANK_TYPES) return; - bank_type = hwid->bank_type; - if (bank_type == SMCA_RESERVED) { pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank); return; @@ -997,10 +1174,8 @@ static void decode_smca_error(struct mce *m) pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec); /* Only print the decode of valid error codes */ - if (xec < smca_mce_descs[bank_type].num_descs && - (hwid->xec_bitmap & BIT_ULL(xec))) { + if (xec < smca_mce_descs[bank_type].num_descs) pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]); - } if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc) decode_dram_ecc(topology_die_id(m->extcpu), m); @@ -1030,20 +1205,6 @@ static inline void amd_decode_err_code(u16 ec) pr_cont("\n"); } -/* - * Filter out unwanted MCE signatures here. - */ -static bool ignore_mce(struct mce *m) -{ - /* - * NB GART TLB error reporting is disabled by default. - */ - if (m->bank == 4 && XEC(m->status, 0x1f) == 0x5 && !report_gart_errors) - return true; - - return false; -} - static const char *decode_error_status(struct mce *m) { if (m->status & MCI_STATUS_UC) { @@ -1067,9 +1228,6 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) unsigned int fam = x86_family(m->cpuid); int ecc; - if (ignore_mce(m)) - return NOTIFY_STOP; - if (m->kflags & MCE_HANDLED_CEC) return NOTIFY_DONE; diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h index 4e9c5e596c6c2..4811b18d9606b 100644 --- a/drivers/edac/mce_amd.h +++ b/drivers/edac/mce_amd.h @@ -7,7 +7,6 @@ #include #define EC(x) ((x) & 0xffff) -#define XEC(x, mask) (((x) >> 16) & mask) #define LOW_SYNDROME(x) (((x) >> 15) & 0xff) #define HIGH_SYNDROME(x) (((x) >> 24) & 0xff) @@ -77,7 +76,6 @@ struct amd_decoder_ops { bool (*mc2_mce)(u16, u8); }; -void amd_report_gart_errors(bool); void amd_register_ecc_decoder(void (*f)(int, struct mce *)); void amd_unregister_ecc_decoder(void (*f)(int, struct mce *)); diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 756bf4fcc8d12..58a772ef437be 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -1055,7 +1055,7 @@ static u64 haswell_get_tohm(struct sbridge_pvt *pvt) pci_read_config_dword(pvt->info.pci_vtd, HASWELL_TOHM_1, ®); rc = ((reg << 6) | rc) << 26; - return rc | 0x1ffffff; + return rc | 0x3ffffff; } static u64 knl_get_tolm(struct sbridge_pvt *pvt) diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 7100f947687e2..632bba959eaf4 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -40,6 +40,7 @@ static skx_decode_f skx_decode; static skx_show_retry_log_f skx_show_retry_rd_err_log; static u64 skx_tolm, skx_tohm; static LIST_HEAD(dev_edac_list); +static int skx_skip_dsm_bias; int __init skx_adxl_get(void) { @@ -157,6 +158,24 @@ void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) skx_show_retry_rd_err_log = show_retry_log; } +void skx_set_skip_dsm(int skip_dsm_bias) +{ + skx_skip_dsm_bias = skip_dsm_bias; +} + +/* + * Return true when: + * 1) Skip _DSM bias is set. + * 2) Icelake CPU triggered error event. + * 3) Error event triggered on memory controller. + */ +static bool is_skip_dsm(const struct mce *mce) +{ + return (skx_skip_dsm_bias && + ((mce->cpuid & MCE_CPUID_FM_MASK) == ICX_MCE_CPUID_FM) && + ((mce->bank << 1) & ICX_IMCx_CHy)); +} + int skx_get_src_id(struct skx_dev *d, int off, u8 *id) { u32 reg; @@ -553,7 +572,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, break; } } - if (adxl_component_count) { + if (adxl_component_count && !is_skip_dsm(m)) { len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", @@ -584,9 +603,11 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) { struct mce *mce = (struct mce *)data; + struct skx_dev *d; struct decoded_addr res; struct mem_ctl_info *mci; char *type; + int imc = 0, chan = 0, skipped = 0; if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; @@ -602,12 +623,42 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, res.addr = mce->addr; if (adxl_component_count) { - if (!skx_adxl_decode(&res)) + if (is_skip_dsm(mce)) { + /* Icelake MCE IMCx_CHy Banks: 13,14;17,18;21,22;25,26 */ + imc = (mce->bank - 12) / 4; + chan = (mce->bank + 1) % 2; + skipped = 1; + } else if (!skx_adxl_decode(&res)) return NOTIFY_DONE; } else if (!skx_decode || !skx_decode(&res)) { return NOTIFY_DONE; } + if (skipped) { + int found = 0; + + list_for_each_entry(d, &dev_edac_list, list) { + if (d->imc[0].src_id == mce->socketid) { + res.dev = d; + found = 1; + break; + } + } + if (!found) + return NOTIFY_DONE; + + res.imc = imc; + res.channel = chan; + res.row = GET_BITFIELD(mce->misc, 19, 39); + res.column = GET_BITFIELD(mce->misc, 9, 18) << 2; + res.bank_address = GET_BITFIELD(mce->misc, 42, 43); + res.bank_group = GET_BITFIELD(mce->misc, 40, 41) | + (GET_BITFIELD(mce->misc, 44, 44) << 2); + res.rank = GET_BITFIELD(mce->misc, 56, 58); + res.dimm = res.rank >> 2; + res.rank = res.rank % 4; + } + mci = res.dev->imc[res.imc].mci; if (!mci) diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 22e174c740be6..0208305398ed9 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -33,6 +33,10 @@ #define I10NM_NUM_IMC 4 #define I10NM_NUM_CHANNELS 2 #define I10NM_NUM_DIMMS 2 +/* Icelake MCE IMCx_CHy Banks: 13,14,17,18,21,22,25,26 */ +#define ICX_IMCx_CHy 0x6666000 +#define MCE_CPUID_FM_MASK 0xFFFF0 +#define ICX_MCE_CPUID_FM 0x606a0 /* Icelake Family:6 Model:6a */ #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC) @@ -138,6 +142,7 @@ typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int le int __init skx_adxl_get(void); void __exit skx_adxl_put(void); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); +void skx_set_skip_dsm(int skip_dsm); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); int skx_get_node_id(struct skx_dev *d, u8 *id); diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c index 880ffd8337187..4d2387f8e511b 100644 --- a/drivers/edac/synopsys_edac.c +++ b/drivers/edac/synopsys_edac.c @@ -163,6 +163,11 @@ #define ECC_STAT_CECNT_SHIFT 8 #define ECC_STAT_BITNUM_MASK 0x7F +/* ECC error count register definitions */ +#define ECC_ERRCNT_UECNT_MASK 0xFFFF0000 +#define ECC_ERRCNT_UECNT_SHIFT 16 +#define ECC_ERRCNT_CECNT_MASK 0xFFFF + /* DDR QOS Interrupt register definitions */ #define DDR_QOS_IRQ_STAT_OFST 0x20200 #define DDR_QOSUE_MASK 0x4 @@ -418,15 +423,16 @@ static int zynqmp_get_error_info(struct synps_edac_priv *priv) base = priv->baseaddr; p = &priv->stat; + regval = readl(base + ECC_ERRCNT_OFST); + p->ce_cnt = regval & ECC_ERRCNT_CECNT_MASK; + p->ue_cnt = (regval & ECC_ERRCNT_UECNT_MASK) >> ECC_ERRCNT_UECNT_SHIFT; + if (!p->ce_cnt) + goto ue_err; + regval = readl(base + ECC_STAT_OFST); if (!regval) return 1; - p->ce_cnt = (regval & ECC_STAT_CECNT_MASK) >> ECC_STAT_CECNT_SHIFT; - p->ue_cnt = (regval & ECC_STAT_UECNT_MASK) >> ECC_STAT_UECNT_SHIFT; - if (!p->ce_cnt) - goto ue_err; - p->ceinfo.bitpos = (regval & ECC_STAT_BITNUM_MASK); regval = readl(base + ECC_CEADDR0_OFST); @@ -782,7 +788,7 @@ static void init_csrows(struct mem_ctl_info *mci) for (j = 0; j < csi->nr_channels; j++) { dimm = csi->channels[j]->dimm; - dimm->edac_mode = EDAC_FLAG_SECDED; + dimm->edac_mode = EDAC_SECDED; dimm->mtype = p_data->get_mtype(priv->baseaddr); dimm->nr_pages = (size >> PAGE_SHIFT) / csi->nr_channels; dimm->grain = SYNPS_EDAC_ERR_GRAIN; @@ -1351,8 +1357,7 @@ static int mc_probe(struct platform_device *pdev) } } - if (of_device_is_compatible(pdev->dev.of_node, - "xlnx,zynqmp-ddrc-2.40a")) + if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) setup_address_map(priv); #endif diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c index e4a1032ba0b58..cd9926e518967 100644 --- a/drivers/edac/xgene_edac.c +++ b/drivers/edac/xgene_edac.c @@ -1922,7 +1922,7 @@ static int xgene_edac_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, i); if (irq < 0) { dev_err(&pdev->dev, "No IRQ resource\n"); - rc = -EINVAL; + rc = irq; goto out_err; } rc = devm_request_irq(&pdev->dev, irq, diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c index 5c9e156cd0862..6b905c3d30f4f 100644 --- a/drivers/extcon/extcon.c +++ b/drivers/extcon/extcon.c @@ -1230,19 +1230,14 @@ int extcon_dev_register(struct extcon_dev *edev) edev->dev.type = &edev->extcon_dev_type; } - ret = device_register(&edev->dev); - if (ret) { - put_device(&edev->dev); - goto err_dev; - } - spin_lock_init(&edev->lock); - edev->nh = devm_kcalloc(&edev->dev, edev->max_supported, - sizeof(*edev->nh), GFP_KERNEL); - if (!edev->nh) { - ret = -ENOMEM; - device_unregister(&edev->dev); - goto err_dev; + if (edev->max_supported) { + edev->nh = kcalloc(edev->max_supported, sizeof(*edev->nh), + GFP_KERNEL); + if (!edev->nh) { + ret = -ENOMEM; + goto err_alloc_nh; + } } for (index = 0; index < edev->max_supported; index++) @@ -1253,6 +1248,12 @@ int extcon_dev_register(struct extcon_dev *edev) dev_set_drvdata(&edev->dev, edev); edev->state = 0; + ret = device_register(&edev->dev); + if (ret) { + put_device(&edev->dev); + goto err_dev; + } + mutex_lock(&extcon_dev_list_lock); list_add(&edev->entry, &extcon_dev_list); mutex_unlock(&extcon_dev_list_lock); @@ -1260,6 +1261,9 @@ int extcon_dev_register(struct extcon_dev *edev) return 0; err_dev: + if (edev->max_supported) + kfree(edev->nh); +err_alloc_nh: if (edev->max_supported) kfree(edev->extcon_dev_type.groups); err_alloc_groups: @@ -1320,6 +1324,7 @@ void extcon_dev_unregister(struct extcon_dev *edev) if (edev->max_supported) { kfree(edev->extcon_dev_type.groups); kfree(edev->cables); + kfree(edev->nh); } put_device(&edev->dev); diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 54be88167c60b..f3b3953cac834 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -668,6 +668,7 @@ EXPORT_SYMBOL_GPL(fw_card_release); void fw_core_remove_card(struct fw_card *card) { struct fw_card_driver dummy_driver = dummy_driver_template; + unsigned long flags; card->driver->update_phy_reg(card, 4, PHY_LINK_ACTIVE | PHY_CONTENDER, 0); @@ -682,7 +683,9 @@ void fw_core_remove_card(struct fw_card *card) dummy_driver.stop_iso = card->driver->stop_iso; card->driver = &dummy_driver; + spin_lock_irqsave(&card->lock, flags); fw_destroy_nodes(card); + spin_unlock_irqrestore(&card->lock, flags); /* Wait for all users, especially device workqueue jobs, to finish. */ fw_card_put(card); diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index 1da7ba18d3993..3a43e5d6ed3b2 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1482,6 +1482,7 @@ static void outbound_phy_packet_callback(struct fw_packet *packet, { struct outbound_phy_packet_event *e = container_of(packet, struct outbound_phy_packet_event, p); + struct client *e_client; switch (status) { /* expected: */ @@ -1498,9 +1499,10 @@ static void outbound_phy_packet_callback(struct fw_packet *packet, } e->phy_packet.data[0] = packet->timestamp; + e_client = e->client; queue_event(e->client, &e->event, &e->phy_packet, sizeof(e->phy_packet) + e->phy_packet.length, NULL, 0); - client_put(e->client); + client_put(e_client); } static int ioctl_send_phy_packet(struct client *client, union ioctl_arg *arg) diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c index 94a13fca82673..5999dce11fc88 100644 --- a/drivers/firewire/core-topology.c +++ b/drivers/firewire/core-topology.c @@ -374,16 +374,13 @@ static void report_found_node(struct fw_card *card, card->bm_retries = 0; } +/* Must be called with card->lock held */ void fw_destroy_nodes(struct fw_card *card) { - unsigned long flags; - - spin_lock_irqsave(&card->lock, flags); card->color++; if (card->local_node != NULL) for_each_fw_node(card, card->local_node, report_lost_node); card->local_node = NULL; - spin_unlock_irqrestore(&card->lock, flags); } static void move_tree(struct fw_node *node0, struct fw_node *node1, int port) @@ -509,6 +506,8 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation, struct fw_node *local_node; unsigned long flags; + spin_lock_irqsave(&card->lock, flags); + /* * If the selfID buffer is not the immediate successor of the * previously processed one, we cannot reliably compare the @@ -520,8 +519,6 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation, card->bm_retries = 0; } - spin_lock_irqsave(&card->lock, flags); - card->broadcast_channel_allocated = card->broadcast_channel_auto_allocated; card->node_id = node_id; /* diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 404a035f104d0..78574789a1872 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -73,24 +73,25 @@ static int try_cancel_split_timeout(struct fw_transaction *t) static int close_transaction(struct fw_transaction *transaction, struct fw_card *card, int rcode) { - struct fw_transaction *t; + struct fw_transaction *t = NULL, *iter; unsigned long flags; spin_lock_irqsave(&card->lock, flags); - list_for_each_entry(t, &card->transaction_list, link) { - if (t == transaction) { - if (!try_cancel_split_timeout(t)) { + list_for_each_entry(iter, &card->transaction_list, link) { + if (iter == transaction) { + if (!try_cancel_split_timeout(iter)) { spin_unlock_irqrestore(&card->lock, flags); goto timed_out; } - list_del_init(&t->link); - card->tlabel_mask &= ~(1ULL << t->tlabel); + list_del_init(&iter->link); + card->tlabel_mask &= ~(1ULL << iter->tlabel); + t = iter; break; } } spin_unlock_irqrestore(&card->lock, flags); - if (&t->link != &card->transaction_list) { + if (t) { t->callback(card, rcode, NULL, 0, t->callback_data); return 0; } @@ -935,7 +936,7 @@ EXPORT_SYMBOL(fw_core_handle_request); void fw_core_handle_response(struct fw_card *card, struct fw_packet *p) { - struct fw_transaction *t; + struct fw_transaction *t = NULL, *iter; unsigned long flags; u32 *data; size_t data_length; @@ -947,20 +948,21 @@ void fw_core_handle_response(struct fw_card *card, struct fw_packet *p) rcode = HEADER_GET_RCODE(p->header[1]); spin_lock_irqsave(&card->lock, flags); - list_for_each_entry(t, &card->transaction_list, link) { - if (t->node_id == source && t->tlabel == tlabel) { - if (!try_cancel_split_timeout(t)) { + list_for_each_entry(iter, &card->transaction_list, link) { + if (iter->node_id == source && iter->tlabel == tlabel) { + if (!try_cancel_split_timeout(iter)) { spin_unlock_irqrestore(&card->lock, flags); goto timed_out; } - list_del_init(&t->link); - card->tlabel_mask &= ~(1ULL << t->tlabel); + list_del_init(&iter->link); + card->tlabel_mask &= ~(1ULL << iter->tlabel); + t = iter; break; } } spin_unlock_irqrestore(&card->lock, flags); - if (&t->link == &card->transaction_list) { + if (!t) { timed_out: fw_notice(card, "unsolicited response (source %x, tlabel %x)\n", source, tlabel); diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 4d5054211550b..2ceed9287435f 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -408,7 +408,7 @@ static void sbp2_status_write(struct fw_card *card, struct fw_request *request, void *payload, size_t length, void *callback_data) { struct sbp2_logical_unit *lu = callback_data; - struct sbp2_orb *orb; + struct sbp2_orb *orb = NULL, *iter; struct sbp2_status status; unsigned long flags; @@ -433,17 +433,18 @@ static void sbp2_status_write(struct fw_card *card, struct fw_request *request, /* Lookup the orb corresponding to this status write. */ spin_lock_irqsave(&lu->tgt->lock, flags); - list_for_each_entry(orb, &lu->orb_list, link) { + list_for_each_entry(iter, &lu->orb_list, link) { if (STATUS_GET_ORB_HIGH(status) == 0 && - STATUS_GET_ORB_LOW(status) == orb->request_bus) { - orb->rcode = RCODE_COMPLETE; - list_del(&orb->link); + STATUS_GET_ORB_LOW(status) == iter->request_bus) { + iter->rcode = RCODE_COMPLETE; + list_del(&iter->link); + orb = iter; break; } } spin_unlock_irqrestore(&lu->tgt->lock, flags); - if (&orb->link != &lu->orb_list) { + if (orb) { orb->callback(orb, &status); kref_put(&orb->kref, free_orb); /* orb callback reference */ } else { diff --git a/drivers/firmware/arm_scmi/base.c b/drivers/firmware/arm_scmi/base.c index f986ee8919f03..2be32e86445f4 100644 --- a/drivers/firmware/arm_scmi/base.c +++ b/drivers/firmware/arm_scmi/base.c @@ -164,7 +164,7 @@ static int scmi_base_implementation_list_get(const struct scmi_handle *handle, break; loop_num_ret = le32_to_cpu(*num_ret); - if (tot_num_ret + loop_num_ret > MAX_PROTOCOLS_IMP) { + if (loop_num_ret > MAX_PROTOCOLS_IMP - tot_num_ret) { dev_err(dev, "No. of Protocol > MAX_PROTOCOLS_IMP"); break; } diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 4e43bdfa041f5..f0b055bc027c4 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -983,7 +983,7 @@ static struct platform_driver scmi_driver = { module_platform_driver(scmi_driver); -MODULE_ALIAS("platform: arm-scmi"); +MODULE_ALIAS("platform:arm-scmi"); MODULE_AUTHOR("Sudeep Holla "); MODULE_DESCRIPTION("ARM SCMI protocol driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/firmware/arm_scmi/scmi_pm_domain.c b/drivers/firmware/arm_scmi/scmi_pm_domain.c index 041f8152272bf..177874adccf0d 100644 --- a/drivers/firmware/arm_scmi/scmi_pm_domain.c +++ b/drivers/firmware/arm_scmi/scmi_pm_domain.c @@ -106,9 +106,7 @@ static int scmi_pm_domain_probe(struct scmi_device *sdev) scmi_pd_data->domains = domains; scmi_pd_data->num_domains = num_domains; - of_genpd_add_provider_onecell(np, scmi_pd_data); - - return 0; + return of_genpd_add_provider_onecell(np, scmi_pd_data); } static const struct scmi_device_id scmi_id_table[] = { diff --git a/drivers/firmware/dmi-sysfs.c b/drivers/firmware/dmi-sysfs.c index b6180023eba7c..2858e05636e98 100644 --- a/drivers/firmware/dmi-sysfs.c +++ b/drivers/firmware/dmi-sysfs.c @@ -603,7 +603,7 @@ static void __init dmi_sysfs_register_handle(const struct dmi_header *dh, "%d-%d", dh->type, entry->instance); if (*ret) { - kfree(entry); + kobject_put(&entry->kobj); return; } diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c index 0e206c9e0d7ae..7ad2d85d7270b 100644 --- a/drivers/firmware/efi/apple-properties.c +++ b/drivers/firmware/efi/apple-properties.c @@ -23,7 +23,7 @@ static bool dump_properties __initdata; static int __init dump_properties_enable(char *arg) { dump_properties = true; - return 0; + return 1; } __setup("dump_apple_properties", dump_properties_enable); diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index e48298687b76d..6157038114a2a 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -25,8 +25,6 @@ #include #include -static char rcd_decode_str[CPER_REC_LEN]; - /* * CPER record ID need to be unique even after reboot, because record * ID is used as index for ERST storage, while CPER records from @@ -299,6 +297,7 @@ const char *cper_mem_err_unpack(struct trace_seq *p, struct cper_mem_err_compact *cmem) { const char *ret = trace_seq_buffer_ptr(p); + char rcd_decode_str[CPER_REC_LEN]; if (cper_mem_err_location(cmem, rcd_decode_str)) trace_seq_printf(p, "%s", rcd_decode_str); @@ -313,6 +312,7 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem, int len) { struct cper_mem_err_compact cmem; + char rcd_decode_str[CPER_REC_LEN]; /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */ if (len == sizeof(struct cper_sec_mem_err_old) && diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index bff119e09ff9a..21986c2f3d70f 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -231,7 +231,7 @@ static int __init efivar_ssdt_setup(char *str) memcpy(efivar_ssdt, str, strlen(str)); else pr_warn("efivar_ssdt: name too long: %s\n", str); - return 0; + return 1; } __setup("efivar_ssdt=", efivar_ssdt_setup); diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index 65fffaa222108..136b9c7f9ac95 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -414,7 +414,7 @@ static void virt_efi_reset_system(int reset_type, unsigned long data_size, efi_char16_t *data) { - if (down_interruptible(&efi_runtime_lock)) { + if (down_trylock(&efi_runtime_lock)) { pr_warn("failed to invoke the reset_system() runtime service:\n" "could not get exclusive access to the firmware\n"); return; diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 436d1776bc7be..a32d15b2928f7 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -750,6 +750,7 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes, { const struct efivar_operations *ops; efi_status_t status; + unsigned long varsize; if (!__efivars) return -EINVAL; @@ -772,15 +773,17 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes, return efivar_entry_set_nonblocking(name, vendor, attributes, size, data); + varsize = size + ucs2_strsize(name, 1024); if (!block) { if (down_trylock(&efivars_lock)) return -EBUSY; + status = check_var_size_nonblocking(attributes, varsize); } else { if (down_interruptible(&efivars_lock)) return -EINTR; + status = check_var_size(attributes, varsize); } - status = check_var_size(attributes, size + ucs2_strsize(name, 1024)); if (status != EFI_SUCCESS) { up(&efivars_lock); return -ENOSPC; diff --git a/drivers/firmware/google/Kconfig b/drivers/firmware/google/Kconfig index a3a6ca659ffa3..20dfe39a0537a 100644 --- a/drivers/firmware/google/Kconfig +++ b/drivers/firmware/google/Kconfig @@ -3,9 +3,9 @@ menuconfig GOOGLE_FIRMWARE bool "Google Firmware Drivers" default n help - These firmware drivers are used by Google's servers. They are - only useful if you are working directly on one of their - proprietary servers. If in doubt, say "N". + These firmware drivers are used by Google servers, + Chromebooks and other devices using coreboot firmware. + If in doubt, say "N". if GOOGLE_FIRMWARE @@ -21,7 +21,7 @@ config GOOGLE_SMI config GOOGLE_COREBOOT_TABLE tristate "Coreboot Table Access" - depends on ACPI || OF + depends on HAS_IOMEM && (ACPI || OF) help This option enables the coreboot_table module, which provides other firmware modules access to the coreboot table. The coreboot table diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index eb797081d1596..bbea136fa40be 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -303,10 +303,15 @@ int psci_cpu_suspend_enter(u32 state) { int ret; - if (!psci_power_state_loses_context(state)) + if (!psci_power_state_loses_context(state)) { + struct arm_cpuidle_irq_context context; + + arm_cpuidle_save_irq_context(&context); ret = psci_ops.cpu_suspend(state, 0); - else + arm_cpuidle_restore_irq_context(&context); + } else { ret = cpu_suspend(state, psci_suspend_finisher); + } return ret; } diff --git a/drivers/firmware/psci/psci_checker.c b/drivers/firmware/psci/psci_checker.c index 03eb798ad3ed9..250b7232f9816 100644 --- a/drivers/firmware/psci/psci_checker.c +++ b/drivers/firmware/psci/psci_checker.c @@ -155,7 +155,7 @@ static int alloc_init_cpu_groups(cpumask_var_t **pcpu_groups) if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) return -ENOMEM; - cpu_groups = kcalloc(nb_available_cpus, sizeof(cpu_groups), + cpu_groups = kcalloc(nb_available_cpus, sizeof(*cpu_groups), GFP_KERNEL); if (!cpu_groups) { free_cpumask_var(tmp); diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index 59db70fb45614..314b9bb78e437 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -385,9 +385,7 @@ static void fw_cfg_sysfs_cache_cleanup(void) struct fw_cfg_sysfs_entry *entry, *next; list_for_each_entry_safe(entry, next, &fw_cfg_entry_cache, list) { - /* will end up invoking fw_cfg_sysfs_cache_delist() - * via each object's release() method (i.e. destructor) - */ + fw_cfg_sysfs_cache_delist(entry); kobject_put(&entry->kobj); } } @@ -445,7 +443,6 @@ static void fw_cfg_sysfs_release_entry(struct kobject *kobj) { struct fw_cfg_sysfs_entry *entry = to_entry(kobj); - fw_cfg_sysfs_cache_delist(entry); kfree(entry); } @@ -598,20 +595,18 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f) /* set file entry information */ entry->size = be32_to_cpu(f->size); entry->select = be16_to_cpu(f->select); - memcpy(entry->name, f->name, FW_CFG_MAX_FILE_PATH); + strscpy(entry->name, f->name, FW_CFG_MAX_FILE_PATH); /* register entry under "/sys/firmware/qemu_fw_cfg/by_key/" */ err = kobject_init_and_add(&entry->kobj, &fw_cfg_sysfs_entry_ktype, fw_cfg_sel_ko, "%d", entry->select); - if (err) { - kobject_put(&entry->kobj); - return err; - } + if (err) + goto err_put_entry; /* add raw binary content access */ err = sysfs_create_bin_file(&entry->kobj, &fw_cfg_sysfs_attr_raw); if (err) - goto err_add_raw; + goto err_del_entry; /* try adding "/sys/firmware/qemu_fw_cfg/by_name/" symlink */ fw_cfg_build_symlink(fw_cfg_fname_kset, &entry->kobj, entry->name); @@ -620,9 +615,10 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f) fw_cfg_sysfs_cache_enlist(entry); return 0; -err_add_raw: +err_del_entry: kobject_del(&entry->kobj); - kfree(entry); +err_put_entry: + kobject_put(&entry->kobj); return err; } diff --git a/drivers/firmware/scpi_pm_domain.c b/drivers/firmware/scpi_pm_domain.c index 51201600d789b..800673910b511 100644 --- a/drivers/firmware/scpi_pm_domain.c +++ b/drivers/firmware/scpi_pm_domain.c @@ -16,7 +16,6 @@ struct scpi_pm_domain { struct generic_pm_domain genpd; struct scpi_ops *ops; u32 domain; - char name[30]; }; /* @@ -110,8 +109,13 @@ static int scpi_pm_domain_probe(struct platform_device *pdev) scpi_pd->domain = i; scpi_pd->ops = scpi_ops; - sprintf(scpi_pd->name, "%pOFn.%d", np, i); - scpi_pd->genpd.name = scpi_pd->name; + scpi_pd->genpd.name = devm_kasprintf(dev, GFP_KERNEL, + "%pOFn.%d", np, i); + if (!scpi_pd->genpd.name) { + dev_err(dev, "Failed to allocate genpd name:%pOFn.%d\n", + np, i); + continue; + } scpi_pd->genpd.power_off = scpi_pd_power_off; scpi_pd->genpd.power_on = scpi_pd_power_on; diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index b2b4ba240fb11..08c422380a00d 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -934,17 +934,17 @@ EXPORT_SYMBOL_GPL(stratix10_svc_allocate_memory); void stratix10_svc_free_memory(struct stratix10_svc_chan *chan, void *kaddr) { struct stratix10_svc_data_mem *pmem; - size_t size = 0; list_for_each_entry(pmem, &svc_data_mem, node) if (pmem->vaddr == kaddr) { - size = pmem->size; - break; + gen_pool_free(chan->ctrl->genpool, + (unsigned long)kaddr, pmem->size); + pmem->vaddr = NULL; + list_del(&pmem->node); + return; } - gen_pool_free(chan->ctrl->genpool, (unsigned long)kaddr, size); - pmem->vaddr = NULL; - list_del(&pmem->node); + list_del(&svc_data_mem); } EXPORT_SYMBOL_GPL(stratix10_svc_free_memory); diff --git a/drivers/fpga/machxo2-spi.c b/drivers/fpga/machxo2-spi.c index 4d8a876415874..37e54e375528e 100644 --- a/drivers/fpga/machxo2-spi.c +++ b/drivers/fpga/machxo2-spi.c @@ -223,8 +223,10 @@ static int machxo2_write_init(struct fpga_manager *mgr, goto fail; get_status(spi, &status); - if (test_bit(FAIL, &status)) + if (test_bit(FAIL, &status)) { + ret = -EINVAL; goto fail; + } dump_status_reg(&status); spi_message_init(&msg); @@ -310,6 +312,7 @@ static int machxo2_write_complete(struct fpga_manager *mgr, dump_status_reg(&status); if (!test_bit(DONE, &status)) { machxo2_cleanup(mgr); + ret = -EINVAL; goto fail; } @@ -331,6 +334,7 @@ static int machxo2_write_complete(struct fpga_manager *mgr, break; if (++refreshloop == MACHXO2_MAX_REFRESH_LOOP) { machxo2_cleanup(mgr); + ret = -EINVAL; goto fail; } } while (1); diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index 2820c59b5f071..22e0d6fcab1c4 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -53,7 +53,7 @@ struct aspeed_gpio_config { struct aspeed_gpio { struct gpio_chip chip; struct irq_chip irqc; - spinlock_t lock; + raw_spinlock_t lock; void __iomem *base; int irq; const struct aspeed_gpio_config *config; @@ -413,14 +413,14 @@ static void aspeed_gpio_set(struct gpio_chip *gc, unsigned int offset, unsigned long flags; bool copro; - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); copro = aspeed_gpio_copro_request(gpio, offset); __aspeed_gpio_set(gc, offset, val); if (copro) aspeed_gpio_copro_release(gpio, offset); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); } static int aspeed_gpio_dir_in(struct gpio_chip *gc, unsigned int offset) @@ -435,7 +435,7 @@ static int aspeed_gpio_dir_in(struct gpio_chip *gc, unsigned int offset) if (!have_input(gpio, offset)) return -ENOTSUPP; - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); reg = ioread32(addr); reg &= ~GPIO_BIT(offset); @@ -445,7 +445,7 @@ static int aspeed_gpio_dir_in(struct gpio_chip *gc, unsigned int offset) if (copro) aspeed_gpio_copro_release(gpio, offset); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); return 0; } @@ -463,7 +463,7 @@ static int aspeed_gpio_dir_out(struct gpio_chip *gc, if (!have_output(gpio, offset)) return -ENOTSUPP; - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); reg = ioread32(addr); reg |= GPIO_BIT(offset); @@ -474,7 +474,7 @@ static int aspeed_gpio_dir_out(struct gpio_chip *gc, if (copro) aspeed_gpio_copro_release(gpio, offset); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); return 0; } @@ -492,11 +492,11 @@ static int aspeed_gpio_get_direction(struct gpio_chip *gc, unsigned int offset) if (!have_output(gpio, offset)) return 1; - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); val = ioread32(bank_reg(gpio, bank, reg_dir)) & GPIO_BIT(offset); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); return !val; @@ -540,14 +540,14 @@ static void aspeed_gpio_irq_ack(struct irq_data *d) status_addr = bank_reg(gpio, bank, reg_irq_status); - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); copro = aspeed_gpio_copro_request(gpio, offset); iowrite32(bit, status_addr); if (copro) aspeed_gpio_copro_release(gpio, offset); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); } static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set) @@ -566,7 +566,7 @@ static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set) addr = bank_reg(gpio, bank, reg_irq_enable); - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); copro = aspeed_gpio_copro_request(gpio, offset); reg = ioread32(addr); @@ -578,7 +578,7 @@ static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set) if (copro) aspeed_gpio_copro_release(gpio, offset); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); } static void aspeed_gpio_irq_mask(struct irq_data *d) @@ -630,7 +630,7 @@ static int aspeed_gpio_set_type(struct irq_data *d, unsigned int type) return -EINVAL; } - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); copro = aspeed_gpio_copro_request(gpio, offset); addr = bank_reg(gpio, bank, reg_irq_type0); @@ -650,7 +650,7 @@ static int aspeed_gpio_set_type(struct irq_data *d, unsigned int type) if (copro) aspeed_gpio_copro_release(gpio, offset); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); irq_set_handler_locked(d, handler); @@ -720,7 +720,7 @@ static int aspeed_gpio_reset_tolerance(struct gpio_chip *chip, treg = bank_reg(gpio, to_bank(offset), reg_tolerance); - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); copro = aspeed_gpio_copro_request(gpio, offset); val = readl(treg); @@ -734,7 +734,7 @@ static int aspeed_gpio_reset_tolerance(struct gpio_chip *chip, if (copro) aspeed_gpio_copro_release(gpio, offset); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); return 0; } @@ -860,7 +860,7 @@ static int enable_debounce(struct gpio_chip *chip, unsigned int offset, return rc; } - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); if (timer_allocation_registered(gpio, offset)) { rc = unregister_allocated_timer(gpio, offset); @@ -920,7 +920,7 @@ static int enable_debounce(struct gpio_chip *chip, unsigned int offset, configure_timer(gpio, offset, i); out: - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); return rc; } @@ -931,13 +931,13 @@ static int disable_debounce(struct gpio_chip *chip, unsigned int offset) unsigned long flags; int rc; - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); rc = unregister_allocated_timer(gpio, offset); if (!rc) configure_timer(gpio, offset, 0); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); return rc; } @@ -1019,7 +1019,7 @@ int aspeed_gpio_copro_grab_gpio(struct gpio_desc *desc, return -EINVAL; bindex = offset >> 3; - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); /* Sanity check, this shouldn't happen */ if (gpio->cf_copro_bankmap[bindex] == 0xff) { @@ -1040,7 +1040,7 @@ int aspeed_gpio_copro_grab_gpio(struct gpio_desc *desc, if (bit) *bit = GPIO_OFFSET(offset); bail: - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); return rc; } EXPORT_SYMBOL_GPL(aspeed_gpio_copro_grab_gpio); @@ -1064,7 +1064,7 @@ int aspeed_gpio_copro_release_gpio(struct gpio_desc *desc) return -EINVAL; bindex = offset >> 3; - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); /* Sanity check, this shouldn't happen */ if (gpio->cf_copro_bankmap[bindex] == 0) { @@ -1078,7 +1078,7 @@ int aspeed_gpio_copro_release_gpio(struct gpio_desc *desc) aspeed_gpio_change_cmd_source(gpio, bank, bindex, GPIO_CMDSRC_ARM); bail: - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); return rc; } EXPORT_SYMBOL_GPL(aspeed_gpio_copro_release_gpio); @@ -1151,7 +1151,7 @@ static int __init aspeed_gpio_probe(struct platform_device *pdev) if (IS_ERR(gpio->base)) return PTR_ERR(gpio->base); - spin_lock_init(&gpio->lock); + raw_spin_lock_init(&gpio->lock); gpio_id = of_match_node(aspeed_gpio_of_table, pdev->dev.of_node); if (!gpio_id) diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 89a053b1d2799..b5ae28fce9a89 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -697,6 +697,9 @@ static int mvebu_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, unsigned long flags; unsigned int on, off; + if (state->polarity != PWM_POLARITY_NORMAL) + return -EINVAL; + val = (unsigned long long) mvpwm->clk_rate * state->duty_cycle; do_div(val, NSEC_PER_SEC); if (val > UINT_MAX) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index d9193ffa17a1e..317f54f19477e 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -379,6 +379,9 @@ static const struct regmap_config pca953x_i2c_regmap = { .reg_bits = 8, .val_bits = 8, + .use_single_read = true, + .use_single_write = true, + .readable_reg = pca953x_readable_register, .writeable_reg = pca953x_writeable_register, .volatile_reg = pca953x_volatile_register, @@ -583,21 +586,21 @@ static int pca953x_gpio_set_pull_up_down(struct pca953x_chip *chip, mutex_lock(&chip->i2c_lock); - /* Disable pull-up/pull-down */ - ret = regmap_write_bits(chip->regmap, pull_en_reg, bit, 0); - if (ret) - goto exit; - /* Configure pull-up/pull-down */ if (config == PIN_CONFIG_BIAS_PULL_UP) ret = regmap_write_bits(chip->regmap, pull_sel_reg, bit, bit); else if (config == PIN_CONFIG_BIAS_PULL_DOWN) ret = regmap_write_bits(chip->regmap, pull_sel_reg, bit, 0); + else + ret = 0; if (ret) goto exit; - /* Enable pull-up/pull-down */ - ret = regmap_write_bits(chip->regmap, pull_en_reg, bit, bit); + /* Disable/Enable pull-up/pull-down */ + if (config == PIN_CONFIG_BIAS_DISABLE) + ret = regmap_write_bits(chip->regmap, pull_en_reg, bit, 0); + else + ret = regmap_write_bits(chip->regmap, pull_en_reg, bit, bit); exit: mutex_unlock(&chip->i2c_lock); @@ -611,7 +614,9 @@ static int pca953x_gpio_set_config(struct gpio_chip *gc, unsigned int offset, switch (pinconf_to_config_param(config)) { case PIN_CONFIG_BIAS_PULL_UP: + case PIN_CONFIG_BIAS_PULL_PIN_DEFAULT: case PIN_CONFIG_BIAS_PULL_DOWN: + case PIN_CONFIG_BIAS_DISABLE: return pca953x_gpio_set_pull_up_down(chip, offset, config); default: return -ENOTSUPP; diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c index a9058fda187e3..a41e1ddf9e6fc 100644 --- a/drivers/gpio/gpio-tegra186.c +++ b/drivers/gpio/gpio-tegra186.c @@ -234,9 +234,12 @@ static int tegra186_gpio_of_xlate(struct gpio_chip *chip, return offset + pin; } +#define to_tegra_gpio(x) container_of((x), struct tegra_gpio, gpio) + static void tegra186_irq_ack(struct irq_data *data) { - struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data); + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); + struct tegra_gpio *gpio = to_tegra_gpio(gc); void __iomem *base; base = tegra186_gpio_get_base(gpio, data->hwirq); @@ -248,7 +251,8 @@ static void tegra186_irq_ack(struct irq_data *data) static void tegra186_irq_mask(struct irq_data *data) { - struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data); + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); + struct tegra_gpio *gpio = to_tegra_gpio(gc); void __iomem *base; u32 value; @@ -263,7 +267,8 @@ static void tegra186_irq_mask(struct irq_data *data) static void tegra186_irq_unmask(struct irq_data *data) { - struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data); + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); + struct tegra_gpio *gpio = to_tegra_gpio(gc); void __iomem *base; u32 value; @@ -278,7 +283,8 @@ static void tegra186_irq_unmask(struct irq_data *data) static int tegra186_irq_set_type(struct irq_data *data, unsigned int type) { - struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data); + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); + struct tegra_gpio *gpio = to_tegra_gpio(gc); void __iomem *base; u32 value; diff --git a/drivers/gpio/gpio-ts4900.c b/drivers/gpio/gpio-ts4900.c index 1da8d05863295..410452306bf7b 100644 --- a/drivers/gpio/gpio-ts4900.c +++ b/drivers/gpio/gpio-ts4900.c @@ -1,7 +1,7 @@ /* * Digital I/O driver for Technologic Systems I2C FPGA Core * - * Copyright (C) 2015 Technologic Systems + * Copyright (C) 2015, 2018 Technologic Systems * Copyright (C) 2016 Savoir-Faire Linux * * This program is free software; you can redistribute it and/or @@ -52,19 +52,33 @@ static int ts4900_gpio_direction_input(struct gpio_chip *chip, { struct ts4900_gpio_priv *priv = gpiochip_get_data(chip); - /* - * This will clear the output enable bit, the other bits are - * dontcare when this is cleared + /* Only clear the OE bit here, requires a RMW. Prevents potential issue + * with OE and data getting to the physical pin at different times. */ - return regmap_write(priv->regmap, offset, 0); + return regmap_update_bits(priv->regmap, offset, TS4900_GPIO_OE, 0); } static int ts4900_gpio_direction_output(struct gpio_chip *chip, unsigned int offset, int value) { struct ts4900_gpio_priv *priv = gpiochip_get_data(chip); + unsigned int reg; int ret; + /* If changing from an input to an output, we need to first set the + * proper data bit to what is requested and then set OE bit. This + * prevents a glitch that can occur on the IO line + */ + regmap_read(priv->regmap, offset, ®); + if (!(reg & TS4900_GPIO_OE)) { + if (value) + reg = TS4900_GPIO_OUT; + else + reg &= ~TS4900_GPIO_OUT; + + regmap_write(priv->regmap, offset, reg); + } + if (value) ret = regmap_write(priv->regmap, offset, TS4900_GPIO_OE | TS4900_GPIO_OUT); diff --git a/drivers/gpio/gpio-uniphier.c b/drivers/gpio/gpio-uniphier.c index 93cdcc41e9fbc..0f1cf50b4dcea 100644 --- a/drivers/gpio/gpio-uniphier.c +++ b/drivers/gpio/gpio-uniphier.c @@ -188,7 +188,7 @@ static void uniphier_gpio_irq_mask(struct irq_data *data) uniphier_gpio_reg_update(priv, UNIPHIER_GPIO_IRQ_EN, mask, 0); - return irq_chip_mask_parent(data); + irq_chip_mask_parent(data); } static void uniphier_gpio_irq_unmask(struct irq_data *data) @@ -198,7 +198,7 @@ static void uniphier_gpio_irq_unmask(struct irq_data *data) uniphier_gpio_reg_update(priv, UNIPHIER_GPIO_IRQ_EN, mask, mask); - return irq_chip_unmask_parent(data); + irq_chip_unmask_parent(data); } static int uniphier_gpio_irq_set_type(struct irq_data *data, unsigned int type) diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index 58776f2d69ff8..1ae612c796eef 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -125,9 +125,13 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio, { struct vf610_gpio_port *port = gpiochip_get_data(chip); unsigned long mask = BIT(gpio); + u32 val; - if (port->sdata && port->sdata->have_paddr) - vf610_gpio_writel(mask, port->gpio_base + GPIO_PDDR); + if (port->sdata && port->sdata->have_paddr) { + val = vf610_gpio_readl(port->gpio_base + GPIO_PDDR); + val |= mask; + vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR); + } vf610_gpio_set(chip, gpio, value); diff --git a/drivers/gpio/gpio-vr41xx.c b/drivers/gpio/gpio-vr41xx.c index 98cd715ccc33c..8d09b619c1669 100644 --- a/drivers/gpio/gpio-vr41xx.c +++ b/drivers/gpio/gpio-vr41xx.c @@ -217,8 +217,6 @@ static int giu_get_irq(unsigned int irq) printk(KERN_ERR "spurious GIU interrupt: %04x(%04x),%04x(%04x)\n", maskl, pendl, maskh, pendh); - atomic_inc(&irq_err_count); - return -EINVAL; } diff --git a/drivers/gpio/gpio-winbond.c b/drivers/gpio/gpio-winbond.c index 7f8f5b02e31d5..4b61d975cc0ec 100644 --- a/drivers/gpio/gpio-winbond.c +++ b/drivers/gpio/gpio-winbond.c @@ -385,12 +385,13 @@ static int winbond_gpio_get(struct gpio_chip *gc, unsigned int offset) unsigned long *base = gpiochip_get_data(gc); const struct winbond_gpio_info *info; bool val; + int ret; winbond_gpio_get_info(&offset, &info); - val = winbond_sio_enter(*base); - if (val) - return val; + ret = winbond_sio_enter(*base); + if (ret) + return ret; winbond_sio_select_logical(*base, info->dev); diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index e3ddc99c105d4..d71c7b9b96650 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -275,8 +275,8 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, pin = agpio->pin_table[0]; if (pin <= 255) { - char ev_name[5]; - sprintf(ev_name, "_%c%02hhX", + char ev_name[8]; + sprintf(ev_name, "_%c%02X", agpio->triggering == ACPI_EDGE_SENSITIVE ? 'E' : 'L', pin); if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle))) @@ -953,10 +953,17 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) irq_flags = acpi_dev_get_irq_type(info.triggering, info.polarity); - /* Set type if specified and different than the current one */ - if (irq_flags != IRQ_TYPE_NONE && - irq_flags != irq_get_trigger_type(irq)) - irq_set_irq_type(irq, irq_flags); + /* + * If the IRQ is not already in use then set type + * if specified and different than the current one. + */ + if (can_request_irq(irq, irq_flags)) { + if (irq_flags != IRQ_TYPE_NONE && + irq_flags != irq_get_trigger_type(irq)) + irq_set_irq_type(irq, irq_flags); + } else { + dev_dbg(&adev->dev, "IRQ %d already in use\n", irq); + } return irq; } diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 3ece59185d372..b1dcd2dd52e6b 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -783,7 +783,7 @@ static void of_gpiochip_init_valid_mask(struct gpio_chip *chip) i, &start); of_property_read_u32_index(np, "gpio-reserved-ranges", i + 1, &count); - if (start >= chip->ngpio || start + count >= chip->ngpio) + if (start >= chip->ngpio || start + count > chip->ngpio) continue; bitmap_clear(chip->valid_mask, start, count); diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h index 5b393622f5920..a0f0a17e224fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h +++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h @@ -119,6 +119,7 @@ #define CONNECTOR_OBJECT_ID_eDP 0x14 #define CONNECTOR_OBJECT_ID_MXM 0x15 #define CONNECTOR_OBJECT_ID_LVDS_eDP 0x16 +#define CONNECTOR_OBJECT_ID_USBC 0x17 /* deleted */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d1e278e999eeb..1b2fa83798304 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -762,7 +762,7 @@ enum amd_hw_ip_block_type { MAX_HWIP }; -#define HWIP_MAX_INSTANCE 8 +#define HWIP_MAX_INSTANCE 10 struct amd_powerplay { void *pp_handle; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 82155ac3288a0..64ee44c2fdd1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -163,17 +163,28 @@ static int acp_poweron(struct generic_pm_domain *genpd) return 0; } -static struct device *get_mfd_cell_dev(const char *device_name, int r) +static int acp_genpd_add_device(struct device *dev, void *data) { - char auto_dev_name[25]; - struct device *dev; + struct generic_pm_domain *gpd = data; + int ret; - snprintf(auto_dev_name, sizeof(auto_dev_name), - "%s.%d.auto", device_name, r); - dev = bus_find_device_by_name(&platform_bus_type, NULL, auto_dev_name); - dev_info(dev, "device %s added to pm domain\n", auto_dev_name); + ret = pm_genpd_add_device(gpd, dev); + if (ret) + dev_err(dev, "Failed to add dev to genpd %d\n", ret); - return dev; + return ret; +} + +static int acp_genpd_remove_device(struct device *dev, void *data) +{ + int ret; + + ret = pm_genpd_remove_device(dev); + if (ret) + dev_err(dev, "Failed to remove dev from genpd %d\n", ret); + + /* Continue to remove */ + return 0; } /** @@ -184,11 +195,10 @@ static struct device *get_mfd_cell_dev(const char *device_name, int r) */ static int acp_hw_init(void *handle) { - int r, i; + int r; uint64_t acp_base; u32 val = 0; u32 count = 0; - struct device *dev; struct i2s_platform_data *i2s_pdata = NULL; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -344,15 +354,10 @@ static int acp_hw_init(void *handle) if (r) goto failure; - for (i = 0; i < ACP_DEVS ; i++) { - dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); - r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); - if (r) { - dev_err(dev, "Failed to add dev to genpd\n"); - goto failure; - } - } - + r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, + acp_genpd_add_device); + if (r) + goto failure; /* Assert Soft reset of ACP */ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); @@ -413,10 +418,8 @@ static int acp_hw_init(void *handle) */ static int acp_hw_fini(void *handle) { - int i, ret; u32 val = 0; u32 count = 0; - struct device *dev; struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* return early if no ACP */ @@ -461,13 +464,8 @@ static int acp_hw_fini(void *handle) udelay(100); } - for (i = 0; i < ACP_DEVS ; i++) { - dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); - ret = pm_genpd_remove_device(dev); - /* If removal fails, dont giveup and try rest */ - if (ret) - dev_err(dev, "remove dev from genpd failed\n"); - } + device_for_each_child(adev->acp.parent, NULL, + acp_genpd_remove_device); mfd_remove_devices(adev->acp.parent); kfree(adev->acp.acp_res); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index c2f27cd7589b6..3ac19a832d6e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -164,7 +164,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_tile_config = amdgpu_amdkfd_get_tile_config, }; -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions() +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void) { return (struct kfd2kgd_calls *)&kfd2kgd; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f003556404ea7..44848afd54780 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -951,11 +951,15 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, struct dma_fence **ef) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct drm_file *drm_priv = filp->private_data; - struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv; - struct amdgpu_vm *avm = &drv_priv->vm; + struct amdgpu_fpriv *drv_priv; + struct amdgpu_vm *avm; int ret; + ret = amdgpu_file_to_fpriv(filp, &drv_priv); + if (ret) + return ret; + avm = &drv_priv->vm; + /* Already a compute VM? */ if (avm->process_info) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 85b0515c0fdcf..e0d2f79571ef5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -61,7 +61,7 @@ static void amdgpu_bo_list_free(struct kref *ref) int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, struct drm_amdgpu_bo_list_entry *info, - unsigned num_entries, struct amdgpu_bo_list **result) + size_t num_entries, struct amdgpu_bo_list **result) { unsigned last_entry = 0, first_userptr = num_entries; struct amdgpu_bo_list_entry *array; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index a130e766cbdbe..529d52a204cf4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -60,7 +60,7 @@ int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, struct drm_amdgpu_bo_list_entry *info, - unsigned num_entries, + size_t num_entries, struct amdgpu_bo_list **list); static inline struct amdgpu_bo_list_entry * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index cda0a76a733d3..0e1cacf731698 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -389,6 +389,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder) native_mode->vdisplay != 0 && native_mode->clock != 0) { mode = drm_mode_duplicate(dev, native_mode); + if (!mode) + return NULL; + mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; drm_mode_set_name(mode); @@ -403,6 +406,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder) * simpler. */ mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false); + if (!mode) + return NULL; + mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name); } @@ -829,6 +835,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector) amdgpu_connector_get_edid(connector); ret = amdgpu_connector_ddc_get_modes(connector); + amdgpu_get_native_mode(connector); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5a5470b984d4e..0789aa587fa05 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -114,7 +114,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs int ret; if (cs->in.num_chunks == 0) - return 0; + return -EINVAL; chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); if (!chunk_array) @@ -1560,6 +1560,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, return 0; default: + dma_fence_put(fence); return -EINVAL; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e8e1720104160..ffd7547135225 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -633,7 +633,7 @@ MODULE_PARM_DESC(sched_policy, * Maximum number of processes that HWS can schedule concurrently. The maximum is the * number of VMIDs assigned to the HWS, which is also the default. */ -int hws_max_conc_proc = 8; +int hws_max_conc_proc = -1; module_param(hws_max_conc_proc, int, 0444); MODULE_PARM_DESC(hws_max_conc_proc, "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index f9bef3154b998..2659202f2026b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -263,7 +263,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; - while (queue_bit-- >= 0) { + while (--queue_bit >= 0) { if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c index 70dbe343f51df..89cecdba81ace 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c @@ -339,7 +339,7 @@ static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus, void amdgpu_i2c_router_select_ddc_port(const struct amdgpu_connector *amdgpu_connector) { - u8 val; + u8 val = 0; if (!amdgpu_connector->router.ddc_valid) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 28361a9c5addc..4cc0dd494a42b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -200,7 +200,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) c++; } - BUG_ON(c >= AMDGPU_BO_MAX_PLACEMENTS); + BUG_ON(c > AMDGPU_BO_MAX_PLACEMENTS); placement->num_placement = c; placement->placement = places; @@ -1305,7 +1305,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) return; - dma_resv_lock(bo->base.resv, NULL); + if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) + return; r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence); if (!WARN_ON(r)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 8a32b5c93778b..bd7ae3e130b6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -138,7 +138,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) return ret; } - __decode_table_header_from_buff(hdr, &buff[2]); + __decode_table_header_from_buff(hdr, buff); if (hdr->header == EEPROM_TABLE_HDR_VAL) { control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) / diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 5667f3655b239..f942cf8d8efcc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1976,7 +1976,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, unsigned i; int r; - if (direct_submit && !ring->sched.ready) { + if (!direct_submit && !ring->sched.ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 3a6115ad01965..f3250db7f9c27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -568,8 +568,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev) void amdgpu_ucode_free_bo(struct amdgpu_device *adev) { - if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) - amdgpu_bo_free_kernel(&adev->firmware.fw_buf, + amdgpu_bo_free_kernel(&adev->firmware.fw_buf, &adev->firmware.fw_buf_mc, &adev->firmware.fw_buf_ptr); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 06cdc22b5501d..5906a8951a6c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2906,8 +2906,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GDS | AMD_PG_SUPPORT_RLC_SMU_HS)) { - WREG32(mmRLC_JUMP_TABLE_RESTORE, - adev->gfx.rlc.cp_table_gpu_addr >> 8); + WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE, + adev->gfx.rlc.cp_table_gpu_addr >> 8); gfx_v9_0_init_gfx_power_gating(adev); } } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index f642e066e67a2..85ee0e849647e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -903,6 +903,8 @@ static int gmc_v10_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + gmc_v10_0_gart_disable(adev); + if (amdgpu_sriov_vf(adev)) { /* full access mode, so don't touch any GMC register */ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); @@ -910,7 +912,6 @@ static int gmc_v10_0_hw_fini(void *handle) } amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); - gmc_v10_0_gart_disable(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 9fb1765e92d15..e9f5de35f7953 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -863,12 +863,12 @@ static int gmc_v6_0_sw_init(void *handle) adev->gmc.mc_mask = 0xffffffffffULL; - r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40)); if (r) { dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n"); return r; } - adev->need_swiotlb = drm_need_swiotlb(44); + adev->need_swiotlb = drm_need_swiotlb(40); r = gmc_v6_0_init_microcode(adev); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index ea764dd9245db..2975331a7b867 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -524,10 +524,10 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) static int gmc_v8_0_mc_init(struct amdgpu_device *adev) { int r; + u32 tmp; adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev); if (!adev->gmc.vram_width) { - u32 tmp; int chansize, numchan; /* Get VRAM informations */ @@ -571,8 +571,15 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) adev->gmc.vram_width = numchan * chansize; } /* size in MB on si */ - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; + tmp = RREG32(mmCONFIG_MEMSIZE); + /* some boards may have garbage in the upper 16 bits */ + if (tmp & 0xffff0000) { + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp); + if (tmp & 0xffff) + tmp &= 0xffff; + } + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL; + adev->gmc.real_vram_size = adev->gmc.mc_vram_size; if (!(adev->flags & AMD_IS_APU)) { r = amdgpu_device_resize_fb_bar(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 688111ef814de..63205de4a5656 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1526,6 +1526,8 @@ static int gmc_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + gmc_v9_0_gart_disable(adev); + if (amdgpu_sriov_vf(adev)) { /* full access mode, so don't touch any GMC register */ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); @@ -1534,7 +1536,6 @@ static int gmc_v9_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); - gmc_v9_0_gart_disable(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 4b3faaccecb94..c8a5a5698edd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -1609,19 +1609,7 @@ static int kv_update_samu_dpm(struct amdgpu_device *adev, bool gate) static u8 kv_get_acp_boot_level(struct amdgpu_device *adev) { - u8 i; - struct amdgpu_clock_voltage_dependency_table *table = - &adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table; - - for (i = 0; i < table->count; i++) { - if (table->entries[i].clk >= 0) /* XXX */ - break; - } - - if (i >= table->count) - i = table->count - 1; - - return i; + return 0; } static void kv_update_acp_boot_level(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 4cb4c891120b2..9931d5c17cfb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -7250,17 +7250,15 @@ static int si_parse_power_table(struct amdgpu_device *adev) if (!adev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - for (i = 0; i < state_array->ucNumEntries; i++) { + for (adev->pm.dpm.num_ps = 0, i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; non_clock_array_index = power_state->v2.nonClockInfoIndex; non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) &non_clock_info_array->nonClockInfo[non_clock_array_index]; ps = kzalloc(sizeof(struct si_ps), GFP_KERNEL); - if (ps == NULL) { - kfree(adev->pm.dpm.ps); + if (ps == NULL) return -ENOMEM; - } adev->pm.dpm.ps[i].ps_priv = ps; si_parse_pplib_non_clock_info(adev, &adev->pm.dpm.ps[i], non_clock_info, @@ -7282,8 +7280,8 @@ static int si_parse_power_table(struct amdgpu_device *adev) k++; } power_state_offset += 2 + power_state->v2.ucNumDPMLevels; + adev->pm.dpm.num_ps++; } - adev->pm.dpm.num_ps = state_array->ucNumEntries; /* fill in the vce power states */ for (i = 0; i < adev->pm.dpm.num_of_vce_states; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 6ff1d308623a7..b368496ed6858 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1143,8 +1143,11 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_MGCG | AMD_CG_SUPPORT_SDMA_LS; + /* + * MMHUB PG needs to be disabled for Picasso for + * stability reasons. + */ adev->pg_flags = AMD_PG_SUPPORT_SDMA | - AMD_PG_SUPPORT_MMHUB | AMD_PG_SUPPORT_VCN; } else { adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 66387caf966e2..3685e89415d5c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -758,7 +758,7 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) /* Fetch the CRAT table from ACPI */ status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); if (status == AE_NOT_FOUND) { - pr_warn("CRAT table not found\n"); + pr_info("CRAT table not found\n"); return -ENODATA; } else if (ACPI_FAILURE(status)) { const char *err = acpi_format_exception(status); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index ad9483b9eea32..60ee1a8321129 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -609,15 +609,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, - kfd->vm_info.first_vmid_kfd + 1; /* Verify module parameters regarding mapped process number*/ - if ((hws_max_conc_proc < 0) - || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { - dev_err(kfd_device, - "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n", - hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, - kfd->vm_info.vmid_num_kfd); + if (hws_max_conc_proc >= 0) + kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd); + else kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; - } else - kfd->max_proc_per_quantum = hws_max_conc_proc; /* Allocate global GWS that is shared by all KFD processes */ if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index fc1e251240a46..6e186d36fc55b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -532,6 +532,8 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) event_waiters = kmalloc_array(num_events, sizeof(struct kfd_event_waiter), GFP_KERNEL); + if (!event_waiters) + return NULL; for (i = 0; (event_waiters) && (i < num_events) ; i++) { init_wait(&event_waiters[i].wait); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 986ff52d5750c..f4b7f7e6c40e4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -82,7 +82,7 @@ static void kfd_exit(void) kfd_chardev_exit(); } -int kgd2kfd_init() +int kgd2kfd_init(void) { return kfd_init(); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 88813dad731fa..c021519af8106 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -98,36 +98,78 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, uint32_t *se_mask) { struct kfd_cu_info cu_info; - uint32_t cu_per_se[KFD_MAX_NUM_SE] = {0}; - int i, se, sh, cu = 0; - + uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0}; + int i, se, sh, cu; amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info); if (cu_mask_count > cu_info.cu_active_number) cu_mask_count = cu_info.cu_active_number; + /* Exceeding these bounds corrupts the stack and indicates a coding error. + * Returning with no CU's enabled will hang the queue, which should be + * attention grabbing. + */ + if (cu_info.num_shader_engines > KFD_MAX_NUM_SE) { + pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n", cu_info.num_shader_engines); + return; + } + if (cu_info.num_shader_arrays_per_engine > KFD_MAX_NUM_SH_PER_SE) { + pr_err("Exceeded KFD_MAX_NUM_SH, chip reports %d\n", + cu_info.num_shader_arrays_per_engine * cu_info.num_shader_engines); + return; + } + /* Count active CUs per SH. + * + * Some CUs in an SH may be disabled. HW expects disabled CUs to be + * represented in the high bits of each SH's enable mask (the upper and lower + * 16 bits of se_mask) and will take care of the actual distribution of + * disabled CUs within each SH automatically. + * Each half of se_mask must be filled only on bits 0-cu_per_sh[se][sh]-1. + * + * See note on Arcturus cu_bitmap layout in gfx_v9_0_get_cu_info. + */ for (se = 0; se < cu_info.num_shader_engines; se++) for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) - cu_per_se[se] += hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]); - - /* Symmetrically map cu_mask to all SEs: - * cu_mask[0] bit0 -> se_mask[0] bit0; - * cu_mask[0] bit1 -> se_mask[1] bit0; - * ... (if # SE is 4) - * cu_mask[0] bit4 -> se_mask[0] bit1; + cu_per_sh[se][sh] = hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]); + + /* Symmetrically map cu_mask to all SEs & SHs: + * se_mask programs up to 2 SH in the upper and lower 16 bits. + * + * Examples + * Assuming 1 SH/SE, 4 SEs: + * cu_mask[0] bit0 -> se_mask[0] bit0 + * cu_mask[0] bit1 -> se_mask[1] bit0 + * ... + * cu_mask[0] bit4 -> se_mask[0] bit1 + * ... + * + * Assuming 2 SH/SE, 4 SEs + * cu_mask[0] bit0 -> se_mask[0] bit0 (SE0,SH0,CU0) + * cu_mask[0] bit1 -> se_mask[1] bit0 (SE1,SH0,CU0) + * ... + * cu_mask[0] bit4 -> se_mask[0] bit16 (SE0,SH1,CU0) + * cu_mask[0] bit5 -> se_mask[1] bit16 (SE1,SH1,CU0) + * ... + * cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1) * ... + * + * First ensure all CUs are disabled, then enable user specified CUs. */ - se = 0; - for (i = 0; i < cu_mask_count; i++) { - if (cu_mask[i / 32] & (1 << (i % 32))) - se_mask[se] |= 1 << cu; - - do { - se++; - if (se == cu_info.num_shader_engines) { - se = 0; - cu++; + for (i = 0; i < cu_info.num_shader_engines; i++) + se_mask[i] = 0; + + i = 0; + for (cu = 0; cu < 16; cu++) { + for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) { + for (se = 0; se < cu_info.num_shader_engines; se++) { + if (cu_per_sh[se][sh] > cu) { + if (cu_mask[i / 32] & (1 << (i % 32))) + se_mask[se] |= 1 << (cu + sh * 16); + i++; + if (i == cu_mask_count) + return; + } } - } while (cu >= cu_per_se[se] && cu < 32); + } } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index fbdb16418847c..4edc012e31387 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -27,6 +27,7 @@ #include "kfd_priv.h" #define KFD_MAX_NUM_SE 8 +#define KFD_MAX_NUM_SH_PER_SE 2 /** * struct mqd_manager diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0dc60fe22aefc..de33864af70b8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -664,6 +664,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.asic_id.pci_revision_id = adev->rev_id; init_data.asic_id.hw_internal_rev = adev->external_rev_id; + init_data.asic_id.chip_id = adev->pdev->device; init_data.asic_id.vram_width = adev->gmc.vram_width; /* TODO: initialize init_data.asic_id.vram_type here!!!! */ @@ -1209,7 +1210,8 @@ static int dm_resume(void *handle) * this is the case when traversing through already created * MST connectors, should be skipped */ - if (aconnector->mst_port) + if (aconnector->dc_link && + aconnector->dc_link->type == dc_connection_mst_branch) continue; mutex_lock(&aconnector->hpd_lock); @@ -5012,6 +5014,9 @@ static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder, mode = amdgpu_dm_create_common_mode(encoder, common_modes[i].name, common_modes[i].w, common_modes[i].h); + if (!mode) + continue; + drm_mode_probed_add(connector, mode); amdgpu_dm_connector->num_modes++; } @@ -7264,10 +7269,13 @@ dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm, static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm_crtc *crtc) { struct drm_connector *connector; - struct drm_connector_state *conn_state; + struct drm_connector_state *conn_state, *old_conn_state; struct amdgpu_dm_connector *aconnector = NULL; int i; - for_each_new_connector_in_state(state, connector, conn_state, i) { + for_each_oldnew_connector_in_state(state, connector, old_conn_state, conn_state, i) { + if (!conn_state->crtc) + conn_state = old_conn_state; + if (conn_state->crtc != crtc) continue; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index f0b001b3af578..883ee517673bd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -221,6 +221,14 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) ret = -EINVAL; goto cleanup; } + + if ((aconn->base.connector_type != DRM_MODE_CONNECTOR_DisplayPort) && + (aconn->base.connector_type != DRM_MODE_CONNECTOR_eDP)) { + DRM_DEBUG_DRIVER("No DP connector available for CRC source\n"); + ret = -EINVAL; + goto cleanup; + } + } if (amdgpu_dm_crtc_configure_crc_source(crtc, crtc_state, source)) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index f3dfb2887ae0b..2cdcefab2d7d4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -95,29 +95,29 @@ static ssize_t dp_link_settings_read(struct file *f, char __user *buf, rd_buf_ptr = rd_buf; - str_len = strlen("Current: %d %d %d "); - snprintf(rd_buf_ptr, str_len, "Current: %d %d %d ", + str_len = strlen("Current: %d 0x%x %d "); + snprintf(rd_buf_ptr, str_len, "Current: %d 0x%x %d ", link->cur_link_settings.lane_count, link->cur_link_settings.link_rate, link->cur_link_settings.link_spread); rd_buf_ptr += str_len; - str_len = strlen("Verified: %d %d %d "); - snprintf(rd_buf_ptr, str_len, "Verified: %d %d %d ", + str_len = strlen("Verified: %d 0x%x %d "); + snprintf(rd_buf_ptr, str_len, "Verified: %d 0x%x %d ", link->verified_link_cap.lane_count, link->verified_link_cap.link_rate, link->verified_link_cap.link_spread); rd_buf_ptr += str_len; - str_len = strlen("Reported: %d %d %d "); - snprintf(rd_buf_ptr, str_len, "Reported: %d %d %d ", + str_len = strlen("Reported: %d 0x%x %d "); + snprintf(rd_buf_ptr, str_len, "Reported: %d 0x%x %d ", link->reported_link_cap.lane_count, link->reported_link_cap.link_rate, link->reported_link_cap.link_spread); rd_buf_ptr += str_len; - str_len = strlen("Preferred: %d %d %d "); - snprintf(rd_buf_ptr, str_len, "Preferred: %d %d %d\n", + str_len = strlen("Preferred: %d 0x%x %d "); + snprintf(rd_buf_ptr, str_len, "Preferred: %d 0x%x %d\n", link->preferred_link_setting.lane_count, link->preferred_link_setting.link_rate, link->preferred_link_setting.link_spread); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index f25ac17f47fa9..de246e183d6ba 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1546,6 +1546,10 @@ bool dc_is_stream_unchanged( if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param) return false; + /*compare audio info*/ + if (memcmp(&old_stream->audio_info, &stream->audio_info, sizeof(stream->audio_info)) != 0) + return false; + return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 60123db7ba02f..fa3acf60e7bd2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2202,14 +2202,18 @@ static void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) &blnd_cfg.black_color); } - if (per_pixel_alpha) - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; - else - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; - blnd_cfg.overlap_only = false; blnd_cfg.global_gain = 0xff; + if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else if (per_pixel_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; + } + if (pipe_ctx->plane_state->global_alpha) blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value; else @@ -3264,13 +3268,12 @@ static enum dc_status dcn10_set_clock(struct dc *dc, struct dc_clock_config clock_cfg = {0}; struct dc_clocks *current_clocks = &context->bw_ctx.bw.dcn.clk; - if (dc->clk_mgr && dc->clk_mgr->funcs->get_clock) - dc->clk_mgr->funcs->get_clock(dc->clk_mgr, - context, clock_type, &clock_cfg); - - if (!dc->clk_mgr->funcs->get_clock) + if (!dc->clk_mgr || !dc->clk_mgr->funcs->get_clock) return DC_FAIL_UNSUPPORTED_1; + dc->clk_mgr->funcs->get_clock(dc->clk_mgr, + context, clock_type, &clock_cfg); + if (clk_khz > clock_cfg.max_clock_khz) return DC_FAIL_CLK_EXCEED_MAX; @@ -3288,7 +3291,7 @@ static enum dc_status dcn10_set_clock(struct dc *dc, else return DC_ERROR_UNEXPECTED; - if (dc->clk_mgr && dc->clk_mgr->funcs->update_clocks) + if (dc->clk_mgr->funcs->update_clocks) dc->clk_mgr->funcs->update_clocks(dc->clk_mgr, context, true); return DC_OK; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 03a2e1d7f0673..f7965a5d24442 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1740,14 +1740,18 @@ static void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) pipe_ctx, &blnd_cfg.black_color); } - if (per_pixel_alpha) - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; - else - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; - blnd_cfg.overlap_only = false; blnd_cfg.global_gain = 0xff; + if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else if (per_pixel_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; + } + if (pipe_ctx->plane_state->global_alpha) blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value; else diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 2b1175bb2daee..d2ea4c003d442 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2232,7 +2232,7 @@ void dcn20_set_mcif_arb_params( wb_arb_params->cli_watermark[k] = get_wm_writeback_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; wb_arb_params->pstate_watermark[k] = get_wm_writeback_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; } - wb_arb_params->time_per_pixel = 16.0 / context->res_ctx.pipe_ctx[i].stream->phy_pix_clk; /* 4 bit fraction, ms */ + wb_arb_params->time_per_pixel = 16.0 * 1000 / (context->res_ctx.pipe_ctx[i].stream->phy_pix_clk / 1000); /* 4 bit fraction, ms */ wb_arb_params->slice_lines = 32; wb_arb_params->arbitration_slice = 2; wb_arb_params->max_scaled_time = dcn20_calc_max_scaled_time(wb_arb_params->time_per_pixel, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 11a4c4029a902..acc2c307c871b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -1140,6 +1140,7 @@ static struct clock_source *dcn21_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c index d03165e71dc65..0be817f8cae6b 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c @@ -53,8 +53,8 @@ */ struct gpio_service *dal_gpio_service_create( - enum dce_version dce_version_major, - enum dce_version dce_version_minor, + enum dce_version dce_version, + enum dce_environment dce_environment, struct dc_context *ctx) { struct gpio_service *service; @@ -67,14 +67,14 @@ struct gpio_service *dal_gpio_service_create( return NULL; } - if (!dal_hw_translate_init(&service->translate, dce_version_major, - dce_version_minor)) { + if (!dal_hw_translate_init(&service->translate, dce_version, + dce_environment)) { BREAK_TO_DEBUGGER(); goto failure_1; } - if (!dal_hw_factory_init(&service->factory, dce_version_major, - dce_version_minor)) { + if (!dal_hw_factory_init(&service->factory, dce_version, + dce_environment)) { BREAK_TO_DEBUGGER(); goto failure_1; } diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c index 623455cd75203..d6f988403941b 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c @@ -227,14 +227,6 @@ static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = { .funcs = &pflip_irq_info_funcs\ } -#define vupdate_int_entry(reg_num)\ - [DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\ - IRQ_REG_ENTRY(OTG, reg_num,\ - OTG_GLOBAL_SYNC_STATUS, VUPDATE_INT_EN,\ - OTG_GLOBAL_SYNC_STATUS, VUPDATE_EVENT_CLEAR),\ - .funcs = &vblank_irq_info_funcs\ - } - /* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic * of DCE's DC_IRQ_SOURCE_VUPDATEx. */ @@ -348,12 +340,6 @@ irq_source_info_dcn21[DAL_IRQ_SOURCES_NUMBER] = { dc_underflow_int_entry(6), [DC_IRQ_SOURCE_DMCU_SCP] = dummy_irq_entry(), [DC_IRQ_SOURCE_VBIOS_SW] = dummy_irq_entry(), - vupdate_int_entry(0), - vupdate_int_entry(1), - vupdate_int_entry(2), - vupdate_int_entry(3), - vupdate_int_entry(4), - vupdate_int_entry(5), vupdate_no_lock_int_entry(0), vupdate_no_lock_int_entry(1), vupdate_no_lock_int_entry(2), diff --git a/drivers/gpu/drm/amd/display/include/gpio_service_interface.h b/drivers/gpu/drm/amd/display/include/gpio_service_interface.h index 9c55d247227ea..7e3240e73c1fc 100644 --- a/drivers/gpu/drm/amd/display/include/gpio_service_interface.h +++ b/drivers/gpu/drm/amd/display/include/gpio_service_interface.h @@ -42,8 +42,8 @@ void dal_gpio_destroy( struct gpio **ptr); struct gpio_service *dal_gpio_service_create( - enum dce_version dce_version_major, - enum dce_version dce_version_minor, + enum dce_version dce_version, + enum dce_environment dce_environment, struct dc_context *ctx); struct gpio *dal_gpio_service_create_irq( diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index d885d642ed7fc..537736713598b 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -85,7 +85,8 @@ //PB7 = MD0 #define MASK_VTEM_MD0__VRR_EN 0x01 #define MASK_VTEM_MD0__M_CONST 0x02 -#define MASK_VTEM_MD0__RESERVED2 0x0C +#define MASK_VTEM_MD0__QMS_EN 0x04 +#define MASK_VTEM_MD0__RESERVED2 0x08 #define MASK_VTEM_MD0__FVA_FACTOR_M1 0xF0 //MD1 @@ -94,7 +95,7 @@ //MD2 #define MASK_VTEM_MD2__BASE_REFRESH_RATE_98 0x03 #define MASK_VTEM_MD2__RB 0x04 -#define MASK_VTEM_MD2__RESERVED3 0xF8 +#define MASK_VTEM_MD2__NEXT_TFR 0xF8 //MD3 #define MASK_VTEM_MD3__BASE_REFRESH_RATE_07 0xFF diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c index 98e915e325ddf..bc3f42e915e91 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c @@ -264,6 +264,10 @@ static int komeda_plane_add(struct komeda_kms_dev *kms, formats = komeda_get_layer_fourcc_list(&mdev->fmt_tbl, layer->layer_type, &n_formats); + if (!formats) { + kfree(kplane); + return -ENOMEM; + } err = drm_universal_plane_init(&kms->base, plane, get_possible_crtcs(kms, c->pipeline), @@ -274,8 +278,10 @@ static int komeda_plane_add(struct komeda_kms_dev *kms, komeda_put_fourcc_list(formats); - if (err) - goto cleanup; + if (err) { + kfree(kplane); + return err; + } drm_plane_helper_add(plane, &komeda_plane_helper_funcs); diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index 587d94798f5c2..af729094260c4 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -483,7 +483,10 @@ static void malidp_crtc_reset(struct drm_crtc *crtc) if (crtc->state) malidp_crtc_destroy_state(crtc, crtc->state); - __drm_atomic_helper_crtc_reset(crtc, &state->base); + if (state) + __drm_atomic_helper_crtc_reset(crtc, &state->base); + else + __drm_atomic_helper_crtc_reset(crtc, NULL); } static int malidp_crtc_enable_vblank(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 9e13e466e72c0..e7bf32f234d71 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -1225,6 +1225,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) return 0; err_unregister_cec: + cec_unregister_adapter(adv7511->cec_adap); i2c_unregister_device(adv7511->i2c_cec); if (adv7511->cec_clk) clk_disable_unprepare(adv7511->cec_clk); diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index 1f26890a8da6e..c6a51d1c7ec9e 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1630,8 +1630,19 @@ static ssize_t analogix_dpaux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { struct analogix_dp_device *dp = to_dp(aux); + int ret; + + pm_runtime_get_sync(dp->dev); + + ret = analogix_dp_detect_hpd(dp); + if (ret) + goto out; - return analogix_dp_transfer(dp, msg); + ret = analogix_dp_transfer(dp, msg); +out: + pm_runtime_put(dp->dev); + + return ret; } struct analogix_dp_device * @@ -1696,8 +1707,10 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); dp->reg_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dp->reg_base)) - return ERR_CAST(dp->reg_base); + if (IS_ERR(dp->reg_base)) { + ret = PTR_ERR(dp->reg_base); + goto err_disable_clk; + } dp->force_hpd = of_property_read_bool(dev->of_node, "force-hpd"); @@ -1709,7 +1722,8 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) if (IS_ERR(dp->hpd_gpiod)) { dev_err(dev, "error getting HDP GPIO: %ld\n", PTR_ERR(dp->hpd_gpiod)); - return ERR_CAST(dp->hpd_gpiod); + ret = PTR_ERR(dp->hpd_gpiod); + goto err_disable_clk; } if (dp->hpd_gpiod) { @@ -1729,7 +1743,8 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) if (dp->irq == -ENXIO) { dev_err(&pdev->dev, "failed to get irq\n"); - return ERR_PTR(-ENODEV); + ret = -ENODEV; + goto err_disable_clk; } ret = devm_request_threaded_irq(&pdev->dev, dp->irq, @@ -1738,11 +1753,15 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) irq_flags, "analogix-dp", dp); if (ret) { dev_err(&pdev->dev, "failed to request irq\n"); - return ERR_PTR(ret); + goto err_disable_clk; } disable_irq(dp->irq); return dp; + +err_disable_clk: + clk_disable_unprepare(dp->clock); + return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(analogix_dp_probe); diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c index 914c569ab8c15..cab3f5c4e2fc8 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c @@ -1086,11 +1086,21 @@ int analogix_dp_send_psr_spd(struct analogix_dp_device *dp, if (!blocking) return 0; + /* + * db[1]!=0: entering PSR, wait for fully active remote frame buffer. + * db[1]==0: exiting PSR, wait for either + * (a) ACTIVE_RESYNC - the sink "must display the + * incoming active frames from the Source device with no visible + * glitches and/or artifacts", even though timings may still be + * re-synchronizing; or + * (b) INACTIVE - the transition is fully complete. + */ ret = readx_poll_timeout(analogix_dp_get_psr_status, dp, psr_status, psr_status >= 0 && ((vsc->db[1] && psr_status == DP_PSR_SINK_ACTIVE_RFB) || - (!vsc->db[1] && psr_status == DP_PSR_SINK_INACTIVE)), 1500, - DP_TIMEOUT_PSR_LOOP_MS * 1000); + (!vsc->db[1] && (psr_status == DP_PSR_SINK_ACTIVE_RESYNC || + psr_status == DP_PSR_SINK_INACTIVE))), + 1500, DP_TIMEOUT_PSR_LOOP_MS * 1000); if (ret) { dev_warn(dp->dev, "Failed to apply PSR %d\n", ret); return ret; diff --git a/drivers/gpu/drm/bridge/cdns-dsi.c b/drivers/gpu/drm/bridge/cdns-dsi.c index 0cb9dd6986ec3..9c3f9110895e8 100644 --- a/drivers/gpu/drm/bridge/cdns-dsi.c +++ b/drivers/gpu/drm/bridge/cdns-dsi.c @@ -1284,6 +1284,7 @@ static const struct of_device_id cdns_dsi_of_match[] = { { .compatible = "cdns,dsi" }, { }, }; +MODULE_DEVICE_TABLE(of, cdns_dsi_of_match); static struct platform_driver cdns_dsi_platform_driver = { .probe = cdns_dsi_drm_probe, diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c index b050fd1f3d201..5302dd90a7a5f 100644 --- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c +++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c @@ -291,19 +291,10 @@ static void ge_b850v3_lvds_remove(void) mutex_unlock(&ge_b850v3_lvds_dev_mutex); } -static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c, - const struct i2c_device_id *id) +static int ge_b850v3_register(void) { + struct i2c_client *stdp4028_i2c = ge_b850v3_lvds_ptr->stdp4028_i2c; struct device *dev = &stdp4028_i2c->dev; - int ret; - - ret = ge_b850v3_lvds_init(dev); - - if (ret) - return ret; - - ge_b850v3_lvds_ptr->stdp4028_i2c = stdp4028_i2c; - i2c_set_clientdata(stdp4028_i2c, ge_b850v3_lvds_ptr); /* drm bridge initialization */ ge_b850v3_lvds_ptr->bridge.funcs = &ge_b850v3_lvds_funcs; @@ -325,6 +316,27 @@ static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c, "ge-b850v3-lvds-dp", ge_b850v3_lvds_ptr); } +static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c, + const struct i2c_device_id *id) +{ + struct device *dev = &stdp4028_i2c->dev; + int ret; + + ret = ge_b850v3_lvds_init(dev); + + if (ret) + return ret; + + ge_b850v3_lvds_ptr->stdp4028_i2c = stdp4028_i2c; + i2c_set_clientdata(stdp4028_i2c, ge_b850v3_lvds_ptr); + + /* Only register after both bridges are probed */ + if (!ge_b850v3_lvds_ptr->stdp2690_i2c) + return 0; + + return ge_b850v3_register(); +} + static int stdp4028_ge_b850v3_fw_remove(struct i2c_client *stdp4028_i2c) { ge_b850v3_lvds_remove(); @@ -368,7 +380,11 @@ static int stdp2690_ge_b850v3_fw_probe(struct i2c_client *stdp2690_i2c, ge_b850v3_lvds_ptr->stdp2690_i2c = stdp2690_i2c; i2c_set_clientdata(stdp2690_i2c, ge_b850v3_lvds_ptr); - return 0; + /* Only register after both bridges are probed */ + if (!ge_b850v3_lvds_ptr->stdp4028_i2c) + return 0; + + return ge_b850v3_register(); } static int stdp2690_ge_b850v3_fw_remove(struct i2c_client *stdp2690_i2c) diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 04431dbac4a4f..fb0b64c965b74 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -2118,7 +2118,7 @@ static void sii8620_init_rcp_input_dev(struct sii8620 *ctx) if (ret) { dev_err(ctx->dev, "Failed to register RC device\n"); ctx->error = ret; - rc_free_device(ctx->rc_dev); + rc_free_device(rc_dev); return; } ctx->rc_dev = rc_dev; diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c index 77384c49fb8dd..2fc27931d3b56 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c @@ -1057,6 +1057,7 @@ __dw_mipi_dsi_probe(struct platform_device *pdev, ret = mipi_dsi_host_register(&dsi->dsi_host); if (ret) { dev_err(dev, "Failed to register MIPI host: %d\n", ret); + pm_runtime_disable(dev); dw_mipi_dsi_debugfs_remove(dsi); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index f1de4bb6558ca..dbb4a374cb646 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -115,6 +115,7 @@ static const struct regmap_config ti_sn_bridge_regmap_config = { .val_bits = 8, .volatile_table = &ti_sn_bridge_volatile_table, .cache_type = REGCACHE_NONE, + .max_register = 0xFF, }; static void ti_sn_bridge_write_u16(struct ti_sn_bridge *pdata, diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 2337b3827e6a4..11a81e8ba9639 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1984,6 +1984,9 @@ EXPORT_SYMBOL(drm_connector_attach_max_bpc_property); void drm_connector_set_vrr_capable_property( struct drm_connector *connector, bool capable) { + if (!connector->vrr_capable_property) + return; + drm_object_property_set_value(&connector->base, connector->vrr_capable_property, capable); diff --git a/drivers/gpu/drm/drm_crtc_helper_internal.h b/drivers/gpu/drm/drm_crtc_helper_internal.h index b5ac1581e6231..d595697d3d7e0 100644 --- a/drivers/gpu/drm/drm_crtc_helper_internal.h +++ b/drivers/gpu/drm/drm_crtc_helper_internal.h @@ -32,16 +32,6 @@ #include #include -/* drm_fb_helper.c */ -#ifdef CONFIG_DRM_FBDEV_EMULATION -int drm_fb_helper_modinit(void); -#else -static inline int drm_fb_helper_modinit(void) -{ - return 0; -} -#endif - /* drm_dp_aux_dev.c */ #ifdef CONFIG_DRM_DP_AUX_CHARDEV int drm_dp_aux_dev_init(void); diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index 00debd02c3220..0ba92428ef560 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -91,6 +91,7 @@ static int drm_clients_info(struct seq_file *m, void *data) mutex_lock(&dev->filelist_mutex); list_for_each_entry_reverse(priv, &dev->filelist, lhead) { struct task_struct *task; + bool is_current_master = drm_is_current_master(priv); rcu_read_lock(); /* locks pid_task()->comm */ task = pid_task(priv->pid, PIDTYPE_PID); @@ -99,7 +100,7 @@ static int drm_clients_info(struct seq_file *m, void *data) task ? task->comm : "", pid_vnr(priv->pid), priv->minor->index, - drm_is_current_master(priv) ? 'y' : 'n', + is_current_master ? 'y' : 'n', priv->authenticated ? 'y' : 'n', from_kuid_munged(seq_user_ns(m), uid), priv->magic); diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 2de1eebe591f9..1ff13af133243 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -3657,6 +3657,7 @@ static void fetch_monitor_name(struct drm_dp_mst_topology_mgr *mgr, mst_edid = drm_dp_mst_get_edid(port->connector, mgr, port); drm_edid_get_monitor_name(mst_edid, name, namelen); + kfree(mst_edid); } /** diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 9b69e55ad7010..2dc6dd6230d76 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -1702,9 +1702,6 @@ struct edid *drm_do_get_edid(struct drm_connector *connector, connector_bad_edid(connector, edid, edid[0x7e] + 1); - edid[EDID_LENGTH-1] += edid[0x7e] - valid_extensions; - edid[0x7e] = valid_extensions; - new = kmalloc_array(valid_extensions + 1, EDID_LENGTH, GFP_KERNEL); if (!new) @@ -1721,6 +1718,9 @@ struct edid *drm_do_get_edid(struct drm_connector *connector, base += EDID_LENGTH; } + new[EDID_LENGTH - 1] += new[0x7e] - valid_extensions; + new[0x7e] = valid_extensions; + kfree(edid); edid = new; } @@ -4380,7 +4380,8 @@ bool drm_detect_monitor_audio(struct edid *edid) if (!edid_ext) goto end; - has_audio = ((edid_ext[3] & EDID_BASIC_AUDIO) != 0); + has_audio = (edid_ext[0] == CEA_EXT && + (edid_ext[3] & EDID_BASIC_AUDIO) != 0); if (has_audio) { DRM_DEBUG_KMS("Monitor has basic audio support\n"); @@ -4533,16 +4534,8 @@ static void drm_parse_hdmi_deep_color_info(struct drm_connector *connector, connector->name, dc_bpc); info->bpc = dc_bpc; - /* - * Deep color support mandates RGB444 support for all video - * modes and forbids YCRCB422 support for all video modes per - * HDMI 1.3 spec. - */ - info->color_formats = DRM_COLOR_FORMAT_RGB444; - /* YCRCB444 is optional according to spec. */ if (hdmi[6] & DRM_EDID_HDMI_DC_Y444) { - info->color_formats |= DRM_COLOR_FORMAT_YCRCB444; DRM_DEBUG("%s: HDMI sink does YCRCB444 in deep color.\n", connector->name); } @@ -4659,6 +4652,7 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi if (!(edid->input & DRM_EDID_INPUT_DIGITAL)) return quirks; + info->color_formats |= DRM_COLOR_FORMAT_RGB444; drm_parse_cea_ext(connector, edid); /* @@ -4707,7 +4701,6 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi DRM_DEBUG("%s: Assigning EDID-1.4 digital sink color depth as %d bpc.\n", connector->name, info->bpc); - info->color_formats |= DRM_COLOR_FORMAT_RGB444; if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB444) info->color_formats |= DRM_COLOR_FORMAT_YCRCB444; if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB422) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 02ffde5fd7226..4ae68bf048920 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -2415,24 +2415,3 @@ int drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp) return 0; } EXPORT_SYMBOL(drm_fbdev_generic_setup); - -/* The Kconfig DRM_KMS_HELPER selects FRAMEBUFFER_CONSOLE (if !EXPERT) - * but the module doesn't depend on any fb console symbols. At least - * attempt to load fbcon to avoid leaving the system without a usable console. - */ -int __init drm_fb_helper_modinit(void) -{ -#if defined(CONFIG_FRAMEBUFFER_CONSOLE_MODULE) && !defined(CONFIG_EXPERT) - const char name[] = "fbcon"; - struct module *fbcon; - - mutex_lock(&module_mutex); - fbcon = find_module(name); - mutex_unlock(&module_mutex); - - if (!fbcon) - request_module_nowait(name); -#endif - return 0; -} -EXPORT_SYMBOL(drm_fb_helper_modinit); diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index 2cf053fb8d54b..1c691bdb89141 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -863,8 +863,6 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, req.request.sequence = req32.request.sequence; req.request.signal = req32.request.signal; err = drm_ioctl_kernel(file, drm_wait_vblank_ioctl, &req, DRM_UNLOCKED); - if (err) - return err; req32.reply.type = req.reply.type; req32.reply.sequence = req.reply.sequence; @@ -873,7 +871,7 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, if (copy_to_user(argp, &req32, sizeof(req32))) return -EFAULT; - return 0; + return err; } #if defined(CONFIG_X86) diff --git a/drivers/gpu/drm/drm_kms_helper_common.c b/drivers/gpu/drm/drm_kms_helper_common.c index 221a8528c9937..f933da1656eb5 100644 --- a/drivers/gpu/drm/drm_kms_helper_common.c +++ b/drivers/gpu/drm/drm_kms_helper_common.c @@ -64,19 +64,18 @@ MODULE_PARM_DESC(edid_firmware, static int __init drm_kms_helper_init(void) { - int ret; - - /* Call init functions from specific kms helpers here */ - ret = drm_fb_helper_modinit(); - if (ret < 0) - goto out; - - ret = drm_dp_aux_dev_init(); - if (ret < 0) - goto out; - -out: - return ret; + /* + * The Kconfig DRM_KMS_HELPER selects FRAMEBUFFER_CONSOLE (if !EXPERT) + * but the module doesn't depend on any fb console symbols. At least + * attempt to load fbcon to avoid leaving the system without a usable + * console. + */ + if (IS_ENABLED(CONFIG_DRM_FBDEV_EMULATION) && + IS_MODULE(CONFIG_FRAMEBUFFER_CONSOLE) && + !IS_ENABLED(CONFIG_EXPERT)) + request_module_nowait("fbcon"); + + return drm_dp_aux_dev_init(); } static void __exit drm_kms_helper_exit(void) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index f6bdec7fa9253..f5ab891731d0b 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -109,6 +109,18 @@ static const struct drm_dmi_panel_orientation_data lcd1200x1920_rightside_up = { .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; +static const struct drm_dmi_panel_orientation_data lcd1280x1920_rightside_up = { + .width = 1280, + .height = 1920, + .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, +}; + +static const struct drm_dmi_panel_orientation_data lcd1600x2560_leftside_up = { + .width = 1600, + .height = 2560, + .orientation = DRM_MODE_PANEL_ORIENTATION_LEFT_UP, +}; + static const struct dmi_system_id orientation_data[] = { { /* Acer One 10 (S1003) */ .matches = { @@ -134,6 +146,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T103HAF"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* AYA NEO 2021 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "AYADEVICE"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "AYA NEO 2021"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* GPD MicroPC (generic strings, also match on bios date) */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Default string"), @@ -148,6 +166,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MicroPC"), }, .driver_data = (void *)&lcd720x1280_rightside_up, + }, { /* GPD Win Max */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1619-01"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* * GPD Pocket, note that the the DMI data is less generic then * it seems, devices with a board-vendor of "AMI Corporation" @@ -185,6 +209,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "Default string"), }, .driver_data = (void *)&gpd_win2, + }, { /* GPD Win 3 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1618-03") + }, + .driver_data = (void *)&lcd720x1280_rightside_up, }, { /* I.T.Works TW891 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."), @@ -193,6 +223,13 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "TW891"), }, .driver_data = (void *)&itworks_tw891, + }, { /* KD Kurio Smart C15200 2-in-1 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "KD Interactive"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Kurio Smart"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "KDM960BCP"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* * Lenovo Ideapad Miix 310 laptop, only some production batches * have a portrait screen, the resolution checks makes the quirk @@ -211,12 +248,23 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"), }, .driver_data = (void *)&lcd800x1280_rightside_up, - }, { /* Lenovo Ideapad D330 */ + }, { /* Lenovo Ideapad D330-10IGM (HD) */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "81H3"), DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGM"), }, + .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* Lenovo Ideapad D330-10IGM (FHD) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGM"), + }, + .driver_data = (void *)&lcd1200x1920_rightside_up, + }, { /* Lenovo Yoga Book X90F / X91F / X91L */ + .matches = { + /* Non exact match to match all versions */ + DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9"), + }, .driver_data = (void *)&lcd1200x1920_rightside_up, }, { /* OneGX1 Pro */ .matches = { @@ -225,6 +273,25 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Default string"), }, .driver_data = (void *)&onegx1_pro, + }, { /* OneXPlayer */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ONE-NETBOOK TECHNOLOGY CO., LTD."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ONE XPLAYER"), + }, + .driver_data = (void *)&lcd1600x2560_leftside_up, + }, { /* Samsung GalaxyBook 10.6 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galaxy Book 10.6"), + }, + .driver_data = (void *)&lcd1280x1920_rightside_up, + }, { /* Valve Steam Deck */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jupiter"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* VIOS LTH17 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "VIOS"), diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index d6ad60ab0d389..6bdebcca56905 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -186,6 +186,13 @@ int drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane, if (WARN_ON(config->num_total_plane >= 32)) return -EINVAL; + /* + * First driver to need more than 64 formats needs to fix this. Each + * format is encoded as a bit and the current code only supports a u64. + */ + if (WARN_ON(format_count > 64)) + return -EINVAL; + WARN_ON(drm_drv_uses_atomic_modeset(dev) && (!funcs->atomic_destroy_state || !funcs->atomic_duplicate_state)); @@ -207,13 +214,6 @@ int drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane, return -ENOMEM; } - /* - * First driver to need more than 64 formats needs to fix this. Each - * format is encoded as a bit and the current code only supports a u64. - */ - if (WARN_ON(format_count > 64)) - return -EINVAL; - if (format_modifiers) { const uint64_t *temp_modifiers = format_modifiers; while (*temp_modifiers++ != DRM_FORMAT_MOD_INVALID) diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c index 3aae7ea522f23..c3f2292dc93d5 100644 --- a/drivers/gpu/drm/drm_plane_helper.c +++ b/drivers/gpu/drm/drm_plane_helper.c @@ -123,7 +123,6 @@ static int drm_plane_helper_check_update(struct drm_plane *plane, .crtc_w = drm_rect_width(dst), .crtc_h = drm_rect_height(dst), .rotation = rotation, - .visible = *visible, }; struct drm_crtc_state crtc_state = { .crtc = crtc, diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 8fdb271354061..e558381c4c96e 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -329,8 +329,17 @@ int drm_syncobj_find_fence(struct drm_file *file_private, if (*fence) { ret = dma_fence_chain_find_seqno(fence, point); - if (!ret) + if (!ret) { + /* If the requested seqno is already signaled + * drm_syncobj_find_fence may return a NULL + * fence. To make sure the recipient gets + * signalled, use a new fence instead. + */ + if (!*fence) + *fence = dma_fence_get_stub(); + goto out; + } dma_fence_put(*fence); } else { ret = -EINVAL; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c index 0c9c40720ca9a..35225ff8792dd 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c @@ -397,8 +397,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state, if (switch_mmu_context) { struct etnaviv_iommu_context *old_context = gpu->mmu_context; - etnaviv_iommu_context_get(mmu_context); - gpu->mmu_context = mmu_context; + gpu->mmu_context = etnaviv_iommu_context_get(mmu_context); etnaviv_iommu_context_put(old_context); } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index cb1faaac380a3..519948637186e 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -304,8 +304,7 @@ struct etnaviv_vram_mapping *etnaviv_gem_mapping_get( list_del(&mapping->obj_node); } - etnaviv_iommu_context_get(mmu_context); - mapping->context = mmu_context; + mapping->context = etnaviv_iommu_context_get(mmu_context); mapping->use = 1; ret = etnaviv_iommu_map_gem(mmu_context, etnaviv_obj, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 1ba83a90cdef6..9baf5af919e1e 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -471,6 +471,12 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, return -EINVAL; } + if (args->stream_size > SZ_128K || args->nr_relocs > SZ_128K || + args->nr_bos > SZ_128K || args->nr_pmrs > 128) { + DRM_ERROR("submit arguments out of size limits\n"); + return -EINVAL; + } + /* * Copy the command submission and bo array to kernel space in * one go, and do this outside of any locks. @@ -534,8 +540,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, goto err_submit_objects; submit->ctx = file->driver_priv; - etnaviv_iommu_context_get(submit->ctx->mmu); - submit->mmu_context = submit->ctx->mmu; + submit->mmu_context = etnaviv_iommu_context_get(submit->ctx->mmu); submit->exec_state = args->exec_state; submit->flags = args->flags; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 85de8551ce866..db35736d47af2 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -545,6 +545,12 @@ static int etnaviv_hw_reset(struct etnaviv_gpu *gpu) /* We rely on the GPU running, so program the clock */ etnaviv_gpu_update_clock(gpu); + gpu->fe_running = false; + gpu->exec_state = -1; + if (gpu->mmu_context) + etnaviv_iommu_context_put(gpu->mmu_context); + gpu->mmu_context = NULL; + return 0; } @@ -607,19 +613,23 @@ void etnaviv_gpu_start_fe(struct etnaviv_gpu *gpu, u32 address, u16 prefetch) VIVS_MMUv2_SEC_COMMAND_CONTROL_ENABLE | VIVS_MMUv2_SEC_COMMAND_CONTROL_PREFETCH(prefetch)); } + + gpu->fe_running = true; } -static void etnaviv_gpu_start_fe_idleloop(struct etnaviv_gpu *gpu) +static void etnaviv_gpu_start_fe_idleloop(struct etnaviv_gpu *gpu, + struct etnaviv_iommu_context *context) { - u32 address = etnaviv_cmdbuf_get_va(&gpu->buffer, - &gpu->mmu_context->cmdbuf_mapping); u16 prefetch; + u32 address; /* setup the MMU */ - etnaviv_iommu_restore(gpu, gpu->mmu_context); + etnaviv_iommu_restore(gpu, context); /* Start command processor */ prefetch = etnaviv_buffer_init(gpu); + address = etnaviv_cmdbuf_get_va(&gpu->buffer, + &gpu->mmu_context->cmdbuf_mapping); etnaviv_gpu_start_fe(gpu, address, prefetch); } @@ -790,7 +800,6 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) /* Now program the hardware */ mutex_lock(&gpu->lock); etnaviv_gpu_hw_init(gpu); - gpu->exec_state = -1; mutex_unlock(&gpu->lock); pm_runtime_mark_last_busy(gpu->dev); @@ -994,8 +1003,6 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu) spin_unlock(&gpu->event_spinlock); etnaviv_gpu_hw_init(gpu); - gpu->exec_state = -1; - gpu->mmu_context = NULL; mutex_unlock(&gpu->lock); pm_runtime_mark_last_busy(gpu->dev); @@ -1306,14 +1313,12 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit) goto out_unlock; } - if (!gpu->mmu_context) { - etnaviv_iommu_context_get(submit->mmu_context); - gpu->mmu_context = submit->mmu_context; - etnaviv_gpu_start_fe_idleloop(gpu); - } else { - etnaviv_iommu_context_get(gpu->mmu_context); - submit->prev_mmu_context = gpu->mmu_context; - } + if (!gpu->fe_running) + etnaviv_gpu_start_fe_idleloop(gpu, submit->mmu_context); + + if (submit->prev_mmu_context) + etnaviv_iommu_context_put(submit->prev_mmu_context); + submit->prev_mmu_context = etnaviv_iommu_context_get(gpu->mmu_context); if (submit->nr_pmrs) { gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre; @@ -1530,7 +1535,7 @@ int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms) static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu) { - if (gpu->initialized && gpu->mmu_context) { + if (gpu->initialized && gpu->fe_running) { /* Replace the last WAIT with END */ mutex_lock(&gpu->lock); etnaviv_buffer_end(gpu); @@ -1543,8 +1548,7 @@ static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu) */ etnaviv_gpu_wait_idle(gpu, 100); - etnaviv_iommu_context_put(gpu->mmu_context); - gpu->mmu_context = NULL; + gpu->fe_running = false; } gpu->exec_state = -1; @@ -1692,6 +1696,9 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master, etnaviv_gpu_hw_suspend(gpu); #endif + if (gpu->mmu_context) + etnaviv_iommu_context_put(gpu->mmu_context); + if (gpu->initialized) { etnaviv_cmdbuf_free(&gpu->buffer); etnaviv_iommu_global_fini(gpu); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 8f9bd4edc96a5..02478c75f8968 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -101,6 +101,7 @@ struct etnaviv_gpu { struct workqueue_struct *wq; struct drm_gpu_scheduler sched; bool initialized; + bool fe_running; /* 'ring'-buffer: */ struct etnaviv_cmdbuf buffer; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c index 1a7c89a67bea3..afe5dd6a9925b 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c @@ -92,6 +92,10 @@ static void etnaviv_iommuv1_restore(struct etnaviv_gpu *gpu, struct etnaviv_iommuv1_context *v1_context = to_v1_context(context); u32 pgtable; + if (gpu->mmu_context) + etnaviv_iommu_context_put(gpu->mmu_context); + gpu->mmu_context = etnaviv_iommu_context_get(context); + /* set base addresses */ gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_RA, context->global->memory_base); gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_FE, context->global->memory_base); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c index f8bf488e9d717..d664ae29ae209 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c @@ -172,6 +172,10 @@ static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu, if (gpu_read(gpu, VIVS_MMUv2_CONTROL) & VIVS_MMUv2_CONTROL_ENABLE) return; + if (gpu->mmu_context) + etnaviv_iommu_context_put(gpu->mmu_context); + gpu->mmu_context = etnaviv_iommu_context_get(context); + prefetch = etnaviv_buffer_config_mmuv2(gpu, (u32)v2_context->mtlb_dma, (u32)context->global->bad_page_dma); @@ -192,6 +196,10 @@ static void etnaviv_iommuv2_restore_sec(struct etnaviv_gpu *gpu, if (gpu_read(gpu, VIVS_MMUv2_SEC_CONTROL) & VIVS_MMUv2_SEC_CONTROL_ENABLE) return; + if (gpu->mmu_context) + etnaviv_iommu_context_put(gpu->mmu_context); + gpu->mmu_context = etnaviv_iommu_context_get(context); + gpu_write(gpu, VIVS_MMUv2_PTA_ADDRESS_LOW, lower_32_bits(context->global->v2.pta_dma)); gpu_write(gpu, VIVS_MMUv2_PTA_ADDRESS_HIGH, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index 3607d348c2980..790cbb20aaeba 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -204,6 +204,7 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu_context *context, */ list_for_each_entry_safe(m, n, &list, scan_node) { etnaviv_iommu_remove_mapping(context, m); + etnaviv_iommu_context_put(m->context); m->context = NULL; list_del_init(&m->mmu_node); list_del_init(&m->scan_node); @@ -288,6 +289,12 @@ void etnaviv_iommu_unmap_gem(struct etnaviv_iommu_context *context, mutex_lock(&context->lock); + /* Bail if the mapping has been reaped by another thread */ + if (!mapping->context) { + mutex_unlock(&context->lock); + return; + } + /* If the vram node is on the mm, unmap and remove the node */ if (mapping->vram_node.mm == &context->mm) etnaviv_iommu_remove_mapping(context, mapping); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h index d1d6902fd13be..e4a0b7d09c2ea 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h @@ -105,9 +105,11 @@ void etnaviv_iommu_dump(struct etnaviv_iommu_context *ctx, void *buf); struct etnaviv_iommu_context * etnaviv_iommu_context_init(struct etnaviv_iommu_global *global, struct etnaviv_cmdbuf_suballoc *suballoc); -static inline void etnaviv_iommu_context_get(struct etnaviv_iommu_context *ctx) +static inline struct etnaviv_iommu_context * +etnaviv_iommu_context_get(struct etnaviv_iommu_context *ctx) { kref_get(&ctx->refcount); + return ctx; } void etnaviv_iommu_context_put(struct etnaviv_iommu_context *ctx); void etnaviv_iommu_restore(struct etnaviv_gpu *gpu, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 4e3e95dce6d87..cd46c882269cc 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -89,12 +89,15 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job) u32 dma_addr; int change; + /* block scheduler */ + drm_sched_stop(&gpu->sched, sched_job); + /* * If the GPU managed to complete this jobs fence, the timout is * spurious. Bail out. */ if (dma_fence_is_signaled(submit->out_fence)) - return; + goto out_no_timeout; /* * If the GPU is still making forward progress on the front-end (which @@ -105,12 +108,9 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job) change = dma_addr - gpu->hangcheck_dma_addr; if (change < 0 || change > 16) { gpu->hangcheck_dma_addr = dma_addr; - return; + goto out_no_timeout; } - /* block scheduler */ - drm_sched_stop(&gpu->sched, sched_job); - if(sched_job) drm_sched_increase_karma(sched_job); @@ -120,6 +120,7 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job) drm_sched_resubmit_jobs(&gpu->sched); +out_no_timeout: /* restart scheduler after GPU is usable again */ drm_sched_start(&gpu->sched, true); } diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c index 58b89ec11b0eb..a3c9d8b9e1a18 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dma.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c @@ -140,6 +140,8 @@ int exynos_drm_register_dma(struct drm_device *drm, struct device *dev, EXYNOS_DEV_ADDR_START, EXYNOS_DEV_ADDR_SIZE); else if (IS_ENABLED(CONFIG_IOMMU_DMA)) mapping = iommu_get_domain_for_dev(priv->dma_dev); + else + mapping = ERR_PTR(-ENODEV); if (IS_ERR(mapping)) return PTR_ERR(mapping); diff --git a/drivers/gpu/drm/gma500/psb_intel_display.c b/drivers/gpu/drm/gma500/psb_intel_display.c index 4256410535f06..65e67e12a0a1a 100644 --- a/drivers/gpu/drm/gma500/psb_intel_display.c +++ b/drivers/gpu/drm/gma500/psb_intel_display.c @@ -532,14 +532,15 @@ void psb_intel_crtc_init(struct drm_device *dev, int pipe, struct drm_crtc *psb_intel_get_crtc_from_pipe(struct drm_device *dev, int pipe) { - struct drm_crtc *crtc = NULL; + struct drm_crtc *crtc; list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct gma_crtc *gma_crtc = to_gma_crtc(crtc); + if (gma_crtc->pipe == pipe) - break; + return crtc; } - return crtc; + return NULL; } int gma_connector_clones(struct drm_device *dev, int type_mask) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 41c8e39a73ba8..e4f03fcb125e4 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -21,7 +21,6 @@ config DRM_I915_DEBUG depends on DRM_I915 select DEBUG_FS select PREEMPT_COUNT - select REFCOUNT_FULL select I2C_CHARDEV select STACKDEPOT select DRM_DP_AUX_CHARDEV diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index abc8c42b8b0c1..85583f9146305 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -166,6 +166,12 @@ static void vlv_steal_power_sequencer(struct drm_i915_private *dev_priv, enum pipe pipe); static void intel_dp_unset_edid(struct intel_dp *intel_dp); +static void intel_dp_set_default_sink_rates(struct intel_dp *intel_dp) +{ + intel_dp->sink_rates[0] = 162000; + intel_dp->num_sink_rates = 1; +} + /* update sink rates from dpcd */ static void intel_dp_set_sink_rates(struct intel_dp *intel_dp) { @@ -4261,6 +4267,9 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) */ intel_psr_init_dpcd(intel_dp); + /* Clear the default sink rates */ + intel_dp->num_sink_rates = 0; + /* Read the eDP 1.4+ supported link rates. */ if (intel_dp->edp_dpcd[0] >= DP_EDP_14) { __le16 sink_rates[DP_MAX_SUPPORTED_RATES]; @@ -7167,6 +7176,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, return false; intel_dp_set_source_rates(intel_dp); + intel_dp_set_default_sink_rates(intel_dp); + intel_dp_set_common_rates(intel_dp); intel_dp->reset_link_params = true; intel_dp->pps_pipe = INVALID_PIPE; diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 74d45a0eecb84..c59b43fb34a8c 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -520,6 +520,7 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, DRM_MODE_CONNECTOR_DisplayPort); if (ret) { + drm_dp_mst_put_port_malloc(port); intel_connector_free(intel_connector); return NULL; } diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 29edfc3437163..aeb7cb651fa4f 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -913,6 +913,9 @@ static int check_overlay_dst(struct intel_overlay *overlay, const struct intel_crtc_state *pipe_config = overlay->crtc->config; + if (rec->dst_height == 0 || rec->dst_width == 0) + return -EINVAL; + if (rec->dst_x < pipe_config->pipe_src_w && rec->dst_x + rec->dst_width <= pipe_config->pipe_src_w && rec->dst_y < pipe_config->pipe_src_h && diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 198a91c765314..5c21c06189c1d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1452,7 +1452,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma) urelocs = u64_to_user_ptr(entry->relocs_ptr); remain = entry->relocation_count; - if (unlikely(remain > N_RELOC(ULONG_MAX))) + if (unlikely((unsigned long)remain > N_RELOC(ULONG_MAX))) return -EINVAL; /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 08b35587bc6dc..352c102f3459c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -118,6 +118,9 @@ struct drm_i915_gem_object { I915_SELFTEST_DECLARE(struct list_head st_link); + unsigned long flags; +#define I915_BO_WAS_BOUND_BIT 0 + /* * Is the object to be mapped as read-only to the GPU * Only honoured if hardware has relevant pte bit diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 18f0ce0135c17..aa63fa0ab575e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -8,6 +8,8 @@ #include "i915_gem_object.h" #include "i915_scatterlist.h" +#include "gt/intel_gt.h" + void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, unsigned int sg_page_sizes) @@ -176,6 +178,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) __i915_gem_object_reset_page_iter(obj); obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + intel_wakeref_t wakeref; + + with_intel_runtime_pm_if_in_use(&i915->runtime_pm, wakeref) + intel_gt_invalidate_tlbs(&i915->gt); + } + return pages; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index d48ec9a76ed16..f6d7f5d307d7d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -15,6 +15,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) spin_lock_init(>->irq_lock); + mutex_init(>->tlb_invalidate_lock); + INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); @@ -266,3 +268,113 @@ void intel_gt_driver_late_release(struct intel_gt *gt) intel_uc_driver_late_release(>->uc); intel_gt_fini_reset(gt); } + +struct reg_and_bit { + i915_reg_t reg; + u32 bit; +}; + +static struct reg_and_bit +get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, + const i915_reg_t *regs, const unsigned int num) +{ + const unsigned int class = engine->class; + struct reg_and_bit rb = { }; + + if (WARN_ON_ONCE(class >= num || !regs[class].reg)) + return rb; + + rb.reg = regs[class]; + if (gen8 && class == VIDEO_DECODE_CLASS) + rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ + else + rb.bit = engine->instance; + + rb.bit = BIT(rb.bit); + + return rb; +} + +void intel_gt_invalidate_tlbs(struct intel_gt *gt) +{ + static const i915_reg_t gen8_regs[] = { + [RENDER_CLASS] = GEN8_RTCR, + [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ + [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, + [COPY_ENGINE_CLASS] = GEN8_BTCR, + }; + static const i915_reg_t gen12_regs[] = { + [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR, + [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, + [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, + [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, + }; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + enum intel_engine_id id; + const i915_reg_t *regs; + unsigned int num = 0; + + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + + if (INTEL_GEN(i915) == 12) { + regs = gen12_regs; + num = ARRAY_SIZE(gen12_regs); + } else if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) <= 11) { + regs = gen8_regs; + num = ARRAY_SIZE(gen8_regs); + } else if (INTEL_GEN(i915) < 8) { + return; + } + + if (WARN_ONCE(!num, "Platform does not implement TLB invalidation!")) + return; + + GEM_TRACE("\n"); + + assert_rpm_wakelock_held(&i915->runtime_pm); + + mutex_lock(>->tlb_invalidate_lock); + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ + + for_each_engine(engine, gt, id) { + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + } + + spin_unlock_irq(&uncore->lock); + + for_each_engine(engine, gt, id) { + /* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ + const unsigned int timeout_us = 100; + const unsigned int timeout_ms = 4; + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + if (__intel_wait_for_register_fw(uncore, + rb.reg, rb.bit, 0, + timeout_us, timeout_ms, + NULL)) + DRM_ERROR_RATELIMITED("%s TLB invalidation did not complete in %ums!\n", + engine->name, timeout_ms); + } + + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + mutex_unlock(>->tlb_invalidate_lock); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 4920cb351f109..4eab15bdcd97b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -57,4 +57,6 @@ static inline bool intel_gt_is_wedged(struct intel_gt *gt) void intel_gt_queue_hangcheck(struct intel_gt *gt); +void intel_gt_invalidate_tlbs(struct intel_gt *gt); + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index dc295c196d11c..82a78719b32d5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -40,6 +40,8 @@ struct intel_gt { struct intel_uc uc; + struct mutex tlb_invalidate_lock; + struct intel_gt_timelines { spinlock_t lock; /* protects active_list */ struct list_head active_list; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 9cb01d9828f1d..c970e3deb0085 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -289,6 +289,14 @@ void intel_timeline_fini(struct intel_timeline *timeline) i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); i915_vma_put(timeline->hwsp_ggtt); + + /* + * A small race exists between intel_gt_retire_requests_timeout and + * intel_timeline_exit which could result in the syncmap not getting + * free'd. Rather than work to hard to seal this race, simply cleanup + * the syncmap on fini. + */ + i915_syncmap_free(&timeline->sync); } struct intel_timeline * diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7b6e68f082f8c..1386d0f5eac63 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2519,6 +2519,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28) #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24) +#define GEN8_RTCR _MMIO(0x4260) +#define GEN8_M1TCR _MMIO(0x4264) +#define GEN8_M2TCR _MMIO(0x4268) +#define GEN8_BTCR _MMIO(0x426c) +#define GEN8_VTCR _MMIO(0x4270) + #if 0 #define PRB0_TAIL _MMIO(0x2030) #define PRB0_HEAD _MMIO(0x2034) @@ -2602,6 +2608,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define FAULT_VA_HIGH_BITS (0xf << 0) #define FAULT_GTT_SEL (1 << 4) +#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8) +#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc) +#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0) +#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4) + #define FPGA_DBG _MMIO(0x42300) #define FPGA_DBG_RM_NOCLAIM (1 << 31) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e0e677b2a3a94..c24f49ee10d73 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -341,6 +341,10 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, return ret; vma->flags |= bind_flags; + + if (vma->obj) + set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); + return 0; } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 91afeca0c6f82..d59455b2d401f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2822,7 +2822,7 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *crtc_state) } static void intel_read_wm_latency(struct drm_i915_private *dev_priv, - u16 wm[8]) + u16 wm[]) { struct intel_uncore *uncore = &dev_priv->uncore; @@ -3017,9 +3017,9 @@ static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv) * The BIOS provided WM memory latency values are often * inadequate for high resolution displays. Adjust them. */ - changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) | - ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) | - ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12); + changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12); + changed |= ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12); + changed |= ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12); if (!changed) return; diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index 2256c9789fc2c..f19264e91d4db 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -68,7 +68,7 @@ static void ipu_crtc_disable_planes(struct ipu_crtc *ipu_crtc, drm_atomic_crtc_state_for_each_plane(plane, old_crtc_state) { if (plane == &ipu_crtc->plane[0]->base) disable_full = true; - if (&ipu_crtc->plane[1] && plane == &ipu_crtc->plane[1]->base) + if (ipu_crtc->plane[1] && plane == &ipu_crtc->plane[1]->base) disable_partial = true; } diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c index be55548f352af..e24272428744c 100644 --- a/drivers/gpu/drm/imx/parallel-display.c +++ b/drivers/gpu/drm/imx/parallel-display.c @@ -68,8 +68,10 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector) ret = of_get_drm_display_mode(np, &imxpd->mode, &imxpd->bus_flags, OF_USE_NATIVE_MODE); - if (ret) + if (ret) { + drm_mode_destroy(connector->dev, mode); return ret; + } drm_mode_copy(mode, &imxpd->mode); mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED, diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c index d86b8d81a483a..155971c57b2d5 100644 --- a/drivers/gpu/drm/lima/lima_device.c +++ b/drivers/gpu/drm/lima/lima_device.c @@ -293,6 +293,7 @@ int lima_device_init(struct lima_device *ldev) struct resource *res; dma_set_coherent_mask(ldev->dev, DMA_BIT_MASK(32)); + dma_set_max_seg_size(ldev->dev, UINT_MAX); err = lima_clk_init(ldev); if (err) diff --git a/drivers/gpu/drm/mediatek/mtk_cec.c b/drivers/gpu/drm/mediatek/mtk_cec.c index cb29b649fcdba..12bf937694977 100644 --- a/drivers/gpu/drm/mediatek/mtk_cec.c +++ b/drivers/gpu/drm/mediatek/mtk_cec.c @@ -84,7 +84,7 @@ static void mtk_cec_mask(struct mtk_cec *cec, unsigned int offset, u32 tmp = readl(cec->regs + offset) & ~mask; tmp |= val & mask; - writel(val, cec->regs + offset); + writel(tmp, cec->regs + offset); } void mtk_cec_set_hpd_event(struct device *dev, diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index df2656e579917..a3ae6c1d341bf 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -891,6 +891,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) BUG_ON(!node); ret = a6xx_gmu_init(a6xx_gpu, node); + of_node_put(node); if (ret) { a6xx_destroy(&(a6xx_gpu->base.base)); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c index 179e8d52cadb4..a08ca7a47400f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c @@ -281,10 +281,12 @@ static void dpu_hw_ctl_clear_all_blendstages(struct dpu_hw_ctl *ctx) int i; for (i = 0; i < ctx->mixer_count; i++) { - DPU_REG_WRITE(c, CTL_LAYER(LM_0 + i), 0); - DPU_REG_WRITE(c, CTL_LAYER_EXT(LM_0 + i), 0); - DPU_REG_WRITE(c, CTL_LAYER_EXT2(LM_0 + i), 0); - DPU_REG_WRITE(c, CTL_LAYER_EXT3(LM_0 + i), 0); + enum dpu_lm mixer_id = ctx->mixer_hw_caps[i].id; + + DPU_REG_WRITE(c, CTL_LAYER(mixer_id), 0); + DPU_REG_WRITE(c, CTL_LAYER_EXT(mixer_id), 0); + DPU_REG_WRITE(c, CTL_LAYER_EXT2(mixer_id), 0); + DPU_REG_WRITE(c, CTL_LAYER_EXT3(mixer_id), 0); } } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c index 4f8b813aab810..8256f06218d0f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c @@ -137,11 +137,13 @@ static int _sspp_subblk_offset(struct dpu_hw_pipe *ctx, u32 *idx) { int rc = 0; - const struct dpu_sspp_sub_blks *sblk = ctx->cap->sblk; + const struct dpu_sspp_sub_blks *sblk; - if (!ctx) + if (!ctx || !ctx->cap || !ctx->cap->sblk) return -EINVAL; + sblk = ctx->cap->sblk; + switch (s_id) { case DPU_SSPP_SRC: *idx = sblk->src_blk.base; @@ -404,7 +406,7 @@ static void _dpu_hw_sspp_setup_scaler3(struct dpu_hw_pipe *ctx, (void)pe; if (_sspp_subblk_offset(ctx, DPU_SSPP_SCALER_QSEED3, &idx) || !sspp - || !scaler3_cfg || !ctx || !ctx->cap || !ctx->cap->sblk) + || !scaler3_cfg) return; dpu_hw_setup_scaler3(&ctx->hw, scaler3_cfg, idx, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 58b0485dc3750..c08c67338d73d 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -88,8 +88,8 @@ static int _dpu_danger_signal_status(struct seq_file *s, &status); } else { seq_puts(s, "\nSafe signal status:\n"); - if (kms->hw_mdp->ops.get_danger_status) - kms->hw_mdp->ops.get_danger_status(kms->hw_mdp, + if (kms->hw_mdp->ops.get_safe_status) + kms->hw_mdp->ops.get_safe_status(kms->hw_mdp, &status); } pm_runtime_put_sync(&kms->pdev->dev); @@ -599,8 +599,10 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms) for (i = 0; i < dpu_kms->catalog->vbif_count; i++) { u32 vbif_idx = dpu_kms->catalog->vbif[i].id; - if ((vbif_idx < VBIF_MAX) && dpu_kms->hw_vbif[vbif_idx]) + if ((vbif_idx < VBIF_MAX) && dpu_kms->hw_vbif[vbif_idx]) { dpu_hw_vbif_destroy(dpu_kms->hw_vbif[vbif_idx]); + dpu_kms->hw_vbif[vbif_idx] = NULL; + } } } diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index 20194d86d0339..f0a5767b69f50 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -108,13 +108,6 @@ static void mdp4_disable_commit(struct msm_kms *kms) static void mdp4_prepare_commit(struct msm_kms *kms, struct drm_atomic_state *state) { - int i; - struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; - - /* see 119ecb7fd */ - for_each_new_crtc_in_state(state, crtc, crtc_state, i) - drm_crtc_vblank_get(crtc); } static void mdp4_flush_commit(struct msm_kms *kms, unsigned crtc_mask) @@ -133,12 +126,6 @@ static void mdp4_wait_flush(struct msm_kms *kms, unsigned crtc_mask) static void mdp4_complete_commit(struct msm_kms *kms, unsigned crtc_mask) { - struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms)); - struct drm_crtc *crtc; - - /* see 119ecb7fd */ - for_each_crtc_mask(mdp4_kms->dev, crtc, crtc_mask) - drm_crtc_vblank_put(crtc); } static long mdp4_round_pixclk(struct msm_kms *kms, unsigned long rate, @@ -258,6 +245,7 @@ static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms, encoder = mdp4_lcdc_encoder_init(dev, panel_node); if (IS_ERR(encoder)) { DRM_DEV_ERROR(dev->dev, "failed to construct LCDC encoder\n"); + of_node_put(panel_node); return PTR_ERR(encoder); } @@ -267,6 +255,7 @@ static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms, connector = mdp4_lvds_connector_init(dev, panel_node, encoder); if (IS_ERR(connector)) { DRM_DEV_ERROR(dev->dev, "failed to initialize LVDS connector\n"); + of_node_put(panel_node); return PTR_ERR(connector); } @@ -418,6 +407,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) { struct platform_device *pdev = to_platform_device(dev->dev); struct mdp4_platform_config *config = mdp4_get_config(pdev); + struct msm_drm_private *priv = dev->dev_private; struct mdp4_kms *mdp4_kms; struct msm_kms *kms = NULL; struct msm_gem_address_space *aspace; @@ -432,7 +422,8 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) mdp_kms_init(&mdp4_kms->base, &kms_funcs); - kms = &mdp4_kms->base.base; + priv->kms = &mdp4_kms->base.base; + kms = priv->kms; mdp4_kms->dev = dev; diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index 395146884a222..03d60eb092577 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -534,9 +534,15 @@ int mdp5_crtc_setup_pipeline(struct drm_crtc *crtc, if (ret) return ret; - mdp5_mixer_release(new_crtc_state->state, old_mixer); + ret = mdp5_mixer_release(new_crtc_state->state, old_mixer); + if (ret) + return ret; + if (old_r_mixer) { - mdp5_mixer_release(new_crtc_state->state, old_r_mixer); + ret = mdp5_mixer_release(new_crtc_state->state, old_r_mixer); + if (ret) + return ret; + if (!need_right_mixer) pipeline->r_mixer = NULL; } @@ -903,8 +909,10 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, ret = msm_gem_get_and_pin_iova(cursor_bo, kms->aspace, &mdp5_crtc->cursor.iova); - if (ret) + if (ret) { + drm_gem_object_put(cursor_bo); return -EINVAL; + } pm_runtime_get_sync(&pdev->dev); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 77823ccdd0f8f..39d0082eedcca 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -698,9 +698,9 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) pdev = mdp5_kms->pdev; irq = irq_of_parse_and_map(pdev->dev.of_node, 0); - if (irq < 0) { - ret = irq; - DRM_DEV_ERROR(&pdev->dev, "failed to get irq: %d\n", ret); + if (!irq) { + ret = -EINVAL; + DRM_DEV_ERROR(&pdev->dev, "failed to get irq\n"); goto fail; } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c index 954db683ae444..2536def2a0005 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c @@ -116,21 +116,28 @@ int mdp5_mixer_assign(struct drm_atomic_state *s, struct drm_crtc *crtc, return 0; } -void mdp5_mixer_release(struct drm_atomic_state *s, struct mdp5_hw_mixer *mixer) +int mdp5_mixer_release(struct drm_atomic_state *s, struct mdp5_hw_mixer *mixer) { struct mdp5_global_state *global_state = mdp5_get_global_state(s); - struct mdp5_hw_mixer_state *new_state = &global_state->hwmixer; + struct mdp5_hw_mixer_state *new_state; if (!mixer) - return; + return 0; + + if (IS_ERR(global_state)) + return PTR_ERR(global_state); + + new_state = &global_state->hwmixer; if (WARN_ON(!new_state->hwmixer_to_crtc[mixer->idx])) - return; + return -EINVAL; DBG("%s: release from crtc %s", mixer->name, new_state->hwmixer_to_crtc[mixer->idx]->name); new_state->hwmixer_to_crtc[mixer->idx] = NULL; + + return 0; } void mdp5_mixer_destroy(struct mdp5_hw_mixer *mixer) diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h index 43c9ba43ce185..545ee223b9d74 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h @@ -30,7 +30,7 @@ void mdp5_mixer_destroy(struct mdp5_hw_mixer *lm); int mdp5_mixer_assign(struct drm_atomic_state *s, struct drm_crtc *crtc, uint32_t caps, struct mdp5_hw_mixer **mixer, struct mdp5_hw_mixer **r_mixer); -void mdp5_mixer_release(struct drm_atomic_state *s, - struct mdp5_hw_mixer *mixer); +int mdp5_mixer_release(struct drm_atomic_state *s, + struct mdp5_hw_mixer *mixer); #endif /* __MDP5_LM_H__ */ diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c index ba6695963aa66..a4f5cb90f3e80 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c @@ -119,18 +119,23 @@ int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, return 0; } -void mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe) +int mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe) { struct msm_drm_private *priv = s->dev->dev_private; struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(priv->kms)); struct mdp5_global_state *state = mdp5_get_global_state(s); - struct mdp5_hw_pipe_state *new_state = &state->hwpipe; + struct mdp5_hw_pipe_state *new_state; if (!hwpipe) - return; + return 0; + + if (IS_ERR(state)) + return PTR_ERR(state); + + new_state = &state->hwpipe; if (WARN_ON(!new_state->hwpipe_to_plane[hwpipe->idx])) - return; + return -EINVAL; DBG("%s: release from plane %s", hwpipe->name, new_state->hwpipe_to_plane[hwpipe->idx]->name); @@ -141,6 +146,8 @@ void mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe) } new_state->hwpipe_to_plane[hwpipe->idx] = NULL; + + return 0; } void mdp5_pipe_destroy(struct mdp5_hw_pipe *hwpipe) diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h index 9b26d0761bd4f..cca67938cab21 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h @@ -37,7 +37,7 @@ int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, uint32_t caps, uint32_t blkcfg, struct mdp5_hw_pipe **hwpipe, struct mdp5_hw_pipe **r_hwpipe); -void mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe); +int mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe); struct mdp5_hw_pipe *mdp5_pipe_init(enum mdp5_pipe pipe, uint32_t reg_offset, uint32_t caps); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c index 83423092de2ff..0dc23c86747e8 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c @@ -179,7 +179,10 @@ static void mdp5_plane_reset(struct drm_plane *plane) drm_framebuffer_put(plane->state->fb); kfree(to_mdp5_plane_state(plane->state)); + plane->state = NULL; mdp5_state = kzalloc(sizeof(*mdp5_state), GFP_KERNEL); + if (!mdp5_state) + return; /* assign default blend parameters */ mdp5_state->alpha = 255; @@ -390,12 +393,24 @@ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state, mdp5_state->r_hwpipe = NULL; - mdp5_pipe_release(state->state, old_hwpipe); - mdp5_pipe_release(state->state, old_right_hwpipe); + ret = mdp5_pipe_release(state->state, old_hwpipe); + if (ret) + return ret; + + ret = mdp5_pipe_release(state->state, old_right_hwpipe); + if (ret) + return ret; + } } else { - mdp5_pipe_release(state->state, mdp5_state->hwpipe); - mdp5_pipe_release(state->state, mdp5_state->r_hwpipe); + ret = mdp5_pipe_release(state->state, mdp5_state->hwpipe); + if (ret) + return ret; + + ret = mdp5_pipe_release(state->state, mdp5_state->r_hwpipe); + if (ret) + return ret; + mdp5_state->hwpipe = mdp5_state->r_hwpipe = NULL; } diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c index 55ea4bc2ee9cb..16194971a99f9 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.c +++ b/drivers/gpu/drm/msm/dsi/dsi.c @@ -26,17 +26,22 @@ static int dsi_get_phy(struct msm_dsi *msm_dsi) } phy_pdev = of_find_device_by_node(phy_node); - if (phy_pdev) + if (phy_pdev) { msm_dsi->phy = platform_get_drvdata(phy_pdev); + msm_dsi->phy_dev = &phy_pdev->dev; + } of_node_put(phy_node); - if (!phy_pdev || !msm_dsi->phy) { + if (!phy_pdev) { + DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__); + return -EPROBE_DEFER; + } + if (!msm_dsi->phy) { + put_device(&phy_pdev->dev); DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__); return -EPROBE_DEFER; } - - msm_dsi->phy_dev = get_device(&phy_pdev->dev); return 0; } @@ -206,8 +211,10 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev, goto fail; } - if (!msm_dsi_manager_validate_current_config(msm_dsi->id)) + if (!msm_dsi_manager_validate_current_config(msm_dsi->id)) { + ret = -EINVAL; goto fail; + } msm_dsi->encoder = encoder; diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 1e7b1be25bb07..743142e15b4c1 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -460,7 +460,7 @@ static int dsi_bus_clk_enable(struct msm_dsi_host *msm_host) return 0; err: - for (; i > 0; i--) + while (--i >= 0) clk_disable_unprepare(msm_host->bus_clks[i]); return ret; @@ -1348,10 +1348,10 @@ static int dsi_cmds2buf_tx(struct msm_dsi_host *msm_host, dsi_get_bpp(msm_host->format) / 8; len = dsi_cmd_dma_add(msm_host, msg); - if (!len) { + if (len < 0) { pr_err("%s: failed to add cmd type = 0x%x\n", __func__, msg->type); - return -EINVAL; + return len; } /* for video mode, do not send cmds more than @@ -1370,10 +1370,14 @@ static int dsi_cmds2buf_tx(struct msm_dsi_host *msm_host, } ret = dsi_cmd_dma_tx(msm_host, len); - if (ret < len) { - pr_err("%s: cmd dma tx failed, type=0x%x, data0=0x%x, len=%d\n", - __func__, msg->type, (*(u8 *)(msg->tx_buf)), len); - return -ECOMM; + if (ret < 0) { + pr_err("%s: cmd dma tx failed, type=0x%x, data0=0x%x, len=%d, ret=%d\n", + __func__, msg->type, (*(u8 *)(msg->tx_buf)), len, ret); + return ret; + } else if (ret < len) { + pr_err("%s: cmd dma tx failed, type=0x%x, data0=0x%x, ret=%d len=%d\n", + __func__, msg->type, (*(u8 *)(msg->tx_buf)), ret, len); + return -EIO; } return len; @@ -1669,6 +1673,8 @@ static int dsi_host_parse_lane_data(struct msm_dsi_host *msm_host, if (!prop) { DRM_DEV_DEBUG(dev, "failed to find data lane mapping, using default\n"); + /* Set the number of date lanes to 4 by default. */ + msm_host->num_data_lanes = 4; return 0; } @@ -2097,9 +2103,12 @@ int msm_dsi_host_cmd_rx(struct mipi_dsi_host *host, } ret = dsi_cmds2buf_tx(msm_host, msg); - if (ret < msg->tx_len) { + if (ret < 0) { pr_err("%s: Read cmd Tx failed, %d\n", __func__, ret); return ret; + } else if (ret < msg->tx_len) { + pr_err("%s: Read cmd Tx failed, too short: %d\n", __func__, ret); + return -ECOMM; } /* diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 73127948f54d9..f3ff2cdc288ba 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -625,7 +625,7 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id) return connector; fail: - connector->funcs->destroy(msm_dsi->connector); + connector->funcs->destroy(connector); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 21519229fe73a..60d50643d0b5c 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -665,12 +665,14 @@ void __exit msm_dsi_phy_driver_unregister(void) int msm_dsi_phy_enable(struct msm_dsi_phy *phy, int src_pll_id, struct msm_dsi_phy_clk_request *clk_req) { - struct device *dev = &phy->pdev->dev; + struct device *dev; int ret; if (!phy || !phy->cfg->ops.enable) return -EINVAL; + dev = &phy->pdev->dev; + ret = dsi_phy_enable_resource(phy); if (ret) { DRM_DEV_ERROR(dev, "%s: resource enable failed, %d\n", diff --git a/drivers/gpu/drm/msm/edp/edp_ctrl.c b/drivers/gpu/drm/msm/edp/edp_ctrl.c index 7f3dd3ffe2c9b..bbbd96f9eaa06 100644 --- a/drivers/gpu/drm/msm/edp/edp_ctrl.c +++ b/drivers/gpu/drm/msm/edp/edp_ctrl.c @@ -1082,7 +1082,7 @@ void msm_edp_ctrl_power(struct edp_ctrl *ctrl, bool on) int msm_edp_ctrl_init(struct msm_edp *edp) { struct edp_ctrl *ctrl = NULL; - struct device *dev = &edp->pdev->dev; + struct device *dev; int ret; if (!edp) { @@ -1090,6 +1090,7 @@ int msm_edp_ctrl_init(struct msm_edp *edp) return -EINVAL; } + dev = &edp->pdev->dev; ctrl = devm_kzalloc(dev, sizeof(*ctrl), GFP_KERNEL); if (!ctrl) return -ENOMEM; diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 355afb936401a..e4c9ff934e5b8 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -97,10 +97,15 @@ static int msm_hdmi_get_phy(struct hdmi *hdmi) of_node_put(phy_node); - if (!phy_pdev || !hdmi->phy) { + if (!phy_pdev) { DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n"); return -EPROBE_DEFER; } + if (!hdmi->phy) { + DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n"); + put_device(&phy_pdev->dev); + return -EPROBE_DEFER; + } hdmi->phy_dev = get_device(&phy_pdev->dev); @@ -137,6 +142,10 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev) /* HDCP needs physical address of hdmi register */ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, config->mmio_name); + if (!res) { + ret = -EINVAL; + goto fail; + } hdmi->mmio_phy_addr = res->start; hdmi->qfprom_mmio = msm_ioremap(pdev, @@ -306,9 +315,9 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi, } hdmi->irq = irq_of_parse_and_map(pdev->dev.of_node, 0); - if (hdmi->irq < 0) { - ret = hdmi->irq; - DRM_DEV_ERROR(dev->dev, "failed to get irq: %d\n", ret); + if (!hdmi->irq) { + ret = -EINVAL; + DRM_DEV_ERROR(dev->dev, "failed to get irq\n"); goto fail; } diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index 1c74381a4fc9d..08995e981808a 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -77,6 +77,7 @@ static int msm_gpu_open(struct inode *inode, struct file *file) goto free_priv; pm_runtime_get_sync(&gpu->pdev->dev); + msm_gpu_hw_init(gpu); show_priv->state = gpu->funcs->gpu_state_get(gpu); pm_runtime_put_sync(&gpu->pdev->dev); diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 7443df77cadb5..407b51cf67909 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -337,7 +337,7 @@ static int msm_init_vram(struct drm_device *dev) of_node_put(node); if (ret) return ret; - size = r.end - r.start; + size = r.end - r.start + 1; DRM_INFO("using VRAM carveout: %lx@%pa\n", size, &r.start); /* if we have no IOMMU, then we need to use carveout allocator. diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index d92a0ffe2a767..8e6a4d5f3a405 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -1036,7 +1036,7 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, ret = msm_gem_new_impl(dev, size, flags, &obj); if (ret) - goto fail; + return ERR_PTR(ret); msm_obj = to_msm_bo(obj); @@ -1124,7 +1124,7 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev, ret = msm_gem_new_impl(dev, size, MSM_BO_WC, &obj); if (ret) - goto fail; + return ERR_PTR(ret); drm_gem_private_object_init(dev, obj, size); diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index d7c8948427fe0..705a834ba1e66 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -17,7 +17,7 @@ struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj) int npages = obj->size >> PAGE_SHIFT; if (WARN_ON(!msm_obj->pages)) /* should have already pinned! */ - return NULL; + return ERR_PTR(-ENOMEM); return drm_prime_pages_to_sg(msm_obj->pages, npages); } diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c index dc64863b5fd88..f517b39aba56b 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c @@ -179,7 +179,7 @@ nv04_display_destroy(struct drm_device *dev) nvif_notify_fini(&disp->flip); nouveau_display(dev)->priv = NULL; - kfree(disp); + vfree(disp); nvif_object_unmap(&drm->client.device.object); } @@ -197,7 +197,7 @@ nv04_display_create(struct drm_device *dev) struct nv04_display *disp; int i, ret; - disp = kzalloc(sizeof(*disp), GFP_KERNEL); + disp = vzalloc(sizeof(*disp)); if (!disp) return -ENOMEM; diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index c7a94c94dbf37..f2f3280c3a50e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -51,8 +51,9 @@ static bool nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE], struct nouveau_backlight *bl) { - const int nb = ida_simple_get(&bl_ida, 0, 0, GFP_KERNEL); - if (nb < 0 || nb >= 100) + const int nb = ida_alloc_max(&bl_ida, 99, GFP_KERNEL); + + if (nb < 0) return false; if (nb > 0) snprintf(backlight_name, BL_NAME_SIZE, "nv_backlight%d", nb); @@ -280,7 +281,7 @@ nouveau_backlight_init(struct drm_connector *connector) nv_encoder, ops, &props); if (IS_ERR(bl->dev)) { if (bl->id >= 0) - ida_simple_remove(&bl_ida, bl->id); + ida_free(&bl_ida, bl->id); ret = PTR_ERR(bl->dev); goto fail_alloc; } @@ -306,7 +307,7 @@ nouveau_backlight_fini(struct drm_connector *connector) return; if (bl->id >= 0) - ida_simple_remove(&bl_ida, bl->id); + ida_free(&bl_ida, bl->id); backlight_device_unregister(bl->dev); nv_conn->backlight = NULL; diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index 3b13feca970f7..3c54d61e4fa94 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -207,6 +207,7 @@ static const struct file_operations nouveau_pstate_fops = { .open = nouveau_debugfs_pstate_open, .read = seq_read, .write = nouveau_debugfs_pstate_set, + .release = single_release, }; static struct drm_info_list nouveau_debugfs_list[] = { diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 5347e5bdee8cc..e18938972a895 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -779,7 +779,7 @@ nouveau_drm_device_remove(struct drm_device *dev) struct nvkm_client *client; struct nvkm_device *device; - drm_dev_unregister(dev); + drm_dev_unplug(dev); dev->irq_enabled = false; client = nvxx_client(&drm->client.base); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c index b0ece71aefdee..ce774579c89d1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c @@ -57,7 +57,7 @@ nvkm_control_mthd_pstate_info(struct nvkm_control *ctrl, void *data, u32 size) args->v0.count = 0; args->v0.ustate_ac = NVIF_CONTROL_PSTATE_INFO_V0_USTATE_DISABLE; args->v0.ustate_dc = NVIF_CONTROL_PSTATE_INFO_V0_USTATE_DISABLE; - args->v0.pwrsrc = -ENOSYS; + args->v0.pwrsrc = -ENODEV; args->v0.pstate = NVIF_CONTROL_PSTATE_INFO_V0_PSTATE_UNKNOWN; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c index 0e372a190d3f1..9b6972c953584 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c @@ -123,7 +123,7 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) mutex_init(&tdev->iommu.mutex); - if (iommu_present(&platform_bus_type)) { + if (device_iommu_mapped(dev)) { tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type); if (!tdev->iommu.domain) goto error; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c index 818d21bd28d31..1d2837c5a8f29 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c @@ -419,7 +419,7 @@ nvkm_dp_train(struct nvkm_dp *dp, u32 dataKBps) return ret; } -static void +void nvkm_dp_disable(struct nvkm_outp *outp, struct nvkm_ior *ior) { struct nvkm_dp *dp = nvkm_dp(outp); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h index 428b3f488f033..e484d0c3b0d42 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h @@ -32,6 +32,7 @@ struct nvkm_dp { int nvkm_dp_new(struct nvkm_disp *, int index, struct dcb_output *, struct nvkm_outp **); +void nvkm_dp_disable(struct nvkm_outp *, struct nvkm_ior *); /* DPCD Receiver Capabilities */ #define DPCD_RC00_DPCD_REV 0x00000 diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c index 6e3c450eaacef..3ff49344abc77 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c @@ -62,7 +62,6 @@ gv100_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet, nvkm_wr32(device, 0x6f0108 + hdmi, vendor_infoframe.header); nvkm_wr32(device, 0x6f010c + hdmi, vendor_infoframe.subpack0_low); nvkm_wr32(device, 0x6f0110 + hdmi, vendor_infoframe.subpack0_high); - nvkm_wr32(device, 0x6f0110 + hdmi, 0x00000000); nvkm_wr32(device, 0x6f0114 + hdmi, 0x00000000); nvkm_wr32(device, 0x6f0118 + hdmi, 0x00000000); nvkm_wr32(device, 0x6f011c + hdmi, 0x00000000); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c index c62030c96fba0..4b1c72fd8f039 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c @@ -22,6 +22,7 @@ * Authors: Ben Skeggs */ #include "outp.h" +#include "dp.h" #include "ior.h" #include @@ -216,6 +217,14 @@ nvkm_outp_init_route(struct nvkm_outp *outp) if (!ior->arm.head || ior->arm.proto != proto) { OUTP_DBG(outp, "no heads (%x %d %d)", ior->arm.head, ior->arm.proto, proto); + + /* The EFI GOP driver on Ampere can leave unused DP links routed, + * which we don't expect. The DisableLT IED script *should* get + * us back to where we need to be. + */ + if (ior->func->route.get && !ior->arm.head && outp->info.type == DCB_OUTPUT_DP) + nvkm_dp_disable(outp, ior); + return; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c index f3c30b2a788e8..8bff14ae16b0e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c @@ -38,7 +38,7 @@ nvbios_addr(struct nvkm_bios *bios, u32 *addr, u8 size) *addr += bios->imaged_addr; } - if (unlikely(*addr + size >= bios->size)) { + if (unlikely(*addr + size > bios->size)) { nvkm_error(&bios->subdev, "OOB %d %08x %08x\n", size, p, *addr); return false; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c index 40e564524b7a9..93a49cbfb81d6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c @@ -135,10 +135,10 @@ nvkm_cstate_find_best(struct nvkm_clk *clk, struct nvkm_pstate *pstate, list_for_each_entry_from_reverse(cstate, &pstate->list, head) { if (nvkm_cstate_valid(clk, cstate, max_volt, clk->temp)) - break; + return cstate; } - return cstate; + return NULL; } static struct nvkm_cstate * @@ -169,6 +169,8 @@ nvkm_cstate_prog(struct nvkm_clk *clk, struct nvkm_pstate *pstate, int cstatei) if (!list_empty(&pstate->list)) { cstate = nvkm_cstate_get(clk, pstate, cstatei); cstate = nvkm_cstate_find_best(clk, pstate, cstate); + if (!cstate) + return -EINVAL; } else { cstate = &pstate->base; } diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index f152bc4eeb535..e70ccadf7541d 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -141,6 +141,7 @@ config DRM_PANEL_OLIMEX_LCD_OLINUXINO depends on OF depends on I2C depends on BACKLIGHT_CLASS_DEVICE + select CRC32 help The panel is used with different sizes LCDs, from 480x272 to 1280x800, and 24 bit per pixel. diff --git a/drivers/gpu/drm/panel/panel-innolux-p079zca.c b/drivers/gpu/drm/panel/panel-innolux-p079zca.c index d92d1c98878c1..df90b66079816 100644 --- a/drivers/gpu/drm/panel/panel-innolux-p079zca.c +++ b/drivers/gpu/drm/panel/panel-innolux-p079zca.c @@ -509,6 +509,7 @@ static void innolux_panel_del(struct innolux_panel *innolux) static int innolux_panel_probe(struct mipi_dsi_device *dsi) { const struct panel_desc *desc; + struct innolux_panel *innolux; int err; desc = of_device_get_match_data(&dsi->dev); @@ -520,7 +521,14 @@ static int innolux_panel_probe(struct mipi_dsi_device *dsi) if (err < 0) return err; - return mipi_dsi_attach(dsi); + err = mipi_dsi_attach(dsi); + if (err < 0) { + innolux = mipi_dsi_get_drvdata(dsi); + innolux_panel_del(innolux); + return err; + } + + return 0; } static int innolux_panel_remove(struct mipi_dsi_device *dsi) diff --git a/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c b/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c index 3ac04eb8d0fe5..1e7fecab72a9f 100644 --- a/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c +++ b/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c @@ -424,7 +424,13 @@ static int kingdisplay_panel_probe(struct mipi_dsi_device *dsi) if (err < 0) return err; - return mipi_dsi_attach(dsi); + err = mipi_dsi_attach(dsi); + if (err < 0) { + kingdisplay_panel_del(kingdisplay); + return err; + } + + return 0; } static int kingdisplay_panel_remove(struct mipi_dsi_device *dsi) diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c index bdb4d59c81277..a621dd28ff70d 100644 --- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c +++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c @@ -232,7 +232,7 @@ static void rpi_touchscreen_i2c_write(struct rpi_touchscreen *ts, ret = i2c_smbus_write_byte_data(ts->i2c, reg, val); if (ret) - dev_err(&ts->dsi->dev, "I2C write failed: %d\n", ret); + dev_err(&ts->i2c->dev, "I2C write failed: %d\n", ret); } static int rpi_touchscreen_write(struct rpi_touchscreen *ts, u16 reg, u32 val) @@ -268,7 +268,7 @@ static int rpi_touchscreen_noop(struct drm_panel *panel) return 0; } -static int rpi_touchscreen_enable(struct drm_panel *panel) +static int rpi_touchscreen_prepare(struct drm_panel *panel) { struct rpi_touchscreen *ts = panel_to_ts(panel); int i; @@ -298,6 +298,13 @@ static int rpi_touchscreen_enable(struct drm_panel *panel) rpi_touchscreen_write(ts, DSI_STARTDSI, 0x01); msleep(100); + return 0; +} + +static int rpi_touchscreen_enable(struct drm_panel *panel) +{ + struct rpi_touchscreen *ts = panel_to_ts(panel); + /* Turn on the backlight. */ rpi_touchscreen_i2c_write(ts, REG_PWM, 255); @@ -352,7 +359,7 @@ static int rpi_touchscreen_get_modes(struct drm_panel *panel) static const struct drm_panel_funcs rpi_touchscreen_funcs = { .disable = rpi_touchscreen_disable, .unprepare = rpi_touchscreen_noop, - .prepare = rpi_touchscreen_noop, + .prepare = rpi_touchscreen_prepare, .enable = rpi_touchscreen_enable, .get_modes = rpi_touchscreen_get_modes, }; diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index f0ea782df836d..312a3c4e23318 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -1619,7 +1619,7 @@ static const struct display_timing innolux_g070y2_l01_timing = { static const struct panel_desc innolux_g070y2_l01 = { .timings = &innolux_g070y2_l01_timing, .num_timings = 1, - .bpc = 6, + .bpc = 8, .size = { .width = 152, .height = 91, diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c index 238fb6d54df47..413bf314a2bc3 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.c +++ b/drivers/gpu/drm/panfrost/panfrost_device.c @@ -59,7 +59,8 @@ static int panfrost_clk_init(struct panfrost_device *pfdev) if (IS_ERR(pfdev->bus_clock)) { dev_err(pfdev->dev, "get bus_clock failed %ld\n", PTR_ERR(pfdev->bus_clock)); - return PTR_ERR(pfdev->bus_clock); + err = PTR_ERR(pfdev->bus_clock); + goto disable_clock; } if (pfdev->bus_clock) { diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index f57dd195dfb8e..b4bab8647f763 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -428,8 +428,8 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data, if (args->retained) { if (args->madv == PANFROST_MADV_DONTNEED) - list_add_tail(&bo->base.madv_list, - &pfdev->shrinker_list); + list_move_tail(&bo->base.madv_list, + &pfdev->shrinker_list); else if (args->madv == PANFROST_MADV_WILLNEED) list_del_init(&bo->base.madv_list); } diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index bfd503d220881..8a014dc115712 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -52,25 +52,16 @@ static int write_cmd(struct panfrost_device *pfdev, u32 as_nr, u32 cmd) } static void lock_region(struct panfrost_device *pfdev, u32 as_nr, - u64 iova, size_t size) + u64 iova, u64 size) { u8 region_width; u64 region = iova & PAGE_MASK; - /* - * fls returns: - * 1 .. 32 - * - * 10 + fls(num_pages) - * results in the range (11 .. 42) - */ - - size = round_up(size, PAGE_SIZE); - region_width = 10 + fls(size >> PAGE_SHIFT); - if ((size >> PAGE_SHIFT) != (1ul << (region_width - 11))) { - /* not pow2, so must go up to the next pow2 */ - region_width += 1; - } + /* The size is encoded as ceil(log2) minus(1), which may be calculated + * with fls. The size must be clamped to hardware bounds. + */ + size = max_t(u64, size, AS_LOCK_REGION_MIN_SIZE); + region_width = fls64(size - 1) - 1; region |= region_width; /* Lock the region that needs to be updated */ @@ -81,7 +72,7 @@ static void lock_region(struct panfrost_device *pfdev, u32 as_nr, static int mmu_hw_do_operation_locked(struct panfrost_device *pfdev, int as_nr, - u64 iova, size_t size, u32 op) + u64 iova, u64 size, u32 op) { if (as_nr < 0) return 0; @@ -98,7 +89,7 @@ static int mmu_hw_do_operation_locked(struct panfrost_device *pfdev, int as_nr, static int mmu_hw_do_operation(struct panfrost_device *pfdev, struct panfrost_mmu *mmu, - u64 iova, size_t size, u32 op) + u64 iova, u64 size, u32 op) { int ret; @@ -115,7 +106,7 @@ static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_m u64 transtab = cfg->arm_mali_lpae_cfg.transtab; u64 memattr = cfg->arm_mali_lpae_cfg.memattr; - mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0UL, AS_COMMAND_FLUSH_MEM); + mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), transtab & 0xffffffffUL); mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), transtab >> 32); @@ -131,7 +122,7 @@ static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_m static void panfrost_mmu_disable(struct panfrost_device *pfdev, u32 as_nr) { - mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0UL, AS_COMMAND_FLUSH_MEM); + mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), 0); mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), 0); @@ -231,7 +222,7 @@ static size_t get_pgsize(u64 addr, size_t size) static void panfrost_mmu_flush_range(struct panfrost_device *pfdev, struct panfrost_mmu *mmu, - u64 iova, size_t size) + u64 iova, u64 size) { if (mmu->as < 0) return; diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h index eddaa62ad8b0e..2ae3a4d301d39 100644 --- a/drivers/gpu/drm/panfrost/panfrost_regs.h +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h @@ -318,6 +318,8 @@ #define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2 << 8) #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3 << 8) +#define AS_LOCK_REGION_MIN_SIZE (1ULL << 15) + #define gpu_write(dev, reg, data) writel(data, dev->iomem + reg) #define gpu_read(dev, reg) readl(dev->iomem + reg) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index cc8f32a1b03c6..92ffed5c1d690 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -197,7 +197,8 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder, * so don't register a backlight device */ if ((rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) && - (rdev->pdev->device == 0x6741)) + (rdev->pdev->device == 0x6741) && + !dmi_match(DMI_PRODUCT_NAME, "iMac12,1")) return; if (!radeon_encoder->enc_priv) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index bc63f4cecf5d5..ca6ccd69424e0 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -477,6 +477,8 @@ static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encode native_mode->vdisplay != 0 && native_mode->clock != 0) { mode = drm_mode_duplicate(dev, native_mode); + if (!mode) + return NULL; mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; drm_mode_set_name(mode); @@ -491,6 +493,8 @@ static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encode * simpler. */ mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false); + if (!mode) + return NULL; mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name); } diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 03d3550ecc7cb..51db8b4f6d551 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -634,6 +634,8 @@ void radeon_driver_lastclose_kms(struct drm_device *dev) int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) { struct radeon_device *rdev = dev->dev_private; + struct radeon_fpriv *fpriv; + struct radeon_vm *vm; int r; file_priv->driver_priv = NULL; @@ -646,48 +648,52 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) /* new gpu have virtual address space support */ if (rdev->family >= CHIP_CAYMAN) { - struct radeon_fpriv *fpriv; - struct radeon_vm *vm; fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); if (unlikely(!fpriv)) { r = -ENOMEM; - goto out_suspend; + goto err_suspend; } if (rdev->accel_working) { vm = &fpriv->vm; r = radeon_vm_init(rdev, vm); - if (r) { - kfree(fpriv); - goto out_suspend; - } + if (r) + goto err_fpriv; r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); - if (r) { - radeon_vm_fini(rdev, vm); - kfree(fpriv); - goto out_suspend; - } + if (r) + goto err_vm_fini; /* map the ib pool buffer read only into * virtual address space */ vm->ib_bo_va = radeon_vm_bo_add(rdev, vm, rdev->ring_tmp_bo.bo); + if (!vm->ib_bo_va) { + r = -ENOMEM; + goto err_vm_fini; + } + r = radeon_vm_bo_set_addr(rdev, vm->ib_bo_va, RADEON_VA_IB_OFFSET, RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_SNOOPED); - if (r) { - radeon_vm_fini(rdev, vm); - kfree(fpriv); - goto out_suspend; - } + if (r) + goto err_vm_fini; } file_priv->driver_priv = fpriv; } -out_suspend: + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; + +err_vm_fini: + radeon_vm_fini(rdev, vm); +err_fpriv: + kfree(fpriv); + +err_suspend: pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return r; diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c index 8dc91c2d916a8..f7191ae2266fd 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c @@ -231,6 +231,8 @@ struct dw_mipi_dsi_rockchip { struct dw_mipi_dsi *dmd; const struct rockchip_dw_dsi_chip_data *cdata; struct dw_mipi_dsi_plat_data pdata; + + bool dsi_bound; }; struct dphy_pll_parameter_map { @@ -625,10 +627,6 @@ static void dw_mipi_dsi_encoder_enable(struct drm_encoder *encoder) if (mux < 0) return; - pm_runtime_get_sync(dsi->dev); - if (dsi->slave) - pm_runtime_get_sync(dsi->slave->dev); - /* * For the RK3399, the clk of grf must be enabled before writing grf * register. And for RK3288 or other soc, this grf_clk must be NULL, @@ -647,20 +645,10 @@ static void dw_mipi_dsi_encoder_enable(struct drm_encoder *encoder) clk_disable_unprepare(dsi->grf_clk); } -static void dw_mipi_dsi_encoder_disable(struct drm_encoder *encoder) -{ - struct dw_mipi_dsi_rockchip *dsi = to_dsi(encoder); - - if (dsi->slave) - pm_runtime_put(dsi->slave->dev); - pm_runtime_put(dsi->dev); -} - static const struct drm_encoder_helper_funcs dw_mipi_dsi_encoder_helper_funcs = { .atomic_check = dw_mipi_dsi_encoder_atomic_check, .enable = dw_mipi_dsi_encoder_enable, - .disable = dw_mipi_dsi_encoder_disable, }; static const struct drm_encoder_funcs dw_mipi_dsi_encoder_funcs = { @@ -795,10 +783,14 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev, put_device(second); } + pm_runtime_get_sync(dsi->dev); + if (dsi->slave) + pm_runtime_get_sync(dsi->slave->dev); + ret = clk_prepare_enable(dsi->pllref_clk); if (ret) { DRM_DEV_ERROR(dev, "Failed to enable pllref_clk: %d\n", ret); - return ret; + goto out_pm_runtime; } /* @@ -810,7 +802,7 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev, ret = clk_prepare_enable(dsi->grf_clk); if (ret) { DRM_DEV_ERROR(dsi->dev, "Failed to enable grf_clk: %d\n", ret); - return ret; + goto out_pm_runtime; } dw_mipi_dsi_rockchip_config(dsi); @@ -822,16 +814,25 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev, ret = rockchip_dsi_drm_create_encoder(dsi, drm_dev); if (ret) { DRM_DEV_ERROR(dev, "Failed to create drm encoder\n"); - return ret; + goto out_pm_runtime; } ret = dw_mipi_dsi_bind(dsi->dmd, &dsi->encoder); if (ret) { DRM_DEV_ERROR(dev, "Failed to bind: %d\n", ret); - return ret; + goto out_pm_runtime; } + dsi->dsi_bound = true; + return 0; + +out_pm_runtime: + pm_runtime_put(dsi->dev); + if (dsi->slave) + pm_runtime_put(dsi->slave->dev); + + return ret; } static void dw_mipi_dsi_rockchip_unbind(struct device *dev, @@ -843,9 +844,15 @@ static void dw_mipi_dsi_rockchip_unbind(struct device *dev, if (dsi->is_slave) return; + dsi->dsi_bound = false; + dw_mipi_dsi_unbind(dsi->dmd); clk_disable_unprepare(dsi->pllref_clk); + + pm_runtime_put(dsi->dev); + if (dsi->slave) + pm_runtime_put(dsi->slave->dev); } static const struct component_ops dw_mipi_dsi_rockchip_ops = { @@ -903,6 +910,36 @@ static const struct dw_mipi_dsi_host_ops dw_mipi_dsi_rockchip_host_ops = { .detach = dw_mipi_dsi_rockchip_host_detach, }; +static int __maybe_unused dw_mipi_dsi_rockchip_resume(struct device *dev) +{ + struct dw_mipi_dsi_rockchip *dsi = dev_get_drvdata(dev); + int ret; + + /* + * Re-configure DSI state, if we were previously initialized. We need + * to do this before rockchip_drm_drv tries to re-enable() any panels. + */ + if (dsi->dsi_bound) { + ret = clk_prepare_enable(dsi->grf_clk); + if (ret) { + DRM_DEV_ERROR(dsi->dev, "Failed to enable grf_clk: %d\n", ret); + return ret; + } + + dw_mipi_dsi_rockchip_config(dsi); + if (dsi->slave) + dw_mipi_dsi_rockchip_config(dsi->slave); + + clk_disable_unprepare(dsi->grf_clk); + } + + return 0; +} + +static const struct dev_pm_ops dw_mipi_dsi_rockchip_pm_ops = { + SET_LATE_SYSTEM_SLEEP_PM_OPS(NULL, dw_mipi_dsi_rockchip_resume) +}; + static int dw_mipi_dsi_rockchip_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -986,14 +1023,10 @@ static int dw_mipi_dsi_rockchip_probe(struct platform_device *pdev) if (ret != -EPROBE_DEFER) DRM_DEV_ERROR(dev, "Failed to probe dw_mipi_dsi: %d\n", ret); - goto err_clkdisable; + return ret; } return 0; - -err_clkdisable: - clk_disable_unprepare(dsi->pllref_clk); - return ret; } static int dw_mipi_dsi_rockchip_remove(struct platform_device *pdev) @@ -1088,6 +1121,7 @@ struct platform_driver dw_mipi_dsi_rockchip_driver = { .remove = dw_mipi_dsi_rockchip_remove, .driver = { .of_match_table = dw_mipi_dsi_rockchip_dt_ids, + .pm = &dw_mipi_dsi_rockchip_pm_ops, .name = "dw-mipi-dsi-rockchip", }, }; diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c index 906891b03a38d..7805091bac32d 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -528,13 +528,6 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master, return ret; } - ret = clk_prepare_enable(hdmi->vpll_clk); - if (ret) { - DRM_DEV_ERROR(hdmi->dev, "Failed to enable HDMI vpll: %d\n", - ret); - return ret; - } - hdmi->phy = devm_phy_optional_get(dev, "hdmi"); if (IS_ERR(hdmi->phy)) { ret = PTR_ERR(hdmi->phy); @@ -543,6 +536,13 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master, return ret; } + ret = clk_prepare_enable(hdmi->vpll_clk); + if (ret) { + DRM_DEV_ERROR(hdmi->dev, "Failed to enable HDMI vpll: %d\n", + ret); + return ret; + } + drm_encoder_helper_add(encoder, &dw_hdmi_rockchip_encoder_helper_funcs); drm_encoder_init(drm, encoder, &dw_hdmi_rockchip_encoder_funcs, DRM_MODE_ENCODER_TMDS, NULL); diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 84e3decb17b1f..2e4e1933a43c1 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -1848,10 +1848,10 @@ static int vop_bind(struct device *dev, struct device *master, void *data) vop_win_init(vop); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - vop->len = resource_size(res); vop->regs = devm_ioremap_resource(dev, res); if (IS_ERR(vop->regs)) return PTR_ERR(vop->regs); + vop->len = resource_size(res); vop->regsbak = devm_kzalloc(dev, vop->len, GFP_KERNEL); if (!vop->regsbak) diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig index 37e90e42943f6..0e2d304f0d83f 100644 --- a/drivers/gpu/drm/sun4i/Kconfig +++ b/drivers/gpu/drm/sun4i/Kconfig @@ -46,6 +46,7 @@ config DRM_SUN6I_DSI default MACH_SUN8I select CRC_CCITT select DRM_MIPI_DSI + select RESET_CONTROLLER select PHY_SUN6I_MIPI_DPHY help Choose this option if you want have an Allwinner SoC with diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.h b/drivers/gpu/drm/sun4i/sun8i_mixer.h index 345b28b0a80a1..dc4300a7b019b 100644 --- a/drivers/gpu/drm/sun4i/sun8i_mixer.h +++ b/drivers/gpu/drm/sun4i/sun8i_mixer.h @@ -114,10 +114,10 @@ /* format 13 is semi-planar YUV411 VUVU */ #define SUN8I_MIXER_FBFMT_YUV411 14 /* format 15 doesn't exist */ -/* format 16 is P010 YVU */ -#define SUN8I_MIXER_FBFMT_P010_YUV 17 -/* format 18 is P210 YVU */ -#define SUN8I_MIXER_FBFMT_P210_YUV 19 +#define SUN8I_MIXER_FBFMT_P010_YUV 16 +/* format 17 is P010 YVU */ +#define SUN8I_MIXER_FBFMT_P210_YUV 18 +/* format 19 is P210 YVU */ /* format 20 is packed YVU444 10-bit */ /* format 21 is packed YUV444 10-bit */ diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index a5d47e301c5f7..13413d2b26028 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -1455,8 +1455,10 @@ static int tegra_dsi_ganged_probe(struct tegra_dsi *dsi) dsi->slave = platform_get_drvdata(gangster); of_node_put(np); - if (!dsi->slave) + if (!dsi->slave) { + put_device(&gangster->dev); return -EPROBE_DEFER; + } dsi->slave->master = dsi; } diff --git a/drivers/gpu/drm/tilcdc/tilcdc_external.c b/drivers/gpu/drm/tilcdc/tilcdc_external.c index 43d756b7810ee..67e23317c7ded 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_external.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_external.c @@ -58,11 +58,13 @@ struct drm_connector *tilcdc_encoder_find_connector(struct drm_device *ddev, int tilcdc_add_component_encoder(struct drm_device *ddev) { struct tilcdc_drm_private *priv = ddev->dev_private; - struct drm_encoder *encoder; + struct drm_encoder *encoder = NULL, *iter; - list_for_each_entry(encoder, &ddev->mode_config.encoder_list, head) - if (encoder->possible_crtcs & (1 << priv->crtc->index)) + list_for_each_entry(iter, &ddev->mode_config.encoder_list, head) + if (iter->possible_crtcs & (1 << priv->crtc->index)) { + encoder = iter; break; + } if (!encoder) { dev_err(ddev->dev, "%s: No suitable encoder found\n", __func__); diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c index ddb61a60c6104..ef69773b5bed5 100644 --- a/drivers/gpu/drm/udl/udl_connector.c +++ b/drivers/gpu/drm/udl/udl_connector.c @@ -29,7 +29,7 @@ static int udl_get_edid_block(void *data, u8 *buf, unsigned int block, ret = usb_control_msg(udl->udev, usb_rcvctrlpipe(udl->udev, 0), (0x02), (0x80 | (0x02 << 5)), bval, - 0xA1, read_buff, 2, HZ); + 0xA1, read_buff, 2, 1000); if (ret < 1) { DRM_ERROR("Read EDID byte %d failed err %x\n", i, ret); kfree(read_buff); diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 19c092d75266b..1609a85429cef 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -195,8 +195,8 @@ v3d_clean_caches(struct v3d_dev *v3d) V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF); if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & - V3D_L2TCACTL_L2TFLS), 100)) { - DRM_ERROR("Timeout waiting for L1T write combiner flush\n"); + V3D_L2TCACTL_TMUWCF), 100)) { + DRM_ERROR("Timeout waiting for TMU write combiner flush\n"); } mutex_lock(&v3d->cache_clean_lock); diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 72d30d90b856c..0af246a5609ca 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -389,7 +389,7 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) bo = kzalloc(sizeof(*bo), GFP_KERNEL); if (!bo) - return ERR_PTR(-ENOMEM); + return NULL; bo->madv = VC4_MADV_WILLNEED; refcount_set(&bo->usecnt, 0); diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c index c78fa8144776e..0983949cc8c98 100644 --- a/drivers/gpu/drm/vc4/vc4_dsi.c +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -831,7 +831,7 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder) unsigned long phy_clock; int ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret) { DRM_ERROR("Failed to runtime PM enable on DSI%d\n", dsi->port); return; diff --git a/drivers/gpu/drm/vc4/vc4_txp.c b/drivers/gpu/drm/vc4/vc4_txp.c index bf720206727f0..0d9263f65d95b 100644 --- a/drivers/gpu/drm/vc4/vc4_txp.c +++ b/drivers/gpu/drm/vc4/vc4_txp.c @@ -285,12 +285,18 @@ static void vc4_txp_connector_atomic_commit(struct drm_connector *conn, if (WARN_ON(i == ARRAY_SIZE(drm_fmts))) return; - ctrl = TXP_GO | TXP_VSTART_AT_EOF | TXP_EI | + ctrl = TXP_GO | TXP_EI | VC4_SET_FIELD(0xf, TXP_BYTE_ENABLE) | VC4_SET_FIELD(txp_fmts[i], TXP_FORMAT); if (fb->format->has_alpha) ctrl |= TXP_ALPHA_ENABLE; + else + /* + * If TXP_ALPHA_ENABLE isn't set and TXP_ALPHA_INVERT is, the + * hardware will force the output padding to be 0xff. + */ + ctrl |= TXP_ALPHA_INVERT; gem = drm_fb_cma_get_gem_obj(fb, 0); TXP_WRITE(TXP_DST_PTR, gem->paddr + fb->offsets[0]); diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c index e622485ae8267..7e34307eb075e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_display.c +++ b/drivers/gpu/drm/virtio/virtgpu_display.c @@ -174,6 +174,8 @@ static int virtio_gpu_conn_get_modes(struct drm_connector *connector) DRM_DEBUG("add mode: %dx%d\n", width, height); mode = drm_cvt_mode(connector->dev, width, height, 60, false, false, false); + if (!mode) + return count; mode->type |= DRM_MODE_TYPE_PREFERRED; drm_mode_probed_add(connector, mode); count++; diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index bb46e7a0f1b5d..0ca996e6fd5cb 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -80,9 +80,7 @@ virtio_gpu_get_vbuf(struct virtio_gpu_device *vgdev, { struct virtio_gpu_vbuffer *vbuf; - vbuf = kmem_cache_zalloc(vgdev->vbufs, GFP_KERNEL); - if (!vbuf) - return ERR_PTR(-ENOMEM); + vbuf = kmem_cache_zalloc(vgdev->vbufs, GFP_KERNEL | __GFP_NOFAIL); BUG_ON(size > MAX_INLINE_CMD_SIZE); vbuf->buf = (void *)vbuf + sizeof(*vbuf); @@ -142,10 +140,6 @@ static void *virtio_gpu_alloc_cmd_resp(struct virtio_gpu_device *vgdev, vbuf = virtio_gpu_get_vbuf(vgdev, cmd_size, resp_size, resp_buf, cb); - if (IS_ERR(vbuf)) { - *vbuffer_p = NULL; - return ERR_CAST(vbuf); - } *vbuffer_p = vbuf; return (struct virtio_gpu_command *)vbuf->buf; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 5eb73ded8e07a..765f7a62870db 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1002,15 +1002,14 @@ extern int vmw_execbuf_fence_commands(struct drm_file *file_priv, struct vmw_private *dev_priv, struct vmw_fence_obj **p_fence, uint32_t *p_handle); -extern void vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, +extern int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, struct vmw_fpriv *vmw_fp, int ret, struct drm_vmw_fence_rep __user *user_fence_rep, struct vmw_fence_obj *fence, uint32_t fence_handle, - int32_t out_fence_fd, - struct sync_file *sync_file); + int32_t out_fence_fd); bool vmw_cmd_describe(const void *buf, u32 *size, char const **cmd); /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index ff86d49dc5e8a..e3d20048075bf 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -3413,17 +3413,17 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv, * Also if copying fails, user-space will be unable to signal the fence object * so we wait for it immediately, and then unreference the user-space reference. */ -void +int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, struct vmw_fpriv *vmw_fp, int ret, struct drm_vmw_fence_rep __user *user_fence_rep, struct vmw_fence_obj *fence, uint32_t fence_handle, - int32_t out_fence_fd, struct sync_file *sync_file) + int32_t out_fence_fd) { struct drm_vmw_fence_rep fence_rep; if (user_fence_rep == NULL) - return; + return 0; memset(&fence_rep, 0, sizeof(fence_rep)); @@ -3451,20 +3451,14 @@ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, * handle. */ if (unlikely(ret != 0) && (fence_rep.error == 0)) { - if (sync_file) - fput(sync_file->file); - - if (fence_rep.fd != -1) { - put_unused_fd(fence_rep.fd); - fence_rep.fd = -1; - } - ttm_ref_object_base_unref(vmw_fp->tfile, fence_handle, TTM_REF_USAGE); VMW_DEBUG_USER("Fence copy error. Syncing.\n"); (void) vmw_fence_obj_wait(fence, false, false, VMW_FENCE_WAIT_TIMEOUT); } + + return ret ? -EFAULT : 0; } /** @@ -3806,16 +3800,23 @@ int vmw_execbuf_process(struct drm_file *file_priv, (void) vmw_fence_obj_wait(fence, false, false, VMW_FENCE_WAIT_TIMEOUT); + } + } + + ret = vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret, + user_fence_rep, fence, handle, out_fence_fd); + + if (sync_file) { + if (ret) { + /* usercopy of fence failed, put the file object */ + fput(sync_file->file); + put_unused_fd(out_fence_fd); } else { /* Link the fence with the FD created earlier */ fd_install(out_fence_fd, sync_file->file); } } - vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret, - user_fence_rep, fence, handle, out_fence_fd, - sync_file); - /* Don't unreference when handing fence out */ if (unlikely(out_fence != NULL)) { *out_fence = fence; @@ -3833,7 +3834,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, */ vmw_validation_unref_lists(&val_ctx); - return 0; + return ret; out_unlock_binding: mutex_unlock(&dev_priv->binding_mutex); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index ea29953e0b081..8e7b05484ba85 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -498,7 +498,7 @@ static int vmw_fb_kms_detach(struct vmw_fb_par *par, static int vmw_fb_kms_framebuffer(struct fb_info *info) { - struct drm_mode_fb_cmd2 mode_cmd; + struct drm_mode_fb_cmd2 mode_cmd = {0}; struct vmw_fb_par *par = info->par; struct fb_var_screeninfo *var = &info->var; struct drm_framebuffer *cur_fb; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 178a6cd1a06fe..874093a0b04f0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -1171,7 +1171,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, } vmw_execbuf_copy_fence_user(dev_priv, vmw_fp, 0, user_fence_rep, fence, - handle, -1, NULL); + handle, -1); vmw_fence_obj_unreference(&fence); return 0; out_no_create: diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 33b1519887474..0b800c3540492 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2570,7 +2570,7 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv, if (file_priv) vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret, user_fence_rep, fence, - handle, -1, NULL); + handle, -1); if (out_fence) *out_fence = fence; else diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c index a1c85d1521f5c..82b244cb313e6 100644 --- a/drivers/gpu/ipu-v3/ipu-cpmem.c +++ b/drivers/gpu/ipu-v3/ipu-cpmem.c @@ -585,21 +585,21 @@ static const struct ipu_rgb def_bgra_16 = { .bits_per_pixel = 16, }; -#define Y_OFFSET(pix, x, y) ((x) + pix->width * (y)) -#define U_OFFSET(pix, x, y) ((pix->width * pix->height) + \ - (pix->width * ((y) / 2) / 2) + (x) / 2) -#define V_OFFSET(pix, x, y) ((pix->width * pix->height) + \ - (pix->width * pix->height / 4) + \ - (pix->width * ((y) / 2) / 2) + (x) / 2) -#define U2_OFFSET(pix, x, y) ((pix->width * pix->height) + \ - (pix->width * (y) / 2) + (x) / 2) -#define V2_OFFSET(pix, x, y) ((pix->width * pix->height) + \ - (pix->width * pix->height / 2) + \ - (pix->width * (y) / 2) + (x) / 2) -#define UV_OFFSET(pix, x, y) ((pix->width * pix->height) + \ - (pix->width * ((y) / 2)) + (x)) -#define UV2_OFFSET(pix, x, y) ((pix->width * pix->height) + \ - (pix->width * y) + (x)) +#define Y_OFFSET(pix, x, y) ((x) + pix->bytesperline * (y)) +#define U_OFFSET(pix, x, y) ((pix->bytesperline * pix->height) + \ + (pix->bytesperline * ((y) / 2) / 2) + (x) / 2) +#define V_OFFSET(pix, x, y) ((pix->bytesperline * pix->height) + \ + (pix->bytesperline * pix->height / 4) + \ + (pix->bytesperline * ((y) / 2) / 2) + (x) / 2) +#define U2_OFFSET(pix, x, y) ((pix->bytesperline * pix->height) + \ + (pix->bytesperline * (y) / 2) + (x) / 2) +#define V2_OFFSET(pix, x, y) ((pix->bytesperline * pix->height) + \ + (pix->bytesperline * pix->height / 2) + \ + (pix->bytesperline * (y) / 2) + (x) / 2) +#define UV_OFFSET(pix, x, y) ((pix->bytesperline * pix->height) + \ + (pix->bytesperline * ((y) / 2)) + (x)) +#define UV2_OFFSET(pix, x, y) ((pix->bytesperline * pix->height) + \ + (pix->bytesperline * y) + (x)) #define NUM_ALPHA_CHANNELS 7 diff --git a/drivers/gpu/ipu-v3/ipu-di.c b/drivers/gpu/ipu-v3/ipu-di.c index b4a31d506fccf..74eca68891add 100644 --- a/drivers/gpu/ipu-v3/ipu-di.c +++ b/drivers/gpu/ipu-v3/ipu-di.c @@ -451,8 +451,9 @@ static void ipu_di_config_clock(struct ipu_di *di, error = rate / (sig->mode.pixelclock / 1000); - dev_dbg(di->ipu->dev, " IPU clock can give %lu with divider %u, error %d.%u%%\n", - rate, div, (signed)(error - 1000) / 10, error % 10); + dev_dbg(di->ipu->dev, " IPU clock can give %lu with divider %u, error %c%d.%d%%\n", + rate, div, error < 1000 ? '-' : '+', + abs(error - 1000) / 10, abs(error - 1000) % 10); /* Allow a 1% error */ if (error < 1010 && error >= 990) { diff --git a/drivers/greybus/svc.c b/drivers/greybus/svc.c index ce7740ef449ba..51d0875a34800 100644 --- a/drivers/greybus/svc.c +++ b/drivers/greybus/svc.c @@ -866,8 +866,14 @@ static int gb_svc_hello(struct gb_operation *op) gb_svc_debugfs_init(svc); - return gb_svc_queue_deferred_request(op); + ret = gb_svc_queue_deferred_request(op); + if (ret) + goto err_remove_debugfs; + + return 0; +err_remove_debugfs: + gb_svc_debugfs_exit(svc); err_unregister_device: gb_svc_watchdog_destroy(svc); device_del(&svc->dev); diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 1ecb5124421c0..5169a38ee47a9 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -149,6 +149,7 @@ config HID_APPLEIR config HID_ASUS tristate "Asus" + depends on USB_HID depends on LEDS_CLASS depends on ASUS_WMI || ASUS_WMI=n select POWER_SUPPLY @@ -206,14 +207,14 @@ config HID_CHERRY config HID_CHICONY tristate "Chicony devices" - depends on HID + depends on USB_HID default !EXPERT ---help--- Support for Chicony Tactical pad and special keys on Chicony keyboards. config HID_CORSAIR tristate "Corsair devices" - depends on HID && USB && LEDS_CLASS + depends on USB_HID && LEDS_CLASS ---help--- Support for Corsair devices that are not fully compliant with the HID standard. @@ -244,7 +245,7 @@ config HID_MACALLY config HID_PRODIKEYS tristate "Prodikeys PC-MIDI Keyboard support" - depends on HID && SND + depends on USB_HID && SND select SND_RAWMIDI ---help--- Support for Prodikeys PC-MIDI Keyboard device support. @@ -524,7 +525,7 @@ config HID_LENOVO config HID_LOGITECH tristate "Logitech devices" - depends on HID + depends on USB_HID default !EXPERT ---help--- Support for Logitech devices that are not fully compliant with HID standard. @@ -871,7 +872,7 @@ config HID_SAITEK config HID_SAMSUNG tristate "Samsung InfraRed remote control or keyboards" - depends on HID + depends on USB_HID ---help--- Support for Samsung InfraRed remote control or keyboards. diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 6909c045fece1..efce31d035ef5 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -301,12 +301,19 @@ static int apple_event(struct hid_device *hdev, struct hid_field *field, /* * MacBook JIS keyboard has wrong logical maximum + * Magic Keyboard JIS has wrong logical maximum */ static __u8 *apple_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { struct apple_sc *asc = hid_get_drvdata(hdev); + if(*rsize >=71 && rdesc[70] == 0x65 && rdesc[64] == 0x65) { + hid_info(hdev, + "fixing up Magic Keyboard JIS report descriptor\n"); + rdesc[64] = rdesc[70] = 0xe7; + } + if ((asc->quirks & APPLE_RDESC_JIS) && *rsize >= 60 && rdesc[53] == 0x65 && rdesc[59] == 0x65) { hid_info(hdev, @@ -382,7 +389,7 @@ static int apple_input_configured(struct hid_device *hdev, if ((asc->quirks & APPLE_HAS_FN) && !asc->fn_found) { hid_info(hdev, "Fn key not found (Apple Wireless Keyboard clone?), disabling Fn key handling\n"); - asc->quirks = 0; + asc->quirks &= ~APPLE_HAS_FN; } return 0; diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index e6e4c841fb06f..7f84ed0afdfef 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -849,7 +849,7 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id) if (drvdata->quirks & QUIRK_IS_MULTITOUCH) drvdata->tp = &asus_i2c_tp; - if (drvdata->quirks & QUIRK_T100_KEYBOARD) { + if ((drvdata->quirks & QUIRK_T100_KEYBOARD) && hid_is_usb(hdev)) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); if (intf->altsetting->desc.bInterfaceNumber == T100_TPAD_INTF) { diff --git a/drivers/hid/hid-betopff.c b/drivers/hid/hid-betopff.c index 0790fbd3fc9a2..467d789f9bc2d 100644 --- a/drivers/hid/hid-betopff.c +++ b/drivers/hid/hid-betopff.c @@ -56,15 +56,22 @@ static int betopff_init(struct hid_device *hid) { struct betopff_device *betopff; struct hid_report *report; - struct hid_input *hidinput = - list_first_entry(&hid->inputs, struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int field_count = 0; int error; int i, j; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + + hidinput = list_first_entry(&hid->inputs, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c index db6da21ade063..e8c5e3ac9fff1 100644 --- a/drivers/hid/hid-bigbenff.c +++ b/drivers/hid/hid-bigbenff.c @@ -191,7 +191,7 @@ static void bigben_worker(struct work_struct *work) struct bigben_device, worker); struct hid_field *report_field = bigben->report->field[0]; - if (bigben->removed) + if (bigben->removed || !report_field) return; if (bigben->work_led) { @@ -347,6 +347,12 @@ static int bigben_probe(struct hid_device *hid, bigben->report = list_entry(report_list->next, struct hid_report, list); + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + error = -ENODEV; + goto error_hw_stop; + } + hidinput = list_first_entry(&hid->inputs, struct hid_input, list); set_bit(FF_RUMBLE, hidinput->input->ffbit); diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c index 3f0ed6a952234..e19e2b5973396 100644 --- a/drivers/hid/hid-chicony.c +++ b/drivers/hid/hid-chicony.c @@ -58,8 +58,12 @@ static int ch_input_mapping(struct hid_device *hdev, struct hid_input *hi, static __u8 *ch_switch12_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - struct usb_interface *intf = to_usb_interface(hdev->dev.parent); - + struct usb_interface *intf; + + if (!hid_is_usb(hdev)) + return rdesc; + + intf = to_usb_interface(hdev->dev.parent); if (intf->cur_altsetting->desc.bInterfaceNumber == 1) { /* Change usage maximum and logical maximum from 0x7fff to * 0x2fff, so they don't exceed HID_MAX_USAGES */ diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c index 902a60e249ed2..8c895c820b672 100644 --- a/drivers/hid/hid-corsair.c +++ b/drivers/hid/hid-corsair.c @@ -553,7 +553,12 @@ static int corsair_probe(struct hid_device *dev, const struct hid_device_id *id) int ret; unsigned long quirks = id->driver_data; struct corsair_drvdata *drvdata; - struct usb_interface *usbif = to_usb_interface(dev->dev.parent); + struct usb_interface *usbif; + + if (!hid_is_usb(dev)) + return -EINVAL; + + usbif = to_usb_interface(dev->dev.parent); drvdata = devm_kzalloc(&dev->dev, sizeof(struct corsair_drvdata), GFP_KERNEL); diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 01135713e8f9b..419d8dec7e498 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -823,7 +823,9 @@ static const char *keys[KEY_MAX + 1] = { [KEY_F22] = "F22", [KEY_F23] = "F23", [KEY_F24] = "F24", [KEY_PLAYCD] = "PlayCD", [KEY_PAUSECD] = "PauseCD", [KEY_PROG3] = "Prog3", - [KEY_PROG4] = "Prog4", [KEY_SUSPEND] = "Suspend", + [KEY_PROG4] = "Prog4", + [KEY_ALL_APPLICATIONS] = "AllApplications", + [KEY_SUSPEND] = "Suspend", [KEY_CLOSE] = "Close", [KEY_PLAY] = "Play", [KEY_FASTFORWARD] = "FastForward", [KEY_BASSBOOST] = "BassBoost", [KEY_PRINT] = "Print", [KEY_HP] = "HP", @@ -930,6 +932,7 @@ static const char *keys[KEY_MAX + 1] = { [KEY_SCREENSAVER] = "ScreenSaver", [KEY_VOICECOMMAND] = "VoiceCommand", [KEY_EMOJI_PICKER] = "EmojiPicker", + [KEY_DICTATE] = "Dictate", [KEY_BRIGHTNESS_MIN] = "BrightnessMin", [KEY_BRIGHTNESS_MAX] = "BrightnessMax", [KEY_BRIGHTNESS_AUTO] = "BrightnessAuto", diff --git a/drivers/hid/hid-elan.c b/drivers/hid/hid-elan.c index dae193749d443..838673303f77f 100644 --- a/drivers/hid/hid-elan.c +++ b/drivers/hid/hid-elan.c @@ -50,7 +50,7 @@ struct elan_drvdata { static int is_not_elan_touchpad(struct hid_device *hdev) { - if (hdev->bus == BUS_USB) { + if (hid_is_usb(hdev)) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); return (intf->altsetting->desc.bInterfaceNumber != @@ -188,7 +188,6 @@ static int elan_input_configured(struct hid_device *hdev, struct hid_input *hi) ret = input_mt_init_slots(input, ELAN_MAX_FINGERS, INPUT_MT_POINTER); if (ret) { hid_err(hdev, "Failed to init elan MT slots: %d\n", ret); - input_free_device(input); return ret; } @@ -200,7 +199,6 @@ static int elan_input_configured(struct hid_device *hdev, struct hid_input *hi) hid_err(hdev, "Failed to register elan input device: %d\n", ret); input_mt_destroy_slots(input); - input_free_device(input); return ret; } diff --git a/drivers/hid/hid-elo.c b/drivers/hid/hid-elo.c index 0d22713a38742..2876cb6a7dcab 100644 --- a/drivers/hid/hid-elo.c +++ b/drivers/hid/hid-elo.c @@ -229,6 +229,9 @@ static int elo_probe(struct hid_device *hdev, const struct hid_device_id *id) struct elo_priv *priv; int ret; + if (!hid_is_usb(hdev)) + return -EINVAL; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index 505ed76a830e8..b6947d7573473 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -469,6 +469,8 @@ static int hammer_probe(struct hid_device *hdev, static const struct hid_device_id hammer_devices[] = { { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_DON) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_EEL) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_HAMMER) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, diff --git a/drivers/hid/hid-holtek-kbd.c b/drivers/hid/hid-holtek-kbd.c index 0a38e8e9bc783..403506b9697e7 100644 --- a/drivers/hid/hid-holtek-kbd.c +++ b/drivers/hid/hid-holtek-kbd.c @@ -140,12 +140,17 @@ static int holtek_kbd_input_event(struct input_dev *dev, unsigned int type, static int holtek_kbd_probe(struct hid_device *hdev, const struct hid_device_id *id) { - struct usb_interface *intf = to_usb_interface(hdev->dev.parent); - int ret = hid_parse(hdev); + struct usb_interface *intf; + int ret; + + if (!hid_is_usb(hdev)) + return -EINVAL; + ret = hid_parse(hdev); if (!ret) ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); + intf = to_usb_interface(hdev->dev.parent); if (!ret && intf->cur_altsetting->desc.bInterfaceNumber == 1) { struct hid_input *hidinput; list_for_each_entry(hidinput, &hdev->inputs, list) { diff --git a/drivers/hid/hid-holtek-mouse.c b/drivers/hid/hid-holtek-mouse.c index 195b735b001d0..7c907939bfae1 100644 --- a/drivers/hid/hid-holtek-mouse.c +++ b/drivers/hid/hid-holtek-mouse.c @@ -62,6 +62,29 @@ static __u8 *holtek_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, return rdesc; } +static int holtek_mouse_probe(struct hid_device *hdev, + const struct hid_device_id *id) +{ + int ret; + + if (!hid_is_usb(hdev)) + return -EINVAL; + + ret = hid_parse(hdev); + if (ret) { + hid_err(hdev, "hid parse failed: %d\n", ret); + return ret; + } + + ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); + if (ret) { + hid_err(hdev, "hw start failed: %d\n", ret); + return ret; + } + + return 0; +} + static const struct hid_device_id holtek_mouse_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A067) }, @@ -83,6 +106,7 @@ static struct hid_driver holtek_mouse_driver = { .name = "holtek_mouse", .id_table = holtek_mouse_devices, .report_fixup = holtek_mouse_report_fixup, + .probe = holtek_mouse_probe, }; module_hid_driver(holtek_mouse_driver); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 6ed6158d4f732..c587a77d493c1 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -489,6 +489,7 @@ #define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d #define USB_DEVICE_ID_GOOGLE_MOONBALL 0x5044 #define USB_DEVICE_ID_GOOGLE_DON 0x5050 +#define USB_DEVICE_ID_GOOGLE_EEL 0x5057 #define USB_VENDOR_ID_GOTOP 0x08f2 #define USB_DEVICE_ID_SUPER_Q2 0x007f @@ -858,6 +859,7 @@ #define USB_DEVICE_ID_MS_TOUCH_COVER_2 0x07a7 #define USB_DEVICE_ID_MS_TYPE_COVER_2 0x07a9 #define USB_DEVICE_ID_MS_POWER_COVER 0x07da +#define USB_DEVICE_ID_MS_SURFACE3_COVER 0x07de #define USB_DEVICE_ID_MS_XBOX_ONE_S_CONTROLLER 0x02fd #define USB_DEVICE_ID_MS_PIXART_MOUSE 0x00cb #define USB_DEVICE_ID_8BITDO_SN30_PRO_PLUS 0x02e0 @@ -1309,6 +1311,7 @@ #define USB_VENDOR_ID_UGTIZER 0x2179 #define USB_DEVICE_ID_UGTIZER_TABLET_GP0610 0x0053 #define USB_DEVICE_ID_UGTIZER_TABLET_GT5040 0x0077 +#define USB_DEVICE_ID_UGTIZER_TABLET_WP5540 0x0004 #define USB_VENDOR_ID_VIEWSONIC 0x0543 #define USB_DEVICE_ID_VIEWSONIC_PD1011 0xe621 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 6d551ae251c0a..d1ba6fafe960f 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -415,8 +415,6 @@ static int hidinput_get_battery_property(struct power_supply *psy, if (dev->battery_status == HID_BATTERY_UNKNOWN) val->intval = POWER_SUPPLY_STATUS_UNKNOWN; - else if (dev->battery_capacity == 100) - val->intval = POWER_SUPPLY_STATUS_FULL; else val->intval = POWER_SUPPLY_STATUS_DISCHARGING; break; @@ -958,6 +956,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x0cd: map_key_clear(KEY_PLAYPAUSE); break; case 0x0cf: map_key_clear(KEY_VOICECOMMAND); break; + case 0x0d8: map_key_clear(KEY_DICTATE); break; case 0x0d9: map_key_clear(KEY_EMOJI_PICKER); break; case 0x0e0: map_abs_clear(ABS_VOLUME); break; @@ -1049,6 +1048,8 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x29d: map_key_clear(KEY_KBD_LAYOUT_NEXT); break; + case 0x2a2: map_key_clear(KEY_ALL_APPLICATIONS); break; + case 0x2c7: map_key_clear(KEY_KBDINPUTASSIST_PREV); break; case 0x2c8: map_key_clear(KEY_KBDINPUTASSIST_NEXT); break; case 0x2c9: map_key_clear(KEY_KBDINPUTASSIST_PREVGROUP); break; @@ -1290,6 +1291,12 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct input = field->hidinput->input; + if (usage->type == EV_ABS && + (((*quirks & HID_QUIRK_X_INVERT) && usage->code == ABS_X) || + ((*quirks & HID_QUIRK_Y_INVERT) && usage->code == ABS_Y))) { + value = field->logical_maximum - value; + } + if (usage->hat_min < usage->hat_max || usage->hat_dir) { int hat_dir = usage->hat_dir; if (!hat_dir) diff --git a/drivers/hid/hid-led.c b/drivers/hid/hid-led.c index c2c66ceca1327..7d82f8d426bbc 100644 --- a/drivers/hid/hid-led.c +++ b/drivers/hid/hid-led.c @@ -366,7 +366,7 @@ static const struct hidled_config hidled_configs[] = { .type = DREAM_CHEEKY, .name = "Dream Cheeky Webmail Notifier", .short_name = "dream_cheeky", - .max_brightness = 31, + .max_brightness = 63, .num_leds = 1, .report_size = 9, .report_type = RAW_REQUEST, diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c index 0dc7cdfc56f77..2c7e7c089bf99 100644 --- a/drivers/hid/hid-lg.c +++ b/drivers/hid/hid-lg.c @@ -769,12 +769,18 @@ static int lg_raw_event(struct hid_device *hdev, struct hid_report *report, static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id) { - struct usb_interface *iface = to_usb_interface(hdev->dev.parent); - __u8 iface_num = iface->cur_altsetting->desc.bInterfaceNumber; + struct usb_interface *iface; + __u8 iface_num; unsigned int connect_mask = HID_CONNECT_DEFAULT; struct lg_drv_data *drv_data; int ret; + if (!hid_is_usb(hdev)) + return -EINVAL; + + iface = to_usb_interface(hdev->dev.parent); + iface_num = iface->cur_altsetting->desc.bInterfaceNumber; + /* G29 only work with the 1st interface */ if ((hdev->product == USB_DEVICE_ID_LOGITECH_G29_WHEEL) && (iface_num != 0)) { diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index b499ac37dc7b0..a663cbb7b6832 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -1000,6 +1000,7 @@ static void logi_hidpp_recv_queue_notif(struct hid_device *hdev, workitem.reports_supported |= STD_KEYBOARD; break; case 0x0f: + case 0x11: device_type = "eQUAD Lightspeed 1.2"; logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem); workitem.reports_supported |= STD_KEYBOARD; @@ -1686,7 +1687,7 @@ static int logi_dj_probe(struct hid_device *hdev, case recvr_type_27mhz: no_dj_interfaces = 2; break; case recvr_type_bluetooth: no_dj_interfaces = 2; break; } - if (hid_is_using_ll_driver(hdev, &usb_hid_driver)) { + if (hid_is_usb(hdev)) { intf = to_usb_interface(hdev->dev.parent); if (intf && intf->altsetting->desc.bInterfaceNumber >= no_dj_interfaces) { diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 37270b8f4e58b..653f436aa4593 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2158,6 +2158,9 @@ static const struct hid_device_id mt_devices[] = { { .driver_data = MT_CLS_GOOGLE, HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_TOUCH_ROSE) }, + { .driver_data = MT_CLS_GOOGLE, + HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_GOOGLE, + USB_DEVICE_ID_GOOGLE_WHISKERS) }, /* Generic MT device */ { HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH, HID_ANY_ID, HID_ANY_ID) }, diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c index 2666af02d5c1a..e4e9471d0f1e9 100644 --- a/drivers/hid/hid-prodikeys.c +++ b/drivers/hid/hid-prodikeys.c @@ -798,12 +798,18 @@ static int pk_raw_event(struct hid_device *hdev, struct hid_report *report, static int pk_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; - struct usb_interface *intf = to_usb_interface(hdev->dev.parent); - unsigned short ifnum = intf->cur_altsetting->desc.bInterfaceNumber; + struct usb_interface *intf; + unsigned short ifnum; unsigned long quirks = id->driver_data; struct pk_device *pk; struct pcmidi_snd *pm = NULL; + if (!hid_is_usb(hdev)) + return -EINVAL; + + intf = to_usb_interface(hdev->dev.parent); + ifnum = intf->cur_altsetting->desc.bInterfaceNumber; + pk = kzalloc(sizeof(*pk), GFP_KERNEL); if (pk == NULL) { hid_err(hdev, "can't alloc descriptor\n"); diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index efc9d0d281709..45eba224cdc77 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -124,6 +124,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_MCS, USB_DEVICE_ID_MCS_GAMEPADBLOCK), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PIXART_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), HID_QUIRK_NO_INIT_REPORTS }, + { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE3_COVER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE_PRO_2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TOUCH_COVER_2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_2), HID_QUIRK_NO_INIT_REPORTS }, @@ -186,6 +187,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_TURBOX_KEYBOARD), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_KNA5), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWA60), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_WP5540), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET), HID_QUIRK_MULTI_INPUT }, diff --git a/drivers/hid/hid-roccat-arvo.c b/drivers/hid/hid-roccat-arvo.c index ffcd444ae2ba6..4b18e1a4fc7ac 100644 --- a/drivers/hid/hid-roccat-arvo.c +++ b/drivers/hid/hid-roccat-arvo.c @@ -344,6 +344,9 @@ static int arvo_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-isku.c b/drivers/hid/hid-roccat-isku.c index ce5f22519956a..e95d59cd8d075 100644 --- a/drivers/hid/hid-roccat-isku.c +++ b/drivers/hid/hid-roccat-isku.c @@ -324,6 +324,9 @@ static int isku_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-kone.c b/drivers/hid/hid-roccat-kone.c index 509b9bb1362cb..6cf59b5e5484b 100644 --- a/drivers/hid/hid-roccat-kone.c +++ b/drivers/hid/hid-roccat-kone.c @@ -749,6 +749,9 @@ static int kone_probe(struct hid_device *hdev, const struct hid_device_id *id) { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-koneplus.c b/drivers/hid/hid-roccat-koneplus.c index 0316edf8c5bb4..1896c69ea512f 100644 --- a/drivers/hid/hid-roccat-koneplus.c +++ b/drivers/hid/hid-roccat-koneplus.c @@ -431,6 +431,9 @@ static int koneplus_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-konepure.c b/drivers/hid/hid-roccat-konepure.c index 5248b3c7cf785..cf8eeb33a1257 100644 --- a/drivers/hid/hid-roccat-konepure.c +++ b/drivers/hid/hid-roccat-konepure.c @@ -133,6 +133,9 @@ static int konepure_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-kovaplus.c b/drivers/hid/hid-roccat-kovaplus.c index 9600128815705..6fb9b9563769d 100644 --- a/drivers/hid/hid-roccat-kovaplus.c +++ b/drivers/hid/hid-roccat-kovaplus.c @@ -501,6 +501,9 @@ static int kovaplus_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-lua.c b/drivers/hid/hid-roccat-lua.c index 4a88a76d5c622..d5ddf0d68346b 100644 --- a/drivers/hid/hid-roccat-lua.c +++ b/drivers/hid/hid-roccat-lua.c @@ -160,6 +160,9 @@ static int lua_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-pyra.c b/drivers/hid/hid-roccat-pyra.c index 989927defe8db..4fcc8e7d276f2 100644 --- a/drivers/hid/hid-roccat-pyra.c +++ b/drivers/hid/hid-roccat-pyra.c @@ -449,6 +449,9 @@ static int pyra_probe(struct hid_device *hdev, const struct hid_device_id *id) { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-ryos.c b/drivers/hid/hid-roccat-ryos.c index 3956a6c9c5217..5bf1971a2b14d 100644 --- a/drivers/hid/hid-roccat-ryos.c +++ b/drivers/hid/hid-roccat-ryos.c @@ -141,6 +141,9 @@ static int ryos_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-savu.c b/drivers/hid/hid-roccat-savu.c index 818701f7a0281..a784bb4ee6512 100644 --- a/drivers/hid/hid-roccat-savu.c +++ b/drivers/hid/hid-roccat-savu.c @@ -113,6 +113,9 @@ static int savu_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-samsung.c b/drivers/hid/hid-samsung.c index 2e1c31156eca0..cf5992e970940 100644 --- a/drivers/hid/hid-samsung.c +++ b/drivers/hid/hid-samsung.c @@ -152,6 +152,9 @@ static int samsung_probe(struct hid_device *hdev, int ret; unsigned int cmask = HID_CONNECT_DEFAULT; + if (!hid_is_usb(hdev)) + return -EINVAL; + ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-u2fzero.c b/drivers/hid/hid-u2fzero.c index 95e0807878c7e..ac3fd870673d2 100644 --- a/drivers/hid/hid-u2fzero.c +++ b/drivers/hid/hid-u2fzero.c @@ -132,7 +132,7 @@ static int u2fzero_recv(struct u2fzero_device *dev, ret = (wait_for_completion_timeout( &ctx.done, msecs_to_jiffies(USB_CTRL_SET_TIMEOUT))); - if (ret < 0) { + if (ret == 0) { usb_kill_urb(dev->urb); hid_err(hdev, "urb submission timed out"); } else { @@ -191,6 +191,8 @@ static int u2fzero_rng_read(struct hwrng *rng, void *data, struct u2f_hid_msg resp; int ret; size_t actual_length; + /* valid packets must have a correct header */ + int min_length = offsetof(struct u2f_hid_msg, init.data); if (!dev->present) { hid_dbg(dev->hdev, "device not present"); @@ -198,12 +200,14 @@ static int u2fzero_rng_read(struct hwrng *rng, void *data, } ret = u2fzero_recv(dev, &req, &resp); - if (ret < 0) + + /* ignore errors or packets without data */ + if (ret < min_length) return 0; /* only take the minimum amount of data it is safe to take */ - actual_length = min3((size_t)ret - offsetof(struct u2f_hid_msg, - init.data), U2F_HID_MSG_LEN(resp), max); + actual_length = min3((size_t)ret - min_length, + U2F_HID_MSG_LEN(resp), max); memcpy(data, resp.init.data, actual_length); @@ -286,7 +290,7 @@ static int u2fzero_probe(struct hid_device *hdev, unsigned int minor; int ret; - if (!hid_is_using_ll_driver(hdev, &usb_hid_driver)) + if (!hid_is_usb(hdev)) return -EINVAL; dev = devm_kzalloc(&hdev->dev, sizeof(*dev), GFP_KERNEL); diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index 8e9c9e646cb7d..4edb241957040 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -164,6 +164,9 @@ static int uclogic_probe(struct hid_device *hdev, struct uclogic_drvdata *drvdata = NULL; bool params_initialized = false; + if (!hid_is_usb(hdev)) + return -EINVAL; + /* * libinput requires the pad interface to be on a different node * than the pen, so use QUIRK_MULTI_INPUT for all tablets. diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c index e80c812f44a77..191aba9f6b497 100644 --- a/drivers/hid/hid-uclogic-params.c +++ b/drivers/hid/hid-uclogic-params.c @@ -65,7 +65,7 @@ static int uclogic_params_get_str_desc(__u8 **pbuf, struct hid_device *hdev, __u8 idx, size_t len) { int rc; - struct usb_device *udev = hid_to_usb_dev(hdev); + struct usb_device *udev; __u8 *buf = NULL; /* Check arguments */ @@ -74,6 +74,8 @@ static int uclogic_params_get_str_desc(__u8 **pbuf, struct hid_device *hdev, goto cleanup; } + udev = hid_to_usb_dev(hdev); + buf = kmalloc(len, GFP_KERNEL); if (buf == NULL) { rc = -ENOMEM; @@ -449,7 +451,7 @@ static int uclogic_params_frame_init_v1_buttonpad( { int rc; bool found = false; - struct usb_device *usb_dev = hid_to_usb_dev(hdev); + struct usb_device *usb_dev; char *str_buf = NULL; const size_t str_len = 16; @@ -459,6 +461,8 @@ static int uclogic_params_frame_init_v1_buttonpad( goto cleanup; } + usb_dev = hid_to_usb_dev(hdev); + /* * Enable generic button mode */ @@ -705,9 +709,9 @@ static int uclogic_params_huion_init(struct uclogic_params *params, struct hid_device *hdev) { int rc; - struct usb_device *udev = hid_to_usb_dev(hdev); - struct usb_interface *iface = to_usb_interface(hdev->dev.parent); - __u8 bInterfaceNumber = iface->cur_altsetting->desc.bInterfaceNumber; + struct usb_device *udev; + struct usb_interface *iface; + __u8 bInterfaceNumber; bool found; /* The resulting parameters (noop) */ struct uclogic_params p = {0, }; @@ -721,6 +725,10 @@ static int uclogic_params_huion_init(struct uclogic_params *params, goto cleanup; } + udev = hid_to_usb_dev(hdev); + iface = to_usb_interface(hdev->dev.parent); + bInterfaceNumber = iface->cur_altsetting->desc.bInterfaceNumber; + /* If it's not a pen interface */ if (bInterfaceNumber != 0) { /* TODO: Consider marking the interface invalid */ @@ -832,21 +840,25 @@ int uclogic_params_init(struct uclogic_params *params, struct hid_device *hdev) { int rc; - struct usb_device *udev = hid_to_usb_dev(hdev); - __u8 bNumInterfaces = udev->config->desc.bNumInterfaces; - struct usb_interface *iface = to_usb_interface(hdev->dev.parent); - __u8 bInterfaceNumber = iface->cur_altsetting->desc.bInterfaceNumber; + struct usb_device *udev; + __u8 bNumInterfaces; + struct usb_interface *iface; + __u8 bInterfaceNumber; bool found; /* The resulting parameters (noop) */ struct uclogic_params p = {0, }; /* Check arguments */ - if (params == NULL || hdev == NULL || - !hid_is_using_ll_driver(hdev, &usb_hid_driver)) { + if (params == NULL || hdev == NULL || !hid_is_usb(hdev)) { rc = -EINVAL; goto cleanup; } + udev = hid_to_usb_dev(hdev); + bNumInterfaces = udev->config->desc.bNumInterfaces; + iface = to_usb_interface(hdev->dev.parent); + bInterfaceNumber = iface->cur_altsetting->desc.bInterfaceNumber; + /* * Set replacement report descriptor if the original matches the * specified size. Otherwise keep interface unchanged. diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 6f7a3702b5fba..1547b0ec6a113 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -178,8 +178,6 @@ static const struct i2c_hid_quirks { I2C_HID_QUIRK_NO_IRQ_AFTER_RESET }, { I2C_VENDOR_ID_RAYDIUM, I2C_PRODUCT_ID_RAYDIUM_3118, I2C_HID_QUIRK_NO_IRQ_AFTER_RESET }, - { USB_VENDOR_ID_ELAN, HID_ANY_ID, - I2C_HID_QUIRK_BOGUS_IRQ }, { USB_VENDOR_ID_ALPS_JP, HID_ANY_ID, I2C_HID_QUIRK_RESET_ON_RESUME }, { I2C_VENDOR_ID_SYNAPTICS, I2C_PRODUCT_ID_SYNAPTICS_SYNA2393, @@ -190,7 +188,8 @@ static const struct i2c_hid_quirks { * Sending the wakeup after reset actually break ELAN touchscreen controller */ { USB_VENDOR_ID_ELAN, HID_ANY_ID, - I2C_HID_QUIRK_NO_WAKEUP_AFTER_RESET }, + I2C_HID_QUIRK_NO_WAKEUP_AFTER_RESET | + I2C_HID_QUIRK_BOGUS_IRQ }, { 0, 0 } }; @@ -623,6 +622,17 @@ static int i2c_hid_get_raw_report(struct hid_device *hid, if (report_type == HID_OUTPUT_REPORT) return -EINVAL; + /* + * In case of unnumbered reports the response from the device will + * not have the report ID that the upper layers expect, so we need + * to stash it the buffer ourselves and adjust the data size. + */ + if (!report_number) { + buf[0] = 0; + buf++; + count--; + } + /* +2 bytes to include the size of the reply in the query buffer */ ask_count = min(count + 2, (size_t)ihid->bufsize); @@ -644,6 +654,9 @@ static int i2c_hid_get_raw_report(struct hid_device *hid, count = min(count, ret_count - 2); memcpy(buf, ihid->rawbuf + 2, count); + if (!report_number) + count++; + return count; } @@ -660,17 +673,19 @@ static int i2c_hid_output_raw_report(struct hid_device *hid, __u8 *buf, mutex_lock(&ihid->reset_lock); - if (report_id) { - buf++; - count--; - } - + /* + * Note that both numbered and unnumbered reports passed here + * are supposed to have report ID stored in the 1st byte of the + * buffer, so we strip it off unconditionally before passing payload + * to i2c_hid_set_or_send_report which takes care of encoding + * everything properly. + */ ret = i2c_hid_set_or_send_report(client, report_type == HID_FEATURE_REPORT ? 0x03 : 0x02, - report_id, buf, count, use_data); + report_id, buf + 1, count - 1, use_data); - if (report_id && ret >= 0) - ret++; /* add report_id to the number of transfered bytes */ + if (ret >= 0) + ret++; /* add report_id to the number of transferred bytes */ mutex_unlock(&ihid->reset_lock); diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c index 6cf59fd26ad78..b6d6d119035ca 100644 --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c @@ -656,21 +656,12 @@ static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data, */ payload_max_size &= ~(L1_CACHE_BYTES - 1); - dma_buf = kmalloc(payload_max_size, GFP_KERNEL | GFP_DMA32); + dma_buf = dma_alloc_coherent(devc, payload_max_size, &dma_buf_phy, GFP_KERNEL); if (!dma_buf) { client_data->flag_retry = true; return -ENOMEM; } - dma_buf_phy = dma_map_single(devc, dma_buf, payload_max_size, - DMA_TO_DEVICE); - if (dma_mapping_error(devc, dma_buf_phy)) { - dev_err(cl_data_to_dev(client_data), "DMA map failed\n"); - client_data->flag_retry = true; - rv = -ENOMEM; - goto end_err_dma_buf_release; - } - ldr_xfer_dma_frag.fragment.hdr.command = LOADER_CMD_XFER_FRAGMENT; ldr_xfer_dma_frag.fragment.xfer_mode = LOADER_XFER_MODE_DIRECT_DMA; ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy; @@ -690,14 +681,7 @@ static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data, ldr_xfer_dma_frag.fragment.size = fragment_size; memcpy(dma_buf, &fw->data[fragment_offset], fragment_size); - dma_sync_single_for_device(devc, dma_buf_phy, - payload_max_size, - DMA_TO_DEVICE); - - /* - * Flush cache here because the dma_sync_single_for_device() - * does not do for x86. - */ + /* Flush cache to be sure the data is in main memory. */ clflush_cache_range(dma_buf, payload_max_size); dev_dbg(cl_data_to_dev(client_data), @@ -720,15 +704,8 @@ static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data, fragment_offset += fragment_size; } - dma_unmap_single(devc, dma_buf_phy, payload_max_size, DMA_TO_DEVICE); - kfree(dma_buf); - return 0; - end_err_resp_buf_release: - /* Free ISH buffer if not done already, in error case */ - dma_unmap_single(devc, dma_buf_phy, payload_max_size, DMA_TO_DEVICE); -end_err_dma_buf_release: - kfree(dma_buf); + dma_free_coherent(devc, payload_max_size, dma_buf, dma_buf_phy); return rv; } diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c index 8fe3efcb83271..fc06d8bb42e0f 100644 --- a/drivers/hid/uhid.c +++ b/drivers/hid/uhid.c @@ -28,11 +28,22 @@ struct uhid_device { struct mutex devlock; + + /* This flag tracks whether the HID device is usable for commands from + * userspace. The flag is already set before hid_add_device(), which + * runs in workqueue context, to allow hid_add_device() to communicate + * with userspace. + * However, if hid_add_device() fails, the flag is cleared without + * holding devlock. + * We guarantee that if @running changes from true to false while you're + * holding @devlock, it's still fine to access @hid. + */ bool running; __u8 *rd_data; uint rd_size; + /* When this is NULL, userspace may use UHID_CREATE/UHID_CREATE2. */ struct hid_device *hid; struct uhid_event input_buf; @@ -63,9 +74,18 @@ static void uhid_device_add_worker(struct work_struct *work) if (ret) { hid_err(uhid->hid, "Cannot register HID device: error %d\n", ret); - hid_destroy_device(uhid->hid); - uhid->hid = NULL; + /* We used to call hid_destroy_device() here, but that's really + * messy to get right because we have to coordinate with + * concurrent writes from userspace that might be in the middle + * of using uhid->hid. + * Just leave uhid->hid as-is for now, and clean it up when + * userspace tries to close or reinitialize the uhid instance. + * + * However, we do have to clear the ->running flag and do a + * wakeup to make sure userspace knows that the device is gone. + */ uhid->running = false; + wake_up_interruptible(&uhid->report_wait); } } @@ -474,7 +494,7 @@ static int uhid_dev_create2(struct uhid_device *uhid, void *rd_data; int ret; - if (uhid->running) + if (uhid->hid) return -EALREADY; rd_size = ev->u.create2.rd_size; @@ -556,7 +576,7 @@ static int uhid_dev_create(struct uhid_device *uhid, static int uhid_dev_destroy(struct uhid_device *uhid) { - if (!uhid->running) + if (!uhid->hid) return -EINVAL; uhid->running = false; @@ -565,6 +585,7 @@ static int uhid_dev_destroy(struct uhid_device *uhid) cancel_work_sync(&uhid->worker); hid_destroy_device(uhid->hid); + uhid->hid = NULL; kfree(uhid->rd_data); return 0; diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 1cfbbaf6901da..8537fcdb456df 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -503,7 +503,7 @@ static void hid_ctrl(struct urb *urb) if (unplug) { usbhid->ctrltail = usbhid->ctrlhead; - } else { + } else if (usbhid->ctrlhead != usbhid->ctrltail) { usbhid->ctrltail = (usbhid->ctrltail + 1) & (HID_CONTROL_FIFO_SIZE - 1); if (usbhid->ctrlhead != usbhid->ctrltail && @@ -1221,9 +1221,20 @@ static void usbhid_stop(struct hid_device *hid) mutex_lock(&usbhid->mutex); clear_bit(HID_STARTED, &usbhid->iofl); + spin_lock_irq(&usbhid->lock); /* Sync with error and led handlers */ set_bit(HID_DISCONNECTED, &usbhid->iofl); + while (usbhid->ctrltail != usbhid->ctrlhead) { + if (usbhid->ctrl[usbhid->ctrltail].dir == USB_DIR_OUT) { + kfree(usbhid->ctrl[usbhid->ctrltail].raw_report); + usbhid->ctrl[usbhid->ctrltail].raw_report = NULL; + } + + usbhid->ctrltail = (usbhid->ctrltail + 1) & + (HID_CONTROL_FIFO_SIZE - 1); + } spin_unlock_irq(&usbhid->lock); + usb_kill_urb(usbhid->urbin); usb_kill_urb(usbhid->urbout); usb_kill_urb(usbhid->urbctrl); diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 73dafa60080f1..329bb1a46f90e 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -726,7 +726,7 @@ static void wacom_retrieve_hid_descriptor(struct hid_device *hdev, * Skip the query for this type and modify defaults based on * interface number. */ - if (features->type == WIRELESS) { + if (features->type == WIRELESS && intf) { if (intf->cur_altsetting->desc.bInterfaceNumber == 0) features->device_type = WACOM_DEVICETYPE_WL_MONITOR; else @@ -2217,7 +2217,7 @@ static void wacom_update_name(struct wacom *wacom, const char *suffix) if ((features->type == HID_GENERIC) && !strcmp("Wacom HID", features->name)) { char *product_name = wacom->hdev->name; - if (hid_is_using_ll_driver(wacom->hdev, &usb_hid_driver)) { + if (hid_is_usb(wacom->hdev)) { struct usb_interface *intf = to_usb_interface(wacom->hdev->dev.parent); struct usb_device *dev = interface_to_usbdev(intf); product_name = dev->product; @@ -2448,6 +2448,9 @@ static void wacom_wireless_work(struct work_struct *work) wacom_destroy_battery(wacom); + if (!usbdev) + return; + /* Stylus interface */ hdev1 = usb_get_intfdata(usbdev->config->interface[1]); wacom1 = hid_get_drvdata(hdev1); @@ -2727,8 +2730,6 @@ static void wacom_mode_change_work(struct work_struct *work) static int wacom_probe(struct hid_device *hdev, const struct hid_device_id *id) { - struct usb_interface *intf = to_usb_interface(hdev->dev.parent); - struct usb_device *dev = interface_to_usbdev(intf); struct wacom *wacom; struct wacom_wac *wacom_wac; struct wacom_features *features; @@ -2763,8 +2764,14 @@ static int wacom_probe(struct hid_device *hdev, wacom_wac->hid_data.inputmode = -1; wacom_wac->mode_report = -1; - wacom->usbdev = dev; - wacom->intf = intf; + if (hid_is_usb(hdev)) { + struct usb_interface *intf = to_usb_interface(hdev->dev.parent); + struct usb_device *dev = interface_to_usbdev(intf); + + wacom->usbdev = dev; + wacom->intf = intf; + } + mutex_init(&wacom->lock); INIT_DELAYED_WORK(&wacom->init_work, wacom_init_work); INIT_WORK(&wacom->wireless_work, wacom_wireless_work); diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index d5425bc1ad61a..de69ea5f5a4be 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2566,6 +2566,24 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac, } } +static bool wacom_wac_slot_is_active(struct input_dev *dev, int key) +{ + struct input_mt *mt = dev->mt; + struct input_mt_slot *s; + + if (!mt) + return false; + + for (s = mt->slots; s != mt->slots + mt->num_slots; s++) { + if (s->key == key && + input_mt_get_value(s, ABS_MT_TRACKING_ID) >= 0) { + return true; + } + } + + return false; +} + static void wacom_wac_finger_event(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, __s32 value) { @@ -2578,6 +2596,9 @@ static void wacom_wac_finger_event(struct hid_device *hdev, return; switch (equivalent_usage) { + case HID_DG_CONFIDENCE: + wacom_wac->hid_data.confidence = value; + break; case HID_GD_X: wacom_wac->hid_data.x = value; break; @@ -2610,8 +2631,14 @@ static void wacom_wac_finger_event(struct hid_device *hdev, } if (usage->usage_index + 1 == field->report_count) { - if (equivalent_usage == wacom_wac->hid_data.last_slot_field) - wacom_wac_finger_slot(wacom_wac, wacom_wac->touch_input); + if (equivalent_usage == wacom_wac->hid_data.last_slot_field) { + bool touch_removed = wacom_wac_slot_is_active(wacom_wac->touch_input, + wacom_wac->hid_data.id) && !wacom_wac->hid_data.tipswitch; + + if (wacom_wac->hid_data.confidence || touch_removed) { + wacom_wac_finger_slot(wacom_wac, wacom_wac->touch_input); + } + } } } @@ -2625,6 +2652,12 @@ static void wacom_wac_finger_pre_report(struct hid_device *hdev, wacom_wac->is_invalid_bt_frame = false; + hid_data->confidence = true; + + hid_data->cc_report = 0; + hid_data->cc_index = -1; + hid_data->cc_value_index = -1; + for (i = 0; i < report->maxfield; i++) { struct hid_field *field = report->field[i]; int j; @@ -2658,11 +2691,14 @@ static void wacom_wac_finger_pre_report(struct hid_device *hdev, hid_data->cc_index >= 0) { struct hid_field *field = report->field[hid_data->cc_index]; int value = field->value[hid_data->cc_value_index]; - if (value) + if (value) { hid_data->num_expected = value; + hid_data->num_received = 0; + } } else { hid_data->num_expected = wacom_wac->features.touch_max; + hid_data->num_received = 0; } } @@ -2686,6 +2722,7 @@ static void wacom_wac_finger_report(struct hid_device *hdev, input_sync(input); wacom_wac->hid_data.num_received = 0; + wacom_wac->hid_data.num_expected = 0; /* keep touch state for pen event */ wacom_wac->shared->touch_down = wacom_wac_finger_count_touches(wacom_wac); @@ -4715,6 +4752,12 @@ static const struct wacom_features wacom_features_0x393 = { "Wacom Intuos Pro S", 31920, 19950, 8191, 63, INTUOSP2S_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 7, .touch_max = 10 }; +static const struct wacom_features wacom_features_0x3c6 = + { "Wacom Intuos BT S", 15200, 9500, 4095, 63, + INTUOSHT3_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 4 }; +static const struct wacom_features wacom_features_0x3c8 = + { "Wacom Intuos BT M", 21600, 13500, 4095, 63, + INTUOSHT3_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 4 }; static const struct wacom_features wacom_features_HID_ANY_ID = { "Wacom HID", .type = HID_GENERIC, .oVid = HID_ANY_ID, .oPid = HID_ANY_ID }; @@ -4888,6 +4931,8 @@ const struct hid_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x37A) }, { USB_DEVICE_WACOM(0x37B) }, { BT_DEVICE_WACOM(0x393) }, + { BT_DEVICE_WACOM(0x3c6) }, + { BT_DEVICE_WACOM(0x3c8) }, { USB_DEVICE_WACOM(0x4001) }, { USB_DEVICE_WACOM(0x4004) }, { USB_DEVICE_WACOM(0x5000) }, diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h index e3835407e8d23..8dea7cb298e69 100644 --- a/drivers/hid/wacom_wac.h +++ b/drivers/hid/wacom_wac.h @@ -300,6 +300,7 @@ struct hid_data { bool tipswitch; bool barrelswitch; bool barrelswitch2; + bool confidence; int x; int y; int pressure; diff --git a/drivers/hsi/hsi_core.c b/drivers/hsi/hsi_core.c index a5f92e2889cb8..a330f58d45fc6 100644 --- a/drivers/hsi/hsi_core.c +++ b/drivers/hsi/hsi_core.c @@ -102,6 +102,7 @@ struct hsi_client *hsi_new_client(struct hsi_port *port, if (device_register(&cl->device) < 0) { pr_err("hsi: failed to register client: %s\n", info->name); put_device(&cl->device); + goto err; } return cl; diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index bd4e72f6dfd49..44d6de6fc03da 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1550,7 +1550,7 @@ static void balloon_onchannelcallback(void *context) break; default: - pr_warn("Unhandled message: type: %d\n", dm_hdr->type); + pr_warn_ratelimited("Unhandled message: type: %d\n", dm_hdr->type); } } diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index cabcb66e7c5ef..356382a340b2c 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -13,6 +13,7 @@ #define _HYPERV_VMBUS_H #include +#include #include #include #include diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 9a03b163cbbda..59f1e64908b1d 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -378,7 +378,16 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi) { u32 priv_read_loc = rbi->priv_read_index; - u32 write_loc = READ_ONCE(rbi->ring_buffer->write_index); + u32 write_loc; + + /* + * The Hyper-V host writes the packet data, then uses + * store_release() to update the write_index. Use load_acquire() + * here to prevent loads of the packet data from being re-ordered + * before the read of the write_index and potentially getting + * stale data. + */ + write_loc = virt_load_acquire(&rbi->ring_buffer->write_index); if (write_loc >= priv_read_loc) return write_loc - priv_read_loc; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 2d2568dac2a66..56918274c48cf 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1303,7 +1303,7 @@ static void vmbus_isr(void) tasklet_schedule(&hv_cpu->msg_dpc); } - add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); + add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR); } /* @@ -1787,8 +1787,10 @@ int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel) kobj->kset = dev->channels_kset; ret = kobject_init_and_add(kobj, &vmbus_chan_ktype, NULL, "%u", relid); - if (ret) + if (ret) { + kobject_put(kobj); return ret; + } ret = sysfs_create_group(kobj, &vmbus_chan_group); @@ -1797,6 +1799,7 @@ int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel) * The calling functions' error handling paths will cleanup the * empty channel directory. */ + kobject_put(kobj); dev_err(device, "Unable to set up channel sysfs files\n"); return ret; } @@ -2490,10 +2493,15 @@ static void __exit vmbus_exit(void) if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { kmsg_dump_unregister(&hv_kmsg_dumper); unregister_die_notifier(&hyperv_die_block); - atomic_notifier_chain_unregister(&panic_notifier_list, - &hyperv_panic_block); } + /* + * The panic notifier is always registered, hence we should + * also unconditionally unregister it here as well. + */ + atomic_notifier_chain_unregister(&panic_notifier_list, + &hyperv_panic_block); + free_page((unsigned long)hv_panic_page); unregister_sysctl_table(hv_ctl_table_hdr); hv_ctl_table_hdr = NULL; diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 1f0b52e56a560..242475209a479 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -812,7 +812,7 @@ config SENSORS_LTC4261 config SENSORS_LTQ_CPUTEMP bool "Lantiq cpu temperature sensor driver" - depends on LANTIQ + depends on SOC_XWAY help If you say yes here you get support for the temperature sensor inside your CPU. @@ -1142,10 +1142,11 @@ config SENSORS_LM90 help If you say yes here you get support for National Semiconductor LM90, LM86, LM89 and LM99, Analog Devices ADM1032, ADT7461, and ADT7461A, - Maxim MAX6646, MAX6647, MAX6648, MAX6649, MAX6657, MAX6658, MAX6659, - MAX6680, MAX6681, MAX6692, MAX6695, MAX6696, ON Semiconductor NCT1008, - Winbond/Nuvoton W83L771W/G/AWG/ASG, Philips SA56004, GMT G781, and - Texas Instruments TMP451 sensor chips. + Maxim MAX6646, MAX6647, MAX6648, MAX6649, MAX6654, MAX6657, MAX6658, + MAX6659, MAX6680, MAX6681, MAX6692, MAX6695, MAX6696, + ON Semiconductor NCT1008, Winbond/Nuvoton W83L771W/G/AWG/ASG, + Philips SA56004, GMT G781, Texas Instruments TMP451 and TMP461 + sensor chips. This driver can also be built as a module. If so, the module will be called lm90. diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c index a30f34cf512c2..95e06886991dc 100644 --- a/drivers/hwmon/adt7470.c +++ b/drivers/hwmon/adt7470.c @@ -20,6 +20,7 @@ #include #include #include +#include /* Addresses to scan */ static const unsigned short normal_i2c[] = { 0x2C, 0x2E, 0x2F, I2C_CLIENT_END }; @@ -260,11 +261,10 @@ static int adt7470_update_thread(void *p) adt7470_read_temperatures(client, data); mutex_unlock(&data->lock); - set_current_state(TASK_INTERRUPTIBLE); if (kthread_should_stop()) break; - schedule_timeout(msecs_to_jiffies(data->auto_update_interval)); + schedule_timeout_interruptible(msecs_to_jiffies(data->auto_update_interval)); } return 0; diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index 35c00420d855b..785e7a07bb291 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -301,7 +301,7 @@ static int i8k_get_fan_nominal_speed(int fan, int speed) } /* - * Set the fan speed (off, low, high). Returns the new fan status. + * Set the fan speed (off, low, high, ...). */ static int i8k_set_fan(int fan, int speed) { @@ -313,7 +313,7 @@ static int i8k_set_fan(int fan, int speed) speed = (speed < 0) ? 0 : ((speed > i8k_fan_max) ? i8k_fan_max : speed); regs.ebx = (fan & 0xff) | (speed << 8); - return i8k_smm(®s) ? : i8k_get_fan_status(fan); + return i8k_smm(®s); } static int i8k_get_temp_type(int sensor) @@ -427,7 +427,7 @@ static int i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg) { int val = 0; - int speed; + int speed, err; unsigned char buff[16]; int __user *argp = (int __user *)arg; @@ -488,7 +488,11 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg) if (copy_from_user(&speed, argp + 1, sizeof(int))) return -EFAULT; - val = i8k_set_fan(val, speed); + err = i8k_set_fan(val, speed); + if (err < 0) + return err; + + val = i8k_get_fan_status(val); break; default: @@ -588,15 +592,18 @@ static const struct file_operations i8k_fops = { .unlocked_ioctl = i8k_ioctl, }; +static struct proc_dir_entry *entry; + static void __init i8k_init_procfs(void) { /* Register the proc entry */ - proc_create("i8k", 0, NULL, &i8k_fops); + entry = proc_create("i8k", 0, NULL, &i8k_fops); } static void __exit i8k_exit_procfs(void) { - remove_proc_entry("i8k", NULL); + if (entry) + remove_proc_entry("i8k", NULL); } #else diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c index d09deb409de7a..3336ff9e306b2 100644 --- a/drivers/hwmon/f71882fg.c +++ b/drivers/hwmon/f71882fg.c @@ -1577,8 +1577,9 @@ static ssize_t show_temp(struct device *dev, struct device_attribute *devattr, temp *= 125; if (sign) temp -= 128000; - } else - temp = data->temp[nr] * 1000; + } else { + temp = ((s8)data->temp[nr]) * 1000; + } return sprintf(buf, "%d\n", temp); } diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index d018b20089ecd..a2175394cd253 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -645,8 +645,10 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, dev_set_drvdata(hdev, drvdata); dev_set_name(hdev, HWMON_ID_FORMAT, id); err = device_register(hdev); - if (err) - goto free_hwmon; + if (err) { + put_device(hdev); + goto ida_remove; + } if (dev && dev->of_node && chip && chip->ops->read && chip->info[0]->type == hwmon_chip && diff --git a/drivers/hwmon/ibmaem.c b/drivers/hwmon/ibmaem.c index d05ab713566df..00a4ffe010393 100644 --- a/drivers/hwmon/ibmaem.c +++ b/drivers/hwmon/ibmaem.c @@ -550,7 +550,7 @@ static int aem_init_aem1_inst(struct aem_ipmi_data *probe, u8 module_handle) res = platform_device_add(data->pdev); if (res) - goto ipmi_err; + goto dev_add_err; platform_set_drvdata(data->pdev, data); @@ -598,7 +598,9 @@ static int aem_init_aem1_inst(struct aem_ipmi_data *probe, u8 module_handle) ipmi_destroy_user(data->ipmi.user); ipmi_err: platform_set_drvdata(data->pdev, NULL); - platform_device_unregister(data->pdev); + platform_device_del(data->pdev); +dev_add_err: + platform_device_put(data->pdev); dev_err: ida_simple_remove(&aem_ida, data->id); id_err: @@ -690,7 +692,7 @@ static int aem_init_aem2_inst(struct aem_ipmi_data *probe, res = platform_device_add(data->pdev); if (res) - goto ipmi_err; + goto dev_add_err; platform_set_drvdata(data->pdev, data); @@ -738,7 +740,9 @@ static int aem_init_aem2_inst(struct aem_ipmi_data *probe, ipmi_destroy_user(data->ipmi.user); ipmi_err: platform_set_drvdata(data->pdev, NULL); - platform_device_unregister(data->pdev); + platform_device_del(data->pdev); +dev_add_err: + platform_device_put(data->pdev); dev_err: ida_simple_remove(&aem_ida, data->id); id_err: diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index f96fd8efb45a1..4e239bd75b1da 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -1,13 +1,21 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * k10temp.c - AMD Family 10h/11h/12h/14h/15h/16h processor hardware monitoring + * k10temp.c - AMD Family 10h/11h/12h/14h/15h/16h/17h + * processor hardware monitoring * * Copyright (c) 2009 Clemens Ladisch + * Copyright (c) 2020 Guenter Roeck + * + * Implementation notes: + * - CCD register address information as well as the calculation to + * convert raw register values is from https://github.com/ocerman/zenpower. + * The information is not confirmed from chip datasheets, but experiments + * suggest that it provides reasonable temperature values. */ +#include #include #include -#include #include #include #include @@ -31,22 +39,22 @@ static DEFINE_MUTEX(nb_smu_ind_mutex); #endif /* CPUID function 0x80000001, ebx */ -#define CPUID_PKGTYPE_MASK 0xf0000000 +#define CPUID_PKGTYPE_MASK GENMASK(31, 28) #define CPUID_PKGTYPE_F 0x00000000 #define CPUID_PKGTYPE_AM2R2_AM3 0x10000000 /* DRAM controller (PCI function 2) */ #define REG_DCT0_CONFIG_HIGH 0x094 -#define DDR3_MODE 0x00000100 +#define DDR3_MODE BIT(8) /* miscellaneous (PCI function 3) */ #define REG_HARDWARE_THERMAL_CONTROL 0x64 -#define HTC_ENABLE 0x00000001 +#define HTC_ENABLE BIT(0) #define REG_REPORTED_TEMPERATURE 0xa4 #define REG_NORTHBRIDGE_CAPABILITIES 0xe8 -#define NB_CAP_HTC 0x00000400 +#define NB_CAP_HTC BIT(10) /* * For F15h M60h and M70h, REG_HARDWARE_THERMAL_CONTROL @@ -57,8 +65,16 @@ static DEFINE_MUTEX(nb_smu_ind_mutex); #define F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET 0xd8200c64 #define F15H_M60H_REPORTED_TEMP_CTRL_OFFSET 0xd8200ca4 -/* F17h M01h Access througn SMN */ -#define F17H_M01H_REPORTED_TEMP_CTRL_OFFSET 0x00059800 +/* Common for Zen CPU families (Family 17h and 18h and 19h) */ +#define ZEN_REPORTED_TEMP_CTRL_BASE 0x00059800 + +#define ZEN_CCD_TEMP(offset, x) (ZEN_REPORTED_TEMP_CTRL_BASE + \ + (offset) + ((x) * 4)) +#define ZEN_CCD_TEMP_VALID BIT(11) +#define ZEN_CCD_TEMP_MASK GENMASK(10, 0) + +#define ZEN_CUR_TEMP_SHIFT 21 +#define ZEN_CUR_TEMP_RANGE_SEL_MASK BIT(19) struct k10temp_data { struct pci_dev *pdev; @@ -66,9 +82,18 @@ struct k10temp_data { void (*read_tempreg)(struct pci_dev *pdev, u32 *regval); int temp_offset; u32 temp_adjust_mask; - bool show_tdie; + u32 show_temp; + bool is_zen; + u32 ccd_offset; }; +#define TCTL_BIT 0 +#define TDIE_BIT 1 +#define TCCD_BIT(x) ((x) + 2) + +#define HAVE_TEMP(d, channel) ((d)->show_temp & BIT(channel)) +#define HAVE_TDIE(d) HAVE_TEMP(d, TDIE_BIT) + struct tctl_offset { u8 model; char const *id; @@ -117,136 +142,162 @@ static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval) F15H_M60H_REPORTED_TEMP_CTRL_OFFSET, regval); } -static void read_tempreg_nb_f17(struct pci_dev *pdev, u32 *regval) +static void read_tempreg_nb_zen(struct pci_dev *pdev, u32 *regval) { amd_smn_read(amd_pci_dev_to_node_id(pdev), - F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval); + ZEN_REPORTED_TEMP_CTRL_BASE, regval); } -static unsigned int get_raw_temp(struct k10temp_data *data) +static long get_raw_temp(struct k10temp_data *data) { - unsigned int temp; u32 regval; + long temp; data->read_tempreg(data->pdev, ®val); - temp = (regval >> 21) * 125; + temp = (regval >> ZEN_CUR_TEMP_SHIFT) * 125; if (regval & data->temp_adjust_mask) temp -= 49000; return temp; } -static ssize_t temp1_input_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct k10temp_data *data = dev_get_drvdata(dev); - unsigned int temp = get_raw_temp(data); - - if (temp > data->temp_offset) - temp -= data->temp_offset; - else - temp = 0; - - return sprintf(buf, "%u\n", temp); -} - -static ssize_t temp2_input_show(struct device *dev, - struct device_attribute *devattr, char *buf) -{ - struct k10temp_data *data = dev_get_drvdata(dev); - unsigned int temp = get_raw_temp(data); - - return sprintf(buf, "%u\n", temp); -} - -static ssize_t temp_label_show(struct device *dev, - struct device_attribute *devattr, char *buf) -{ - struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - - return sprintf(buf, "%s\n", attr->index ? "Tctl" : "Tdie"); -} +static const char *k10temp_temp_label[] = { + "Tctl", + "Tdie", + "Tccd1", + "Tccd2", + "Tccd3", + "Tccd4", + "Tccd5", + "Tccd6", + "Tccd7", + "Tccd8", + "Tccd9", + "Tccd10", + "Tccd11", + "Tccd12", +}; -static ssize_t temp1_max_show(struct device *dev, - struct device_attribute *attr, char *buf) +static int k10temp_read_labels(struct device *dev, + enum hwmon_sensor_types type, + u32 attr, int channel, const char **str) { - return sprintf(buf, "%d\n", 70 * 1000); + switch (type) { + case hwmon_temp: + *str = k10temp_temp_label[channel]; + break; + default: + return -EOPNOTSUPP; + } + return 0; } -static ssize_t temp_crit_show(struct device *dev, - struct device_attribute *devattr, char *buf) +static int k10temp_read_temp(struct device *dev, u32 attr, int channel, + long *val) { - struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); struct k10temp_data *data = dev_get_drvdata(dev); - int show_hyst = attr->index; u32 regval; - int value; - data->read_htcreg(data->pdev, ®val); - value = ((regval >> 16) & 0x7f) * 500 + 52000; - if (show_hyst) - value -= ((regval >> 24) & 0xf) * 500; - return sprintf(buf, "%d\n", value); + switch (attr) { + case hwmon_temp_input: + switch (channel) { + case 0: /* Tctl */ + *val = get_raw_temp(data); + if (*val < 0) + *val = 0; + break; + case 1: /* Tdie */ + *val = get_raw_temp(data) - data->temp_offset; + if (*val < 0) + *val = 0; + break; + case 2 ... 13: /* Tccd{1-12} */ + amd_smn_read(amd_pci_dev_to_node_id(data->pdev), + ZEN_CCD_TEMP(data->ccd_offset, channel - 2), + ®val); + *val = (regval & ZEN_CCD_TEMP_MASK) * 125 - 49000; + break; + default: + return -EOPNOTSUPP; + } + break; + case hwmon_temp_max: + *val = 70 * 1000; + break; + case hwmon_temp_crit: + data->read_htcreg(data->pdev, ®val); + *val = ((regval >> 16) & 0x7f) * 500 + 52000; + break; + case hwmon_temp_crit_hyst: + data->read_htcreg(data->pdev, ®val); + *val = (((regval >> 16) & 0x7f) + - ((regval >> 24) & 0xf)) * 500 + 52000; + break; + default: + return -EOPNOTSUPP; + } + return 0; } -static DEVICE_ATTR_RO(temp1_input); -static DEVICE_ATTR_RO(temp1_max); -static SENSOR_DEVICE_ATTR_RO(temp1_crit, temp_crit, 0); -static SENSOR_DEVICE_ATTR_RO(temp1_crit_hyst, temp_crit, 1); - -static SENSOR_DEVICE_ATTR_RO(temp1_label, temp_label, 0); -static DEVICE_ATTR_RO(temp2_input); -static SENSOR_DEVICE_ATTR_RO(temp2_label, temp_label, 1); +static int k10temp_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + switch (type) { + case hwmon_temp: + return k10temp_read_temp(dev, attr, channel, val); + default: + return -EOPNOTSUPP; + } +} -static umode_t k10temp_is_visible(struct kobject *kobj, - struct attribute *attr, int index) +static umode_t k10temp_is_visible(const void *_data, + enum hwmon_sensor_types type, + u32 attr, int channel) { - struct device *dev = container_of(kobj, struct device, kobj); - struct k10temp_data *data = dev_get_drvdata(dev); + const struct k10temp_data *data = _data; struct pci_dev *pdev = data->pdev; u32 reg; - switch (index) { - case 0 ... 1: /* temp1_input, temp1_max */ - default: - break; - case 2 ... 3: /* temp1_crit, temp1_crit_hyst */ - if (!data->read_htcreg) - return 0; - - pci_read_config_dword(pdev, REG_NORTHBRIDGE_CAPABILITIES, - ®); - if (!(reg & NB_CAP_HTC)) - return 0; - - data->read_htcreg(data->pdev, ®); - if (!(reg & HTC_ENABLE)) - return 0; - break; - case 4 ... 6: /* temp1_label, temp2_input, temp2_label */ - if (!data->show_tdie) + switch (type) { + case hwmon_temp: + switch (attr) { + case hwmon_temp_input: + if (!HAVE_TEMP(data, channel)) + return 0; + break; + case hwmon_temp_max: + if (channel || data->is_zen) + return 0; + break; + case hwmon_temp_crit: + case hwmon_temp_crit_hyst: + if (channel || !data->read_htcreg) + return 0; + + pci_read_config_dword(pdev, + REG_NORTHBRIDGE_CAPABILITIES, + ®); + if (!(reg & NB_CAP_HTC)) + return 0; + + data->read_htcreg(data->pdev, ®); + if (!(reg & HTC_ENABLE)) + return 0; + break; + case hwmon_temp_label: + /* Show temperature labels only on Zen CPUs */ + if (!data->is_zen || !HAVE_TEMP(data, channel)) + return 0; + break; + default: return 0; + } break; + default: + return 0; } - return attr->mode; + return 0444; } -static struct attribute *k10temp_attrs[] = { - &dev_attr_temp1_input.attr, - &dev_attr_temp1_max.attr, - &sensor_dev_attr_temp1_crit.dev_attr.attr, - &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr, - &sensor_dev_attr_temp1_label.dev_attr.attr, - &dev_attr_temp2_input.attr, - &sensor_dev_attr_temp2_label.dev_attr.attr, - NULL -}; - -static const struct attribute_group k10temp_group = { - .attrs = k10temp_attrs, - .is_visible = k10temp_is_visible, -}; -__ATTRIBUTE_GROUPS(k10temp); - static bool has_erratum_319(struct pci_dev *pdev) { u32 pkg_type, reg_dram_cfg; @@ -281,8 +332,53 @@ static bool has_erratum_319(struct pci_dev *pdev) (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_stepping <= 2); } -static int k10temp_probe(struct pci_dev *pdev, - const struct pci_device_id *id) +static const struct hwmon_channel_info *k10temp_info[] = { + HWMON_CHANNEL_INFO(temp, + HWMON_T_INPUT | HWMON_T_MAX | + HWMON_T_CRIT | HWMON_T_CRIT_HYST | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL), + NULL +}; + +static const struct hwmon_ops k10temp_hwmon_ops = { + .is_visible = k10temp_is_visible, + .read = k10temp_read, + .read_string = k10temp_read_labels, +}; + +static const struct hwmon_chip_info k10temp_chip_info = { + .ops = &k10temp_hwmon_ops, + .info = k10temp_info, +}; + +static void k10temp_get_ccd_support(struct pci_dev *pdev, + struct k10temp_data *data, int limit) +{ + u32 regval; + int i; + + for (i = 0; i < limit; i++) { + amd_smn_read(amd_pci_dev_to_node_id(pdev), + ZEN_CCD_TEMP(data->ccd_offset, i), ®val); + if (regval & ZEN_CCD_TEMP_VALID) + data->show_temp |= BIT(TCCD_BIT(i)); + } +} + +static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int unreliable = has_erratum_319(pdev); struct device *dev = &pdev->dev; @@ -305,6 +401,7 @@ static int k10temp_probe(struct pci_dev *pdev, return -ENOMEM; data->pdev = pdev; + data->show_temp |= BIT(TCTL_BIT); /* Always show Tctl */ if (boot_cpu_data.x86 == 0x15 && ((boot_cpu_data.x86_model & 0xf0) == 0x60 || @@ -312,9 +409,48 @@ static int k10temp_probe(struct pci_dev *pdev, data->read_htcreg = read_htcreg_nb_f15; data->read_tempreg = read_tempreg_nb_f15; } else if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) { - data->temp_adjust_mask = 0x80000; - data->read_tempreg = read_tempreg_nb_f17; - data->show_tdie = true; + data->temp_adjust_mask = ZEN_CUR_TEMP_RANGE_SEL_MASK; + data->read_tempreg = read_tempreg_nb_zen; + data->is_zen = true; + + switch (boot_cpu_data.x86_model) { + case 0x1: /* Zen */ + case 0x8: /* Zen+ */ + case 0x11: /* Zen APU */ + case 0x18: /* Zen+ APU */ + data->ccd_offset = 0x154; + k10temp_get_ccd_support(pdev, data, 4); + break; + case 0x31: /* Zen2 Threadripper */ + case 0x60: /* Renoir */ + case 0x68: /* Lucienne */ + case 0x71: /* Zen2 */ + data->ccd_offset = 0x154; + k10temp_get_ccd_support(pdev, data, 8); + break; + } + } else if (boot_cpu_data.x86 == 0x19) { + data->temp_adjust_mask = ZEN_CUR_TEMP_RANGE_SEL_MASK; + data->read_tempreg = read_tempreg_nb_zen; + data->is_zen = true; + + switch (boot_cpu_data.x86_model) { + case 0x0 ... 0x1: /* Zen3 SP3/TR */ + case 0x21: /* Zen3 Ryzen Desktop */ + case 0x50 ... 0x5f: /* Green Sardine */ + data->ccd_offset = 0x154; + k10temp_get_ccd_support(pdev, data, 8); + break; + case 0x40 ... 0x4f: /* Yellow Carp */ + data->ccd_offset = 0x300; + k10temp_get_ccd_support(pdev, data, 8); + break; + case 0x10 ... 0x1f: + case 0xa0 ... 0xaf: + data->ccd_offset = 0x300; + k10temp_get_ccd_support(pdev, data, 12); + break; + } } else { data->read_htcreg = read_htcreg_pci; data->read_tempreg = read_tempreg_pci; @@ -325,13 +461,15 @@ static int k10temp_probe(struct pci_dev *pdev, if (boot_cpu_data.x86 == entry->model && strstr(boot_cpu_data.x86_model_id, entry->id)) { + data->show_temp |= BIT(TDIE_BIT); /* show Tdie */ data->temp_offset = entry->offset; break; } } - hwmon_dev = devm_hwmon_device_register_with_groups(dev, "k10temp", data, - k10temp_groups); + hwmon_dev = devm_hwmon_device_register_with_info(dev, "k10temp", data, + &k10temp_chip_info, + NULL); return PTR_ERR_OR_ZERO(hwmon_dev); } @@ -351,6 +489,10 @@ static const struct pci_device_id k10temp_id_table[] = { { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F3) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M10H_DF_F3) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F3) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) }, { PCI_VDEVICE(HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) }, {} }; diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 9b3c9f390ef81..b62763a85d6e4 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -35,6 +35,15 @@ * explicitly as max6659, or if its address is not 0x4c. * These chips lack the remote temperature offset feature. * + * This driver also supports the MAX6654 chip made by Maxim. This chip can be + * at 9 different addresses, similar to MAX6680/MAX6681. The MAX6654 is similar + * to MAX6657/MAX6658/MAX6659, but does not support critical temperature + * limits. Extended range is available by setting the configuration register + * accordingly, and is done during initialization. Extended precision is only + * available at conversion rates of 1 Hz and slower. Note that extended + * precision is not enabled by default, as this driver initializes all chips + * to 2 Hz by design. + * * This driver also supports the MAX6646, MAX6647, MAX6648, MAX6649 and * MAX6692 chips made by Maxim. These are again similar to the LM86, * but they use unsigned temperature values and can report temperatures @@ -61,10 +70,10 @@ * This driver also supports the G781 from GMT. This device is compatible * with the ADM1032. * - * This driver also supports TMP451 from Texas Instruments. This device is - * supported in both compatibility and extended mode. It's mostly compatible - * with ADT7461 except for local temperature low byte register and max - * conversion rate. + * This driver also supports TMP451 and TMP461 from Texas Instruments. + * Those devices are supported in both compatibility and extended mode. + * They are mostly compatible with ADT7461 except for local temperature + * low byte register and max conversion rate. * * Since the LM90 was the first chipset supported by this driver, most * comments will refer to this chipset, but are actually general and @@ -94,8 +103,8 @@ * have address 0x4d. * MAX6647 has address 0x4e. * MAX6659 can have address 0x4c, 0x4d or 0x4e. - * MAX6680 and MAX6681 can have address 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b, - * 0x4c, 0x4d or 0x4e. + * MAX6654, MAX6680, and MAX6681 can have address 0x18, 0x19, 0x1a, 0x29, + * 0x2a, 0x2b, 0x4c, 0x4d or 0x4e. * SA56004 can have address 0x48 through 0x4F. */ @@ -104,7 +113,7 @@ static const unsigned short normal_i2c[] = { 0x4d, 0x4e, 0x4f, I2C_CLIENT_END }; enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, - max6646, w83l771, max6696, sa56004, g781, tmp451 }; + max6646, w83l771, max6696, sa56004, g781, tmp451, tmp461, max6654 }; /* * The LM90 registers @@ -145,7 +154,7 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, #define LM90_REG_R_TCRIT_HYST 0x21 #define LM90_REG_W_TCRIT_HYST 0x21 -/* MAX6646/6647/6649/6657/6658/6659/6695/6696 registers */ +/* MAX6646/6647/6649/6654/6657/6658/6659/6695/6696 registers */ #define MAX6657_REG_R_LOCAL_TEMPL 0x11 #define MAX6696_REG_R_STATUS2 0x12 @@ -160,8 +169,12 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, #define LM90_MAX_CONVRATE_MS 16000 /* Maximum conversion rate in ms */ -/* TMP451 registers */ +/* TMP451/TMP461 registers */ #define TMP451_REG_R_LOCAL_TEMPL 0x15 +#define TMP451_REG_CONALERT 0x22 + +#define TMP461_REG_CHEN 0x16 +#define TMP461_REG_DFC 0x24 /* * Device flags @@ -174,7 +187,10 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, #define LM90_HAVE_EMERGENCY_ALARM (1 << 5)/* emergency alarm */ #define LM90_HAVE_TEMP3 (1 << 6) /* 3rd temperature sensor */ #define LM90_HAVE_BROKEN_ALERT (1 << 7) /* Broken alert */ -#define LM90_PAUSE_FOR_CONFIG (1 << 8) /* Pause conversion for config */ +#define LM90_HAVE_EXTENDED_TEMP (1 << 8) /* extended temperature support*/ +#define LM90_PAUSE_FOR_CONFIG (1 << 9) /* Pause conversion for config */ +#define LM90_HAVE_CRIT (1 << 10)/* Chip supports CRIT/OVERT register */ +#define LM90_HAVE_CRIT_ALRM_SWP (1 << 11)/* critical alarm bits swapped */ /* LM90 status */ #define LM90_STATUS_LTHRM (1 << 0) /* local THERM limit tripped */ @@ -184,6 +200,7 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, #define LM90_STATUS_RHIGH (1 << 4) /* remote high temp limit tripped */ #define LM90_STATUS_LLOW (1 << 5) /* local low temp limit tripped */ #define LM90_STATUS_LHIGH (1 << 6) /* local high temp limit tripped */ +#define LM90_STATUS_BUSY (1 << 7) /* conversion is ongoing */ #define MAX6696_STATUS2_R2THRM (1 << 1) /* remote2 THERM limit tripped */ #define MAX6696_STATUS2_R2OPEN (1 << 2) /* remote2 is an open circuit */ @@ -209,6 +226,7 @@ static const struct i2c_device_id lm90_id[] = { { "max6646", max6646 }, { "max6647", max6646 }, { "max6649", max6646 }, + { "max6654", max6654 }, { "max6657", max6657 }, { "max6658", max6657 }, { "max6659", max6659 }, @@ -220,6 +238,7 @@ static const struct i2c_device_id lm90_id[] = { { "w83l771", w83l771 }, { "sa56004", sa56004 }, { "tmp451", tmp451 }, + { "tmp461", tmp461 }, { } }; MODULE_DEVICE_TABLE(i2c, lm90_id); @@ -269,6 +288,10 @@ static const struct of_device_id __maybe_unused lm90_of_match[] = { .compatible = "dallas,max6649", .data = (void *)max6646 }, + { + .compatible = "dallas,max6654", + .data = (void *)max6654 + }, { .compatible = "dallas,max6657", .data = (void *)max6657 @@ -313,6 +336,10 @@ static const struct of_device_id __maybe_unused lm90_of_match[] = { .compatible = "ti,tmp451", .data = (void *)tmp451 }, + { + .compatible = "ti,tmp461", + .data = (void *)tmp461 + }, { }, }; MODULE_DEVICE_TABLE(of, lm90_of_match); @@ -331,80 +358,99 @@ struct lm90_params { static const struct lm90_params lm90_params[] = { [adm1032] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 10, }, [adt7461] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP + | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 10, }, [g781] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, - .max_convrate = 8, + .max_convrate = 7, }, [lm86] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, }, [lm90] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, }, [lm99] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, }, [max6646] = { + .flags = LM90_HAVE_CRIT | LM90_HAVE_BROKEN_ALERT, .alert_alarms = 0x7c, .max_convrate = 6, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, + [max6654] = { + .flags = LM90_HAVE_BROKEN_ALERT, + .alert_alarms = 0x7c, + .max_convrate = 7, + .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, + }, [max6657] = { - .flags = LM90_PAUSE_FOR_CONFIG, + .flags = LM90_PAUSE_FOR_CONFIG | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6659] = { - .flags = LM90_HAVE_EMERGENCY, + .flags = LM90_HAVE_EMERGENCY | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6680] = { - .flags = LM90_HAVE_OFFSET, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT + | LM90_HAVE_CRIT_ALRM_SWP | LM90_HAVE_BROKEN_ALERT, .alert_alarms = 0x7c, .max_convrate = 7, }, [max6696] = { .flags = LM90_HAVE_EMERGENCY - | LM90_HAVE_EMERGENCY_ALARM | LM90_HAVE_TEMP3, + | LM90_HAVE_EMERGENCY_ALARM | LM90_HAVE_TEMP3 | LM90_HAVE_CRIT, .alert_alarms = 0x1c7c, .max_convrate = 6, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [w83l771] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, }, [sa56004] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, .reg_local_ext = SA56004_REG_R_LOCAL_TEMPL, }, [tmp451] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP | LM90_HAVE_CRIT, + .alert_alarms = 0x7c, + .max_convrate = 9, + .reg_local_ext = TMP451_REG_R_LOCAL_TEMPL, + }, + [tmp461] = { + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 9, .reg_local_ext = TMP451_REG_R_LOCAL_TEMPL, @@ -632,20 +678,22 @@ static int lm90_update_limits(struct device *dev) struct i2c_client *client = data->client; int val; - val = lm90_read_reg(client, LM90_REG_R_LOCAL_CRIT); - if (val < 0) - return val; - data->temp8[LOCAL_CRIT] = val; + if (data->flags & LM90_HAVE_CRIT) { + val = lm90_read_reg(client, LM90_REG_R_LOCAL_CRIT); + if (val < 0) + return val; + data->temp8[LOCAL_CRIT] = val; - val = lm90_read_reg(client, LM90_REG_R_REMOTE_CRIT); - if (val < 0) - return val; - data->temp8[REMOTE_CRIT] = val; + val = lm90_read_reg(client, LM90_REG_R_REMOTE_CRIT); + if (val < 0) + return val; + data->temp8[REMOTE_CRIT] = val; - val = lm90_read_reg(client, LM90_REG_R_TCRIT_HYST); - if (val < 0) - return val; - data->temp_hyst = val; + val = lm90_read_reg(client, LM90_REG_R_TCRIT_HYST); + if (val < 0) + return val; + data->temp_hyst = val; + } val = lm90_read_reg(client, LM90_REG_R_REMOTE_LOWH); if (val < 0) @@ -773,7 +821,7 @@ static int lm90_update_device(struct device *dev) val = lm90_read_reg(client, LM90_REG_R_STATUS); if (val < 0) return val; - data->alarms = val; /* lower 8 bit of alarms */ + data->alarms = val & ~LM90_STATUS_BUSY; if (data->kind == max6696) { val = lm90_select_remote_channel(data, 1); @@ -979,7 +1027,7 @@ static int lm90_get_temp11(struct lm90_data *data, int index) s16 temp11 = data->temp11[index]; int temp; - if (data->kind == adt7461 || data->kind == tmp451) + if (data->flags & LM90_HAVE_EXTENDED_TEMP) temp = temp_from_u16_adt7461(data, temp11); else if (data->kind == max6646) temp = temp_from_u16(temp11); @@ -1013,7 +1061,7 @@ static int lm90_set_temp11(struct lm90_data *data, int index, long val) if (data->kind == lm99 && index <= 2) val -= 16000; - if (data->kind == adt7461 || data->kind == tmp451) + if (data->flags & LM90_HAVE_EXTENDED_TEMP) data->temp11[index] = temp_to_u16_adt7461(data, val); else if (data->kind == max6646) data->temp11[index] = temp_to_u8(val) << 8; @@ -1040,7 +1088,7 @@ static int lm90_get_temp8(struct lm90_data *data, int index) s8 temp8 = data->temp8[index]; int temp; - if (data->kind == adt7461 || data->kind == tmp451) + if (data->flags & LM90_HAVE_EXTENDED_TEMP) temp = temp_from_u8_adt7461(data, temp8); else if (data->kind == max6646) temp = temp_from_u8(temp8); @@ -1073,7 +1121,7 @@ static int lm90_set_temp8(struct lm90_data *data, int index, long val) if (data->kind == lm99 && index == 3) val -= 16000; - if (data->kind == adt7461 || data->kind == tmp451) + if (data->flags & LM90_HAVE_EXTENDED_TEMP) data->temp8[index] = temp_to_u8_adt7461(data, val); else if (data->kind == max6646) data->temp8[index] = temp_to_u8(val); @@ -1091,7 +1139,7 @@ static int lm90_get_temphyst(struct lm90_data *data, int index) { int temp; - if (data->kind == adt7461 || data->kind == tmp451) + if (data->flags & LM90_HAVE_EXTENDED_TEMP) temp = temp_from_u8_adt7461(data, data->temp8[index]); else if (data->kind == max6646) temp = temp_from_u8(data->temp8[index]); @@ -1111,7 +1159,7 @@ static int lm90_set_temphyst(struct lm90_data *data, long val) int temp; int err; - if (data->kind == adt7461 || data->kind == tmp451) + if (data->flags & LM90_HAVE_EXTENDED_TEMP) temp = temp_from_u8_adt7461(data, data->temp8[LOCAL_CRIT]); else if (data->kind == max6646) temp = temp_from_u8(data->temp8[LOCAL_CRIT]); @@ -1147,6 +1195,7 @@ static const u8 lm90_temp_emerg_index[3] = { static const u8 lm90_min_alarm_bits[3] = { 5, 3, 11 }; static const u8 lm90_max_alarm_bits[3] = { 6, 4, 12 }; static const u8 lm90_crit_alarm_bits[3] = { 0, 1, 9 }; +static const u8 lm90_crit_alarm_bits_swapped[3] = { 1, 0, 9 }; static const u8 lm90_emergency_alarm_bits[3] = { 15, 13, 14 }; static const u8 lm90_fault_bits[3] = { 0, 2, 10 }; @@ -1172,7 +1221,10 @@ static int lm90_temp_read(struct device *dev, u32 attr, int channel, long *val) *val = (data->alarms >> lm90_max_alarm_bits[channel]) & 1; break; case hwmon_temp_crit_alarm: - *val = (data->alarms >> lm90_crit_alarm_bits[channel]) & 1; + if (data->flags & LM90_HAVE_CRIT_ALRM_SWP) + *val = (data->alarms >> lm90_crit_alarm_bits_swapped[channel]) & 1; + else + *val = (data->alarms >> lm90_crit_alarm_bits[channel]) & 1; break; case hwmon_temp_emergency_alarm: *val = (data->alarms >> lm90_emergency_alarm_bits[channel]) & 1; @@ -1420,12 +1472,11 @@ static int lm90_detect(struct i2c_client *client, if (man_id < 0 || chip_id < 0 || config1 < 0 || convrate < 0) return -ENODEV; - if (man_id == 0x01 || man_id == 0x5C || man_id == 0x41) { + if (man_id == 0x01 || man_id == 0x5C || man_id == 0xA1) { config2 = i2c_smbus_read_byte_data(client, LM90_REG_R_CONFIG2); if (config2 < 0) return -ENODEV; - } else - config2 = 0; /* Make compiler happy */ + } if ((address == 0x4C || address == 0x4D) && man_id == 0x01) { /* National Semiconductor */ @@ -1557,6 +1608,16 @@ static int lm90_detect(struct i2c_client *client, && (config1 & 0x3f) == 0x00 && convrate <= 0x07) { name = "max6646"; + } else + /* + * The chip_id of the MAX6654 holds the revision of the chip. + * The lowest 3 bits of the config1 register are unused and + * should return zero when read. + */ + if (chip_id == 0x08 + && (config1 & 0x07) == 0x00 + && convrate <= 0x07) { + name = "max6654"; } } else if (address == 0x4C @@ -1589,18 +1650,26 @@ static int lm90_detect(struct i2c_client *client, && convrate <= 0x08) name = "g781"; } else - if (address == 0x4C - && man_id == 0x55) { /* Texas Instruments */ - int local_ext; + if (man_id == 0x55 && chip_id == 0x00 && + (config1 & 0x1B) == 0x00 && convrate <= 0x09) { + int local_ext, conalert, chen, dfc; local_ext = i2c_smbus_read_byte_data(client, TMP451_REG_R_LOCAL_TEMPL); - - if (chip_id == 0x00 /* TMP451 */ - && (config1 & 0x1B) == 0x00 - && convrate <= 0x09 - && (local_ext & 0x0F) == 0x00) - name = "tmp451"; + conalert = i2c_smbus_read_byte_data(client, + TMP451_REG_CONALERT); + chen = i2c_smbus_read_byte_data(client, TMP461_REG_CHEN); + dfc = i2c_smbus_read_byte_data(client, TMP461_REG_DFC); + + if ((local_ext & 0x0F) == 0x00 && + (conalert & 0xf1) == 0x01 && + (chen & 0xfc) == 0x00 && + (dfc & 0xfc) == 0x00) { + if (address == 0x4c && !(chen & 0x03)) + name = "tmp451"; + else if (address >= 0x48 && address <= 0x4f) + name = "tmp461"; + } } if (!name) { /* identification failed */ @@ -1647,7 +1716,7 @@ static int lm90_init_client(struct i2c_client *client, struct lm90_data *data) lm90_set_convrate(client, data, 500); /* 500ms; 2Hz conversion rate */ /* Check Temperature Range Select */ - if (data->kind == adt7461 || data->kind == tmp451) { + if (data->flags & LM90_HAVE_EXTENDED_TEMP) { if (config & 0x04) data->flags |= LM90_FLAG_ADT7461_EXT; } @@ -1660,6 +1729,15 @@ static int lm90_init_client(struct i2c_client *client, struct lm90_data *data) if (data->kind == max6680) config |= 0x18; + /* + * Put MAX6654 into extended range (0x20, extend minimum range from + * 0 degrees to -64 degrees). Note that extended resolution is not + * possible on the MAX6654 unless conversion rate is set to 1 Hz or + * slower, which is intentionally not done by default. + */ + if (data->kind == max6654) + config |= 0x20; + /* * Select external channel 0 for max6695/96 */ @@ -1806,11 +1884,14 @@ static int lm90_probe(struct i2c_client *client, info->config = data->channel_config; data->channel_config[0] = HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | - HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM | - HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM; + HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM; data->channel_config[1] = HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | - HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM | - HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM | HWMON_T_FAULT; + HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM | HWMON_T_FAULT; + + if (data->flags & LM90_HAVE_CRIT) { + data->channel_config[0] |= HWMON_T_CRIT | HWMON_T_CRIT_ALARM | HWMON_T_CRIT_HYST; + data->channel_config[1] |= HWMON_T_CRIT | HWMON_T_CRIT_ALARM | HWMON_T_CRIT_HYST; + } if (data->flags & LM90_HAVE_OFFSET) data->channel_config[1] |= HWMON_T_OFFSET; diff --git a/drivers/hwmon/mlxreg-fan.c b/drivers/hwmon/mlxreg-fan.c index ed8d59d4eecb3..bd8f5a3aaad9c 100644 --- a/drivers/hwmon/mlxreg-fan.c +++ b/drivers/hwmon/mlxreg-fan.c @@ -291,8 +291,8 @@ static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev, { struct mlxreg_fan *fan = cdev->devdata; unsigned long cur_state; + int i, config = 0; u32 regval; - int i; int err; /* @@ -305,6 +305,12 @@ static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev, * overwritten. */ if (state >= MLXREG_FAN_SPEED_MIN && state <= MLXREG_FAN_SPEED_MAX) { + /* + * This is configuration change, which is only supported through sysfs. + * For configuration non-zero value is to be returned to avoid thermal + * statistics update. + */ + config = 1; state -= MLXREG_FAN_MAX_STATE; for (i = 0; i < state; i++) fan->cooling_levels[i] = state; @@ -319,7 +325,7 @@ static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev, cur_state = MLXREG_FAN_PWM_DUTY2STATE(regval); if (state < cur_state) - return 0; + return config; state = cur_state; } @@ -335,7 +341,7 @@ static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev, dev_err(fan->dev, "Failed to write PWM duty\n"); return err; } - return 0; + return config; } static const struct thermal_cooling_device_ops mlxreg_fan_cooling_ops = { diff --git a/drivers/hwmon/pmbus/lm25066.c b/drivers/hwmon/pmbus/lm25066.c index 05fce86f1f817..41c4bbb9c0572 100644 --- a/drivers/hwmon/pmbus/lm25066.c +++ b/drivers/hwmon/pmbus/lm25066.c @@ -51,26 +51,31 @@ struct __coeff { #define PSC_CURRENT_IN_L (PSC_NUM_CLASSES) #define PSC_POWER_L (PSC_NUM_CLASSES + 1) -static struct __coeff lm25066_coeff[6][PSC_NUM_CLASSES + 2] = { +static struct __coeff lm25066_coeff[][PSC_NUM_CLASSES + 2] = { [lm25056] = { [PSC_VOLTAGE_IN] = { .m = 16296, + .b = 1343, .R = -2, }, [PSC_CURRENT_IN] = { .m = 13797, + .b = -1833, .R = -2, }, [PSC_CURRENT_IN_L] = { .m = 6726, + .b = -537, .R = -2, }, [PSC_POWER] = { .m = 5501, + .b = -2908, .R = -3, }, [PSC_POWER_L] = { .m = 26882, + .b = -5646, .R = -4, }, [PSC_TEMPERATURE] = { @@ -82,26 +87,32 @@ static struct __coeff lm25066_coeff[6][PSC_NUM_CLASSES + 2] = { [lm25066] = { [PSC_VOLTAGE_IN] = { .m = 22070, + .b = -1800, .R = -2, }, [PSC_VOLTAGE_OUT] = { .m = 22070, + .b = -1800, .R = -2, }, [PSC_CURRENT_IN] = { .m = 13661, + .b = -5200, .R = -2, }, [PSC_CURRENT_IN_L] = { .m = 6852, + .b = -3100, .R = -2, }, [PSC_POWER] = { .m = 736, + .b = -3300, .R = -2, }, [PSC_POWER_L] = { .m = 369, + .b = -1900, .R = -2, }, [PSC_TEMPERATURE] = { @@ -111,26 +122,32 @@ static struct __coeff lm25066_coeff[6][PSC_NUM_CLASSES + 2] = { [lm5064] = { [PSC_VOLTAGE_IN] = { .m = 4611, + .b = -642, .R = -2, }, [PSC_VOLTAGE_OUT] = { .m = 4621, + .b = 423, .R = -2, }, [PSC_CURRENT_IN] = { .m = 10742, + .b = 1552, .R = -2, }, [PSC_CURRENT_IN_L] = { .m = 5456, + .b = 2118, .R = -2, }, [PSC_POWER] = { .m = 1204, + .b = 8524, .R = -3, }, [PSC_POWER_L] = { .m = 612, + .b = 11202, .R = -3, }, [PSC_TEMPERATURE] = { @@ -140,26 +157,32 @@ static struct __coeff lm25066_coeff[6][PSC_NUM_CLASSES + 2] = { [lm5066] = { [PSC_VOLTAGE_IN] = { .m = 4587, + .b = -1200, .R = -2, }, [PSC_VOLTAGE_OUT] = { .m = 4587, + .b = -2400, .R = -2, }, [PSC_CURRENT_IN] = { .m = 10753, + .b = -1200, .R = -2, }, [PSC_CURRENT_IN_L] = { .m = 5405, + .b = -600, .R = -2, }, [PSC_POWER] = { .m = 1204, + .b = -6000, .R = -3, }, [PSC_POWER_L] = { .m = 605, + .b = -8000, .R = -3, }, [PSC_TEMPERATURE] = { diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h index d198af3a92b6f..9731f1f830b16 100644 --- a/drivers/hwmon/pmbus/pmbus.h +++ b/drivers/hwmon/pmbus/pmbus.h @@ -292,6 +292,7 @@ enum pmbus_fan_mode { percent = 0, rpm }; /* * STATUS_VOUT, STATUS_INPUT */ +#define PB_VOLTAGE_VIN_OFF BIT(3) #define PB_VOLTAGE_UV_FAULT BIT(4) #define PB_VOLTAGE_UV_WARNING BIT(5) #define PB_VOLTAGE_OV_WARNING BIT(6) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index 8470097907bc2..beb443d020c20 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -1324,7 +1324,7 @@ static const struct pmbus_limit_attr vin_limit_attrs[] = { .reg = PMBUS_VIN_UV_FAULT_LIMIT, .attr = "lcrit", .alarm = "lcrit_alarm", - .sbit = PB_VOLTAGE_UV_FAULT, + .sbit = PB_VOLTAGE_UV_FAULT | PB_VOLTAGE_VIN_OFF, }, { .reg = PMBUS_VIN_OV_WARN_LIMIT, .attr = "max", @@ -2174,10 +2174,14 @@ static int pmbus_regulator_is_enabled(struct regulator_dev *rdev) { struct device *dev = rdev_get_dev(rdev); struct i2c_client *client = to_i2c_client(dev->parent); + struct pmbus_data *data = i2c_get_clientdata(client); u8 page = rdev_get_id(rdev); int ret; + mutex_lock(&data->update_lock); ret = pmbus_read_byte_data(client, page, PMBUS_OPERATION); + mutex_unlock(&data->update_lock); + if (ret < 0) return ret; @@ -2188,11 +2192,17 @@ static int _pmbus_regulator_on_off(struct regulator_dev *rdev, bool enable) { struct device *dev = rdev_get_dev(rdev); struct i2c_client *client = to_i2c_client(dev->parent); + struct pmbus_data *data = i2c_get_clientdata(client); u8 page = rdev_get_id(rdev); + int ret; - return pmbus_update_byte_data(client, page, PMBUS_OPERATION, - PB_OPERATION_CONTROL_ON, - enable ? PB_OPERATION_CONTROL_ON : 0); + mutex_lock(&data->update_lock); + ret = pmbus_update_byte_data(client, page, PMBUS_OPERATION, + PB_OPERATION_CONTROL_ON, + enable ? PB_OPERATION_CONTROL_ON : 0); + mutex_unlock(&data->update_lock); + + return ret; } static int pmbus_regulator_enable(struct regulator_dev *rdev) diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c index 6c84780e358e8..066b12990fbfb 100644 --- a/drivers/hwmon/sch56xx-common.c +++ b/drivers/hwmon/sch56xx-common.c @@ -424,7 +424,7 @@ struct sch56xx_watchdog_data *sch56xx_watchdog_register(struct device *parent, if (nowayout) set_bit(WDOG_NO_WAY_OUT, &data->wddev.status); if (output_enable & SCH56XX_WDOG_OUTPUT_ENABLE) - set_bit(WDOG_ACTIVE, &data->wddev.status); + set_bit(WDOG_HW_RUNNING, &data->wddev.status); /* Since the watchdog uses a downcounter there is no register to read the BIOS set timeout from (if any was set at all) -> diff --git a/drivers/hwmon/tmp401.c b/drivers/hwmon/tmp401.c index fa361d9949db7..debcece02dd33 100644 --- a/drivers/hwmon/tmp401.c +++ b/drivers/hwmon/tmp401.c @@ -731,10 +731,21 @@ static int tmp401_probe(struct i2c_client *client, return 0; } +static const struct of_device_id __maybe_unused tmp4xx_of_match[] = { + { .compatible = "ti,tmp401", }, + { .compatible = "ti,tmp411", }, + { .compatible = "ti,tmp431", }, + { .compatible = "ti,tmp432", }, + { .compatible = "ti,tmp435", }, + { }, +}; +MODULE_DEVICE_TABLE(of, tmp4xx_of_match); + static struct i2c_driver tmp401_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "tmp401", + .of_match_table = of_match_ptr(tmp4xx_of_match), }, .probe = tmp401_probe, .id_table = tmp401_id, diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c index a94e35cff3e5f..876ccf77a825a 100644 --- a/drivers/hwmon/tmp421.c +++ b/drivers/hwmon/tmp421.c @@ -100,23 +100,17 @@ struct tmp421_data { s16 temp[4]; }; -static int temp_from_s16(s16 reg) +static int temp_from_raw(u16 reg, bool extended) { /* Mask out status bits */ int temp = reg & ~0xf; - return (temp * 1000 + 128) / 256; -} - -static int temp_from_u16(u16 reg) -{ - /* Mask out status bits */ - int temp = reg & ~0xf; - - /* Add offset for extended temperature range. */ - temp -= 64 * 256; + if (extended) + temp = temp - 64 * 256; + else + temp = (s16)temp; - return (temp * 1000 + 128) / 256; + return DIV_ROUND_CLOSEST(temp * 1000, 256); } static struct tmp421_data *tmp421_update_device(struct device *dev) @@ -153,17 +147,15 @@ static int tmp421_read(struct device *dev, enum hwmon_sensor_types type, switch (attr) { case hwmon_temp_input: - if (tmp421->config & TMP421_CONFIG_RANGE) - *val = temp_from_u16(tmp421->temp[channel]); - else - *val = temp_from_s16(tmp421->temp[channel]); + *val = temp_from_raw(tmp421->temp[channel], + tmp421->config & TMP421_CONFIG_RANGE); return 0; case hwmon_temp_fault: /* - * The OPEN bit signals a fault. This is bit 0 of the temperature - * register (low byte). + * Any of OPEN or /PVLD bits indicate a hardware mulfunction + * and the conversion result may be incorrect */ - *val = tmp421->temp[channel] & 0x01; + *val = !!(tmp421->temp[channel] & 0x03); return 0; default: return -EOPNOTSUPP; @@ -176,9 +168,6 @@ static umode_t tmp421_is_visible(const void *data, enum hwmon_sensor_types type, { switch (attr) { case hwmon_temp_fault: - if (channel == 0) - return 0; - return 0444; case hwmon_temp_input: return 0444; default: diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c index aad8d4da5802b..b586e917e6ede 100644 --- a/drivers/hwmon/w83791d.c +++ b/drivers/hwmon/w83791d.c @@ -273,9 +273,6 @@ struct w83791d_data { char valid; /* !=0 if following fields are valid */ unsigned long last_updated; /* In jiffies */ - /* array of 2 pointers to subclients */ - struct i2c_client *lm75[2]; - /* volts */ u8 in[NUMBER_OF_VIN]; /* Register value */ u8 in_max[NUMBER_OF_VIN]; /* Register value */ @@ -1258,7 +1255,6 @@ static const struct attribute_group w83791d_group_fanpwm45 = { static int w83791d_detect_subclients(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; - struct w83791d_data *data = i2c_get_clientdata(client); int address = client->addr; int i, id; u8 val; @@ -1281,22 +1277,19 @@ static int w83791d_detect_subclients(struct i2c_client *client) } val = w83791d_read(client, W83791D_REG_I2C_SUBADDR); - if (!(val & 0x08)) - data->lm75[0] = devm_i2c_new_dummy_device(&client->dev, adapter, - 0x48 + (val & 0x7)); - if (!(val & 0x80)) { - if (!IS_ERR(data->lm75[0]) && - ((val & 0x7) == ((val >> 4) & 0x7))) { - dev_err(&client->dev, - "duplicate addresses 0x%x, " - "use force_subclient\n", - data->lm75[0]->addr); - return -ENODEV; - } - data->lm75[1] = devm_i2c_new_dummy_device(&client->dev, adapter, - 0x48 + ((val >> 4) & 0x7)); + + if (!(val & 0x88) && (val & 0x7) == ((val >> 4) & 0x7)) { + dev_err(&client->dev, + "duplicate addresses 0x%x, use force_subclient\n", 0x48 + (val & 0x7)); + return -ENODEV; } + if (!(val & 0x08)) + devm_i2c_new_dummy_device(&client->dev, adapter, 0x48 + (val & 0x7)); + + if (!(val & 0x80)) + devm_i2c_new_dummy_device(&client->dev, adapter, 0x48 + ((val >> 4) & 0x7)); + return 0; } diff --git a/drivers/hwmon/w83792d.c b/drivers/hwmon/w83792d.c index 7fc8a1160c8f9..f91db4be2fc79 100644 --- a/drivers/hwmon/w83792d.c +++ b/drivers/hwmon/w83792d.c @@ -264,9 +264,6 @@ struct w83792d_data { char valid; /* !=0 if following fields are valid */ unsigned long last_updated; /* In jiffies */ - /* array of 2 pointers to subclients */ - struct i2c_client *lm75[2]; - u8 in[9]; /* Register value */ u8 in_max[9]; /* Register value */ u8 in_min[9]; /* Register value */ @@ -928,7 +925,6 @@ w83792d_detect_subclients(struct i2c_client *new_client) int address = new_client->addr; u8 val; struct i2c_adapter *adapter = new_client->adapter; - struct w83792d_data *data = i2c_get_clientdata(new_client); id = i2c_adapter_id(adapter); if (force_subclients[0] == id && force_subclients[1] == address) { @@ -947,21 +943,19 @@ w83792d_detect_subclients(struct i2c_client *new_client) } val = w83792d_read_value(new_client, W83792D_REG_I2C_SUBADDR); - if (!(val & 0x08)) - data->lm75[0] = devm_i2c_new_dummy_device(&new_client->dev, adapter, - 0x48 + (val & 0x7)); - if (!(val & 0x80)) { - if (!IS_ERR(data->lm75[0]) && - ((val & 0x7) == ((val >> 4) & 0x7))) { - dev_err(&new_client->dev, - "duplicate addresses 0x%x, use force_subclient\n", - data->lm75[0]->addr); - return -ENODEV; - } - data->lm75[1] = devm_i2c_new_dummy_device(&new_client->dev, adapter, - 0x48 + ((val >> 4) & 0x7)); + + if (!(val & 0x88) && (val & 0x7) == ((val >> 4) & 0x7)) { + dev_err(&new_client->dev, + "duplicate addresses 0x%x, use force_subclient\n", 0x48 + (val & 0x7)); + return -ENODEV; } + if (!(val & 0x08)) + devm_i2c_new_dummy_device(&new_client->dev, adapter, 0x48 + (val & 0x7)); + + if (!(val & 0x80)) + devm_i2c_new_dummy_device(&new_client->dev, adapter, 0x48 + ((val >> 4) & 0x7)); + return 0; } diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c index 9df48b70c70c7..640330a3a8dc4 100644 --- a/drivers/hwmon/w83793.c +++ b/drivers/hwmon/w83793.c @@ -202,7 +202,6 @@ static inline s8 TEMP_TO_REG(long val, s8 min, s8 max) } struct w83793_data { - struct i2c_client *lm75[2]; struct device *hwmon_dev; struct mutex update_lock; char valid; /* !=0 if following fields are valid */ @@ -1566,7 +1565,6 @@ w83793_detect_subclients(struct i2c_client *client) int address = client->addr; u8 tmp; struct i2c_adapter *adapter = client->adapter; - struct w83793_data *data = i2c_get_clientdata(client); id = i2c_adapter_id(adapter); if (force_subclients[0] == id && force_subclients[1] == address) { @@ -1586,21 +1584,19 @@ w83793_detect_subclients(struct i2c_client *client) } tmp = w83793_read_value(client, W83793_REG_I2C_SUBADDR); - if (!(tmp & 0x08)) - data->lm75[0] = devm_i2c_new_dummy_device(&client->dev, adapter, - 0x48 + (tmp & 0x7)); - if (!(tmp & 0x80)) { - if (!IS_ERR(data->lm75[0]) - && ((tmp & 0x7) == ((tmp >> 4) & 0x7))) { - dev_err(&client->dev, - "duplicate addresses 0x%x, " - "use force_subclients\n", data->lm75[0]->addr); - return -ENODEV; - } - data->lm75[1] = devm_i2c_new_dummy_device(&client->dev, adapter, - 0x48 + ((tmp >> 4) & 0x7)); + + if (!(tmp & 0x88) && (tmp & 0x7) == ((tmp >> 4) & 0x7)) { + dev_err(&client->dev, + "duplicate addresses 0x%x, use force_subclient\n", 0x48 + (tmp & 0x7)); + return -ENODEV; } + if (!(tmp & 0x08)) + devm_i2c_new_dummy_device(&client->dev, adapter, 0x48 + (tmp & 0x7)); + + if (!(tmp & 0x80)) + devm_i2c_new_dummy_device(&client->dev, adapter, 0x48 + ((tmp >> 4) & 0x7)); + return 0; } diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c index 96544b348c273..ebe34fd6adb0a 100644 --- a/drivers/hwtracing/coresight/coresight-cpu-debug.c +++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c @@ -379,9 +379,10 @@ static int debug_notifier_call(struct notifier_block *self, int cpu; struct debug_drvdata *drvdata; - mutex_lock(&debug_lock); + /* Bail out if we can't acquire the mutex or the functionality is off */ + if (!mutex_trylock(&debug_lock)) + return NOTIFY_DONE; - /* Bail out if the functionality is disabled */ if (!debug_enable) goto skip_dump; @@ -400,7 +401,7 @@ static int debug_notifier_call(struct notifier_block *self, skip_dump: mutex_unlock(&debug_lock); - return 0; + return NOTIFY_DONE; } static struct notifier_block debug_notifier = { diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c index ee44640edeb52..4c9e2fc533529 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c @@ -364,8 +364,12 @@ static ssize_t mode_store(struct device *dev, mode = ETM_MODE_QELEM(config->mode); /* start by clearing QE bits */ config->cfg &= ~(BIT(13) | BIT(14)); - /* if supported, Q elements with instruction counts are enabled */ - if ((mode & BIT(0)) && (drvdata->q_support & BIT(0))) + /* + * if supported, Q elements with instruction counts are enabled. + * Always set the low bit for any requested mode. Valid combos are + * 0b00, 0b01 and 0b11. + */ + if (mode && drvdata->q_support) config->cfg |= BIT(13); /* * if supported, Q elements with and without instruction diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 2d08a8719506c..647f4e054be69 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -145,6 +145,7 @@ config I2C_I801 Comet Lake (PCH) Elkhart Lake (PCH) Tiger Lake (PCH) + Emmitsburg (PCH) This driver can also be built as a module. If so, the module will be called i2c-i801. @@ -483,7 +484,7 @@ config I2C_BRCMSTB config I2C_CADENCE tristate "Cadence I2C Controller" - depends on ARCH_ZYNQ || ARM64 || XTENSA + depends on ARCH_ZYNQ || ARM64 || XTENSA || COMPILE_TEST help Say yes here to select Cadence I2C Host Controller. This controller is e.g. used by Xilinx Zynq. @@ -894,7 +895,7 @@ config I2C_QCOM_GENI config I2C_QUP tristate "Qualcomm QUP based I2C controller" - depends on ARCH_QCOM + depends on ARCH_QCOM || COMPILE_TEST help If you say yes to this option, support will be included for the built-in I2C interface on the Qualcomm SoCs. diff --git a/drivers/i2c/busses/i2c-at91-master.c b/drivers/i2c/busses/i2c-at91-master.c index a3fcc35ffd3b6..f74d5ad2f1faa 100644 --- a/drivers/i2c/busses/i2c-at91-master.c +++ b/drivers/i2c/busses/i2c-at91-master.c @@ -609,6 +609,7 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num) unsigned int_addr_flag = 0; struct i2c_msg *m_start = msg; bool is_read; + u8 *dma_buf = NULL; dev_dbg(&adap->dev, "at91_xfer: processing %d messages:\n", num); @@ -656,7 +657,17 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num) dev->msg = m_start; dev->recv_len_abort = false; + if (dev->use_dma) { + dma_buf = i2c_get_dma_safe_msg_buf(m_start, 1); + if (!dma_buf) { + ret = -ENOMEM; + goto out; + } + dev->buf = dma_buf; + } + ret = at91_do_twi_transfer(dev); + i2c_put_dma_safe_msg_buf(dma_buf, m_start, !ret); ret = (ret < 0) ? ret : num; out: diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index 5ab901ad615dd..c265fe4621621 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -23,6 +23,11 @@ #define BCM2835_I2C_FIFO 0x10 #define BCM2835_I2C_DIV 0x14 #define BCM2835_I2C_DEL 0x18 +/* + * 16-bit field for the number of SCL cycles to wait after rising SCL + * before deciding the slave is not responding. 0 disables the + * timeout detection. + */ #define BCM2835_I2C_CLKT 0x1c #define BCM2835_I2C_C_READ BIT(0) @@ -479,6 +484,12 @@ static int bcm2835_i2c_probe(struct platform_device *pdev) adap->dev.of_node = pdev->dev.of_node; adap->quirks = of_device_get_match_data(&pdev->dev); + /* + * Disable the hardware clock stretching timeout. SMBUS + * specifies a limit for how long the device can stretch the + * clock, but core I2C doesn't. + */ + bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_CLKT, 0); bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_C, 0); ret = i2c_add_adapter(adap); diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c index 5e89cd6b690ce..36b8aceb972fc 100644 --- a/drivers/i2c/busses/i2c-brcmstb.c +++ b/drivers/i2c/busses/i2c-brcmstb.c @@ -640,7 +640,7 @@ static int brcmstb_i2c_probe(struct platform_device *pdev) /* set the data in/out register size for compatible SoCs */ if (of_device_is_compatible(dev->device->of_node, - "brcmstb,brcmper-i2c")) + "brcm,brcmper-i2c")) dev->data_regsz = sizeof(u8); else dev->data_regsz = sizeof(u32); diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 17f0dd1f891e2..8750e444f4492 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -198,9 +198,9 @@ static inline bool cdns_is_holdquirk(struct cdns_i2c *id, bool hold_wrkaround) */ static irqreturn_t cdns_i2c_isr(int irq, void *ptr) { - unsigned int isr_status, avail_bytes, updatetx; + unsigned int isr_status, avail_bytes; unsigned int bytes_to_send; - bool hold_quirk; + bool updatetx; struct cdns_i2c *id = ptr; /* Signal completion only after everything is updated */ int done_flag = 0; @@ -219,11 +219,7 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr) * Check if transfer size register needs to be updated again for a * large data receive operation. */ - updatetx = 0; - if (id->recv_count > id->curr_recv_count) - updatetx = 1; - - hold_quirk = (id->quirks & CDNS_I2C_BROKEN_HOLD_BIT) && updatetx; + updatetx = id->recv_count > id->curr_recv_count; /* When receiving, handle data interrupt and completion interrupt */ if (id->p_recv_buf && @@ -246,7 +242,7 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr) id->recv_count--; id->curr_recv_count--; - if (cdns_is_holdquirk(id, hold_quirk)) + if (cdns_is_holdquirk(id, updatetx)) break; } @@ -257,7 +253,7 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr) * maintain transfer size non-zero while performing a large * receive operation. */ - if (cdns_is_holdquirk(id, hold_quirk)) { + if (cdns_is_holdquirk(id, updatetx)) { /* wait while fifo is full */ while (cdns_i2c_readreg(CDNS_I2C_XFER_SIZE_OFFSET) != (id->curr_recv_count - CDNS_I2C_FIFO_DEPTH)) @@ -279,22 +275,6 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr) CDNS_I2C_XFER_SIZE_OFFSET); id->curr_recv_count = id->recv_count; } - } else if (id->recv_count && !hold_quirk && - !id->curr_recv_count) { - - /* Set the slave address in address register*/ - cdns_i2c_writereg(id->p_msg->addr & CDNS_I2C_ADDR_MASK, - CDNS_I2C_ADDR_OFFSET); - - if (id->recv_count > CDNS_I2C_TRANSFER_SIZE) { - cdns_i2c_writereg(CDNS_I2C_TRANSFER_SIZE, - CDNS_I2C_XFER_SIZE_OFFSET); - id->curr_recv_count = CDNS_I2C_TRANSFER_SIZE; - } else { - cdns_i2c_writereg(id->recv_count, - CDNS_I2C_XFER_SIZE_OFFSET); - id->curr_recv_count = id->recv_count; - } } /* Clear hold (if not repeated start) and signal completion */ @@ -506,7 +486,7 @@ static void cdns_i2c_master_reset(struct i2c_adapter *adap) static int cdns_i2c_process_msg(struct cdns_i2c *id, struct i2c_msg *msg, struct i2c_adapter *adap) { - unsigned long time_left; + unsigned long time_left, msg_timeout; u32 reg; id->p_msg = msg; @@ -531,8 +511,16 @@ static int cdns_i2c_process_msg(struct cdns_i2c *id, struct i2c_msg *msg, else cdns_i2c_msend(id); + /* Minimal time to execute this message */ + msg_timeout = msecs_to_jiffies((1000 * msg->len * BITS_PER_BYTE) / id->i2c_clk); + /* Plus some wiggle room */ + msg_timeout += msecs_to_jiffies(500); + + if (msg_timeout < adap->timeout) + msg_timeout = adap->timeout; + /* Wait for the signal of completion */ - time_left = wait_for_completion_timeout(&id->xfer_done, adap->timeout); + time_left = wait_for_completion_timeout(&id->xfer_done, msg_timeout); if (time_left == 0) { cdns_i2c_master_reset(adap); dev_err(id->adap.dev.parent, @@ -977,6 +965,7 @@ static int cdns_i2c_probe(struct platform_device *pdev) return 0; err_clk_dis: + clk_notifier_unregister(id->clk, &id->clk_rate_change_nb); clk_disable_unprepare(id->clk); pm_runtime_set_suspended(&pdev->dev); pm_runtime_disable(&pdev->dev); diff --git a/drivers/i2c/busses/i2c-cbus-gpio.c b/drivers/i2c/busses/i2c-cbus-gpio.c index 72df563477b1c..f8639a4457d23 100644 --- a/drivers/i2c/busses/i2c-cbus-gpio.c +++ b/drivers/i2c/busses/i2c-cbus-gpio.c @@ -195,8 +195,9 @@ static u32 cbus_i2c_func(struct i2c_adapter *adapter) } static const struct i2c_algorithm cbus_i2c_algo = { - .smbus_xfer = cbus_i2c_smbus_xfer, - .functionality = cbus_i2c_func, + .smbus_xfer = cbus_i2c_smbus_xfer, + .smbus_xfer_atomic = cbus_i2c_smbus_xfer, + .functionality = cbus_i2c_func, }; static int cbus_i2c_remove(struct platform_device *pdev) diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c index 77eb3b849216e..283ac9074c389 100644 --- a/drivers/i2c/busses/i2c-designware-common.c +++ b/drivers/i2c/busses/i2c-designware-common.c @@ -253,9 +253,6 @@ int i2c_dw_prepare_clk(struct dw_i2c_dev *dev, bool prepare) { int ret; - if (IS_ERR(dev->clk)) - return PTR_ERR(dev->clk); - if (prepare) { /* Optional interface clock */ ret = clk_prepare_enable(dev->pclk); diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c index 05b35ac33ce33..735326e5eb8cf 100644 --- a/drivers/i2c/busses/i2c-designware-pcidrv.c +++ b/drivers/i2c/busses/i2c-designware-pcidrv.c @@ -37,10 +37,10 @@ enum dw_pci_ctl_id_t { }; struct dw_scl_sda_cfg { - u32 ss_hcnt; - u32 fs_hcnt; - u32 ss_lcnt; - u32 fs_lcnt; + u16 ss_hcnt; + u16 fs_hcnt; + u16 ss_lcnt; + u16 fs_lcnt; u32 sda_hold; }; diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 0c55c54372d7e..75313c80f132f 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -349,8 +349,17 @@ static int dw_i2c_plat_probe(struct platform_device *pdev) goto exit_reset; } - dev->clk = devm_clk_get(&pdev->dev, NULL); - if (!i2c_dw_prepare_clk(dev, true)) { + dev->clk = devm_clk_get_optional(&pdev->dev, NULL); + if (IS_ERR(dev->clk)) { + ret = PTR_ERR(dev->clk); + goto exit_reset; + } + + ret = i2c_dw_prepare_clk(dev, true); + if (ret) + goto exit_reset; + + if (dev->clk) { u64 clk_khz; dev->get_clk_rate_khz = i2c_dw_get_clk_rate_khz; diff --git a/drivers/i2c/busses/i2c-highlander.c b/drivers/i2c/busses/i2c-highlander.c index ff340d7ae2e52..6a880c2623808 100644 --- a/drivers/i2c/busses/i2c-highlander.c +++ b/drivers/i2c/busses/i2c-highlander.c @@ -379,7 +379,7 @@ static int highlander_i2c_probe(struct platform_device *pdev) platform_set_drvdata(pdev, dev); dev->irq = platform_get_irq(pdev, 0); - if (iic_force_poll) + if (dev->irq < 0 || iic_force_poll) dev->irq = 0; if (dev->irq) { diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index a959062ded4f8..7edcb11fcf981 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -54,6 +54,7 @@ * Sunrise Point-H (PCH) 0xa123 32 hard yes yes yes * Sunrise Point-LP (PCH) 0x9d23 32 hard yes yes yes * DNV (SOC) 0x19df 32 hard yes yes yes + * Emmitsburg (PCH) 0x1bc9 32 hard yes yes yes * Broxton (SOC) 0x5ad4 32 hard yes yes yes * Lewisburg (PCH) 0xa1a3 32 hard yes yes yes * Lewisburg Supersku (PCH) 0xa223 32 hard yes yes yes @@ -203,6 +204,7 @@ #define PCI_DEVICE_ID_INTEL_BAYTRAIL_SMBUS 0x0f12 #define PCI_DEVICE_ID_INTEL_CDF_SMBUS 0x18df #define PCI_DEVICE_ID_INTEL_DNV_SMBUS 0x19df +#define PCI_DEVICE_ID_INTEL_EBG_SMBUS 0x1bc9 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS 0x1c22 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS 0x1d22 /* Patsburg also has three 'Integrated Device Function' SMBus controllers */ @@ -785,6 +787,11 @@ static int i801_block_transaction(struct i801_priv *priv, int result = 0; unsigned char hostc; + if (read_write == I2C_SMBUS_READ && command == I2C_SMBUS_BLOCK_DATA) + data->block[0] = I2C_SMBUS_BLOCK_MAX; + else if (data->block[0] < 1 || data->block[0] > I2C_SMBUS_BLOCK_MAX) + return -EPROTO; + if (command == I2C_SMBUS_I2C_BLOCK_DATA) { if (read_write == I2C_SMBUS_WRITE) { /* set I2C_EN bit in configuration register */ @@ -798,16 +805,6 @@ static int i801_block_transaction(struct i801_priv *priv, } } - if (read_write == I2C_SMBUS_WRITE - || command == I2C_SMBUS_I2C_BLOCK_DATA) { - if (data->block[0] < 1) - data->block[0] = 1; - if (data->block[0] > I2C_SMBUS_BLOCK_MAX) - data->block[0] = I2C_SMBUS_BLOCK_MAX; - } else { - data->block[0] = 32; /* max for SMBus block reads */ - } - /* Experience has shown that the block buffer can only be used for SMBus (not I2C) block transactions, even though the datasheet doesn't mention this limitation. */ @@ -1054,6 +1051,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CDF_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_DNV_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EBG_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROXTON_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS) }, @@ -1739,6 +1737,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS: case PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS: case PCI_DEVICE_ID_INTEL_TIGERLAKE_LP_SMBUS: + case PCI_DEVICE_ID_INTEL_EBG_SMBUS: priv->features |= FEATURE_BLOCK_PROC; priv->features |= FEATURE_I2C_BLOCK_READ; priv->features |= FEATURE_IRQ; diff --git a/drivers/i2c/busses/i2c-iop3xx.c b/drivers/i2c/busses/i2c-iop3xx.c index 2f8b8050a2233..899624721c1ea 100644 --- a/drivers/i2c/busses/i2c-iop3xx.c +++ b/drivers/i2c/busses/i2c-iop3xx.c @@ -467,16 +467,14 @@ iop3xx_i2c_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - ret = -ENXIO; + ret = irq; goto unmap; } ret = request_irq(irq, iop3xx_i2c_irq_handler, 0, pdev->name, adapter_data); - if (ret) { - ret = -EIO; + if (ret) goto unmap; - } memcpy(new_adapter->name, pdev->name, strlen(pdev->name)); new_adapter->owner = THIS_MODULE; diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index 2f95e25a10f7c..41dd3f6df4dc5 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -77,10 +77,12 @@ #define PCI_DEVICE_ID_INTEL_S1200_SMT1 0x0c5a #define PCI_DEVICE_ID_INTEL_CDF_SMT 0x18ac #define PCI_DEVICE_ID_INTEL_DNV_SMT 0x19ac +#define PCI_DEVICE_ID_INTEL_EBG_SMT 0x1bff #define PCI_DEVICE_ID_INTEL_AVOTON_SMT 0x1f15 #define ISMT_DESC_ENTRIES 2 /* number of descriptor entries */ #define ISMT_MAX_RETRIES 3 /* number of SMBus retries to attempt */ +#define ISMT_LOG_ENTRIES 3 /* number of interrupt cause log entries */ /* Hardware Descriptor Constants - Control Field */ #define ISMT_DESC_CWRL 0x01 /* Command/Write Length */ @@ -174,6 +176,8 @@ struct ismt_priv { u8 head; /* ring buffer head pointer */ struct completion cmp; /* interrupt completion */ u8 buffer[I2C_SMBUS_BLOCK_MAX + 16]; /* temp R/W data buffer */ + dma_addr_t log_dma; + u32 *log; }; /** @@ -184,6 +188,7 @@ static const struct pci_device_id ismt_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_S1200_SMT1) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CDF_SMT) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_DNV_SMT) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EBG_SMT) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AVOTON_SMT) }, { 0, } }; @@ -408,6 +413,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, memset(desc, 0, sizeof(struct ismt_desc)); desc->tgtaddr_rw = ISMT_DESC_ADDR_RW(addr, read_write); + /* Always clear the log entries */ + memset(priv->log, 0, ISMT_LOG_ENTRIES * sizeof(u32)); + /* Initialize common control bits */ if (likely(pci_dev_msi_enabled(priv->pci_dev))) desc->control = ISMT_DESC_INT | ISMT_DESC_FAIR; @@ -697,6 +705,8 @@ static void ismt_hw_init(struct ismt_priv *priv) /* initialize the Master Descriptor Base Address (MDBA) */ writeq(priv->io_rng_dma, priv->smba + ISMT_MSTR_MDBA); + writeq(priv->log_dma, priv->smba + ISMT_GR_SMTICL); + /* initialize the Master Control Register (MCTRL) */ writel(ISMT_MCTRL_MEIE, priv->smba + ISMT_MSTR_MCTRL); @@ -784,6 +794,12 @@ static int ismt_dev_init(struct ismt_priv *priv) priv->head = 0; init_completion(&priv->cmp); + priv->log = dmam_alloc_coherent(&priv->pci_dev->dev, + ISMT_LOG_ENTRIES * sizeof(u32), + &priv->log_dma, GFP_KERNEL); + if (!priv->log) + return -ENOMEM; + return 0; } diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index af349661fd769..8de8296d25831 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -105,23 +105,30 @@ static irqreturn_t mpc_i2c_isr(int irq, void *dev_id) /* Sometimes 9th clock pulse isn't generated, and slave doesn't release * the bus, because it wants to send ACK. * Following sequence of enabling/disabling and sending start/stop generates - * the 9 pulses, so it's all OK. + * the 9 pulses, each with a START then ending with STOP, so it's all OK. */ static void mpc_i2c_fixup(struct mpc_i2c *i2c) { int k; - u32 delay_val = 1000000 / i2c->real_clk + 1; - - if (delay_val < 2) - delay_val = 2; + unsigned long flags; for (k = 9; k; k--) { writeccr(i2c, 0); - writeccr(i2c, CCR_MSTA | CCR_MTX | CCR_MEN); + writeb(0, i2c->base + MPC_I2C_SR); /* clear any status bits */ + writeccr(i2c, CCR_MEN | CCR_MSTA); /* START */ + readb(i2c->base + MPC_I2C_DR); /* init xfer */ + udelay(15); /* let it hit the bus */ + local_irq_save(flags); /* should not be delayed further */ + writeccr(i2c, CCR_MEN | CCR_MSTA | CCR_RSTA); /* delay SDA */ readb(i2c->base + MPC_I2C_DR); - writeccr(i2c, CCR_MEN); - udelay(delay_val << 1); + if (k != 1) + udelay(5); + local_irq_restore(flags); } + writeccr(i2c, CCR_MEN); /* Initiate STOP */ + readb(i2c->base + MPC_I2C_DR); + udelay(15); /* Let STOP propagate */ + writeccr(i2c, 0); } static int i2c_wait(struct mpc_i2c *i2c, unsigned timeout, int writing) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index e1ef0122ef759..5587e7c549c4f 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -932,7 +932,7 @@ static int mtk_i2c_probe(struct platform_device *pdev) return PTR_ERR(i2c->pdmabase); irq = platform_get_irq(pdev, 0); - if (irq <= 0) + if (irq < 0) return irq; init_completion(&i2c->msg_complete); diff --git a/drivers/i2c/busses/i2c-mt7621.c b/drivers/i2c/busses/i2c-mt7621.c index 62df8379bc895..65e72101b393b 100644 --- a/drivers/i2c/busses/i2c-mt7621.c +++ b/drivers/i2c/busses/i2c-mt7621.c @@ -304,7 +304,8 @@ static int mtk_i2c_probe(struct platform_device *pdev) if (i2c->bus_freq == 0) { dev_warn(i2c->dev, "clock-frequency 0 not supported\n"); - return -EINVAL; + ret = -EINVAL; + goto err_disable_clk; } adap = &i2c->adap; @@ -322,10 +323,15 @@ static int mtk_i2c_probe(struct platform_device *pdev) ret = i2c_add_adapter(adap); if (ret < 0) - return ret; + goto err_disable_clk; dev_info(&pdev->dev, "clock %u kHz\n", i2c->bus_freq / 1000); + return 0; + +err_disable_clk: + clk_disable_unprepare(i2c->clk); + return ret; } diff --git a/drivers/i2c/busses/i2c-pasemi.c b/drivers/i2c/busses/i2c-pasemi.c index 20f2772c0e79b..2c909522f0f38 100644 --- a/drivers/i2c/busses/i2c-pasemi.c +++ b/drivers/i2c/busses/i2c-pasemi.c @@ -137,6 +137,12 @@ static int pasemi_i2c_xfer_msg(struct i2c_adapter *adapter, TXFIFO_WR(smbus, msg->buf[msg->len-1] | (stop ? MTXFIFO_STOP : 0)); + + if (stop) { + err = pasemi_smb_waitready(smbus); + if (err) + goto reset_out; + } } return 0; diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c index 1a33007b03e9e..1107a5e7229e4 100644 --- a/drivers/i2c/busses/i2c-rk3x.c +++ b/drivers/i2c/busses/i2c-rk3x.c @@ -422,8 +422,8 @@ static void rk3x_i2c_handle_read(struct rk3x_i2c *i2c, unsigned int ipd) if (!(ipd & REG_INT_MBRF)) return; - /* ack interrupt */ - i2c_writel(i2c, REG_INT_MBRF, REG_IPD); + /* ack interrupt (read also produces a spurious START flag, clear it too) */ + i2c_writel(i2c, REG_INT_MBRF | REG_INT_START, REG_IPD); /* Can only handle a maximum of 32 bytes at a time */ if (len > 32) diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index d6322698b2458..e6f927c6f8af9 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -1141,7 +1141,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev) */ if (!(i2c->quirks & QUIRK_POLL)) { i2c->irq = ret = platform_get_irq(pdev, 0); - if (ret <= 0) { + if (ret < 0) { dev_err(&pdev->dev, "cannot find IRQ\n"); clk_unprepare(i2c->clk); return ret; diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index a7977eef2ead5..0f0c41174dddc 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -1394,6 +1394,7 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) { struct stm32f7_i2c_dev *i2c_dev = data; struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg; + struct stm32_i2c_dma *dma = i2c_dev->dma; void __iomem *base = i2c_dev->base; u32 status, mask; int ret = IRQ_HANDLED; @@ -1418,6 +1419,10 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) if (status & STM32F7_I2C_ISR_NACKF) { dev_dbg(i2c_dev->dev, "<%s>: Receive NACK\n", __func__); writel_relaxed(STM32F7_I2C_ICR_NACKCF, base + STM32F7_I2C_ICR); + if (i2c_dev->use_dma) { + stm32f7_i2c_disable_dma_req(i2c_dev); + dmaengine_terminate_all(dma->chan_using); + } f7_msg->result = -ENXIO; } @@ -1433,7 +1438,7 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) /* Clear STOP flag */ writel_relaxed(STM32F7_I2C_ICR_STOPCF, base + STM32F7_I2C_ICR); - if (i2c_dev->use_dma) { + if (i2c_dev->use_dma && !f7_msg->result) { ret = IRQ_WAKE_THREAD; } else { i2c_dev->master_mode = false; @@ -1446,7 +1451,7 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) if (f7_msg->stop) { mask = STM32F7_I2C_CR2_STOP; stm32f7_i2c_set_bits(base + STM32F7_I2C_CR2, mask); - } else if (i2c_dev->use_dma) { + } else if (i2c_dev->use_dma && !f7_msg->result) { ret = IRQ_WAKE_THREAD; } else if (f7_msg->smbus) { stm32f7_i2c_smbus_rep_start(i2c_dev); @@ -1586,12 +1591,23 @@ static int stm32f7_i2c_xfer(struct i2c_adapter *i2c_adap, time_left = wait_for_completion_timeout(&i2c_dev->complete, i2c_dev->adap.timeout); ret = f7_msg->result; + if (ret) { + /* + * It is possible that some unsent data have already been + * written into TXDR. To avoid sending old data in a + * further transfer, flush TXDR in case of any error + */ + writel_relaxed(STM32F7_I2C_ISR_TXE, + i2c_dev->base + STM32F7_I2C_ISR); + goto pm_free; + } if (!time_left) { dev_dbg(i2c_dev->dev, "Access to slave 0x%x timed out\n", i2c_dev->msg->addr); if (i2c_dev->use_dma) dmaengine_terminate_all(dma->chan_using); + stm32f7_i2c_wait_free_bus(i2c_dev); ret = -ETIMEDOUT; } @@ -1634,13 +1650,22 @@ static int stm32f7_i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, timeout = wait_for_completion_timeout(&i2c_dev->complete, i2c_dev->adap.timeout); ret = f7_msg->result; - if (ret) + if (ret) { + /* + * It is possible that some unsent data have already been + * written into TXDR. To avoid sending old data in a + * further transfer, flush TXDR in case of any error + */ + writel_relaxed(STM32F7_I2C_ISR_TXE, + i2c_dev->base + STM32F7_I2C_ISR); goto pm_free; + } if (!timeout) { dev_dbg(dev, "Access to slave 0x%x timed out\n", f7_msg->addr); if (i2c_dev->use_dma) dmaengine_terminate_all(dma->chan_using); + stm32f7_i2c_wait_free_bus(i2c_dev); ret = -ETIMEDOUT; goto pm_free; } diff --git a/drivers/i2c/busses/i2c-thunderx-pcidrv.c b/drivers/i2c/busses/i2c-thunderx-pcidrv.c index 19f8eec387172..107aeb8b54da4 100644 --- a/drivers/i2c/busses/i2c-thunderx-pcidrv.c +++ b/drivers/i2c/busses/i2c-thunderx-pcidrv.c @@ -208,6 +208,7 @@ static int thunder_i2c_probe_pci(struct pci_dev *pdev, i2c->adap.bus_recovery_info = &octeon_i2c_recovery_info; i2c->adap.dev.parent = dev; i2c->adap.dev.of_node = pdev->dev.of_node; + i2c->adap.dev.fwnode = dev->fwnode; snprintf(i2c->adap.name, sizeof(i2c->adap.name), "Cavium ThunderX i2c adapter at %s", dev_name(dev)); i2c_set_adapdata(&i2c->adap, i2c); diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 37b3b9307d076..a48bee59dcdec 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -715,7 +715,6 @@ static const struct i2c_adapter_quirks xiic_quirks = { static const struct i2c_adapter xiic_adapter = { .owner = THIS_MODULE, - .name = DRIVER_NAME, .class = I2C_CLASS_DEPRECATED, .algo = &xiic_algorithm, .quirks = &xiic_quirks, @@ -752,6 +751,8 @@ static int xiic_i2c_probe(struct platform_device *pdev) i2c_set_adapdata(&i2c->adap, i2c); i2c->adap.dev.parent = &pdev->dev; i2c->adap.dev.of_node = pdev->dev.of_node; + snprintf(i2c->adap.name, sizeof(i2c->adap.name), + DRIVER_NAME " %s", pdev->name); mutex_init(&i2c->lock); init_waitqueue_head(&i2c->wait); diff --git a/drivers/i2c/busses/i2c-xlr.c b/drivers/i2c/busses/i2c-xlr.c index 34cd4b3085402..dda6cb848405b 100644 --- a/drivers/i2c/busses/i2c-xlr.c +++ b/drivers/i2c/busses/i2c-xlr.c @@ -433,11 +433,15 @@ static int xlr_i2c_probe(struct platform_device *pdev) i2c_set_adapdata(&priv->adap, priv); ret = i2c_add_numbered_adapter(&priv->adap); if (ret < 0) - return ret; + goto err_unprepare_clk; platform_set_drvdata(pdev, priv); dev_info(&priv->adap.dev, "Added I2C Bus.\n"); return 0; + +err_unprepare_clk: + clk_unprepare(clk); + return ret; } static int xlr_i2c_remove(struct platform_device *pdev) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index c70983780ae79..fe466ee4c49bf 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -436,6 +436,7 @@ static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value, break; i2c_acpi_register_device(adapter, adev, &info); + put_device(&adapter->dev); break; case ACPI_RECONFIG_DEVICE_REMOVE: if (!acpi_device_enumerated(adev)) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index c9ae1895cd48a..7da6ca26a5f56 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -536,6 +536,9 @@ static long compat_i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned lo sizeof(rdwr_arg))) return -EFAULT; + if (!rdwr_arg.msgs || rdwr_arg.nmsgs == 0) + return -EINVAL; + if (rdwr_arg.nmsgs > I2C_RDWR_IOCTL_MAX_MSGS) return -EINVAL; diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index 5365199a31f41..f7a7405d4350a 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -261,7 +261,7 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev) err = device_create_file(&pdev->dev, &dev_attr_available_masters); if (err) - goto err_rollback; + goto err_rollback_activation; err = device_create_file(&pdev->dev, &dev_attr_current_master); if (err) @@ -271,8 +271,9 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev) err_rollback_available: device_remove_file(&pdev->dev, &dev_attr_available_masters); -err_rollback: +err_rollback_activation: i2c_demux_deactivate_master(priv); +err_rollback: for (j = 0; j < i; j++) { of_node_put(priv->chan[j].parent_np); of_changeset_destroy(&priv->chan[j].chgset); diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 4b0ad1864d76e..9d029d8979e94 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1256,14 +1256,13 @@ static bool intel_idle_acpi_cst_extract(void) if (!intel_idle_cst_usable()) continue; - if (!acpi_processor_claim_cst_control()) { - acpi_state_table.count = 0; - return false; - } + if (!acpi_processor_claim_cst_control()) + break; return true; } + acpi_state_table.count = 0; pr_debug("ACPI _CST not found or not usable\n"); return false; } @@ -1280,7 +1279,7 @@ static void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) struct acpi_processor_cx *cx; struct cpuidle_state *state; - if (intel_idle_max_cstate_reached(cstate)) + if (intel_idle_max_cstate_reached(cstate - 1)) break; cx = &acpi_state_table.states[cstate]; diff --git a/drivers/iio/accel/bma180.c b/drivers/iio/accel/bma180.c index acf1cc2bee07b..014788ea493ac 100644 --- a/drivers/iio/accel/bma180.c +++ b/drivers/iio/accel/bma180.c @@ -793,11 +793,12 @@ static int bma180_probe(struct i2c_client *client, data->trig->dev.parent = &client->dev; data->trig->ops = &bma180_trigger_ops; iio_trigger_set_drvdata(data->trig, indio_dev); - indio_dev->trig = iio_trigger_get(data->trig); ret = iio_trigger_register(data->trig); if (ret) goto err_trigger_free; + + indio_dev->trig = iio_trigger_get(data->trig); } ret = iio_triggered_buffer_setup(indio_dev, NULL, diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c index bcdf25f32e220..a05d55125d13a 100644 --- a/drivers/iio/accel/bmc150-accel-core.c +++ b/drivers/iio/accel/bmc150-accel-core.c @@ -1649,11 +1649,14 @@ int bmc150_accel_core_probe(struct device *dev, struct regmap *regmap, int irq, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(dev, "Unable to register iio device\n"); - goto err_trigger_unregister; + goto err_pm_cleanup; } return 0; +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); err_trigger_unregister: bmc150_accel_unregister_triggers(data, BMC150_ACCEL_TRIGGERS - 1); err_buffer_cleanup: diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index 57db60bf2d4ca..bd762976a3815 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -1409,14 +1409,16 @@ static int kxcjk1013_probe(struct i2c_client *client, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(&client->dev, "unable to register iio device\n"); - goto err_buffer_cleanup; + goto err_pm_cleanup; } return 0; +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(&client->dev); + pm_runtime_disable(&client->dev); err_buffer_cleanup: - if (data->dready_trig) - iio_triggered_buffer_cleanup(indio_dev); + iio_triggered_buffer_cleanup(indio_dev); err_trigger_unregister: if (data->dready_trig) iio_trigger_unregister(data->dready_trig); @@ -1439,8 +1441,8 @@ static int kxcjk1013_remove(struct i2c_client *client) pm_runtime_set_suspended(&client->dev); pm_runtime_put_noidle(&client->dev); + iio_triggered_buffer_cleanup(indio_dev); if (data->dready_trig) { - iio_triggered_buffer_cleanup(indio_dev); iio_trigger_unregister(data->dready_trig); iio_trigger_unregister(data->motion_trig); } diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c index 76429e2a6fb8f..bc18f33c53e14 100644 --- a/drivers/iio/accel/kxsd9.c +++ b/drivers/iio/accel/kxsd9.c @@ -224,14 +224,14 @@ static irqreturn_t kxsd9_trigger_handler(int irq, void *p) hw_values.chan, sizeof(hw_values.chan)); if (ret) { - dev_err(st->dev, - "error reading data\n"); - return ret; + dev_err(st->dev, "error reading data: %d\n", ret); + goto out; } iio_push_to_buffers_with_timestamp(indio_dev, &hw_values, iio_get_time_ns(indio_dev)); +out: iio_trigger_notify_done(indio_dev->trig); return IRQ_HANDLED; diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index 85d453b3f5ec1..74942bfc676a1 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -1473,7 +1473,7 @@ static int mma8452_trigger_setup(struct iio_dev *indio_dev) if (ret) return ret; - indio_dev->trig = trig; + indio_dev->trig = iio_trigger_get(trig); return 0; } @@ -1489,10 +1489,14 @@ static int mma8452_reset(struct i2c_client *client) int i; int ret; - ret = i2c_smbus_write_byte_data(client, MMA8452_CTRL_REG2, + /* + * Find on fxls8471, after config reset bit, it reset immediately, + * and will not give ACK, so here do not check the return value. + * The following code will read the reset register, and check whether + * this reset works. + */ + i2c_smbus_write_byte_data(client, MMA8452_CTRL_REG2, MMA8452_CTRL_REG2_RST); - if (ret < 0) - return ret; for (i = 0; i < 10; i++) { usleep_range(100, 200); diff --git a/drivers/iio/accel/mma9551.c b/drivers/iio/accel/mma9551.c index 99e4a21ca9421..8315c7ee66cf3 100644 --- a/drivers/iio/accel/mma9551.c +++ b/drivers/iio/accel/mma9551.c @@ -496,11 +496,14 @@ static int mma9551_probe(struct i2c_client *client, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(&client->dev, "unable to register iio device\n"); - goto out_poweroff; + goto err_pm_cleanup; } return 0; +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(&client->dev); + pm_runtime_disable(&client->dev); out_poweroff: mma9551_set_device_state(client, false); diff --git a/drivers/iio/accel/mma9553.c b/drivers/iio/accel/mma9553.c index 312070dcf035a..73e85196d0bd3 100644 --- a/drivers/iio/accel/mma9553.c +++ b/drivers/iio/accel/mma9553.c @@ -1135,12 +1135,15 @@ static int mma9553_probe(struct i2c_client *client, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(&client->dev, "unable to register iio device\n"); - goto out_poweroff; + goto err_pm_cleanup; } dev_dbg(&indio_dev->dev, "Registered device %s\n", name); return 0; +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(&client->dev); + pm_runtime_disable(&client->dev); out_poweroff: mma9551_set_device_state(client, false); return ret; diff --git a/drivers/iio/accel/mxc4005.c b/drivers/iio/accel/mxc4005.c index d81b02642a0b3..110b7ea76ef2f 100644 --- a/drivers/iio/accel/mxc4005.c +++ b/drivers/iio/accel/mxc4005.c @@ -462,8 +462,6 @@ static int mxc4005_probe(struct i2c_client *client, data->dready_trig->dev.parent = &client->dev; data->dready_trig->ops = &mxc4005_trigger_ops; iio_trigger_set_drvdata(data->dready_trig, indio_dev); - indio_dev->trig = data->dready_trig; - iio_trigger_get(indio_dev->trig); ret = devm_iio_trigger_register(&client->dev, data->dready_trig); if (ret) { @@ -471,6 +469,8 @@ static int mxc4005_probe(struct i2c_client *client, "failed to register trigger\n"); return ret; } + + indio_dev->trig = iio_trigger_get(data->dready_trig); } return devm_iio_device_register(&client->dev, indio_dev); diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index fa808f9c0d9af..793a803919c52 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -63,7 +63,7 @@ #define AD7124_CONFIG_REF_SEL(x) FIELD_PREP(AD7124_CONFIG_REF_SEL_MSK, x) #define AD7124_CONFIG_PGA_MSK GENMASK(2, 0) #define AD7124_CONFIG_PGA(x) FIELD_PREP(AD7124_CONFIG_PGA_MSK, x) -#define AD7124_CONFIG_IN_BUFF_MSK GENMASK(7, 6) +#define AD7124_CONFIG_IN_BUFF_MSK GENMASK(6, 5) #define AD7124_CONFIG_IN_BUFF(x) FIELD_PREP(AD7124_CONFIG_IN_BUFF_MSK, x) /* AD7124_FILTER_X */ @@ -142,7 +142,6 @@ static const struct iio_chan_spec ad7124_channel_template = { .sign = 'u', .realbits = 24, .storagebits = 32, - .shift = 8, .endianness = IIO_BE, }, }; diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index 0f6c1be1cda2c..9e26517870595 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -470,8 +470,8 @@ static irqreturn_t ad7768_trigger_handler(int irq, void *p) iio_push_to_buffers_with_timestamp(indio_dev, &st->data.scan, iio_get_time_ns(indio_dev)); - iio_trigger_notify_done(indio_dev->trig); err_unlock: + iio_trigger_notify_done(indio_dev->trig); mutex_unlock(&st->lock); return IRQ_HANDLED; diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index d3fc39df535dc..552baf327d0c4 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -184,6 +184,7 @@ static int aspeed_adc_probe(struct platform_device *pdev) data = iio_priv(indio_dev); data->dev = &pdev->dev; + platform_set_drvdata(pdev, indio_dev); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); data->base = devm_ioremap_resource(&pdev->dev, res); diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index def4abeb47cae..8854da453669c 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -1369,7 +1369,8 @@ static int at91_adc_read_info_raw(struct iio_dev *indio_dev, *val = st->conversion_value; ret = at91_adc_adjust_val_osr(st, val); if (chan->scan_type.sign == 's') - *val = sign_extend32(*val, 11); + *val = sign_extend32(*val, + chan->scan_type.realbits - 1); st->conversion_done = false; } diff --git a/drivers/iio/adc/axp20x_adc.c b/drivers/iio/adc/axp20x_adc.c index 88059480da17c..e526c7d6e044e 100644 --- a/drivers/iio/adc/axp20x_adc.c +++ b/drivers/iio/adc/axp20x_adc.c @@ -251,19 +251,8 @@ static int axp22x_adc_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val) { struct axp20x_adc_iio *info = iio_priv(indio_dev); - int size; - /* - * N.B.: Unlike the Chinese datasheets tell, the charging current is - * stored on 12 bits, not 13 bits. Only discharging current is on 13 - * bits. - */ - if (chan->type == IIO_CURRENT && chan->channel == AXP22X_BATT_DISCHRG_I) - size = 13; - else - size = 12; - - *val = axp20x_read_variable_width(info->regmap, chan->address, size); + *val = axp20x_read_variable_width(info->regmap, chan->address, 12); if (*val < 0) return *val; @@ -386,9 +375,8 @@ static int axp22x_adc_scale(struct iio_chan_spec const *chan, int *val, return IIO_VAL_INT_PLUS_MICRO; case IIO_CURRENT: - *val = 0; - *val2 = 500000; - return IIO_VAL_INT_PLUS_MICRO; + *val = 1; + return IIO_VAL_INT; case IIO_TEMP: *val = 100; diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c index 8ea2aed6d6f5a..99ceaa09ef392 100644 --- a/drivers/iio/adc/axp288_adc.c +++ b/drivers/iio/adc/axp288_adc.c @@ -196,6 +196,14 @@ static const struct dmi_system_id axp288_adc_ts_bias_override[] = { }, .driver_data = (void *)(uintptr_t)AXP288_ADC_TS_BIAS_80UA, }, + { + /* Nuvision Solo 10 Draw */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TMAX"), + DMI_MATCH(DMI_PRODUCT_NAME, "TM101W610L"), + }, + .driver_data = (void *)(uintptr_t)AXP288_ADC_TS_BIAS_80UA, + }, {} }; diff --git a/drivers/iio/adc/dln2-adc.c b/drivers/iio/adc/dln2-adc.c index 65c7c9329b1c3..2e37834633ff8 100644 --- a/drivers/iio/adc/dln2-adc.c +++ b/drivers/iio/adc/dln2-adc.c @@ -248,7 +248,6 @@ static int dln2_adc_set_chan_period(struct dln2_adc *dln2, static int dln2_adc_read(struct dln2_adc *dln2, unsigned int channel) { int ret, i; - struct iio_dev *indio_dev = platform_get_drvdata(dln2->pdev); u16 conflict; __le16 value; int olen = sizeof(value); @@ -257,13 +256,9 @@ static int dln2_adc_read(struct dln2_adc *dln2, unsigned int channel) .chan = channel, }; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret < 0) - return ret; - ret = dln2_adc_set_chan_enabled(dln2, channel, true); if (ret < 0) - goto release_direct; + return ret; ret = dln2_adc_set_port_enabled(dln2, true, &conflict); if (ret < 0) { @@ -300,8 +295,6 @@ static int dln2_adc_read(struct dln2_adc *dln2, unsigned int channel) dln2_adc_set_port_enabled(dln2, false, NULL); disable_chan: dln2_adc_set_chan_enabled(dln2, channel, false); -release_direct: - iio_device_release_direct_mode(indio_dev); return ret; } @@ -337,10 +330,16 @@ static int dln2_adc_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: + ret = iio_device_claim_direct_mode(indio_dev); + if (ret < 0) + return ret; + mutex_lock(&dln2->mutex); ret = dln2_adc_read(dln2, chan->channel); mutex_unlock(&dln2->mutex); + iio_device_release_direct_mode(indio_dev); + if (ret < 0) return ret; @@ -666,7 +665,11 @@ static int dln2_adc_probe(struct platform_device *pdev) return -ENOMEM; } iio_trigger_set_drvdata(dln2->trig, dln2); - devm_iio_trigger_register(dev, dln2->trig); + ret = devm_iio_trigger_register(dev, dln2->trig); + if (ret) { + dev_err(dev, "failed to register trigger: %d\n", ret); + return ret; + } iio_trigger_set_immutable(indio_dev, dln2->trig); ret = devm_iio_triggered_buffer_setup(dev, indio_dev, NULL, diff --git a/drivers/iio/adc/men_z188_adc.c b/drivers/iio/adc/men_z188_adc.c index 3b2fbb7ce4310..26caee73b7641 100644 --- a/drivers/iio/adc/men_z188_adc.c +++ b/drivers/iio/adc/men_z188_adc.c @@ -103,6 +103,7 @@ static int men_z188_probe(struct mcb_device *dev, struct z188_adc *adc; struct iio_dev *indio_dev; struct resource *mem; + int ret; indio_dev = devm_iio_device_alloc(&dev->dev, sizeof(struct z188_adc)); if (!indio_dev) @@ -129,8 +130,14 @@ static int men_z188_probe(struct mcb_device *dev, adc->mem = mem; mcb_set_drvdata(dev, indio_dev); - return iio_device_register(indio_dev); + ret = iio_device_register(indio_dev); + if (ret) + goto err_unmap; + + return 0; +err_unmap: + iounmap(adc->base); err: mcb_release_mem(mem); return -ENXIO; diff --git a/drivers/iio/adc/mt6577_auxadc.c b/drivers/iio/adc/mt6577_auxadc.c index 2449d91e47665..9cdb9084c64e8 100644 --- a/drivers/iio/adc/mt6577_auxadc.c +++ b/drivers/iio/adc/mt6577_auxadc.c @@ -82,6 +82,10 @@ static const struct iio_chan_spec mt6577_auxadc_iio_channels[] = { MT6577_AUXADC_CHANNEL(15), }; +/* For Voltage calculation */ +#define VOLTAGE_FULL_RANGE 1500 /* VA voltage */ +#define AUXADC_PRECISE 4096 /* 12 bits */ + static int mt_auxadc_get_cali_data(int rawdata, bool enable_cali) { return rawdata; @@ -191,6 +195,10 @@ static int mt6577_auxadc_read_raw(struct iio_dev *indio_dev, } if (adc_dev->dev_comp->sample_data_cali) *val = mt_auxadc_get_cali_data(*val, true); + + /* Convert adc raw data to voltage: 0 - 1500 mV */ + *val = *val * VOLTAGE_FULL_RANGE / AUXADC_PRECISE; + return IIO_VAL_INT; default: diff --git a/drivers/iio/adc/sc27xx_adc.c b/drivers/iio/adc/sc27xx_adc.c index a6c046575ec3a..5b79c8b9ccde1 100644 --- a/drivers/iio/adc/sc27xx_adc.c +++ b/drivers/iio/adc/sc27xx_adc.c @@ -36,8 +36,8 @@ /* Bits and mask definition for SC27XX_ADC_CH_CFG register */ #define SC27XX_ADC_CHN_ID_MASK GENMASK(4, 0) -#define SC27XX_ADC_SCALE_MASK GENMASK(10, 8) -#define SC27XX_ADC_SCALE_SHIFT 8 +#define SC27XX_ADC_SCALE_MASK GENMASK(10, 9) +#define SC27XX_ADC_SCALE_SHIFT 9 /* Bits definitions for SC27XX_ADC_INT_EN registers */ #define SC27XX_ADC_IRQ_EN BIT(0) @@ -103,14 +103,14 @@ static struct sc27xx_adc_linear_graph small_scale_graph = { 100, 341, }; -static const struct sc27xx_adc_linear_graph big_scale_graph_calib = { - 4200, 856, - 3600, 733, +static const struct sc27xx_adc_linear_graph sc2731_big_scale_graph_calib = { + 4200, 850, + 3600, 728, }; -static const struct sc27xx_adc_linear_graph small_scale_graph_calib = { - 1000, 833, - 100, 80, +static const struct sc27xx_adc_linear_graph sc2731_small_scale_graph_calib = { + 1000, 838, + 100, 84, }; static int sc27xx_adc_get_calib_data(u32 calib_data, int calib_adc) @@ -130,11 +130,11 @@ static int sc27xx_adc_scale_calibration(struct sc27xx_adc_data *data, size_t len; if (big_scale) { - calib_graph = &big_scale_graph_calib; + calib_graph = &sc2731_big_scale_graph_calib; graph = &big_scale_graph; cell_name = "big_scale_calib"; } else { - calib_graph = &small_scale_graph_calib; + calib_graph = &sc2731_small_scale_graph_calib; graph = &small_scale_graph; cell_name = "small_scale_calib"; } diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index 14d6a537289cb..0ecec5a15fbf0 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -815,7 +815,7 @@ static const struct stm32_adc_priv_cfg stm32h7_adc_priv_cfg = { static const struct stm32_adc_priv_cfg stm32mp1_adc_priv_cfg = { .regs = &stm32h7_adc_common_regs, .clk_sel = stm32h7_adc_clk_sel, - .max_clk_rate_hz = 40000000, + .max_clk_rate_hz = 36000000, .has_syscfg = HAS_VBOOSTER | HAS_ANASWVDD, .num_irqs = 2, }; diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 94fde39d9ff7a..d1bbd2b197fc6 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -933,6 +933,7 @@ static int stm32h7_adc_prepare(struct stm32_adc *adc) static void stm32h7_adc_unprepare(struct stm32_adc *adc) { + stm32_adc_writel(adc, STM32H7_ADC_PCSEL, 0); stm32h7_adc_disable(adc); stm32h7_adc_enter_pwr_down(adc); } diff --git a/drivers/iio/adc/stmpe-adc.c b/drivers/iio/adc/stmpe-adc.c index bd72727fc417a..35ae801c4d35f 100644 --- a/drivers/iio/adc/stmpe-adc.c +++ b/drivers/iio/adc/stmpe-adc.c @@ -61,7 +61,7 @@ struct stmpe_adc { static int stmpe_read_voltage(struct stmpe_adc *info, struct iio_chan_spec const *chan, int *val) { - long ret; + unsigned long ret; mutex_lock(&info->lock); @@ -79,7 +79,7 @@ static int stmpe_read_voltage(struct stmpe_adc *info, ret = wait_for_completion_timeout(&info->completion, STMPE_ADC_TIMEOUT); - if (ret <= 0) { + if (ret == 0) { stmpe_reg_write(info->stmpe, STMPE_REG_ADC_INT_STA, STMPE_ADC_CH(info->channel)); mutex_unlock(&info->lock); @@ -96,7 +96,7 @@ static int stmpe_read_voltage(struct stmpe_adc *info, static int stmpe_read_temp(struct stmpe_adc *info, struct iio_chan_spec const *chan, int *val) { - long ret; + unsigned long ret; mutex_lock(&info->lock); @@ -114,7 +114,7 @@ static int stmpe_read_temp(struct stmpe_adc *info, ret = wait_for_completion_timeout(&info->completion, STMPE_ADC_TIMEOUT); - if (ret <= 0) { + if (ret == 0) { mutex_unlock(&info->lock); return -ETIMEDOUT; } diff --git a/drivers/iio/adc/ti-adc128s052.c b/drivers/iio/adc/ti-adc128s052.c index 1e5a936b5b6ad..4267d756cd50e 100644 --- a/drivers/iio/adc/ti-adc128s052.c +++ b/drivers/iio/adc/ti-adc128s052.c @@ -172,7 +172,13 @@ static int adc128_probe(struct spi_device *spi) mutex_init(&adc->lock); ret = iio_device_register(indio_dev); + if (ret) + goto err_disable_regulator; + return 0; + +err_disable_regulator: + regulator_disable(adc->reg); return ret; } diff --git a/drivers/iio/adc/twl6030-gpadc.c b/drivers/iio/adc/twl6030-gpadc.c index f24148bd15de4..fb77c3ff5a3e4 100644 --- a/drivers/iio/adc/twl6030-gpadc.c +++ b/drivers/iio/adc/twl6030-gpadc.c @@ -911,6 +911,8 @@ static int twl6030_gpadc_probe(struct platform_device *pdev) ret = devm_request_threaded_irq(dev, irq, NULL, twl6030_gpadc_irq_handler, IRQF_ONESHOT, "twl6030_gpadc", indio_dev); + if (ret) + return ret; ret = twl6030_gpadc_enable_irq(TWL6030_GPADC_RT_SW1_EOC_MASK); if (ret < 0) { diff --git a/drivers/iio/afe/iio-rescale.c b/drivers/iio/afe/iio-rescale.c index e9ceee66d1e7c..d9c41db8155f9 100644 --- a/drivers/iio/afe/iio-rescale.c +++ b/drivers/iio/afe/iio-rescale.c @@ -38,7 +38,7 @@ static int rescale_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct rescale *rescale = iio_priv(indio_dev); - unsigned long long tmp; + s64 tmp; int ret; switch (mask) { @@ -59,10 +59,10 @@ static int rescale_read_raw(struct iio_dev *indio_dev, *val2 = rescale->denominator; return IIO_VAL_FRACTIONAL; case IIO_VAL_FRACTIONAL_LOG2: - tmp = *val * 1000000000LL; - do_div(tmp, rescale->denominator); + tmp = (s64)*val * 1000000000LL; + tmp = div_s64(tmp, rescale->denominator); tmp *= rescale->numerator; - do_div(tmp, 1000000000LL); + tmp = div_s64(tmp, 1000000000LL); *val = tmp; return ret; default: diff --git a/drivers/iio/chemical/ccs811.c b/drivers/iio/chemical/ccs811.c index 7bf4e9a16a6ae..a2ee6bbda7f4d 100644 --- a/drivers/iio/chemical/ccs811.c +++ b/drivers/iio/chemical/ccs811.c @@ -418,11 +418,11 @@ static int ccs811_probe(struct i2c_client *client, data->drdy_trig->dev.parent = &client->dev; data->drdy_trig->ops = &ccs811_trigger_ops; iio_trigger_set_drvdata(data->drdy_trig, indio_dev); - indio_dev->trig = data->drdy_trig; - iio_trigger_get(indio_dev->trig); ret = iio_trigger_register(data->drdy_trig); if (ret) goto err_poweroff; + + indio_dev->trig = iio_trigger_get(data->drdy_trig); } ret = iio_triggered_buffer_setup(indio_dev, NULL, diff --git a/drivers/iio/common/ssp_sensors/ssp_spi.c b/drivers/iio/common/ssp_sensors/ssp_spi.c index 7db3d5886e3ee..d98e0a04add05 100644 --- a/drivers/iio/common/ssp_sensors/ssp_spi.c +++ b/drivers/iio/common/ssp_sensors/ssp_spi.c @@ -137,7 +137,7 @@ static int ssp_print_mcu_debug(char *data_frame, int *data_index, if (length > received_len - *data_index || length <= 0) { ssp_dbg("[SSP]: MSG From MCU-invalid debug length(%d/%d)\n", length, received_len); - return length ? length : -EPROTO; + return -EPROTO; } ssp_dbg("[SSP]: MSG From MCU - %s\n", &data_frame[*data_index]); @@ -273,6 +273,8 @@ static int ssp_parse_dataframe(struct ssp_data *data, char *dataframe, int len) for (idx = 0; idx < len;) { switch (dataframe[idx++]) { case SSP_MSG2AP_INST_BYPASS_DATA: + if (idx >= len) + return -EPROTO; sd = dataframe[idx++]; if (sd < 0 || sd >= SSP_SENSOR_MAX) { dev_err(SSP_DEV, @@ -282,10 +284,13 @@ static int ssp_parse_dataframe(struct ssp_data *data, char *dataframe, int len) if (indio_devs[sd]) { spd = iio_priv(indio_devs[sd]); - if (spd->process_data) + if (spd->process_data) { + if (idx >= len) + return -EPROTO; spd->process_data(indio_devs[sd], &dataframe[idx], data->timestamp); + } } else { dev_err(SSP_DEV, "no client for frame\n"); } @@ -293,6 +298,8 @@ static int ssp_parse_dataframe(struct ssp_data *data, char *dataframe, int len) idx += ssp_offset_map[sd]; break; case SSP_MSG2AP_INST_DEBUG_DATA: + if (idx >= len) + return -EPROTO; sd = ssp_print_mcu_debug(dataframe, &idx, len); if (sd) { dev_err(SSP_DEV, diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index 364683783ae52..c25b0bc89b0c2 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -76,16 +76,18 @@ static int st_sensors_match_odr(struct st_sensor_settings *sensor_settings, int st_sensors_set_odr(struct iio_dev *indio_dev, unsigned int odr) { - int err; + int err = 0; struct st_sensor_odr_avl odr_out = {0, 0}; struct st_sensor_data *sdata = iio_priv(indio_dev); + mutex_lock(&sdata->odr_lock); + if (!sdata->sensor_settings->odr.mask) - return 0; + goto unlock_mutex; err = st_sensors_match_odr(sdata->sensor_settings, odr, &odr_out); if (err < 0) - goto st_sensors_match_odr_error; + goto unlock_mutex; if ((sdata->sensor_settings->odr.addr == sdata->sensor_settings->pw.addr) && @@ -108,7 +110,9 @@ int st_sensors_set_odr(struct iio_dev *indio_dev, unsigned int odr) if (err >= 0) sdata->odr = odr_out.hz; -st_sensors_match_odr_error: +unlock_mutex: + mutex_unlock(&sdata->odr_lock); + return err; } EXPORT_SYMBOL(st_sensors_set_odr); @@ -384,6 +388,8 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev, struct st_sensors_platform_data *of_pdata; int err = 0; + mutex_init(&sdata->odr_lock); + /* If OF/DT pdata exists, it will take precedence of anything else */ of_pdata = st_sensors_of_probe(indio_dev->dev.parent, pdata); if (of_pdata) @@ -575,18 +581,24 @@ int st_sensors_read_info_raw(struct iio_dev *indio_dev, err = -EBUSY; goto out; } else { + mutex_lock(&sdata->odr_lock); err = st_sensors_set_enable(indio_dev, true); - if (err < 0) + if (err < 0) { + mutex_unlock(&sdata->odr_lock); goto out; + } msleep((sdata->sensor_settings->bootime * 1000) / sdata->odr); err = st_sensors_read_axis_data(indio_dev, ch, val); - if (err < 0) + if (err < 0) { + mutex_unlock(&sdata->odr_lock); goto out; + } *val = *val >> ch->scan_type.shift; err = st_sensors_set_enable(indio_dev, false); + mutex_unlock(&sdata->odr_lock); } out: mutex_unlock(&indio_dev->mlock); diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c index 61c670f7fc5f1..5fbda0431d6ab 100644 --- a/drivers/iio/dac/ad5446.c +++ b/drivers/iio/dac/ad5446.c @@ -170,7 +170,7 @@ static int ad5446_read_raw(struct iio_dev *indio_dev, switch (m) { case IIO_CHAN_INFO_RAW: - *val = st->cached_val; + *val = st->cached_val >> chan->scan_type.shift; return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: *val = st->vref_mv; @@ -527,8 +527,15 @@ static int ad5622_write(struct ad5446_state *st, unsigned val) { struct i2c_client *client = to_i2c_client(st->dev); __be16 data = cpu_to_be16(val); + int ret; + + ret = i2c_master_send(client, (char *)&data, sizeof(data)); + if (ret < 0) + return ret; + if (ret != sizeof(data)) + return -EIO; - return i2c_master_send(client, (char *)&data, sizeof(data)); + return 0; } /** diff --git a/drivers/iio/dac/ad5592r-base.c b/drivers/iio/dac/ad5592r-base.c index 424922cad1e39..87a51a85a642d 100644 --- a/drivers/iio/dac/ad5592r-base.c +++ b/drivers/iio/dac/ad5592r-base.c @@ -530,7 +530,7 @@ static int ad5592r_alloc_channels(struct ad5592r_state *st) if (!ret) st->channel_modes[reg] = tmp; - fwnode_property_read_u32(child, "adi,off-state", &tmp); + ret = fwnode_property_read_u32(child, "adi,off-state", &tmp); if (!ret) st->channel_offstate[reg] = tmp; } diff --git a/drivers/iio/dac/ad5624r_spi.c b/drivers/iio/dac/ad5624r_spi.c index e6c022e1dc1cf..17cc8b3fc5d82 100644 --- a/drivers/iio/dac/ad5624r_spi.c +++ b/drivers/iio/dac/ad5624r_spi.c @@ -229,7 +229,7 @@ static int ad5624r_probe(struct spi_device *spi) if (!indio_dev) return -ENOMEM; st = iio_priv(indio_dev); - st->reg = devm_regulator_get(&spi->dev, "vcc"); + st->reg = devm_regulator_get_optional(&spi->dev, "vref"); if (!IS_ERR(st->reg)) { ret = regulator_enable(st->reg); if (ret) @@ -240,6 +240,22 @@ static int ad5624r_probe(struct spi_device *spi) goto error_disable_reg; voltage_uv = ret; + } else { + if (PTR_ERR(st->reg) != -ENODEV) + return PTR_ERR(st->reg); + /* Backwards compatibility. This naming is not correct */ + st->reg = devm_regulator_get_optional(&spi->dev, "vcc"); + if (!IS_ERR(st->reg)) { + ret = regulator_enable(st->reg); + if (ret) + return ret; + + ret = regulator_get_voltage(st->reg); + if (ret < 0) + goto error_disable_reg; + + voltage_uv = ret; + } } spi_set_drvdata(spi, indio_dev); diff --git a/drivers/iio/dac/ti-dac5571.c b/drivers/iio/dac/ti-dac5571.c index 3a2bb0efe50de..51c41ccf00ad9 100644 --- a/drivers/iio/dac/ti-dac5571.c +++ b/drivers/iio/dac/ti-dac5571.c @@ -352,6 +352,7 @@ static int dac5571_probe(struct i2c_client *client, data->dac5571_pwrdwn = dac5571_pwrdwn_quad; break; default: + ret = -EINVAL; goto err; } diff --git a/drivers/iio/dummy/iio_simple_dummy.c b/drivers/iio/dummy/iio_simple_dummy.c index 6cb02299a2152..18cfe1cb7a408 100644 --- a/drivers/iio/dummy/iio_simple_dummy.c +++ b/drivers/iio/dummy/iio_simple_dummy.c @@ -568,10 +568,9 @@ static struct iio_sw_device *iio_dummy_probe(const char *name) struct iio_sw_device *swd; swd = kzalloc(sizeof(*swd), GFP_KERNEL); - if (!swd) { - ret = -ENOMEM; - goto error_kzalloc; - } + if (!swd) + return ERR_PTR(-ENOMEM); + /* * Allocate an IIO device. * @@ -583,7 +582,7 @@ static struct iio_sw_device *iio_dummy_probe(const char *name) indio_dev = iio_device_alloc(sizeof(*st)); if (!indio_dev) { ret = -ENOMEM; - goto error_ret; + goto error_free_swd; } st = iio_priv(indio_dev); @@ -614,6 +613,10 @@ static struct iio_sw_device *iio_dummy_probe(const char *name) * indio_dev->name = spi_get_device_id(spi)->name; */ indio_dev->name = kstrdup(name, GFP_KERNEL); + if (!indio_dev->name) { + ret = -ENOMEM; + goto error_free_device; + } /* Provide description of available channels */ indio_dev->channels = iio_dummy_channels; @@ -630,7 +633,7 @@ static struct iio_sw_device *iio_dummy_probe(const char *name) ret = iio_simple_dummy_events_register(indio_dev); if (ret < 0) - goto error_free_device; + goto error_free_name; ret = iio_simple_dummy_configure_buffer(indio_dev); if (ret < 0) @@ -647,11 +650,12 @@ static struct iio_sw_device *iio_dummy_probe(const char *name) iio_simple_dummy_unconfigure_buffer(indio_dev); error_unregister_events: iio_simple_dummy_events_unregister(indio_dev); +error_free_name: + kfree(indio_dev->name); error_free_device: iio_device_free(indio_dev); -error_ret: +error_free_swd: kfree(swd); -error_kzalloc: return ERR_PTR(ret); } diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c index 276bed47e8d66..bf1355360c518 100644 --- a/drivers/iio/gyro/bmg160_core.c +++ b/drivers/iio/gyro/bmg160_core.c @@ -1173,11 +1173,14 @@ int bmg160_core_probe(struct device *dev, struct regmap *regmap, int irq, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(dev, "unable to register iio device\n"); - goto err_buffer_cleanup; + goto err_pm_cleanup; } return 0; +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); err_buffer_cleanup: iio_triggered_buffer_cleanup(indio_dev); err_trigger_unregister: diff --git a/drivers/iio/gyro/itg3200_buffer.c b/drivers/iio/gyro/itg3200_buffer.c index 1c3c1bd53374a..98b3f021f0bec 100644 --- a/drivers/iio/gyro/itg3200_buffer.c +++ b/drivers/iio/gyro/itg3200_buffer.c @@ -61,9 +61,9 @@ static irqreturn_t itg3200_trigger_handler(int irq, void *p) iio_push_to_buffers_with_timestamp(indio_dev, &scan, pf->timestamp); +error_ret: iio_trigger_notify_done(indio_dev->trig); -error_ret: return IRQ_HANDLED; } diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c index 75db410b5054e..0f5de4afe33f6 100644 --- a/drivers/iio/gyro/mpu3050-core.c +++ b/drivers/iio/gyro/mpu3050-core.c @@ -874,6 +874,7 @@ static int mpu3050_power_up(struct mpu3050 *mpu3050) ret = regmap_update_bits(mpu3050->map, MPU3050_PWR_MGM, MPU3050_PWR_MGM_SLEEP, 0); if (ret) { + regulator_bulk_disable(ARRAY_SIZE(mpu3050->regs), mpu3050->regs); dev_err(mpu3050->dev, "error setting power mode\n"); return ret; } diff --git a/drivers/iio/imu/kmx61.c b/drivers/iio/imu/kmx61.c index e67466100aff4..c7d19f9ca7652 100644 --- a/drivers/iio/imu/kmx61.c +++ b/drivers/iio/imu/kmx61.c @@ -1393,7 +1393,7 @@ static int kmx61_probe(struct i2c_client *client, ret = iio_device_register(data->acc_indio_dev); if (ret < 0) { dev_err(&client->dev, "Failed to register acc iio device\n"); - goto err_buffer_cleanup_mag; + goto err_pm_cleanup; } ret = iio_device_register(data->mag_indio_dev); @@ -1406,6 +1406,9 @@ static int kmx61_probe(struct i2c_client *client, err_iio_unregister_acc: iio_device_unregister(data->acc_indio_dev); +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(&client->dev); + pm_runtime_disable(&client->dev); err_buffer_cleanup_mag: if (client->irq > 0) iio_triggered_buffer_cleanup(data->mag_indio_dev); diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 057a4b0100106..8850da8e25d69 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -1015,6 +1015,8 @@ static int st_lsm6dsx_set_odr(struct st_lsm6dsx_sensor *sensor, u16 req_odr) int err; switch (sensor->id) { + case ST_LSM6DSX_ID_GYRO: + break; case ST_LSM6DSX_ID_EXT0: case ST_LSM6DSX_ID_EXT1: case ST_LSM6DSX_ID_EXT2: @@ -1040,8 +1042,8 @@ static int st_lsm6dsx_set_odr(struct st_lsm6dsx_sensor *sensor, u16 req_odr) } break; } - default: - break; + default: /* should never occur */ + return -EINVAL; } if (req_odr > 0) { diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c index 3908a9a900354..65de21d9dda15 100644 --- a/drivers/iio/industrialio-trigger.c +++ b/drivers/iio/industrialio-trigger.c @@ -549,7 +549,6 @@ static struct iio_trigger *viio_trigger_alloc(const char *fmt, va_list vargs) irq_modify_status(trig->subirq_base + i, IRQ_NOREQUEST | IRQ_NOAUTOEN, IRQ_NOPROBE); } - get_device(&trig->dev); return trig; diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index 5a8351c9a4265..ca0fe902a7db4 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -588,28 +588,50 @@ EXPORT_SYMBOL_GPL(iio_read_channel_average_raw); static int iio_convert_raw_to_processed_unlocked(struct iio_channel *chan, int raw, int *processed, unsigned int scale) { - int scale_type, scale_val, scale_val2, offset; + int scale_type, scale_val, scale_val2; + int offset_type, offset_val, offset_val2; s64 raw64 = raw; - int ret; - ret = iio_channel_read(chan, &offset, NULL, IIO_CHAN_INFO_OFFSET); - if (ret >= 0) - raw64 += offset; + offset_type = iio_channel_read(chan, &offset_val, &offset_val2, + IIO_CHAN_INFO_OFFSET); + if (offset_type >= 0) { + switch (offset_type) { + case IIO_VAL_INT: + break; + case IIO_VAL_INT_PLUS_MICRO: + case IIO_VAL_INT_PLUS_NANO: + /* + * Both IIO_VAL_INT_PLUS_MICRO and IIO_VAL_INT_PLUS_NANO + * implicitely truncate the offset to it's integer form. + */ + break; + case IIO_VAL_FRACTIONAL: + offset_val /= offset_val2; + break; + case IIO_VAL_FRACTIONAL_LOG2: + offset_val >>= offset_val2; + break; + default: + return -EINVAL; + } + + raw64 += offset_val; + } scale_type = iio_channel_read(chan, &scale_val, &scale_val2, IIO_CHAN_INFO_SCALE); if (scale_type < 0) { /* - * Just pass raw values as processed if no scaling is - * available. + * If no channel scaling is available apply consumer scale to + * raw value and return. */ - *processed = raw; + *processed = raw * scale; return 0; } switch (scale_type) { case IIO_VAL_INT: - *processed = raw64 * scale_val; + *processed = raw64 * scale_val * scale; break; case IIO_VAL_INT_PLUS_MICRO: if (scale_val2 < 0) diff --git a/drivers/iio/light/ltr501.c b/drivers/iio/light/ltr501.c index 8bc01e8a424b0..d84d6895ad30d 100644 --- a/drivers/iio/light/ltr501.c +++ b/drivers/iio/light/ltr501.c @@ -1272,7 +1272,7 @@ static irqreturn_t ltr501_trigger_handler(int irq, void *p) ret = regmap_bulk_read(data->regmap, LTR501_ALS_DATA1, (u8 *)als_buf, sizeof(als_buf)); if (ret < 0) - return ret; + goto done; if (test_bit(0, indio_dev->active_scan_mask)) scan.channels[j++] = le16_to_cpu(als_buf[1]); if (test_bit(1, indio_dev->active_scan_mask)) diff --git a/drivers/iio/light/opt3001.c b/drivers/iio/light/opt3001.c index 92004a2563ea8..5588066c83246 100644 --- a/drivers/iio/light/opt3001.c +++ b/drivers/iio/light/opt3001.c @@ -275,6 +275,8 @@ static int opt3001_get_lux(struct opt3001 *opt, int *val, int *val2) ret = wait_event_timeout(opt->result_ready_queue, opt->result_ready, msecs_to_jiffies(OPT3001_RESULT_READY_LONG)); + if (ret == 0) + return -ETIMEDOUT; } else { /* Sleep for result ready time */ timeout = (opt->int_time == OPT3001_INT_TIME_SHORT) ? @@ -311,9 +313,7 @@ static int opt3001_get_lux(struct opt3001 *opt, int *val, int *val2) /* Disallow IRQ to access the device while lock is active */ opt->ok_to_ignore_lock = false; - if (ret == 0) - return -ETIMEDOUT; - else if (ret < 0) + if (ret < 0) return ret; if (opt->use_irq) { diff --git a/drivers/iio/light/stk3310.c b/drivers/iio/light/stk3310.c index 185c24a75ae6f..d9d32ada635bb 100644 --- a/drivers/iio/light/stk3310.c +++ b/drivers/iio/light/stk3310.c @@ -544,9 +544,8 @@ static irqreturn_t stk3310_irq_event_handler(int irq, void *private) mutex_lock(&data->lock); ret = regmap_field_read(data->reg_flag_nf, &dir); if (ret < 0) { - dev_err(&data->client->dev, "register read failed\n"); - mutex_unlock(&data->lock); - return ret; + dev_err(&data->client->dev, "register read failed: %d\n", ret); + goto out; } event = IIO_UNMOD_EVENT_CODE(IIO_PROXIMITY, 1, IIO_EV_TYPE_THRESH, @@ -558,6 +557,7 @@ static irqreturn_t stk3310_irq_event_handler(int irq, void *private) ret = regmap_field_write(data->reg_flag_psint, 0); if (ret < 0) dev_err(&data->client->dev, "failed to reset interrupts\n"); +out: mutex_unlock(&data->lock); return IRQ_HANDLED; diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c index 82af903a765b2..c54eed3f4edce 100644 --- a/drivers/iio/magnetometer/ak8975.c +++ b/drivers/iio/magnetometer/ak8975.c @@ -391,6 +391,7 @@ static int ak8975_power_on(const struct ak8975_data *data) if (ret) { dev_warn(&data->client->dev, "Failed to enable specified Vid supply\n"); + regulator_disable(data->vdd); return ret; } /* diff --git a/drivers/iio/magnetometer/bmc150_magn.c b/drivers/iio/magnetometer/bmc150_magn.c index 087dc16c2185c..ef8f429cc96f0 100644 --- a/drivers/iio/magnetometer/bmc150_magn.c +++ b/drivers/iio/magnetometer/bmc150_magn.c @@ -944,13 +944,14 @@ int bmc150_magn_probe(struct device *dev, struct regmap *regmap, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(dev, "unable to register iio device\n"); - goto err_disable_runtime_pm; + goto err_pm_cleanup; } dev_dbg(dev, "Registered device %s\n", name); return 0; -err_disable_runtime_pm: +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); err_buffer_cleanup: iio_triggered_buffer_cleanup(indio_dev); diff --git a/drivers/iio/trigger/iio-trig-sysfs.c b/drivers/iio/trigger/iio-trig-sysfs.c index e09e58072872c..2277d6336ac06 100644 --- a/drivers/iio/trigger/iio-trig-sysfs.c +++ b/drivers/iio/trigger/iio-trig-sysfs.c @@ -196,6 +196,7 @@ static int iio_sysfs_trigger_remove(int id) } iio_trigger_unregister(t->trig); + irq_work_sync(&t->work); iio_trigger_free(t->trig); list_del(&t->l); diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index f98510c714b57..4ca9daadb3f16 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -800,6 +800,6 @@ static struct platform_driver stm32_timer_trigger_driver = { }; module_platform_driver(stm32_timer_trigger_driver); -MODULE_ALIAS("platform: stm32-timer-trigger"); +MODULE_ALIAS("platform:stm32-timer-trigger"); MODULE_DESCRIPTION("STMicroelectronics STM32 Timer Trigger driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index ec9e9598894f6..de7df5ab06f3b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -820,6 +820,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) u16 pkey, index; u8 p; enum ib_port_state port_state; + int ret; int i; cma_dev = NULL; @@ -838,9 +839,14 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) if (ib_get_cached_port_state(cur_dev->device, p, &port_state)) continue; - for (i = 0; !rdma_query_gid(cur_dev->device, - p, i, &gid); - i++) { + + for (i = 0; i < cur_dev->device->port_data[p].immutable.gid_tbl_len; + ++i) { + ret = rdma_query_gid(cur_dev->device, p, i, + &gid); + if (ret) + continue; + if (!memcmp(&gid, dgid, sizeof(gid))) { cma_dev = cur_dev; sgid = gid; @@ -2553,7 +2559,7 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout) { struct rdma_id_private *id_priv; - if (id->qp_type != IB_QPT_RC) + if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_INI) return -EINVAL; id_priv = container_of(id, struct rdma_id_private, id); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 256d379bba676..de66d7da1bf6e 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2438,7 +2438,8 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, ++i) { ret = rdma_query_gid(device, port, i, &tmp_gid); if (ret) - return ret; + continue; + if (!memcmp(&tmp_gid, gid, sizeof *gid)) { *port_num = port; if (index) diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index da8adadf47559..75b6da00065a3 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -1187,29 +1187,34 @@ static int __init iw_cm_init(void) ret = iwpm_init(RDMA_NL_IWCM); if (ret) - pr_err("iw_cm: couldn't init iwpm\n"); - else - rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table); + return ret; + iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", 0); if (!iwcm_wq) - return -ENOMEM; + goto err_alloc; iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", iwcm_ctl_table); if (!iwcm_ctl_table_hdr) { pr_err("iw_cm: couldn't register sysctl paths\n"); - destroy_workqueue(iwcm_wq); - return -ENOMEM; + goto err_sysctl; } + rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table); return 0; + +err_sysctl: + destroy_workqueue(iwcm_wq); +err_alloc: + iwpm_exit(RDMA_NL_IWCM); + return -ENOMEM; } static void __exit iw_cm_cleanup(void) { + rdma_nl_unregister(RDMA_NL_IWCM); unregister_net_sysctl_table(iwcm_ctl_table_hdr); destroy_workqueue(iwcm_wq); - rdma_nl_unregister(RDMA_NL_IWCM); iwpm_exit(RDMA_NL_IWCM); } diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index d2d70c89193ff..11ab6390eda4d 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -760,8 +760,9 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, /* Construct the family header first */ header = skb_put(skb, NLMSG_ALIGN(sizeof(*header))); - memcpy(header->device_name, dev_name(&query->port->agent->device->dev), - LS_DEVICE_NAME_MAX); + strscpy_pad(header->device_name, + dev_name(&query->port->agent->device->dev), + LS_DEVICE_NAME_MAX); header->port_num = query->port->port_num; if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) && diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index b8d715c68ca44..11a0806469162 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -66,7 +66,7 @@ void ib_copy_ah_attr_to_user(struct ib_device *device, struct rdma_ah_attr *src = ah_attr; struct rdma_ah_attr conv_ah; - memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved)); + memset(&dst->grh, 0, sizeof(dst->grh)); if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) && (rdma_ah_get_dlid(ah_attr) > be16_to_cpu(IB_LID_PERMISSIVE)) && diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 00c5478871322..818699b855c5c 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -450,6 +450,9 @@ static int uapi_finalize(struct uverbs_api *uapi) uapi->num_write_ex = max_write_ex + 1; data = kmalloc_array(uapi->num_write + uapi->num_write_ex, sizeof(*uapi->write_methods), GFP_KERNEL); + if (!data) + return -ENOMEM; + for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++) data[i] = &uapi->notsupp_method; uapi->write_methods = data; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 58c021648b7c8..dd006b177b544 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1404,6 +1404,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, if (nq) nq->budget++; atomic_inc(&rdev->srq_count); + spin_lock_init(&srq->lock); return 0; @@ -3080,8 +3081,11 @@ static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp, struct ib_wc *wc, struct bnxt_qplib_cqe *cqe) { + struct bnxt_re_dev *rdev; + u16 vlan_id = 0; u8 nw_type; + rdev = qp->rdev; wc->opcode = IB_WC_RECV; wc->status = __rc_to_ib_wc_status(cqe->status); @@ -3093,9 +3097,12 @@ static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp, memcpy(wc->smac, cqe->smac, ETH_ALEN); wc->wc_flags |= IB_WC_WITH_SMAC; if (cqe->flags & CQ_RES_UD_FLAGS_META_FORMAT_VLAN) { - wc->vlan_id = (cqe->cfa_meta & 0xFFF); - if (wc->vlan_id < 0x1000) - wc->wc_flags |= IB_WC_WITH_VLAN; + vlan_id = (cqe->cfa_meta & 0xFFF); + } + /* Mark only if vlan_id is non zero */ + if (vlan_id && bnxt_re_check_if_vlan_valid(rdev, vlan_id)) { + wc->vlan_id = vlan_id; + wc->wc_flags |= IB_WC_WITH_VLAN; } nw_type = (cqe->flags & CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK) >> CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 4d07d22bfa7b1..5fc5ab7813c0f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -642,12 +642,13 @@ int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, int rc = 0; RCFW_CMD_PREP(req, QUERY_SRQ, cmd_flags); - req.srq_cid = cpu_to_le32(srq->id); /* Configure the request */ sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); if (!sbuf) return -ENOMEM; + req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS; + req.srq_cid = cpu_to_le32(srq->id); sb = sbuf->sb; rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, (void *)sbuf, 0); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 3ac08f47a8ce4..b3fbafbf66555 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2469,6 +2469,7 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, memset(attr, 0, sizeof(*attr)); memset(init_attr, 0, sizeof(*init_attr)); attr->qp_state = to_ib_qp_state(qhp->attr.state); + attr->cur_qp_state = to_ib_qp_state(qhp->attr.state); init_attr->cap.max_send_wr = qhp->attr.sq_num_entries; init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries; init_attr->cap.max_send_sge = qhp->attr.sq_max_sges; diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 83858f7e83d0f..75dfe9d1564c1 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -340,6 +340,7 @@ static int efa_enable_msix(struct efa_dev *dev) } if (irq_num != msix_vecs) { + efa_disable_msix(dev); dev_err(&dev->pdev->dev, "Allocated %d MSI-X (out of %d requested)\n", irq_num, msix_vecs); diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 4edae89e8e3ca..17f1e59ab12ee 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -745,7 +745,6 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, rq_entry_inserted = true; qp->qp_handle = create_qp_resp.qp_handle; qp->ibqp.qp_num = create_qp_resp.qp_num; - qp->ibqp.qp_type = init_attr->qp_type; qp->max_send_wr = init_attr->cap.max_send_wr; qp->max_recv_wr = init_attr->cap.max_recv_wr; qp->max_send_sge = init_attr->cap.max_send_sge; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 89e1dfd07a1bf..8c7ba7bad42b9 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -308,6 +308,8 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) unsigned long dim = from->nr_segs; int idx; + if (!HFI1_CAP_IS_KSET(SDMA)) + return -EINVAL; idx = srcu_read_lock(&fd->pq_srcu); pq = srcu_dereference(fd->pq, &fd->pq_srcu); if (!cq || !pq) { diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index fbff6b2f00e71..d698c26282ea1 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -543,7 +543,7 @@ void set_link_ipg(struct hfi1_pportdata *ppd) u16 shift, mult; u64 src; u32 current_egress_rate; /* Mbits /sec */ - u32 max_pkt_time; + u64 max_pkt_time; /* * max_pkt_time is the maximum packet egress time in units * of the fabric clock period 1/(805 MHz). @@ -664,12 +664,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY; ppd->part_enforce |= HFI1_PART_ENFORCE_IN; - - if (loopback) { - dd_dev_err(dd, "Faking data partition 0x8001 in idx %u\n", - !default_pkey_idx); - ppd->pkeys[!default_pkey_idx] = 0x8001; - } + ppd->pkeys[0] = 0x8001; INIT_WORK(&ppd->link_vc_work, handle_verify_cap); INIT_WORK(&ppd->link_up_work, handle_link_up); @@ -1180,7 +1175,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) rcd->egrbufs.rcvtids = NULL; for (e = 0; e < rcd->egrbufs.alloced; e++) { - if (rcd->egrbufs.buffers[e].dma) + if (rcd->egrbufs.buffers[e].addr) dma_free_coherent(&dd->pcidev->dev, rcd->egrbufs.buffers[e].len, rcd->egrbufs.buffers[e].addr, diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 79126b2b14ab0..1a82ea73a0fc2 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -920,6 +920,7 @@ void sc_disable(struct send_context *sc) { u64 reg; struct pio_buf *pbuf; + LIST_HEAD(wake_list); if (!sc) return; @@ -954,19 +955,21 @@ void sc_disable(struct send_context *sc) spin_unlock(&sc->release_lock); write_seqlock(&sc->waitlock); - while (!list_empty(&sc->piowait)) { + if (!list_empty(&sc->piowait)) + list_move(&sc->piowait, &wake_list); + write_sequnlock(&sc->waitlock); + while (!list_empty(&wake_list)) { struct iowait *wait; struct rvt_qp *qp; struct hfi1_qp_priv *priv; - wait = list_first_entry(&sc->piowait, struct iowait, list); + wait = list_first_entry(&wake_list, struct iowait, list); qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); priv->s_iowait.lock = NULL; hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); } - write_sequnlock(&sc->waitlock); spin_unlock_irq(&sc->alloc_lock); } diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index c61b6022575e1..2a684fc6056e1 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1329,11 +1329,13 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) kvfree(sde->tx_ring); sde->tx_ring = NULL; } - spin_lock_irq(&dd->sde_map_lock); - sdma_map_free(rcu_access_pointer(dd->sdma_map)); - RCU_INIT_POINTER(dd->sdma_map, NULL); - spin_unlock_irq(&dd->sde_map_lock); - synchronize_rcu(); + if (rcu_access_pointer(dd->sdma_map)) { + spin_lock_irq(&dd->sde_map_lock); + sdma_map_free(rcu_access_pointer(dd->sdma_map)); + RCU_INIT_POINTER(dd->sdma_map, NULL); + spin_unlock_irq(&dd->sde_map_lock); + synchronize_rcu(); + } kfree(dd->per_sdma); dd->per_sdma = NULL; @@ -3056,6 +3058,7 @@ static void __sdma_process_event(struct sdma_engine *sde, static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) { int i; + struct sdma_desc *descp; /* Handle last descriptor */ if (unlikely((tx->num_desc == (MAX_DESC - 1)))) { @@ -3076,12 +3079,10 @@ static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) if (unlikely(tx->num_desc == MAX_DESC)) goto enomem; - tx->descp = kmalloc_array( - MAX_DESC, - sizeof(struct sdma_desc), - GFP_ATOMIC); - if (!tx->descp) + descp = kmalloc_array(MAX_DESC, sizeof(struct sdma_desc), GFP_ATOMIC); + if (!descp) goto enomem; + tx->descp = descp; /* reserve last descriptor for coalescing */ tx->desc_limit = MAX_DESC - 1; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index f23a341400c06..70991649dc693 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -279,6 +279,9 @@ static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device, static int hns_roce_query_pkey(struct ib_device *ib_dev, u8 port, u16 index, u16 *pkey) { + if (index > 0) + return -EINVAL; + *pkey = PKEY_ID; return 0; @@ -359,7 +362,7 @@ static int hns_roce_mmap(struct ib_ucontext *context, return rdma_user_mmap_io(context, vma, to_hr_ucontext(context)->uar.pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot)); + pgprot_device(vma->vm_page_prot)); /* vm_pgoff: 1 -- TPTR */ case 1: diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 48864269f042a..baa129a6c9127 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -3291,7 +3291,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: ew = kmalloc(sizeof *ew, GFP_ATOMIC); if (!ew) - break; + return; INIT_WORK(&ew->work, handle_port_mgmt_change_event); memcpy(&ew->ib_eqe, eqe, sizeof *eqe); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 17ce928e41bde..bca5358f3ef29 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1149,8 +1149,10 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) qp->flags |= MLX4_IB_QP_NETIF; - else + else { + err = -EINVAL; goto err; + } } err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 23ce0126b268e..c68a2faa82f31 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -921,7 +921,6 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, u32 *cqb = NULL; void *cqc; int cqe_size; - unsigned int irqn; int eqn; int err; @@ -960,7 +959,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } - err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn); + err = mlx5_vector2eqn(dev->mdev, vector, &eqn); if (err) goto err_cqb; @@ -983,7 +982,6 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_cqb; mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); - cq->mcq.irqn = irqn; if (udata) cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; else diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 664e0f374ac00..9ee3267a66ecc 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -957,7 +957,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( struct mlx5_ib_dev *dev; int user_vector; int dev_eqn; - unsigned int irqn; int err; if (uverbs_copy_from(&user_vector, attrs, @@ -969,7 +968,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); - err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn); + err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn); if (err < 0) return err; @@ -1844,8 +1843,10 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, key_level2, obj_event, GFP_KERNEL); - if (err) + if (err) { + kfree(obj_event); return err; + } INIT_LIST_HEAD(&obj_event->obj_sub_list); } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 4540835e05bda..634f29cb7395c 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3865,6 +3865,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, MLX5_SET(dctc, dctc, mtu, attr->path_mtu); MLX5_SET(dctc, dctc, my_addr_index, attr->ah_attr.grh.sgid_index); MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit); + if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) + MLX5_SET(dctc, dctc, eth_prio, attr->ah_attr.sl & 0x7); err = mlx5_core_create_dct(dev->mdev, &qp->dct.mdct, qp->dct.in, MLX5_ST_SZ_BYTES(create_dct_in), out, diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index ed56df319d2df..f4b60a2d8aa91 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -416,6 +416,7 @@ struct qedr_qp { u32 sq_psn; u32 qkey; u32 dest_qp_num; + u8 timeout; /* Relevant to qps created from kernel space only (ULPs) */ u8 prev_wqe_size; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 4408d33646647..9b9a4c820ee06 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2259,6 +2259,8 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 1 << max_t(int, attr->timeout - 8, 0); else qp_params.ack_timeout = 0; + + qp->timeout = attr->timeout; } if (attr_mask & IB_QP_RETRY_CNT) { @@ -2383,15 +2385,18 @@ int qedr_query_qp(struct ib_qp *ibqp, int rc = 0; memset(¶ms, 0, sizeof(params)); - - rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, ¶ms); - if (rc) - goto err; - memset(qp_attr, 0, sizeof(*qp_attr)); memset(qp_init_attr, 0, sizeof(*qp_init_attr)); - qp_attr->qp_state = qedr_get_ibqp_state(params.state); + if (qp->qp_type != IB_QPT_GSI) { + rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, ¶ms); + if (rc) + goto err; + qp_attr->qp_state = qedr_get_ibqp_state(params.state); + } else { + qp_attr->qp_state = qedr_get_ibqp_state(QED_ROCE_QP_STATE_RTS); + } + qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state); qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu); qp_attr->path_mig_state = IB_MIG_MIGRATED; @@ -2415,7 +2420,7 @@ int qedr_query_qp(struct ib_qp *ibqp, rdma_ah_set_dgid_raw(&qp_attr->ah_attr, ¶ms.dgid.bytes[0]); rdma_ah_set_port_num(&qp_attr->ah_attr, 1); rdma_ah_set_sl(&qp_attr->ah_attr, 0); - qp_attr->timeout = params.timeout; + qp_attr->timeout = qp->timeout; qp_attr->rnr_retry = params.rnr_retry; qp_attr->retry_cnt = params.retry_cnt; qp_attr->min_rnr_timer = params.min_rnr_nak_timer; @@ -3533,10 +3538,10 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, * in first 4 bytes and need to update WQE producer in * next 4 bytes. */ - srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod; + srq->hw_srq.virt_prod_pair_addr->sge_prod = cpu_to_le32(hw_srq->sge_prod); /* Make sure sge producer is updated first */ dma_wmb(); - srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod; + srq->hw_srq.virt_prod_pair_addr->wqe_prod = cpu_to_le32(hw_srq->wqe_prod); wr = wr->next; } diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 05190edc2611e..fde848b00202b 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -602,7 +602,7 @@ static int qib_user_sdma_coalesce(const struct qib_devdata *dd, /* * How many pages in this iovec element? */ -static int qib_user_sdma_num_pages(const struct iovec *iov) +static size_t qib_user_sdma_num_pages(const struct iovec *iov) { const unsigned long addr = (unsigned long) iov->iov_base; const unsigned long len = iov->iov_len; @@ -658,7 +658,7 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, static int qib_user_sdma_pin_pages(const struct qib_devdata *dd, struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, - unsigned long addr, int tlen, int npages) + unsigned long addr, int tlen, size_t npages) { struct page *pages[8]; int i, j; @@ -722,7 +722,7 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd, unsigned long idx; for (idx = 0; idx < niov; idx++) { - const int npages = qib_user_sdma_num_pages(iov + idx); + const size_t npages = qib_user_sdma_num_pages(iov + idx); const unsigned long addr = (unsigned long) iov[idx].iov_base; ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr, @@ -824,8 +824,8 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, unsigned pktnw; unsigned pktnwc; int nfrags = 0; - int npages = 0; - int bytes_togo = 0; + size_t npages = 0; + size_t bytes_togo = 0; int tiddma = 0; int cfur; @@ -885,7 +885,11 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, npages += qib_user_sdma_num_pages(&iov[idx]); - bytes_togo += slen; + if (check_add_overflow(bytes_togo, slen, &bytes_togo) || + bytes_togo > type_max(typeof(pkt->bytes_togo))) { + ret = -EINVAL; + goto free_pbc; + } pktnwc += slen >> 2; idx++; nfrags++; @@ -904,8 +908,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, } if (frag_size) { - int tidsmsize, n; - size_t pktsize; + size_t tidsmsize, n, pktsize, sz, addrlimit; n = npages*((2*PAGE_SIZE/frag_size)+1); pktsize = struct_size(pkt, addr, n); @@ -923,14 +926,24 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, else tidsmsize = 0; - pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL); + if (check_add_overflow(pktsize, tidsmsize, &sz)) { + ret = -EINVAL; + goto free_pbc; + } + pkt = kmalloc(sz, GFP_KERNEL); if (!pkt) { ret = -ENOMEM; goto free_pbc; } pkt->largepkt = 1; pkt->frag_size = frag_size; - pkt->addrlimit = n + ARRAY_SIZE(pkt->addr); + if (check_add_overflow(n, ARRAY_SIZE(pkt->addr), + &addrlimit) || + addrlimit > type_max(typeof(pkt->addrlimit))) { + ret = -EINVAL; + goto free_pkt; + } + pkt->addrlimit = addrlimit; if (tiddma) { char *tidsm = (char *)pkt + pktsize; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index d14ad523f96c8..e97c13967174c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -2812,7 +2812,7 @@ void rvt_qp_iter(struct rvt_dev_info *rdi, EXPORT_SYMBOL(rvt_qp_iter); /* - * This should be called with s_lock held. + * This should be called with s_lock and r_lock held. */ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) @@ -3110,6 +3110,8 @@ void rvt_ruc_loopback(struct rvt_qp *sqp) case IB_WR_ATOMIC_FETCH_AND_ADD: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto inv_err; + if (unlikely(wqe->atomic_wr.remote_addr & (sizeof(u64) - 1))) + goto inv_err; if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), wqe->atomic_wr.remote_addr, wqe->atomic_wr.rkey, @@ -3169,7 +3171,9 @@ void rvt_ruc_loopback(struct rvt_qp *sqp) rvp->n_loop_pkts++; flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; + spin_lock(&sqp->r_lock); rvt_send_complete(sqp, wqe, send_status); + spin_unlock(&sqp->r_lock); if (local_ops) { atomic_dec(&sqp->local_ops_pending); local_ops = 0; @@ -3223,9 +3227,15 @@ void rvt_ruc_loopback(struct rvt_qp *sqp) spin_unlock_irqrestore(&qp->r_lock, flags); serr_no_r_lock: spin_lock_irqsave(&sqp->s_lock, flags); + spin_lock(&sqp->r_lock); rvt_send_complete(sqp, wqe, send_status); + spin_unlock(&sqp->r_lock); if (sqp->ibqp.qp_type == IB_QPT_RC) { - int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + int lastwqe; + + spin_lock(&sqp->r_lock); + lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + spin_unlock(&sqp->r_lock); sqp->s_flags &= ~RVT_S_BUSY; spin_unlock_irqrestore(&sqp->s_lock, flags); diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c index 4cf11063e0b59..0f166d6d0ccb0 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.c +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -137,7 +137,7 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { } }, [IB_OPCODE_RC_SEND_MIDDLE] = { - .name = "IB_OPCODE_RC_SEND_MIDDLE]", + .name = "IB_OPCODE_RC_SEND_MIDDLE", .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK | RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES, diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index fe52073867006..8d3f6d93dfb8d 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -140,7 +140,7 @@ enum rxe_device_param { /* default/initial rxe port parameters */ enum rxe_port_param { RXE_PORT_GID_TBL_LEN = 1024, - RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP, + RXE_PORT_PORT_CAP_FLAGS = IB_PORT_CM_SUP, RXE_PORT_MAX_MSG_SZ = 0x800000, RXE_PORT_BAD_PKEY_CNTR = 0, RXE_PORT_QKEY_VIOL_CNTR = 0, diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index a4d6e0b7901e9..87702478eb99b 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -680,7 +680,7 @@ int rxe_requester(void *arg) opcode = next_opcode(qp, wqe, wqe->wr.opcode); if (unlikely(opcode < 0)) { wqe->status = IB_WC_LOC_QP_OP_ERR; - goto exit; + goto err; } mask = rxe_opcode[opcode].mask; diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 4d8bc995b4503..eaa9863dd1e00 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -658,14 +658,9 @@ static inline struct siw_sqe *orq_get_current(struct siw_qp *qp) return &qp->orq[qp->orq_get % qp->attrs.orq_size]; } -static inline struct siw_sqe *orq_get_tail(struct siw_qp *qp) -{ - return &qp->orq[qp->orq_put % qp->attrs.orq_size]; -} - static inline struct siw_sqe *orq_get_free(struct siw_qp *qp) { - struct siw_sqe *orq_e = orq_get_tail(qp); + struct siw_sqe *orq_e = &qp->orq[qp->orq_put % qp->attrs.orq_size]; if (READ_ONCE(orq_e->flags) == 0) return orq_e; diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index e3bac1a877bb7..3aed597103d3d 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -976,14 +976,15 @@ static void siw_accept_newconn(struct siw_cep *cep) siw_cep_set_inuse(new_cep); rv = siw_proc_mpareq(new_cep); - siw_cep_set_free(new_cep); - if (rv != -EAGAIN) { siw_cep_put(cep); new_cep->listen_cep = NULL; - if (rv) + if (rv) { + siw_cep_set_free(new_cep); goto error; + } } + siw_cep_set_free(new_cep); } return; diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index c7c38f7fd29d6..5f94c716301fa 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -1153,11 +1153,12 @@ static int siw_check_tx_fence(struct siw_qp *qp) spin_lock_irqsave(&qp->orq_lock, flags); - rreq = orq_get_current(qp); - /* free current orq entry */ + rreq = orq_get_current(qp); WRITE_ONCE(rreq->flags, 0); + qp->orq_get++; + if (qp->tx_ctx.orq_fence) { if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) { pr_warn("siw: [QP %u]: fence resume: bad status %d\n", @@ -1165,10 +1166,12 @@ static int siw_check_tx_fence(struct siw_qp *qp) rv = -EPROTO; goto out; } - /* resume SQ processing */ + /* resume SQ processing, if possible */ if (tx_waiting->sqe.opcode == SIW_OP_READ || tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) { - rreq = orq_get_tail(qp); + + /* SQ processing was stopped because of a full ORQ */ + rreq = orq_get_free(qp); if (unlikely(!rreq)) { pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp)); rv = -EPROTO; @@ -1181,15 +1184,14 @@ static int siw_check_tx_fence(struct siw_qp *qp) resume_tx = 1; } else if (siw_orq_empty(qp)) { + /* + * SQ processing was stopped by fenced work request. + * Resume since all previous Read's are now completed. + */ qp->tx_ctx.orq_fence = 0; resume_tx = 1; - } else { - pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n", - qp_id(qp), qp->orq_get, qp->orq_put); - rv = -EPROTO; } } - qp->orq_get++; out: spin_unlock_irqrestore(&qp->orq_lock, flags); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 8708ed5477e99..dac806b715afa 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -4222,9 +4222,11 @@ static void srp_remove_one(struct ib_device *device, void *client_data) spin_unlock(&host->target_lock); /* - * Wait for tl_err and target port removal tasks. + * srp_queue_remove_work() queues a call to + * srp_remove_target(). The latter function cancels + * target->tl_err_work so waiting for the remove works to + * finish is sufficient. */ - flush_workqueue(system_long_wq); flush_workqueue(srp_remove_wq); kfree(host); diff --git a/drivers/input/input.c b/drivers/input/input.c index e2eb9b9b8363d..0e16a9980c6a1 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -47,6 +47,17 @@ static DEFINE_MUTEX(input_mutex); static const struct input_value input_value_sync = { EV_SYN, SYN_REPORT, 1 }; +static const unsigned int input_max_code[EV_CNT] = { + [EV_KEY] = KEY_MAX, + [EV_REL] = REL_MAX, + [EV_ABS] = ABS_MAX, + [EV_MSC] = MSC_MAX, + [EV_SW] = SW_MAX, + [EV_LED] = LED_MAX, + [EV_SND] = SND_MAX, + [EV_FF] = FF_MAX, +}; + static inline int is_event_supported(unsigned int code, unsigned long *bm, unsigned int max) { @@ -1978,6 +1989,14 @@ EXPORT_SYMBOL(input_get_timestamp); */ void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int code) { + if (type < EV_CNT && input_max_code[type] && + code > input_max_code[type]) { + pr_err("%s: invalid code %u for type %u\n", __func__, code, + type); + dump_stack(); + return; + } + switch (type) { case EV_KEY: __set_bit(code, dev->keybit); diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c index 6c554c11a7ac3..ea58805c480fa 100644 --- a/drivers/input/joystick/iforce/iforce-usb.c +++ b/drivers/input/joystick/iforce/iforce-usb.c @@ -92,7 +92,7 @@ static int iforce_usb_get_id(struct iforce *iforce, u8 id, id, USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_INTERFACE, - 0, 0, buf, IFORCE_MAX_LENGTH, HZ); + 0, 0, buf, IFORCE_MAX_LENGTH, 1000); if (status < 0) { dev_err(&iforce_usb->intf->dev, "usb_submit_urb failed: %d\n", status); diff --git a/drivers/input/joystick/spaceball.c b/drivers/input/joystick/spaceball.c index cf7cbcd0c29df..6971412990695 100644 --- a/drivers/input/joystick/spaceball.c +++ b/drivers/input/joystick/spaceball.c @@ -19,6 +19,7 @@ #include #include #include +#include #define DRIVER_DESC "SpaceTec SpaceBall 2003/3003/4000 FLX driver" @@ -75,9 +76,15 @@ static void spaceball_process_packet(struct spaceball* spaceball) case 'D': /* Ball data */ if (spaceball->idx != 15) return; - for (i = 0; i < 6; i++) + /* + * Skip first three bytes; read six axes worth of data. + * Axis values are signed 16-bit big-endian. + */ + data += 3; + for (i = 0; i < ARRAY_SIZE(spaceball_axes); i++) { input_report_abs(dev, spaceball_axes[i], - (__s16)((data[2 * i + 3] << 8) | data[2 * i + 2])); + (__s16)get_unaligned_be16(&data[i * 2])); + } break; case 'K': /* Button data */ diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index e5f1e3cf9179f..ba101afcfc27f 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -331,6 +331,7 @@ static const struct xpad_device { { 0x24c6, 0x5b03, "Thrustmaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5d04, "Razer Sabertooth", 0, XTYPE_XBOX360 }, { 0x24c6, 0xfafe, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, + { 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 }, { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX }, { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX }, { 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN } @@ -447,6 +448,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOXONE_VENDOR(0x24c6), /* PowerA Controllers */ XPAD_XBOXONE_VENDOR(0x2e24), /* Hyperkin Duke X-Box One pad */ XPAD_XBOX360_VENDOR(0x2f24), /* GameSir Controllers */ + XPAD_XBOX360_VENDOR(0x3285), /* Nacon GC-100 */ { } }; diff --git a/drivers/input/keyboard/snvs_pwrkey.c b/drivers/input/keyboard/snvs_pwrkey.c index e76b7a400a1c1..248bb86f4b3f4 100644 --- a/drivers/input/keyboard/snvs_pwrkey.c +++ b/drivers/input/keyboard/snvs_pwrkey.c @@ -3,6 +3,7 @@ // Driver for the IMX SNVS ON/OFF Power Key // Copyright (C) 2015 Freescale Semiconductor, Inc. All Rights Reserved. +#include #include #include #include @@ -81,6 +82,11 @@ static irqreturn_t imx_snvs_pwrkey_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +static void imx_snvs_pwrkey_disable_clk(void *data) +{ + clk_disable_unprepare(data); +} + static void imx_snvs_pwrkey_act(void *pdata) { struct pwrkey_drv_data *pd = pdata; @@ -93,6 +99,7 @@ static int imx_snvs_pwrkey_probe(struct platform_device *pdev) struct pwrkey_drv_data *pdata = NULL; struct input_dev *input = NULL; struct device_node *np; + struct clk *clk; int error; /* Get SNVS register Page */ @@ -115,6 +122,28 @@ static int imx_snvs_pwrkey_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "KEY_POWER without setting in dts\n"); } + clk = devm_clk_get_optional(&pdev->dev, NULL); + if (IS_ERR(clk)) { + dev_err(&pdev->dev, "Failed to get snvs clock (%pe)\n", clk); + return PTR_ERR(clk); + } + + error = clk_prepare_enable(clk); + if (error) { + dev_err(&pdev->dev, "Failed to enable snvs clock (%pe)\n", + ERR_PTR(error)); + return error; + } + + error = devm_add_action_or_reset(&pdev->dev, + imx_snvs_pwrkey_disable_clk, clk); + if (error) { + dev_err(&pdev->dev, + "Failed to register clock cleanup handler (%pe)\n", + ERR_PTR(error)); + return error; + } + pdata->wakeup = of_property_read_bool(np, "wakeup-source"); pdata->irq = platform_get_irq(pdev, 0); diff --git a/drivers/input/misc/sparcspkr.c b/drivers/input/misc/sparcspkr.c index fe43e5557ed72..cdcb7737c46aa 100644 --- a/drivers/input/misc/sparcspkr.c +++ b/drivers/input/misc/sparcspkr.c @@ -205,6 +205,7 @@ static int bbc_beep_probe(struct platform_device *op) info = &state->u.bbc; info->clock_freq = of_getintprop_default(dp, "clock-frequency", 0); + of_node_put(dp); if (!info->clock_freq) goto out_free; diff --git a/drivers/input/mouse/appletouch.c b/drivers/input/mouse/appletouch.c index 3f06e8a495d80..ff94388416645 100644 --- a/drivers/input/mouse/appletouch.c +++ b/drivers/input/mouse/appletouch.c @@ -916,6 +916,8 @@ static int atp_probe(struct usb_interface *iface, set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit); set_bit(BTN_LEFT, input_dev->keybit); + INIT_WORK(&dev->work, atp_reinit); + error = input_register_device(dev->input); if (error) goto err_free_buffer; @@ -923,8 +925,6 @@ static int atp_probe(struct usb_interface *iface, /* save our data pointer in this interface device */ usb_set_intfdata(iface, dev); - INIT_WORK(&dev->work, atp_reinit); - return 0; err_free_buffer: diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index 59a14505b9cd1..ca150618d32f1 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -942,17 +942,22 @@ static int bcm5974_probe(struct usb_interface *iface, if (!dev->tp_data) goto err_free_bt_buffer; - if (dev->bt_urb) + if (dev->bt_urb) { usb_fill_int_urb(dev->bt_urb, udev, usb_rcvintpipe(udev, cfg->bt_ep), dev->bt_data, dev->cfg.bt_datalen, bcm5974_irq_button, dev, 1); + dev->bt_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; + } + usb_fill_int_urb(dev->tp_urb, udev, usb_rcvintpipe(udev, cfg->tp_ep), dev->tp_data, dev->cfg.tp_datalen, bcm5974_irq_trackpad, dev, 1); + dev->tp_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; + /* create bcm5974 device */ usb_make_path(udev, dev->phys, sizeof(dev->phys)); strlcat(dev->phys, "/input0", sizeof(dev->phys)); diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 196e8505dd8d7..aaef8847f8862 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -139,55 +139,21 @@ static int elan_get_fwinfo(u16 ic_type, u16 *validpage_count, return 0; } -static int elan_enable_power(struct elan_tp_data *data) +static int elan_set_power(struct elan_tp_data *data, bool on) { int repeat = ETP_RETRY_COUNT; int error; - error = regulator_enable(data->vcc); - if (error) { - dev_err(&data->client->dev, - "failed to enable regulator: %d\n", error); - return error; - } - do { - error = data->ops->power_control(data->client, true); + error = data->ops->power_control(data->client, on); if (error >= 0) return 0; msleep(30); } while (--repeat > 0); - dev_err(&data->client->dev, "failed to enable power: %d\n", error); - return error; -} - -static int elan_disable_power(struct elan_tp_data *data) -{ - int repeat = ETP_RETRY_COUNT; - int error; - - do { - error = data->ops->power_control(data->client, false); - if (!error) { - error = regulator_disable(data->vcc); - if (error) { - dev_err(&data->client->dev, - "failed to disable regulator: %d\n", - error); - /* Attempt to power the chip back up */ - data->ops->power_control(data->client, true); - break; - } - - return 0; - } - - msleep(30); - } while (--repeat > 0); - - dev_err(&data->client->dev, "failed to disable power: %d\n", error); + dev_err(&data->client->dev, "failed to set power %s: %d\n", + on ? "on" : "off", error); return error; } @@ -1316,9 +1282,19 @@ static int __maybe_unused elan_suspend(struct device *dev) /* Enable wake from IRQ */ data->irq_wake = (enable_irq_wake(client->irq) == 0); } else { - ret = elan_disable_power(data); + ret = elan_set_power(data, false); + if (ret) + goto err; + + ret = regulator_disable(data->vcc); + if (ret) { + dev_err(dev, "error %d disabling regulator\n", ret); + /* Attempt to power the chip back up */ + elan_set_power(data, true); + } } +err: mutex_unlock(&data->sysfs_mutex); return ret; } @@ -1329,12 +1305,18 @@ static int __maybe_unused elan_resume(struct device *dev) struct elan_tp_data *data = i2c_get_clientdata(client); int error; - if (device_may_wakeup(dev) && data->irq_wake) { + if (!device_may_wakeup(dev)) { + error = regulator_enable(data->vcc); + if (error) { + dev_err(dev, "error %d enabling regulator\n", error); + goto err; + } + } else if (data->irq_wake) { disable_irq_wake(client->irq); data->irq_wake = false; } - error = elan_enable_power(data); + error = elan_set_power(data, true); if (error) { dev_err(dev, "power up when resuming failed: %d\n", error); goto err; diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 053fe2da1e08f..3e78c26025815 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -517,6 +517,19 @@ static void elantech_report_trackpoint(struct psmouse *psmouse, case 0x16008020U: case 0x26800010U: case 0x36808000U: + + /* + * This firmware misreport coordinates for trackpoint + * occasionally. Discard packets outside of [-127, 127] range + * to prevent cursor jumps. + */ + if (packet[4] == 0x80 || packet[5] == 0x80 || + packet[1] >> 7 == packet[4] >> 7 || + packet[2] >> 7 == packet[5] >> 7) { + elantech_debug("discarding packet [%6ph]\n", packet); + break; + + } x = packet[4] - (int)((packet[1]^0x80) << 1); y = (int)((packet[2]^0x80) << 1) - packet[5]; @@ -1575,7 +1588,13 @@ static const struct dmi_system_id no_hw_res_dmi_table[] = { */ static int elantech_change_report_id(struct psmouse *psmouse) { - unsigned char param[2] = { 0x10, 0x03 }; + /* + * NOTE: the code is expecting to receive param[] as an array of 3 + * items (see __ps2_command()), even if in this case only 2 are + * actually needed. Make sure the array size is 3 to avoid potential + * stack out-of-bound accesses. + */ + unsigned char param[3] = { 0x10, 0x03 }; if (elantech_write_reg_params(psmouse, 0x7, param) || elantech_read_reg_params(psmouse, 0x7, param) || diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 23442a144b834..0282c4c55e9da 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -272,6 +272,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook S6230"), }, }, + { + /* Fujitsu Lifebook T725 laptop */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK T725"), + }, + }, { /* Fujitsu Lifebook U745 */ .matches = { @@ -840,6 +847,13 @@ static const struct dmi_system_id __initconst i8042_dmi_notimeout_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK AH544"), }, }, + { + /* Fujitsu Lifebook T725 laptop */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK T725"), + }, + }, { /* Fujitsu U574 laptop */ /* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */ @@ -981,6 +995,24 @@ static const struct dmi_system_id __initconst i8042_dmi_kbdreset_table[] = { { } }; +static const struct dmi_system_id i8042_dmi_probe_defer_table[] __initconst = { + { + /* ASUS ZenBook UX425UA */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425UA"), + }, + }, + { + /* ASUS ZenBook UM325UA */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"), + }, + }, + { } +}; + #endif /* CONFIG_X86 */ #ifdef CONFIG_PNP @@ -1300,6 +1332,9 @@ static int __init i8042_platform_init(void) if (dmi_check_system(i8042_dmi_kbdreset_table)) i8042_kbdreset = true; + if (dmi_check_system(i8042_dmi_probe_defer_table)) + i8042_probe_defer = true; + /* * A20 was already enabled during early kernel init. But some buggy * BIOSes (in MSI Laptops) require A20 to be enabled using 8042 to diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 6ff6b5710dd4e..bb76ff2f6b1d8 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -44,6 +44,10 @@ static bool i8042_unlock; module_param_named(unlock, i8042_unlock, bool, 0); MODULE_PARM_DESC(unlock, "Ignore keyboard lock."); +static bool i8042_probe_defer; +module_param_named(probe_defer, i8042_probe_defer, bool, 0); +MODULE_PARM_DESC(probe_defer, "Allow deferred probing."); + enum i8042_controller_reset_mode { I8042_RESET_NEVER, I8042_RESET_ALWAYS, @@ -709,7 +713,7 @@ static int i8042_set_mux_mode(bool multiplex, unsigned char *mux_version) * LCS/Telegraphics. */ -static int __init i8042_check_mux(void) +static int i8042_check_mux(void) { unsigned char mux_version; @@ -738,10 +742,10 @@ static int __init i8042_check_mux(void) /* * The following is used to test AUX IRQ delivery. */ -static struct completion i8042_aux_irq_delivered __initdata; -static bool i8042_irq_being_tested __initdata; +static struct completion i8042_aux_irq_delivered; +static bool i8042_irq_being_tested; -static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id) +static irqreturn_t i8042_aux_test_irq(int irq, void *dev_id) { unsigned long flags; unsigned char str, data; @@ -768,7 +772,7 @@ static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id) * verifies success by readinng CTR. Used when testing for presence of AUX * port. */ -static int __init i8042_toggle_aux(bool on) +static int i8042_toggle_aux(bool on) { unsigned char param; int i; @@ -796,7 +800,7 @@ static int __init i8042_toggle_aux(bool on) * the presence of an AUX interface. */ -static int __init i8042_check_aux(void) +static int i8042_check_aux(void) { int retval = -1; bool irq_registered = false; @@ -1003,7 +1007,7 @@ static int i8042_controller_init(void) if (i8042_command(&ctr[n++ % 2], I8042_CMD_CTL_RCTR)) { pr_err("Can't read CTR while initializing i8042\n"); - return -EIO; + return i8042_probe_defer ? -EPROBE_DEFER : -EIO; } } while (n < 2 || ctr[0] != ctr[1]); @@ -1318,7 +1322,7 @@ static void i8042_shutdown(struct platform_device *dev) i8042_controller_reset(false); } -static int __init i8042_create_kbd_port(void) +static int i8042_create_kbd_port(void) { struct serio *serio; struct i8042_port *port = &i8042_ports[I8042_KBD_PORT_NO]; @@ -1346,7 +1350,7 @@ static int __init i8042_create_kbd_port(void) return 0; } -static int __init i8042_create_aux_port(int idx) +static int i8042_create_aux_port(int idx) { struct serio *serio; int port_no = idx < 0 ? I8042_AUX_PORT_NO : I8042_MUX_PORT_NO + idx; @@ -1383,13 +1387,13 @@ static int __init i8042_create_aux_port(int idx) return 0; } -static void __init i8042_free_kbd_port(void) +static void i8042_free_kbd_port(void) { kfree(i8042_ports[I8042_KBD_PORT_NO].serio); i8042_ports[I8042_KBD_PORT_NO].serio = NULL; } -static void __init i8042_free_aux_ports(void) +static void i8042_free_aux_ports(void) { int i; @@ -1399,7 +1403,7 @@ static void __init i8042_free_aux_ports(void) } } -static void __init i8042_register_ports(void) +static void i8042_register_ports(void) { int i; @@ -1440,7 +1444,7 @@ static void i8042_free_irqs(void) i8042_aux_irq_registered = i8042_kbd_irq_registered = false; } -static int __init i8042_setup_aux(void) +static int i8042_setup_aux(void) { int (*aux_enable)(void); int error; @@ -1482,7 +1486,7 @@ static int __init i8042_setup_aux(void) return error; } -static int __init i8042_setup_kbd(void) +static int i8042_setup_kbd(void) { int error; @@ -1532,7 +1536,7 @@ static int i8042_kbd_bind_notifier(struct notifier_block *nb, return 0; } -static int __init i8042_probe(struct platform_device *dev) +static int i8042_probe(struct platform_device *dev) { int error; @@ -1597,6 +1601,7 @@ static struct platform_driver i8042_driver = { .pm = &i8042_pm_ops, #endif }, + .probe = i8042_probe, .remove = i8042_remove, .shutdown = i8042_shutdown, }; @@ -1607,7 +1612,6 @@ static struct notifier_block i8042_kbd_bind_notifier_block = { static int __init i8042_init(void) { - struct platform_device *pdev; int err; dbg_init(); @@ -1623,17 +1627,29 @@ static int __init i8042_init(void) /* Set this before creating the dev to allow i8042_command to work right away */ i8042_present = true; - pdev = platform_create_bundle(&i8042_driver, i8042_probe, NULL, 0, NULL, 0); - if (IS_ERR(pdev)) { - err = PTR_ERR(pdev); + err = platform_driver_register(&i8042_driver); + if (err) goto err_platform_exit; + + i8042_platform_device = platform_device_alloc("i8042", -1); + if (!i8042_platform_device) { + err = -ENOMEM; + goto err_unregister_driver; } + err = platform_device_add(i8042_platform_device); + if (err) + goto err_free_device; + bus_register_notifier(&serio_bus, &i8042_kbd_bind_notifier_block); panic_blink = i8042_panic_blink; return 0; +err_free_device: + platform_device_put(i8042_platform_device); +err_unregister_driver: + platform_driver_unregister(&i8042_driver); err_platform_exit: i8042_platform_exit(); return err; diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c index 06d0ffef4a171..acaf8c045f19a 100644 --- a/drivers/input/tablet/aiptek.c +++ b/drivers/input/tablet/aiptek.c @@ -1801,15 +1801,13 @@ aiptek_probe(struct usb_interface *intf, const struct usb_device_id *id) input_set_abs_params(inputdev, ABS_TILT_Y, AIPTEK_TILT_MIN, AIPTEK_TILT_MAX, 0, 0); input_set_abs_params(inputdev, ABS_WHEEL, AIPTEK_WHEEL_MIN, AIPTEK_WHEEL_MAX - 1, 0, 0); - /* Verify that a device really has an endpoint */ - if (intf->cur_altsetting->desc.bNumEndpoints < 1) { + err = usb_find_common_endpoints(intf->cur_altsetting, + NULL, NULL, &endpoint, NULL); + if (err) { dev_err(&intf->dev, - "interface has %d endpoints, but must have minimum 1\n", - intf->cur_altsetting->desc.bNumEndpoints); - err = -EINVAL; + "interface has no int in endpoints, but must have minimum 1\n"); goto fail3; } - endpoint = &intf->cur_altsetting->endpoint[0].desc; /* Go set up our URB, which is called when the tablet receives * input. diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index ae60442efda0d..fb31a9d43162b 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -1794,7 +1794,7 @@ static int mxt_read_info_block(struct mxt_data *data) if (error) { dev_err(&client->dev, "Error %d parsing object table\n", error); mxt_free_object_table(data); - goto err_free_mem; + return error; } data->object_table = (struct mxt_object *)(id_buf + MXT_OBJECT_START); diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index bfb945fc33a17..3c9cdb87770f2 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -335,7 +335,7 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data) * The Goodix panel will send spurious interrupts after a * 'finger up' event, which will always cause a timeout. */ - return 0; + return -ENOMSG; } static void goodix_ts_report_touch_8b(struct goodix_ts_data *ts, u8 *coor_data) diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index 22839dde1d09f..bc6f6a2ac4b9d 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -290,9 +290,9 @@ static int ili210x_i2c_probe(struct i2c_client *client, if (error) return error; - usleep_range(50, 100); + usleep_range(12000, 15000); gpiod_set_value_cansleep(reset_gpio, 0); - msleep(100); + msleep(160); } priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); diff --git a/drivers/input/touchscreen/of_touchscreen.c b/drivers/input/touchscreen/of_touchscreen.c index e16ec4c7043a4..ed5fbcb40e3f0 100644 --- a/drivers/input/touchscreen/of_touchscreen.c +++ b/drivers/input/touchscreen/of_touchscreen.c @@ -77,12 +77,12 @@ void touchscreen_parse_properties(struct input_dev *input, bool multitouch, axis = multitouch ? ABS_MT_POSITION_X : ABS_X; data_present = touchscreen_get_prop_u32(dev, "touchscreen-min-x", input_abs_get_min(input, axis), - &minimum) | - touchscreen_get_prop_u32(dev, "touchscreen-size-x", + &minimum); + data_present |= touchscreen_get_prop_u32(dev, "touchscreen-size-x", input_abs_get_max(input, axis) + 1, - &maximum) | - touchscreen_get_prop_u32(dev, "touchscreen-fuzz-x", + &maximum); + data_present |= touchscreen_get_prop_u32(dev, "touchscreen-fuzz-x", input_abs_get_fuzz(input, axis), &fuzz); if (data_present) @@ -91,12 +91,12 @@ void touchscreen_parse_properties(struct input_dev *input, bool multitouch, axis = multitouch ? ABS_MT_POSITION_Y : ABS_Y; data_present = touchscreen_get_prop_u32(dev, "touchscreen-min-y", input_abs_get_min(input, axis), - &minimum) | - touchscreen_get_prop_u32(dev, "touchscreen-size-y", + &minimum); + data_present |= touchscreen_get_prop_u32(dev, "touchscreen-size-y", input_abs_get_max(input, axis) + 1, - &maximum) | - touchscreen_get_prop_u32(dev, "touchscreen-fuzz-y", + &maximum); + data_present |= touchscreen_get_prop_u32(dev, "touchscreen-fuzz-y", input_abs_get_fuzz(input, axis), &fuzz); if (data_present) @@ -106,11 +106,11 @@ void touchscreen_parse_properties(struct input_dev *input, bool multitouch, data_present = touchscreen_get_prop_u32(dev, "touchscreen-max-pressure", input_abs_get_max(input, axis), - &maximum) | - touchscreen_get_prop_u32(dev, - "touchscreen-fuzz-pressure", - input_abs_get_fuzz(input, axis), - &fuzz); + &maximum); + data_present |= touchscreen_get_prop_u32(dev, + "touchscreen-fuzz-pressure", + input_abs_get_fuzz(input, axis), + &fuzz); if (data_present) touchscreen_set_params(input, axis, 0, maximum, fuzz); diff --git a/drivers/input/touchscreen/stmfts.c b/drivers/input/touchscreen/stmfts.c index cd8805d71d977..20bc2279a2f24 100644 --- a/drivers/input/touchscreen/stmfts.c +++ b/drivers/input/touchscreen/stmfts.c @@ -337,13 +337,15 @@ static int stmfts_input_open(struct input_dev *dev) struct stmfts_data *sdata = input_get_drvdata(dev); int err; - err = pm_runtime_get_sync(&sdata->client->dev); - if (err < 0) + err = pm_runtime_resume_and_get(&sdata->client->dev); + if (err) return err; err = i2c_smbus_write_byte(sdata->client, STMFTS_MS_MT_SENSE_ON); - if (err) + if (err) { + pm_runtime_put_sync(&sdata->client->dev); return err; + } mutex_lock(&sdata->mutex); sdata->running = true; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 4989c0149732b..db83204bc9a68 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -84,7 +85,7 @@ #define ACPI_DEVFLAG_LINT1 0x80 #define ACPI_DEVFLAG_ATSDIS 0x10000000 -#define LOOP_TIMEOUT 100000 +#define LOOP_TIMEOUT 2000000 /* * ACPI table definitions * @@ -761,6 +762,7 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu) status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); if (status & (MMIO_STATUS_GALOG_RUN_MASK)) break; + udelay(10); } if (i >= LOOP_TIMEOUT) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 9fe657e7fd870..db665d87b5210 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1708,6 +1708,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) dev_info(smmu->dev, "\t0x%016llx\n", (unsigned long long)evt[i]); + cond_resched(); } /* diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 24616525c90dc..9f7c1e29e86a3 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -363,7 +363,7 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, static struct notifier_block dmar_pci_bus_nb = { .notifier_call = dmar_pci_bus_notifier, - .priority = INT_MIN, + .priority = 1, }; static struct dmar_drhd_unit * diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a2a03df977046..469041cfc47d1 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -361,10 +361,12 @@ int intel_iommu_sm; int intel_iommu_enabled = 0; EXPORT_SYMBOL_GPL(intel_iommu_enabled); +static int dmar_mpt3sas_pt; static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; +static int cx6_2M_limitation __ro_after_init; static int iommu_identity_mapping; static int intel_no_bounce; @@ -468,6 +470,12 @@ static int __init intel_iommu_setup(char *str) } else if (!strncmp(str, "nobounce", 8)) { pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n"); intel_no_bounce = 1; + } else if (!strncmp(str, "mpt3sas_pt", 10)) { + pr_info("Intel-IOMMU: Enable mpt3sas device passthrough\n"); + dmar_mpt3sas_pt = 1; + } else if (!strncmp(str, "cx6_2M_limitation", 17)) { + pr_info("Intel-IOMMU: enable cx6_2M_limitation\n"); + cx6_2M_limitation = 1; } str += strcspn(str, ","); @@ -681,7 +689,7 @@ static int domain_update_iommu_superpage(struct intel_iommu *skip) rcu_read_lock(); for_each_active_iommu(iommu, drhd) { if (iommu != skip) { - mask &= cap_super_page_val(iommu->cap); + mask &= cap_super_page_val(iommu->cap) & (cx6_2M_limitation ? 1 : ~0); if (!mask) break; } @@ -1894,7 +1902,7 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, domain->iommu_snooping = 0; if (intel_iommu_superpage) - domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); + domain->iommu_superpage = fls(cap_super_page_val(iommu->cap) & (cx6_2M_limitation ? 1 : ~0)); else domain->iommu_superpage = 0; @@ -4140,6 +4148,13 @@ static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) } DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu); +static void mpt3sas_force_bypass_iommu(struct pci_dev *pdev) +{ + if (dmar_mpt3sas_pt) + pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; +} +DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_NCR, 0x0097, mpt3sas_force_bypass_iommu); + static void __init init_no_remapping_devices(void) { struct dmar_drhd_unit *drhd; diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 5dcc81b1df623..f3b299b5c1c06 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -570,9 +570,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) fn, &intel_ir_domain_ops, iommu); if (!iommu->ir_domain) { - irq_domain_free_fwnode(fn); pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id); - goto out_free_bitmap; + goto out_free_fwnode; } iommu->ir_msi_domain = arch_create_remap_msi_irq_domain(iommu->ir_domain, @@ -596,7 +595,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) if (dmar_enable_qi(iommu)) { pr_err("Failed to enable queued invalidation\n"); - goto out_free_bitmap; + goto out_free_ir_domain; } } @@ -620,6 +619,14 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) return 0; +out_free_ir_domain: + if (iommu->ir_msi_domain) + irq_domain_remove(iommu->ir_msi_domain); + iommu->ir_msi_domain = NULL; + irq_domain_remove(iommu->ir_domain); + iommu->ir_domain = NULL; +out_free_fwnode: + irq_domain_free_fwnode(fn); out_free_bitmap: bitmap_free(bitmap); out_free_pages: diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 4cb394937700c..909be90d26662 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -244,13 +244,17 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size)); else if (lvl == 2) table = kmem_cache_zalloc(data->l2_tables, gfp); + + if (!table) + return NULL; + phys = virt_to_phys(table); if (phys != (arm_v7s_iopte)phys) { /* Doesn't fit in PTE */ dev_err(dev, "Page table does not fit in PTE: %pa", &phys); goto out_free; } - if (table && !cfg->coherent_walk) { + if (!cfg->coherent_walk) { dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index ca51036aa53c7..975237ca03267 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -351,11 +351,12 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, arm_lpae_iopte *ptep, arm_lpae_iopte curr, - struct io_pgtable_cfg *cfg) + struct arm_lpae_io_pgtable *data) { arm_lpae_iopte old, new; + struct io_pgtable_cfg *cfg = &data->iop.cfg; - new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE; + new = paddr_to_iopte(__pa(table), data) | ARM_LPAE_PTE_TYPE_TABLE; if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) new |= ARM_LPAE_PTE_NSTABLE; @@ -406,7 +407,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, if (!cptep) return -ENOMEM; - pte = arm_lpae_install_table(cptep, ptep, 0, cfg); + pte = arm_lpae_install_table(cptep, ptep, 0, data); if (pte) __arm_lpae_free_pages(cptep, tblsz, cfg); } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) { @@ -575,7 +576,7 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, __arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]); } - pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg); + pte = arm_lpae_install_table(tablep, ptep, blk_pte, data); if (pte != blk_pte) { __arm_lpae_free_pages(tablep, tablesz, cfg); /* diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index a0ef2143719fb..492eb441e290c 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -65,8 +65,7 @@ static void free_iova_flush_queue(struct iova_domain *iovad) if (!has_iova_flush_queue(iovad)) return; - if (timer_pending(&iovad->fq_timer)) - del_timer(&iovad->fq_timer); + del_timer_sync(&iovad->fq_timer); fq_destroy_all_entries(iovad); @@ -140,10 +139,11 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) cached_iova = rb_entry(iovad->cached32_node, struct iova, node); if (free == cached_iova || (free->pfn_hi < iovad->dma_32bit_pfn && - free->pfn_lo >= cached_iova->pfn_lo)) { + free->pfn_lo >= cached_iova->pfn_lo)) iovad->cached32_node = rb_next(&free->node); + + if (free->pfn_lo < iovad->dma_32bit_pfn) iovad->max32_alloc_size = iovad->dma_32bit_pfn; - } cached_iova = rb_entry(iovad->cached_node, struct iova, node); if (free->pfn_lo >= cached_iova->pfn_lo) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 2639fc7181171..584eefab1dbcd 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1061,7 +1061,9 @@ static int ipmmu_probe(struct platform_device *pdev) bitmap_zero(mmu->ctx, IPMMU_CTX_MAX); mmu->features = of_device_get_match_data(&pdev->dev); memset(mmu->utlb_ctx, IPMMU_CTX_INVALID, mmu->features->num_utlbs); - dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + if (ret) + return ret; /* Map I/O memory and request IRQ. */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index be99d408cf35d..cba0097eba39c 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -636,16 +636,19 @@ static void insert_iommu_master(struct device *dev, static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *spec) { - struct msm_iommu_dev *iommu; + struct msm_iommu_dev *iommu = NULL, *iter; unsigned long flags; int ret = 0; spin_lock_irqsave(&msm_iommu_lock, flags); - list_for_each_entry(iommu, &qcom_iommu_devices, dev_node) - if (iommu->dev->of_node == spec->np) + list_for_each_entry(iter, &qcom_iommu_devices, dev_node) { + if (iter->dev->of_node == spec->np) { + iommu = iter; break; + } + } - if (!iommu || iommu->dev->of_node != spec->np) { + if (!iommu) { ret = -ENODEV; goto fail; } diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index c2f6c78fee444..18d7c818a174c 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -769,8 +769,7 @@ static int mtk_iommu_remove(struct platform_device *pdev) iommu_device_sysfs_remove(&data->iommu); iommu_device_unregister(&data->iommu); - if (iommu_present(&platform_bus_type)) - bus_set_iommu(&platform_bus_type, NULL); + list_del(&data->list); clk_disable_unprepare(data->bclk); devm_free_irq(&pdev->dev, data->irq, data); diff --git a/drivers/ipack/devices/ipoctal.c b/drivers/ipack/devices/ipoctal.c index 9c2a4b5d30cfc..0f1dca623b5a1 100644 --- a/drivers/ipack/devices/ipoctal.c +++ b/drivers/ipack/devices/ipoctal.c @@ -35,6 +35,7 @@ struct ipoctal_channel { unsigned int pointer_read; unsigned int pointer_write; struct tty_port tty_port; + bool tty_registered; union scc2698_channel __iomem *regs; union scc2698_block __iomem *block_regs; unsigned int board_id; @@ -83,22 +84,34 @@ static int ipoctal_port_activate(struct tty_port *port, struct tty_struct *tty) return 0; } -static int ipoctal_open(struct tty_struct *tty, struct file *file) +static int ipoctal_install(struct tty_driver *driver, struct tty_struct *tty) { struct ipoctal_channel *channel = dev_get_drvdata(tty->dev); struct ipoctal *ipoctal = chan_to_ipoctal(channel, tty->index); - int err; - - tty->driver_data = channel; + int res; if (!ipack_get_carrier(ipoctal->dev)) return -EBUSY; - err = tty_port_open(&channel->tty_port, tty, file); - if (err) - ipack_put_carrier(ipoctal->dev); + res = tty_standard_install(driver, tty); + if (res) + goto err_put_carrier; + + tty->driver_data = channel; + + return 0; + +err_put_carrier: + ipack_put_carrier(ipoctal->dev); + + return res; +} + +static int ipoctal_open(struct tty_struct *tty, struct file *file) +{ + struct ipoctal_channel *channel = tty->driver_data; - return err; + return tty_port_open(&channel->tty_port, tty, file); } static void ipoctal_reset_stats(struct ipoctal_stats *stats) @@ -266,7 +279,6 @@ static int ipoctal_inst_slot(struct ipoctal *ipoctal, unsigned int bus_nr, int res; int i; struct tty_driver *tty; - char name[20]; struct ipoctal_channel *channel; struct ipack_region *region; void __iomem *addr; @@ -357,8 +369,11 @@ static int ipoctal_inst_slot(struct ipoctal *ipoctal, unsigned int bus_nr, /* Fill struct tty_driver with ipoctal data */ tty->owner = THIS_MODULE; tty->driver_name = KBUILD_MODNAME; - sprintf(name, KBUILD_MODNAME ".%d.%d.", bus_nr, slot); - tty->name = name; + tty->name = kasprintf(GFP_KERNEL, KBUILD_MODNAME ".%d.%d.", bus_nr, slot); + if (!tty->name) { + res = -ENOMEM; + goto err_put_driver; + } tty->major = 0; tty->minor_start = 0; @@ -374,8 +389,7 @@ static int ipoctal_inst_slot(struct ipoctal *ipoctal, unsigned int bus_nr, res = tty_register_driver(tty); if (res) { dev_err(&ipoctal->dev->dev, "Can't register tty driver.\n"); - put_tty_driver(tty); - return res; + goto err_free_name; } /* Save struct tty_driver for use it when uninstalling the device */ @@ -386,7 +400,9 @@ static int ipoctal_inst_slot(struct ipoctal *ipoctal, unsigned int bus_nr, channel = &ipoctal->channel[i]; tty_port_init(&channel->tty_port); - tty_port_alloc_xmit_buf(&channel->tty_port); + res = tty_port_alloc_xmit_buf(&channel->tty_port); + if (res) + continue; channel->tty_port.ops = &ipoctal_tty_port_ops; ipoctal_reset_stats(&channel->stats); @@ -394,13 +410,15 @@ static int ipoctal_inst_slot(struct ipoctal *ipoctal, unsigned int bus_nr, spin_lock_init(&channel->lock); channel->pointer_read = 0; channel->pointer_write = 0; - tty_dev = tty_port_register_device(&channel->tty_port, tty, i, NULL); + tty_dev = tty_port_register_device_attr(&channel->tty_port, tty, + i, NULL, channel, NULL); if (IS_ERR(tty_dev)) { dev_err(&ipoctal->dev->dev, "Failed to register tty device.\n"); + tty_port_free_xmit_buf(&channel->tty_port); tty_port_destroy(&channel->tty_port); continue; } - dev_set_drvdata(tty_dev, channel); + channel->tty_registered = true; } /* @@ -412,6 +430,13 @@ static int ipoctal_inst_slot(struct ipoctal *ipoctal, unsigned int bus_nr, ipoctal_irq_handler, ipoctal); return 0; + +err_free_name: + kfree(tty->name); +err_put_driver: + put_tty_driver(tty); + + return res; } static inline int ipoctal_copy_write_buffer(struct ipoctal_channel *channel, @@ -652,6 +677,7 @@ static void ipoctal_cleanup(struct tty_struct *tty) static const struct tty_operations ipoctal_fops = { .ioctl = NULL, + .install = ipoctal_install, .open = ipoctal_open, .close = ipoctal_close, .write = ipoctal_write_tty, @@ -694,12 +720,17 @@ static void __ipoctal_remove(struct ipoctal *ipoctal) for (i = 0; i < NR_CHANNELS; i++) { struct ipoctal_channel *channel = &ipoctal->channel[i]; + + if (!channel->tty_registered) + continue; + tty_unregister_device(ipoctal->tty_drv, i); tty_port_free_xmit_buf(&channel->tty_port); tty_port_destroy(&channel->tty_port); } tty_unregister_driver(ipoctal->tty_drv); + kfree(ipoctal->tty_drv->name); put_tty_driver(ipoctal->tty_drv); kfree(ipoctal); } diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 97f9c001d8ff7..20f44ef9c4c9b 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -415,6 +415,7 @@ config MESON_IRQ_GPIO config GOLDFISH_PIC bool "Goldfish programmable interrupt controller" depends on MIPS && (GOLDFISH || COMPILE_TEST) + select GENERIC_IRQ_CHIP select IRQ_DOMAIN help Say yes here to enable Goldfish interrupt controller driver used diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index c9bdc5221b82f..0fd428db3aa4d 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -232,16 +232,12 @@ static int armada_370_xp_msi_alloc(struct irq_domain *domain, unsigned int virq, int hwirq, i; mutex_lock(&msi_used_lock); + hwirq = bitmap_find_free_region(msi_used, PCI_MSI_DOORBELL_NR, + order_base_2(nr_irqs)); + mutex_unlock(&msi_used_lock); - hwirq = bitmap_find_next_zero_area(msi_used, PCI_MSI_DOORBELL_NR, - 0, nr_irqs, 0); - if (hwirq >= PCI_MSI_DOORBELL_NR) { - mutex_unlock(&msi_used_lock); + if (hwirq < 0) return -ENOSPC; - } - - bitmap_set(msi_used, hwirq, nr_irqs); - mutex_unlock(&msi_used_lock); for (i = 0; i < nr_irqs; i++) { irq_domain_set_info(domain, virq + i, hwirq + i, @@ -250,7 +246,7 @@ static int armada_370_xp_msi_alloc(struct irq_domain *domain, unsigned int virq, NULL, NULL); } - return hwirq; + return 0; } static void armada_370_xp_msi_free(struct irq_domain *domain, @@ -259,7 +255,7 @@ static void armada_370_xp_msi_free(struct irq_domain *domain, struct irq_data *d = irq_domain_get_irq_data(domain, virq); mutex_lock(&msi_used_lock); - bitmap_clear(msi_used, d->hwirq, nr_irqs); + bitmap_release_region(msi_used, d->hwirq, order_base_2(nr_irqs)); mutex_unlock(&msi_used_lock); } @@ -396,7 +392,16 @@ static void armada_xp_mpic_smp_cpu_init(void) static void armada_xp_mpic_perf_init(void) { - unsigned long cpuid = cpu_logical_map(smp_processor_id()); + unsigned long cpuid; + + /* + * This Performance Counter Overflow interrupt is specific for + * Armada 370 and XP. It is not available on Armada 375, 38x and 39x. + */ + if (!of_machine_is_compatible("marvell,armada-370-xp")) + return; + + cpuid = cpu_logical_map(smp_processor_id()); /* Enable Performance Counter Overflow interrupts */ writel(ARMADA_370_XP_INT_CAUSE_PERF(cpuid), diff --git a/drivers/irqchip/irq-aspeed-i2c-ic.c b/drivers/irqchip/irq-aspeed-i2c-ic.c index 8d591c179f812..3d3210828e9bf 100644 --- a/drivers/irqchip/irq-aspeed-i2c-ic.c +++ b/drivers/irqchip/irq-aspeed-i2c-ic.c @@ -79,8 +79,8 @@ static int __init aspeed_i2c_ic_of_init(struct device_node *node, } i2c_ic->parent_irq = irq_of_parse_and_map(node, 0); - if (i2c_ic->parent_irq < 0) { - ret = i2c_ic->parent_irq; + if (!i2c_ic->parent_irq) { + ret = -EINVAL; goto err_iounmap; } diff --git a/drivers/irqchip/irq-bcm6345-l1.c b/drivers/irqchip/irq-bcm6345-l1.c index e3483789f4df3..1bd0621c4ce2a 100644 --- a/drivers/irqchip/irq-bcm6345-l1.c +++ b/drivers/irqchip/irq-bcm6345-l1.c @@ -140,7 +140,7 @@ static void bcm6345_l1_irq_handle(struct irq_desc *desc) for_each_set_bit(hwirq, &pending, IRQS_PER_WORD) { irq = irq_linear_revmap(intc->domain, base + hwirq); if (irq) - do_IRQ(irq); + generic_handle_irq(irq); else spurious_interrupt(); } diff --git a/drivers/irqchip/irq-gic-realview.c b/drivers/irqchip/irq-gic-realview.c index b4c1924f02554..38fab02ffe9d0 100644 --- a/drivers/irqchip/irq-gic-realview.c +++ b/drivers/irqchip/irq-gic-realview.c @@ -57,6 +57,7 @@ realview_gic_of_init(struct device_node *node, struct device_node *parent) /* The PB11MPCore GIC needs to be configured in the syscon */ map = syscon_node_to_regmap(np); + of_node_put(np); if (!IS_ERR(map)) { /* new irq mode with no DCC */ regmap_write(map, REALVIEW_SYS_LOCK_OFFSET, diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index f298313b87ac7..fadd48882ef4c 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -574,7 +574,7 @@ static struct its_collection *its_build_invall_cmd(struct its_node *its, its_fixup_cmd(cmd); - return NULL; + return desc->its_invall_cmd.col; } static struct its_vpe *its_build_vinvall_cmd(struct its_node *its, @@ -3123,7 +3123,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq if (err) { if (i > 0) - its_vpe_irq_domain_free(domain, virq, i - 1); + its_vpe_irq_domain_free(domain, virq, i); its_lpi_free(bitmap, base, nr_ids); its_free_prop_table(vprop_page); diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 9d0b42cb9903e..71534e0fbf9e3 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -87,6 +87,15 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); */ static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); +/* + * Global static key controlling whether an update to PMR allowing more + * interrupts requires to be propagated to the redistributor (DSB SY). + * And this needs to be exported for modules to be able to enable + * interrupts... + */ +DEFINE_STATIC_KEY_FALSE(gic_pmr_sync); +EXPORT_SYMBOL(gic_pmr_sync); + /* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */ static refcount_t *ppi_nmi_refs; @@ -162,11 +171,11 @@ static inline void __iomem *gic_dist_base(struct irq_data *d) } } -static void gic_do_wait_for_rwp(void __iomem *base) +static void gic_do_wait_for_rwp(void __iomem *base, u32 bit) { u32 count = 1000000; /* 1s! */ - while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) { + while (readl_relaxed(base + GICD_CTLR) & bit) { count--; if (!count) { pr_err_ratelimited("RWP timeout, gone fishing\n"); @@ -180,13 +189,13 @@ static void gic_do_wait_for_rwp(void __iomem *base) /* Wait for completion of a distributor change */ static void gic_dist_wait_for_rwp(void) { - gic_do_wait_for_rwp(gic_data.dist_base); + gic_do_wait_for_rwp(gic_data.dist_base, GICD_CTLR_RWP); } /* Wait for completion of a redistributor change */ static void gic_redist_wait_for_rwp(void) { - gic_do_wait_for_rwp(gic_data_rdist_rd_base()); + gic_do_wait_for_rwp(gic_data_rdist_rd_base(), GICR_CTLR_RWP); } #ifdef CONFIG_ARM64 @@ -1502,6 +1511,17 @@ static void gic_enable_nmi_support(void) for (i = 0; i < gic_data.ppi_nr; i++) refcount_set(&ppi_nmi_refs[i], 0); + /* + * Linux itself doesn't use 1:N distribution, so has no need to + * set PMHE. The only reason to have it set is if EL3 requires it + * (and we can't change it). + */ + if (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK) + static_branch_enable(&gic_pmr_sync); + + pr_info("%s ICC_PMR_EL1 synchronisation\n", + static_branch_unlikely(&gic_pmr_sync) ? "Forcing" : "Relaxing"); + static_branch_enable(&supports_pseudo_nmis); if (static_branch_likely(&supports_deactivate_key)) @@ -1616,7 +1636,7 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node) gic_data.ppi_descs = kcalloc(gic_data.ppi_nr, sizeof(*gic_data.ppi_descs), GFP_KERNEL); if (!gic_data.ppi_descs) - return; + goto out_put_node; nr_parts = of_get_child_count(parts_node); @@ -1657,12 +1677,15 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node) continue; cpu = of_cpu_node_to_id(cpu_node); - if (WARN_ON(cpu < 0)) + if (WARN_ON(cpu < 0)) { + of_node_put(cpu_node); continue; + } pr_cont("%pOF[%d] ", cpu_node, cpu); cpumask_set_cpu(cpu, &part->mask); + of_node_put(cpu_node); } pr_cont("}\n"); diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c index a166d30deea26..128a26d219e2c 100644 --- a/drivers/irqchip/irq-nvic.c +++ b/drivers/irqchip/irq-nvic.c @@ -26,7 +26,7 @@ #define NVIC_ISER 0x000 #define NVIC_ICER 0x080 -#define NVIC_IPR 0x300 +#define NVIC_IPR 0x400 #define NVIC_MAX_BANKS 16 /* @@ -105,6 +105,7 @@ static int __init nvic_of_init(struct device_node *node, if (!nvic_irq_domain) { pr_warn("Failed to allocate irq domain\n"); + iounmap(nvic_base); return -ENOMEM; } @@ -114,6 +115,7 @@ static int __init nvic_of_init(struct device_node *node, if (ret) { pr_warn("Failed to allocate irq chips\n"); irq_domain_remove(nvic_irq_domain); + iounmap(nvic_base); return ret; } diff --git a/drivers/irqchip/irq-or1k-pic.c b/drivers/irqchip/irq-or1k-pic.c index 03d2366118dd4..d5f1fabc45d79 100644 --- a/drivers/irqchip/irq-or1k-pic.c +++ b/drivers/irqchip/irq-or1k-pic.c @@ -66,7 +66,6 @@ static struct or1k_pic_dev or1k_pic_level = { .name = "or1k-PIC-level", .irq_unmask = or1k_pic_unmask, .irq_mask = or1k_pic_mask, - .irq_mask_ack = or1k_pic_mask_ack, }, .handle = handle_level_irq, .flags = IRQ_LEVEL | IRQ_NOPROBE, diff --git a/drivers/irqchip/irq-s3c24xx.c b/drivers/irqchip/irq-s3c24xx.c index d2031fecc3861..5e97ae54782d6 100644 --- a/drivers/irqchip/irq-s3c24xx.c +++ b/drivers/irqchip/irq-s3c24xx.c @@ -359,11 +359,25 @@ static inline int s3c24xx_handle_intc(struct s3c_irq_intc *intc, asmlinkage void __exception_irq_entry s3c24xx_handle_irq(struct pt_regs *regs) { do { - if (likely(s3c_intc[0])) - if (s3c24xx_handle_intc(s3c_intc[0], regs, 0)) - continue; + /* + * For platform based machines, neither ERR nor NULL can happen here. + * The s3c24xx_handle_irq() will be set as IRQ handler iff this succeeds: + * + * s3c_intc[0] = s3c24xx_init_intc() + * + * If this fails, the next calls to s3c24xx_init_intc() won't be executed. + * + * For DT machine, s3c_init_intc_of() could set the IRQ handler without + * setting s3c_intc[0] only if it was called with num_ctrl=0. There is no + * such code path, so again the s3c_intc[0] will have a valid pointer if + * set_handle_irq() is called. + * + * Therefore in s3c24xx_handle_irq(), the s3c_intc[0] is always something. + */ + if (s3c24xx_handle_intc(s3c_intc[0], regs, 0)) + continue; - if (s3c_intc[2]) + if (!IS_ERR_OR_NULL(s3c_intc[2])) if (s3c24xx_handle_intc(s3c_intc[2], regs, 64)) continue; diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 7d0a12fe2714a..9dad45d928bfe 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -138,7 +138,13 @@ static void plic_irq_eoi(struct irq_data *d) { struct plic_handler *handler = this_cpu_ptr(&plic_handlers); - writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + if (irqd_irq_masked(d)) { + plic_irq_unmask(d); + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + plic_irq_mask(d); + } else { + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + } } static struct irq_chip plic_chip = { @@ -307,3 +313,4 @@ static int __init plic_init(struct device_node *node, IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init); IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */ +IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_init); /* for firmware driver */ diff --git a/drivers/irqchip/irq-sni-exiu.c b/drivers/irqchip/irq-sni-exiu.c index abd011fcecf4a..c7db617e1a2f6 100644 --- a/drivers/irqchip/irq-sni-exiu.c +++ b/drivers/irqchip/irq-sni-exiu.c @@ -37,11 +37,26 @@ struct exiu_irq_data { u32 spi_base; }; -static void exiu_irq_eoi(struct irq_data *d) +static void exiu_irq_ack(struct irq_data *d) { struct exiu_irq_data *data = irq_data_get_irq_chip_data(d); writel(BIT(d->hwirq), data->base + EIREQCLR); +} + +static void exiu_irq_eoi(struct irq_data *d) +{ + struct exiu_irq_data *data = irq_data_get_irq_chip_data(d); + + /* + * Level triggered interrupts are latched and must be cleared during + * EOI or the interrupt will be jammed on. Of course if a level + * triggered interrupt is still asserted then the write will not clear + * the interrupt. + */ + if (irqd_is_level_type(d)) + writel(BIT(d->hwirq), data->base + EIREQCLR); + irq_chip_eoi_parent(d); } @@ -91,10 +106,13 @@ static int exiu_irq_set_type(struct irq_data *d, unsigned int type) writel_relaxed(val, data->base + EILVL); val = readl_relaxed(data->base + EIEDG); - if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_LEVEL_HIGH) + if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_LEVEL_HIGH) { val &= ~BIT(d->hwirq); - else + irq_set_handler_locked(d, handle_fasteoi_irq); + } else { val |= BIT(d->hwirq); + irq_set_handler_locked(d, handle_fasteoi_ack_irq); + } writel_relaxed(val, data->base + EIEDG); writel_relaxed(BIT(d->hwirq), data->base + EIREQCLR); @@ -104,6 +122,7 @@ static int exiu_irq_set_type(struct irq_data *d, unsigned int type) static struct irq_chip exiu_irq_chip = { .name = "EXIU", + .irq_ack = exiu_irq_ack, .irq_eoi = exiu_irq_eoi, .irq_enable = exiu_irq_enable, .irq_mask = exiu_irq_mask, diff --git a/drivers/irqchip/irq-xtensa-mx.c b/drivers/irqchip/irq-xtensa-mx.c index 27933338f7b36..8c581c985aa7d 100644 --- a/drivers/irqchip/irq-xtensa-mx.c +++ b/drivers/irqchip/irq-xtensa-mx.c @@ -151,14 +151,25 @@ static struct irq_chip xtensa_mx_irq_chip = { .irq_set_affinity = xtensa_mx_irq_set_affinity, }; +static void __init xtensa_mx_init_common(struct irq_domain *root_domain) +{ + unsigned int i; + + irq_set_default_host(root_domain); + secondary_init_irq(); + + /* Initialize default IRQ routing to CPU 0 */ + for (i = 0; i < XCHAL_NUM_EXTINTERRUPTS; ++i) + set_er(1, MIROUT(i)); +} + int __init xtensa_mx_init_legacy(struct device_node *interrupt_parent) { struct irq_domain *root_domain = irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0, &xtensa_mx_irq_domain_ops, &xtensa_mx_irq_chip); - irq_set_default_host(root_domain); - secondary_init_irq(); + xtensa_mx_init_common(root_domain); return 0; } @@ -168,8 +179,7 @@ static int __init xtensa_mx_init(struct device_node *np, struct irq_domain *root_domain = irq_domain_add_linear(np, NR_IRQS, &xtensa_mx_irq_domain_ops, &xtensa_mx_irq_chip); - irq_set_default_host(root_domain); - secondary_init_irq(); + xtensa_mx_init_common(root_domain); return 0; } IRQCHIP_DECLARE(xtensa_mx_irq_chip, "cdns,xtensa-mx", xtensa_mx_init); diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index faa7d61b9d6c4..239a889df6080 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -50,17 +50,18 @@ static u32 pdc_reg_read(int reg, u32 i) static void pdc_enable_intr(struct irq_data *d, bool on) { int pin_out = d->hwirq; + unsigned long flags; u32 index, mask; u32 enable; index = pin_out / 32; mask = pin_out % 32; - raw_spin_lock(&pdc_lock); + raw_spin_lock_irqsave(&pdc_lock, flags); enable = pdc_reg_read(IRQ_ENABLE_BANK, index); enable = on ? ENABLE_INTR(enable, mask) : CLEAR_INTR(enable, mask); pdc_reg_write(IRQ_ENABLE_BANK, index, enable); - raw_spin_unlock(&pdc_lock); + raw_spin_unlock_irqrestore(&pdc_lock, flags); } static void qcom_pdc_gic_mask(struct irq_data *d) diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index 18de41a266ebe..aa625b7ddcce2 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -565,6 +565,11 @@ int detach_capi_ctr(struct capi_ctr *ctr) ctr_down(ctr, CAPI_CTR_DETACHED); + if (ctr->cnr < 1 || ctr->cnr - 1 >= CAPI_MAXCONTR) { + err = -EINVAL; + goto unlock_out; + } + if (capi_controller[ctr->cnr - 1] != ctr) { err = -EINVAL; goto unlock_out; diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c index 9e6aab04f9d61..8299defff55ae 100644 --- a/drivers/isdn/hardware/mISDN/netjet.c +++ b/drivers/isdn/hardware/mISDN/netjet.c @@ -949,8 +949,8 @@ nj_release(struct tiger_hw *card) nj_disable_hwirq(card); mode_tiger(&card->bc[0], ISDN_P_NONE); mode_tiger(&card->bc[1], ISDN_P_NONE); - card->isac.release(&card->isac); spin_unlock_irqrestore(&card->lock, flags); + card->isac.release(&card->isac); release_region(card->base, card->base_s); card->base_s = 0; } diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c index 55891e4204460..a41b4b2645941 100644 --- a/drivers/isdn/mISDN/core.c +++ b/drivers/isdn/mISDN/core.c @@ -381,7 +381,7 @@ mISDNInit(void) err = mISDN_inittimer(&debug); if (err) goto error2; - err = l1_init(&debug); + err = Isdnl1_Init(&debug); if (err) goto error3; err = Isdnl2_Init(&debug); @@ -395,7 +395,7 @@ mISDNInit(void) error5: Isdnl2_cleanup(); error4: - l1_cleanup(); + Isdnl1_cleanup(); error3: mISDN_timer_cleanup(); error2: @@ -408,7 +408,7 @@ static void mISDN_cleanup(void) { misdn_sock_cleanup(); Isdnl2_cleanup(); - l1_cleanup(); + Isdnl1_cleanup(); mISDN_timer_cleanup(); class_unregister(&mISDN_class); diff --git a/drivers/isdn/mISDN/core.h b/drivers/isdn/mISDN/core.h index 23b44d3033279..42599f49c189d 100644 --- a/drivers/isdn/mISDN/core.h +++ b/drivers/isdn/mISDN/core.h @@ -60,8 +60,8 @@ struct Bprotocol *get_Bprotocol4id(u_int); extern int mISDN_inittimer(u_int *); extern void mISDN_timer_cleanup(void); -extern int l1_init(u_int *); -extern void l1_cleanup(void); +extern int Isdnl1_Init(u_int *); +extern void Isdnl1_cleanup(void); extern int Isdnl2_Init(u_int *); extern void Isdnl2_cleanup(void); diff --git a/drivers/isdn/mISDN/layer1.c b/drivers/isdn/mISDN/layer1.c index 98a3bc6c17009..7b31c25a550e3 100644 --- a/drivers/isdn/mISDN/layer1.c +++ b/drivers/isdn/mISDN/layer1.c @@ -398,7 +398,7 @@ create_l1(struct dchannel *dch, dchannel_l1callback *dcb) { EXPORT_SYMBOL(create_l1); int -l1_init(u_int *deb) +Isdnl1_Init(u_int *deb) { debug = deb; l1fsm_s.state_count = L1S_STATE_COUNT; @@ -409,7 +409,7 @@ l1_init(u_int *deb) } void -l1_cleanup(void) +Isdnl1_cleanup(void) { mISDN_FsmFree(&l1fsm_s); } diff --git a/drivers/leds/leds-lt3593.c b/drivers/leds/leds-lt3593.c index c94995f0daa2a..03ae33093ce63 100644 --- a/drivers/leds/leds-lt3593.c +++ b/drivers/leds/leds-lt3593.c @@ -103,10 +103,9 @@ static int lt3593_led_probe(struct platform_device *pdev) init_data.default_label = ":"; ret = devm_led_classdev_register_ext(dev, &led_data->cdev, &init_data); - if (ret < 0) { - fwnode_handle_put(child); + fwnode_handle_put(child); + if (ret < 0) return ret; - } led_data->cdev.dev->of_node = dev->of_node; platform_set_drvdata(pdev, led_data); diff --git a/drivers/leds/trigger/ledtrig-audio.c b/drivers/leds/trigger/ledtrig-audio.c index f76621e88482d..c6b437e6369b8 100644 --- a/drivers/leds/trigger/ledtrig-audio.c +++ b/drivers/leds/trigger/ledtrig-audio.c @@ -6,10 +6,33 @@ #include #include #include +#include "../leds.h" -static struct led_trigger *ledtrig_audio[NUM_AUDIO_LEDS]; static enum led_brightness audio_state[NUM_AUDIO_LEDS]; +static int ledtrig_audio_mute_activate(struct led_classdev *led_cdev) +{ + led_set_brightness_nosleep(led_cdev, audio_state[LED_AUDIO_MUTE]); + return 0; +} + +static int ledtrig_audio_micmute_activate(struct led_classdev *led_cdev) +{ + led_set_brightness_nosleep(led_cdev, audio_state[LED_AUDIO_MICMUTE]); + return 0; +} + +static struct led_trigger ledtrig_audio[NUM_AUDIO_LEDS] = { + [LED_AUDIO_MUTE] = { + .name = "audio-mute", + .activate = ledtrig_audio_mute_activate, + }, + [LED_AUDIO_MICMUTE] = { + .name = "audio-micmute", + .activate = ledtrig_audio_micmute_activate, + }, +}; + enum led_brightness ledtrig_audio_get(enum led_audio type) { return audio_state[type]; @@ -19,24 +42,22 @@ EXPORT_SYMBOL_GPL(ledtrig_audio_get); void ledtrig_audio_set(enum led_audio type, enum led_brightness state) { audio_state[type] = state; - led_trigger_event(ledtrig_audio[type], state); + led_trigger_event(&ledtrig_audio[type], state); } EXPORT_SYMBOL_GPL(ledtrig_audio_set); static int __init ledtrig_audio_init(void) { - led_trigger_register_simple("audio-mute", - &ledtrig_audio[LED_AUDIO_MUTE]); - led_trigger_register_simple("audio-micmute", - &ledtrig_audio[LED_AUDIO_MICMUTE]); + led_trigger_register(&ledtrig_audio[LED_AUDIO_MUTE]); + led_trigger_register(&ledtrig_audio[LED_AUDIO_MICMUTE]); return 0; } module_init(ledtrig_audio_init); static void __exit ledtrig_audio_exit(void) { - led_trigger_unregister_simple(ledtrig_audio[LED_AUDIO_MUTE]); - led_trigger_unregister_simple(ledtrig_audio[LED_AUDIO_MICMUTE]); + led_trigger_unregister(&ledtrig_audio[LED_AUDIO_MUTE]); + led_trigger_unregister(&ledtrig_audio[LED_AUDIO_MICMUTE]); } module_exit(ledtrig_audio_exit); diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 4c2ce210c1237..95b09148167e8 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -5,7 +5,7 @@ menuconfig NVM bool "Open-Channel SSD target support" - depends on BLOCK + depends on BLOCK && BROKEN help Say Y here to get to enable Open-channel SSDs. diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig index 574e122ae1050..b5a534206eddd 100644 --- a/drivers/macintosh/Kconfig +++ b/drivers/macintosh/Kconfig @@ -44,6 +44,7 @@ config ADB_IOP config ADB_CUDA bool "Support for Cuda/Egret based Macs and PowerMacs" depends on (ADB || PPC_PMAC) && !PPC_PMAC64 + select RTC_LIB help This provides support for Cuda/Egret based Macintosh and Power Macintosh systems. This includes most m68k based Macs, @@ -57,6 +58,7 @@ config ADB_CUDA config ADB_PMU bool "Support for PMU based PowerMacs and PowerBooks" depends on PPC_PMAC || MAC + select RTC_LIB help On PowerBooks, iBooks, and recent iMacs and Power Macintoshes, the PMU is an embedded microprocessor whose primary function is to @@ -67,6 +69,10 @@ config ADB_PMU this device; you should do so if your machine is one of those mentioned above. +config ADB_PMU_EVENT + def_bool y + depends on ADB_PMU && INPUT=y + config ADB_PMU_LED bool "Support for the Power/iBook front LED" depends on PPC_PMAC && ADB_PMU diff --git a/drivers/macintosh/Makefile b/drivers/macintosh/Makefile index 49819b1b6f201..712edcb3e0b08 100644 --- a/drivers/macintosh/Makefile +++ b/drivers/macintosh/Makefile @@ -12,7 +12,8 @@ obj-$(CONFIG_MAC_EMUMOUSEBTN) += mac_hid.o obj-$(CONFIG_INPUT_ADBHID) += adbhid.o obj-$(CONFIG_ANSLCD) += ans-lcd.o -obj-$(CONFIG_ADB_PMU) += via-pmu.o via-pmu-event.o +obj-$(CONFIG_ADB_PMU) += via-pmu.o +obj-$(CONFIG_ADB_PMU_EVENT) += via-pmu-event.o obj-$(CONFIG_ADB_PMU_LED) += via-pmu-led.o obj-$(CONFIG_PMAC_BACKLIGHT) += via-pmu-backlight.o obj-$(CONFIG_ADB_CUDA) += via-cuda.o diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index e49d1f287a175..c37d5fce86f79 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -647,7 +647,7 @@ do_adb_query(struct adb_request *req) switch(req->data[1]) { case ADB_QUERY_GETDEVINFO: - if (req->nbytes < 3) + if (req->nbytes < 3 || req->data[2] >= 16) break; mutex_lock(&adb_handler_mutex); req->reply[0] = adb_handler[req->data[2]].original_address; diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 21d532a78fa47..d8b6ac2ec313f 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -1464,7 +1464,7 @@ pmu_handle_data(unsigned char *data, int len) pmu_pass_intr(data, len); /* len == 6 is probably a bad check. But how do I * know what PMU versions send what events here? */ - if (len == 6) { + if (IS_ENABLED(CONFIG_ADB_PMU_EVENT) && len == 6) { via_pmu_event(PMU_EVT_POWER, !!(data[1]&8)); via_pmu_event(PMU_EVT_LID, data[1]&1); } diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 3e7d4b20ab34f..4229b9b5da98f 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -82,11 +82,11 @@ static void msg_submit(struct mbox_chan *chan) exit: spin_unlock_irqrestore(&chan->lock, flags); - /* kick start the timer immediately to avoid delays */ if (!err && (chan->txdone_method & TXDONE_BY_POLL)) { - /* but only if not already active */ - if (!hrtimer_active(&chan->mbox->poll_hrt)) - hrtimer_start(&chan->mbox->poll_hrt, 0, HRTIMER_MODE_REL); + /* kick start the timer immediately to avoid delays */ + spin_lock_irqsave(&chan->mbox->poll_hrt_lock, flags); + hrtimer_start(&chan->mbox->poll_hrt, 0, HRTIMER_MODE_REL); + spin_unlock_irqrestore(&chan->mbox->poll_hrt_lock, flags); } } @@ -120,20 +120,26 @@ static enum hrtimer_restart txdone_hrtimer(struct hrtimer *hrtimer) container_of(hrtimer, struct mbox_controller, poll_hrt); bool txdone, resched = false; int i; + unsigned long flags; for (i = 0; i < mbox->num_chans; i++) { struct mbox_chan *chan = &mbox->chans[i]; if (chan->active_req && chan->cl) { - resched = true; txdone = chan->mbox->ops->last_tx_done(chan); if (txdone) tx_tick(chan, 0); + else + resched = true; } } if (resched) { - hrtimer_forward_now(hrtimer, ms_to_ktime(mbox->txpoll_period)); + spin_lock_irqsave(&mbox->poll_hrt_lock, flags); + if (!hrtimer_is_queued(hrtimer)) + hrtimer_forward_now(hrtimer, ms_to_ktime(mbox->txpoll_period)); + spin_unlock_irqrestore(&mbox->poll_hrt_lock, flags); + return HRTIMER_RESTART; } return HRTIMER_NORESTART; @@ -500,6 +506,7 @@ int mbox_controller_register(struct mbox_controller *mbox) hrtimer_init(&mbox->poll_hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); mbox->poll_hrt.function = txdone_hrtimer; + spin_lock_init(&mbox->poll_hrt_lock); } for (i = 0; i < mbox->num_chans; i++) { diff --git a/drivers/mailbox/tegra-hsp.c b/drivers/mailbox/tegra-hsp.c index 834b35dc3b137..e9b392dc09bc2 100644 --- a/drivers/mailbox/tegra-hsp.c +++ b/drivers/mailbox/tegra-hsp.c @@ -403,6 +403,11 @@ static int tegra_hsp_mailbox_flush(struct mbox_chan *chan, value = tegra_hsp_channel_readl(ch, HSP_SM_SHRD_MBOX); if ((value & HSP_SM_SHRD_MBOX_FULL) == 0) { mbox_chan_txdone(chan, 0); + + /* Wait until channel is empty */ + if (chan->active_req != NULL) + continue; + return 0; } diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c index b72e82efaee52..c799bb81ab03d 100644 --- a/drivers/mcb/mcb-core.c +++ b/drivers/mcb/mcb-core.c @@ -277,8 +277,8 @@ struct mcb_bus *mcb_alloc_bus(struct device *carrier) bus_nr = ida_simple_get(&mcb_ida, 0, 0, GFP_KERNEL); if (bus_nr < 0) { - rc = bus_nr; - goto err_free; + kfree(bus); + return ERR_PTR(bus_nr); } bus->bus_nr = bus_nr; @@ -293,12 +293,12 @@ struct mcb_bus *mcb_alloc_bus(struct device *carrier) dev_set_name(&bus->dev, "mcb:%d", bus_nr); rc = device_add(&bus->dev); if (rc) - goto err_free; + goto err_put; return bus; -err_free: - put_device(carrier); - kfree(bus); + +err_put: + put_device(&bus->dev); return ERR_PTR(rc); } EXPORT_SYMBOL_GPL(mcb_alloc_bus); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 4045ae748f17e..f5d24620d8182 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1119,6 +1119,12 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio) * which would call closure_get(&dc->disk.cl) */ ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO); + if (!ddip) { + bio->bi_status = BLK_STS_RESOURCE; + bio->bi_end_io(bio); + return; + } + ddip->d = d; ddip->start_time = jiffies; ddip->bi_end_io = bio->bi_end_io; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index b0d569032dd4e..efdf6ce0443ea 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -839,20 +839,20 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long); d->full_dirty_stripes = kvzalloc(n, GFP_KERNEL); if (!d->full_dirty_stripes) - return -ENOMEM; + goto out_free_stripe_sectors_dirty; idx = ida_simple_get(&bcache_device_idx, 0, BCACHE_DEVICE_IDX_MAX, GFP_KERNEL); if (idx < 0) - return idx; + goto out_free_full_dirty_stripes; if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio), BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) - goto err; + goto out_ida_remove; d->disk = alloc_disk(BCACHE_MINORS); if (!d->disk) - goto err; + goto out_bioset_exit; set_capacity(d->disk, sectors); snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx); @@ -887,8 +887,14 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, return 0; -err: +out_bioset_exit: + bioset_exit(&d->bio_split); +out_ida_remove: ida_simple_remove(&bcache_device_idx, idx); +out_free_full_dirty_stripes: + kvfree(d->full_dirty_stripes); +out_free_stripe_sectors_dirty: + kvfree(d->stripe_sectors_dirty); return -ENOMEM; } diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d85648b9c247a..fa674e9b6f23d 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2010,7 +2010,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string static int get_key_size(char **key_string) { - return (*key_string[0] == ':') ? -EINVAL : strlen(*key_string) >> 1; + return (*key_string[0] == ':') ? -EINVAL : (int)(strlen(*key_string) >> 1); } #endif @@ -2092,7 +2092,12 @@ static void *crypt_page_alloc(gfp_t gfp_mask, void *pool_data) struct crypt_config *cc = pool_data; struct page *page; - if (unlikely(percpu_counter_compare(&cc->n_allocated_pages, dm_crypt_pages_per_client) >= 0) && + /* + * Note, percpu_counter_read_positive() may over (and under) estimate + * the current usage by at most (batch - 1) * num_online_cpus() pages, + * but avoids potential spinlock contention of an exact result. + */ + if (unlikely(percpu_counter_read_positive(&cc->n_allocated_pages) >= dm_crypt_pages_per_client) && likely(gfp_mask & __GFP_NORETRY)) return NULL; @@ -2812,6 +2817,11 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } +static char hex2asc(unsigned char c) +{ + return c + '0' + ((unsigned)(9 - c) >> 4 & 0x27); +} + static void crypt_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { @@ -2830,9 +2840,12 @@ static void crypt_status(struct dm_target *ti, status_type_t type, if (cc->key_size > 0) { if (cc->key_string) DMEMIT(":%u:%s", cc->key_size, cc->key_string); - else - for (i = 0; i < cc->key_size; i++) - DMEMIT("%02x", cc->key[i]); + else { + for (i = 0; i < cc->key_size; i++) { + DMEMIT("%c%c", hex2asc(cc->key[i] >> 4), + hex2asc(cc->key[i] & 0xf)); + } + } } else DMEMIT("-"); diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index 6b0b3a13ab4a2..418f590b99b8d 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1397,7 +1397,7 @@ static void start_worker(struct era *era) static void stop_worker(struct era *era) { atomic_set(&era->suspended, 1); - flush_workqueue(era->wq); + drain_workqueue(era->wq); } /*---------------------------------------------------------------- @@ -1581,6 +1581,12 @@ static void era_postsuspend(struct dm_target *ti) } stop_worker(era); + + r = metadata_commit(era->md); + if (r) { + DMERR("%s: metadata_commit failed", __func__); + /* FIXME: fail mode */ + } } static int era_preresume(struct dm_target *ti) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 9f4d657dd36c8..acbda91e7643c 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4054,6 +4054,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) } if (ic->internal_hash) { + size_t recalc_tags_size; ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); if (!ic->recalc_wq ) { ti->error = "Cannot allocate workqueue"; @@ -4067,8 +4068,10 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) r = -ENOMEM; goto bad; } - ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block, - ic->tag_size, GFP_KERNEL); + recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size; + if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size) + recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size; + ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL); if (!ic->recalc_tags) { ti->error = "Cannot allocate tags for recalculating"; r = -ENOMEM; @@ -4146,8 +4149,6 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) } if (should_write_sb) { - int r; - init_journal(ic, 0, ic->journal_sections, 0); r = dm_integrity_failed(ic); if (unlikely(r)) { diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 3f15d8dc2b71f..7a73f2fa0ad72 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -1696,6 +1697,7 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags) if (unlikely(cmd >= ARRAY_SIZE(_ioctls))) return NULL; + cmd = array_index_nospec(cmd, ARRAY_SIZE(_ioctls)); *ioctl_flags = _ioctls[cmd].flags; return _ioctls[cmd].fn; } diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 33e71ea6cc143..fe3a9473f3387 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -415,8 +415,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, /* * Work out how many "unsigned long"s we need to hold the bitset. */ - bitset_size = dm_round_up(region_count, - sizeof(*lc->clean_bits) << BYTE_SHIFT); + bitset_size = dm_round_up(region_count, BITS_PER_LONG); bitset_size >>= BYTE_SHIFT; lc->bitset_uint32_count = bitset_size / sizeof(*lc->clean_bits); @@ -616,7 +615,7 @@ static int disk_resume(struct dm_dirty_log *log) log_clear_bit(lc, lc->clean_bits, i); /* clear any old bits -- device has shrunk */ - for (i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++) + for (i = lc->region_count; i % BITS_PER_LONG; i++) log_clear_bit(lc, lc->clean_bits, i); /* copy clean across to sync */ diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 5e73cc6ad0ce4..42151c9fc6e56 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -998,12 +998,13 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) static int validate_raid_redundancy(struct raid_set *rs) { unsigned int i, rebuild_cnt = 0; - unsigned int rebuilds_per_group = 0, copies; + unsigned int rebuilds_per_group = 0, copies, raid_disks; unsigned int group_size, last_group_start; - for (i = 0; i < rs->md.raid_disks; i++) - if (!test_bit(In_sync, &rs->dev[i].rdev.flags) || - !rs->dev[i].rdev.sb_page) + for (i = 0; i < rs->raid_disks; i++) + if (!test_bit(FirstUse, &rs->dev[i].rdev.flags) && + ((!test_bit(In_sync, &rs->dev[i].rdev.flags) || + !rs->dev[i].rdev.sb_page))) rebuild_cnt++; switch (rs->md.level) { @@ -1043,8 +1044,9 @@ static int validate_raid_redundancy(struct raid_set *rs) * A A B B C * C D D E E */ + raid_disks = min(rs->raid_disks, rs->md.raid_disks); if (__is_raid10_near(rs->md.new_layout)) { - for (i = 0; i < rs->md.raid_disks; i++) { + for (i = 0; i < raid_disks; i++) { if (!(i % copies)) rebuilds_per_group = 0; if ((!rs->dev[i].rdev.sb_page || @@ -1067,10 +1069,10 @@ static int validate_raid_redundancy(struct raid_set *rs) * results in the need to treat the last (potentially larger) * set differently. */ - group_size = (rs->md.raid_disks / copies); - last_group_start = (rs->md.raid_disks / group_size) - 1; + group_size = (raid_disks / copies); + last_group_start = (raid_disks / group_size) - 1; last_group_start *= group_size; - for (i = 0; i < rs->md.raid_disks; i++) { + for (i = 0; i < raid_disks; i++) { if (!(i % copies) && !(i > last_group_start)) rebuilds_per_group = 0; if ((!rs->dev[i].rdev.sb_page || @@ -1585,7 +1587,7 @@ static sector_t __rdev_sectors(struct raid_set *rs) { int i; - for (i = 0; i < rs->md.raid_disks; i++) { + for (i = 0; i < rs->raid_disks; i++) { struct md_rdev *rdev = &rs->dev[i].rdev; if (!test_bit(Journal, &rdev->flags) && @@ -3746,13 +3748,13 @@ static int raid_iterate_devices(struct dm_target *ti, unsigned int i; int r = 0; - for (i = 0; !r && i < rs->md.raid_disks; i++) - if (rs->dev[i].data_dev) - r = fn(ti, - rs->dev[i].data_dev, - 0, /* No offset on data devs */ - rs->md.dev_sectors, - data); + for (i = 0; !r && i < rs->raid_disks; i++) { + if (rs->dev[i].data_dev) { + r = fn(ti, rs->dev[i].data_dev, + 0, /* No offset on data devs */ + rs->md.dev_sectors, data); + } + } return r; } diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 71417048256af..ce6d3bce1b7b0 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -224,6 +224,7 @@ void dm_stats_cleanup(struct dm_stats *stats) atomic_read(&shared->in_flight[READ]), atomic_read(&shared->in_flight[WRITE])); } + cond_resched(); } dm_stat_free(&s->rcu_head); } @@ -313,6 +314,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, for (ni = 0; ni < n_entries; ni++) { atomic_set(&s->stat_shared[ni].in_flight[READ], 0); atomic_set(&s->stat_shared[ni].in_flight[WRITE], 0); + cond_resched(); } if (s->n_histogram_entries) { @@ -325,6 +327,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, for (ni = 0; ni < n_entries; ni++) { s->stat_shared[ni].tmp.histogram = hi; hi += s->n_histogram_entries + 1; + cond_resched(); } } @@ -345,6 +348,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, for (ni = 0; ni < n_entries; ni++) { p[ni].histogram = hi; hi += s->n_histogram_entries + 1; + cond_resched(); } } } @@ -474,6 +478,7 @@ static int dm_stats_list(struct dm_stats *stats, const char *program, } DMEMIT("\n"); } + cond_resched(); } mutex_unlock(&stats->mutex); @@ -750,6 +755,7 @@ static void __dm_stat_clear(struct dm_stat *s, size_t idx_start, size_t idx_end, local_irq_enable(); } } + cond_resched(); } } @@ -865,6 +871,8 @@ static int dm_stats_print(struct dm_stats *stats, int id, if (unlikely(sz + 1 >= maxlen)) goto buffer_overflow; + + cond_resched(); } if (clear) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 5e61ea526e7ad..bb136ae5b493c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -872,8 +872,7 @@ EXPORT_SYMBOL(dm_consume_args); static bool __table_type_bio_based(enum dm_queue_mode table_type) { return (table_type == DM_TYPE_BIO_BASED || - table_type == DM_TYPE_DAX_BIO_BASED || - table_type == DM_TYPE_NVME_BIO_BASED); + table_type == DM_TYPE_DAX_BIO_BASED); } static bool __table_type_request_based(enum dm_queue_mode table_type) @@ -929,8 +928,6 @@ bool dm_table_supports_dax(struct dm_table *t, return true; } -static bool dm_table_does_not_support_partial_completion(struct dm_table *t); - static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -960,7 +957,6 @@ static int dm_table_determine_type(struct dm_table *t) goto verify_bio_based; } BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED); - BUG_ON(t->type == DM_TYPE_NVME_BIO_BASED); goto verify_rq_based; } @@ -999,15 +995,6 @@ static int dm_table_determine_type(struct dm_table *t) if (dm_table_supports_dax(t, device_not_dax_capable, &page_size) || (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) { t->type = DM_TYPE_DAX_BIO_BASED; - } else { - /* Check if upgrading to NVMe bio-based is valid or required */ - tgt = dm_table_get_immutable_target(t); - if (tgt && !tgt->max_io_len && dm_table_does_not_support_partial_completion(t)) { - t->type = DM_TYPE_NVME_BIO_BASED; - goto verify_rq_based; /* must be stacked directly on NVMe (blk-mq) */ - } else if (list_empty(devices) && live_md_type == DM_TYPE_NVME_BIO_BASED) { - t->type = DM_TYPE_NVME_BIO_BASED; - } } return 0; } @@ -1024,8 +1011,7 @@ static int dm_table_determine_type(struct dm_table *t) * (e.g. request completion process for partial completion.) */ if (t->num_targets > 1) { - DMERR("%s DM doesn't support multiple targets", - t->type == DM_TYPE_NVME_BIO_BASED ? "nvme bio-based" : "request-based"); + DMERR("request-based DM doesn't support multiple targets"); return -EINVAL; } @@ -1714,20 +1700,6 @@ static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, return q && !blk_queue_add_random(q); } -static int device_is_partial_completion(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) -{ - char b[BDEVNAME_SIZE]; - - /* For now, NVMe devices are the only devices of this class */ - return (strncmp(bdevname(dev->bdev, b), "nvme", 4) != 0); -} - -static bool dm_table_does_not_support_partial_completion(struct dm_table *t) -{ - return !dm_table_any_dev_attr(t, device_is_partial_completion, NULL); -} - static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 711f101447e3e..9dcdf34b7e32d 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -1217,6 +1217,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) static struct target_type verity_target = { .name = "verity", + .features = DM_TARGET_IMMUTABLE, .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = verity_ctr, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1802eaba8ec64..630986bb67c9a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -676,19 +676,20 @@ static void start_io_acct(struct dm_io *io) false, 0, &io->stats_aux); } -static void end_io_acct(struct dm_io *io) +static void end_io_acct(struct mapped_device *md, struct bio *bio, + unsigned long start_time, struct dm_stats_aux *stats_aux) { - struct mapped_device *md = io->md; - struct bio *bio = io->orig_bio; - unsigned long duration = jiffies - io->start_time; - - generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0, - io->start_time); + unsigned long duration = jiffies - start_time; if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), bio->bi_iter.bi_sector, bio_sectors(bio), - true, duration, &io->stats_aux); + true, duration, stats_aux); + + smp_wmb(); + + generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0, + start_time); /* nudge anyone waiting on suspend queue */ if (unlikely(wq_has_sleeper(&md->wait))) @@ -909,6 +910,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error) blk_status_t io_error; struct bio *bio; struct mapped_device *md = io->md; + unsigned long start_time = 0; + struct dm_stats_aux stats_aux; /* Push-back supersedes any I/O errors */ if (unlikely(error)) { @@ -935,8 +938,10 @@ static void dec_pending(struct dm_io *io, blk_status_t error) io_error = io->status; bio = io->orig_bio; - end_io_acct(io); + start_time = io->start_time; + stats_aux = io->stats_aux; free_io(md, io); + end_io_acct(md, bio, start_time, &stats_aux); if (io_error == BLK_STS_DM_REQUEUE) return; @@ -995,7 +1000,7 @@ static void clone_endio(struct bio *bio) struct mapped_device *md = tio->io->md; dm_endio_fn endio = tio->ti->type->end_io; - if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) { + if (unlikely(error == BLK_STS_TARGET)) { if (bio_op(bio) == REQ_OP_DISCARD && !bio->bi_disk->queue->limits.max_discard_sectors) disable_discard(md); @@ -1320,7 +1325,6 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) sector = clone->bi_iter.bi_sector; if (unlikely(swap_bios_limit(ti, clone))) { - struct mapped_device *md = io->md; int latch = get_swap_bios(); if (unlikely(latch != md->swap_bios)) __set_swap_bios_limit(md, latch); @@ -1335,24 +1339,17 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) /* the bio has been remapped so dispatch it */ trace_block_bio_remap(clone->bi_disk->queue, clone, bio_dev(io->orig_bio), sector); - if (md->type == DM_TYPE_NVME_BIO_BASED) - ret = direct_make_request(clone); - else - ret = generic_make_request(clone); + ret = generic_make_request(clone); break; case DM_MAPIO_KILL: - if (unlikely(swap_bios_limit(ti, clone))) { - struct mapped_device *md = io->md; + if (unlikely(swap_bios_limit(ti, clone))) up(&md->swap_bios_semaphore); - } free_tio(tio); dec_pending(io, BLK_STS_IOERR); break; case DM_MAPIO_REQUEUE: - if (unlikely(swap_bios_limit(ti, clone))) { - struct mapped_device *md = io->md; + if (unlikely(swap_bios_limit(ti, clone))) up(&md->swap_bios_semaphore); - } free_tio(tio); dec_pending(io, BLK_STS_DM_REQUEUE); break; @@ -1727,51 +1724,6 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, return ret; } -/* - * Optimized variant of __split_and_process_bio that leverages the - * fact that targets that use it do _not_ have a need to split bios. - */ -static blk_qc_t __process_bio(struct mapped_device *md, struct dm_table *map, - struct bio *bio, struct dm_target *ti) -{ - struct clone_info ci; - blk_qc_t ret = BLK_QC_T_NONE; - int error = 0; - - init_clone_info(&ci, md, map, bio); - - if (bio->bi_opf & REQ_PREFLUSH) { - struct bio flush_bio; - - /* - * Use an on-stack bio for this, it's safe since we don't - * need to reference it after submit. It's just used as - * the basis for the clone(s). - */ - bio_init(&flush_bio, NULL, 0); - flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; - ci.bio = &flush_bio; - ci.sector_count = 0; - error = __send_empty_flush(&ci); - bio_uninit(ci.bio); - /* dec_pending submits any data associated with flush */ - } else { - struct dm_target_io *tio; - - ci.bio = bio; - ci.sector_count = bio_sectors(bio); - if (__process_abnormal_io(&ci, ti, &error)) - goto out; - - tio = alloc_tio(&ci, ti, 0, GFP_NOIO); - ret = __clone_and_map_simple_bio(&ci, tio, NULL); - } -out: - /* drop the extra reference count */ - dec_pending(ci.io, errno_to_blk_status(error)); - return ret; -} - static blk_qc_t dm_process_bio(struct mapped_device *md, struct dm_table *map, struct bio *bio) { @@ -1802,8 +1754,6 @@ static blk_qc_t dm_process_bio(struct mapped_device *md, /* regular IO is split by __split_and_process_bio */ } - if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED) - return __process_bio(md, map, bio, ti); return __split_and_process_bio(md, map, bio); } @@ -2197,12 +2147,10 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, if (request_based) dm_stop_queue(q); - if (request_based || md->type == DM_TYPE_NVME_BIO_BASED) { + if (request_based) { /* - * Leverage the fact that request-based DM targets and - * NVMe bio based targets are immutable singletons - * - used to optimize both dm_request_fn and dm_mq_queue_rq; - * and __process_bio. + * Leverage the fact that request-based DM targets are + * immutable singletons - used to optimize dm_mq_queue_rq. */ md->immutable_target = dm_table_get_immutable_target(t); } @@ -2331,7 +2279,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) break; case DM_TYPE_BIO_BASED: case DM_TYPE_DAX_BIO_BASED: - case DM_TYPE_NVME_BIO_BASED: dm_init_congested_fn(md); break; case DM_TYPE_NONE: @@ -2493,6 +2440,8 @@ static int dm_wait_for_completion(struct mapped_device *md, long task_state) } finish_wait(&md->wait, &wait); + smp_rmb(); + return r; } @@ -3065,7 +3014,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu switch (type) { case DM_TYPE_BIO_BASED: case DM_TYPE_DAX_BIO_BASED: - case DM_TYPE_NVME_BIO_BASED: pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size); front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); io_front_pad = roundup(front_pad, __alignof__(struct dm_io)) + offsetof(struct dm_io, tio); diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index d7eef5292ae2f..a95e20c3d0d4f 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -642,14 +642,6 @@ static int md_bitmap_read_sb(struct bitmap *bitmap) daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; write_behind = le32_to_cpu(sb->write_behind); sectors_reserved = le32_to_cpu(sb->sectors_reserved); - /* Setup nodes/clustername only if bitmap version is - * cluster-compatible - */ - if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) { - nodes = le32_to_cpu(sb->nodes); - strlcpy(bitmap->mddev->bitmap_info.cluster_name, - sb->cluster_name, 64); - } /* verify that the bitmap-specific fields are valid */ if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) @@ -671,6 +663,16 @@ static int md_bitmap_read_sb(struct bitmap *bitmap) goto out; } + /* + * Setup nodes/clustername only if bitmap version is + * cluster-compatible + */ + if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) { + nodes = le32_to_cpu(sb->nodes); + strlcpy(bitmap->mddev->bitmap_info.cluster_name, + sb->cluster_name, 64); + } + /* keep the array size field of the bitmap superblock up to date */ sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); @@ -703,9 +705,9 @@ static int md_bitmap_read_sb(struct bitmap *bitmap) out: kunmap_atomic(sb); - /* Assigning chunksize is required for "re_read" */ - bitmap->mddev->bitmap_info.chunksize = chunksize; if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { + /* Assigning chunksize is required for "re_read" */ + bitmap->mddev->bitmap_info.chunksize = chunksize; err = md_setup_cluster(bitmap->mddev, nodes); if (err) { pr_warn("%s: Could not setup cluster service (%d)\n", @@ -716,18 +718,18 @@ static int md_bitmap_read_sb(struct bitmap *bitmap) goto re_read; } - out_no_sb: - if (test_bit(BITMAP_STALE, &bitmap->flags)) - bitmap->events_cleared = bitmap->mddev->events; - bitmap->mddev->bitmap_info.chunksize = chunksize; - bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; - bitmap->mddev->bitmap_info.max_write_behind = write_behind; - bitmap->mddev->bitmap_info.nodes = nodes; - if (bitmap->mddev->bitmap_info.space == 0 || - bitmap->mddev->bitmap_info.space > sectors_reserved) - bitmap->mddev->bitmap_info.space = sectors_reserved; - if (err) { + if (err == 0) { + if (test_bit(BITMAP_STALE, &bitmap->flags)) + bitmap->events_cleared = bitmap->mddev->events; + bitmap->mddev->bitmap_info.chunksize = chunksize; + bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; + bitmap->mddev->bitmap_info.max_write_behind = write_behind; + bitmap->mddev->bitmap_info.nodes = nodes; + if (bitmap->mddev->bitmap_info.space == 0 || + bitmap->mddev->bitmap_info.space > sectors_reserved) + bitmap->mddev->bitmap_info.space = sectors_reserved; + } else { md_bitmap_print_sb(bitmap); if (bitmap->cluster_slot < 0) md_cluster_stop(bitmap->mddev); diff --git a/drivers/md/md.c b/drivers/md/md.c index 761d43829b2b7..11fd3b32b5621 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2532,14 +2532,16 @@ static void sync_sbs(struct mddev *mddev, int nospares) static bool does_sb_need_changing(struct mddev *mddev) { - struct md_rdev *rdev; + struct md_rdev *rdev = NULL, *iter; struct mdp_superblock_1 *sb; int role; /* Find a good rdev */ - rdev_for_each(rdev, mddev) - if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags)) + rdev_for_each(iter, mddev) + if ((iter->raid_disk >= 0) && !test_bit(Faulty, &iter->flags)) { + rdev = iter; break; + } /* No good device found. */ if (!rdev) @@ -5535,10 +5537,6 @@ static int md_alloc(dev_t dev, char *name) */ disk->flags |= GENHD_FL_EXT_DEVT; mddev->gendisk = disk; - /* As soon as we call add_disk(), another thread could get - * through to md_open, so make sure it doesn't get too far - */ - mutex_lock(&mddev->open_mutex); add_disk(disk); error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md"); @@ -5553,7 +5551,6 @@ static int md_alloc(dev_t dev, char *name) if (mddev->kobj.sd && sysfs_create_group(&mddev->kobj, &md_bitmap_group)) pr_debug("pointless warning\n"); - mutex_unlock(&mddev->open_mutex); abort: mutex_unlock(&disks_mutex); if (!error && mddev->kobj.sd) { @@ -7780,17 +7777,22 @@ EXPORT_SYMBOL(md_register_thread); void md_unregister_thread(struct md_thread **threadp) { - struct md_thread *thread = *threadp; - if (!thread) - return; - pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk)); - /* Locking ensures that mddev_unlock does not wake_up a + struct md_thread *thread; + + /* + * Locking ensures that mddev_unlock does not wake_up a * non-existent thread */ spin_lock(&pers_lock); + thread = *threadp; + if (!thread) { + spin_unlock(&pers_lock); + return; + } *threadp = NULL; spin_unlock(&pers_lock); + pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk)); kthread_stop(thread->tsk); kfree(thread); } @@ -9534,16 +9536,18 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev) void md_reload_sb(struct mddev *mddev, int nr) { - struct md_rdev *rdev; + struct md_rdev *rdev = NULL, *iter; int err; /* Find the rdev */ - rdev_for_each_rcu(rdev, mddev) { - if (rdev->desc_nr == nr) + rdev_for_each_rcu(iter, mddev) { + if (iter->desc_nr == nr) { + rdev = iter; break; + } } - if (!rdev || rdev->desc_nr != nr) { + if (!rdev) { pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr); return; } diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 9e4d1212f4c16..63f2baed3c8a6 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -423,9 +423,9 @@ static int rebalance_children(struct shadow_spine *s, memcpy(n, dm_block_data(child), dm_bm_block_size(dm_tm_get_bm(info->tm))); - dm_tm_unlock(info->tm, child); dm_tm_dec(info->tm, dm_block_location(child)); + dm_tm_unlock(info->tm, child); return 0; } diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 8aae0624a2971..6383afb88f319 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -83,14 +83,16 @@ void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, } static int insert_at(size_t value_size, struct btree_node *node, unsigned index, - uint64_t key, void *value) - __dm_written_to_disk(value) + uint64_t key, void *value) + __dm_written_to_disk(value) { uint32_t nr_entries = le32_to_cpu(node->header.nr_entries); + uint32_t max_entries = le32_to_cpu(node->header.max_entries); __le64 key_le = cpu_to_le64(key); if (index > nr_entries || - index >= le32_to_cpu(node->header.max_entries)) { + index >= max_entries || + nr_entries >= max_entries) { DMERR("too many entries in btree node for insert"); __dm_unbless_for_disk(value); return -ENOMEM; diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index a213bf11738fb..85853ab629717 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -281,6 +281,11 @@ int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result) struct disk_index_entry ie_disk; struct dm_block *blk; + if (b >= ll->nr_blocks) { + DMERR_LIMIT("metadata block out of bounds"); + return -EINVAL; + } + b = do_div(index, ll->entries_per_block); r = ll->load_ie(ll, index, &ie_disk); if (r < 0) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 322386ff5d225..0ead5a7887f14 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -143,21 +143,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) pr_debug("md/raid0:%s: FINAL %d zones\n", mdname(mddev), conf->nr_strip_zones); - if (conf->nr_strip_zones == 1) { - conf->layout = RAID0_ORIG_LAYOUT; - } else if (mddev->layout == RAID0_ORIG_LAYOUT || - mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) { - conf->layout = mddev->layout; - } else if (default_layout == RAID0_ORIG_LAYOUT || - default_layout == RAID0_ALT_MULTIZONE_LAYOUT) { - conf->layout = default_layout; - } else { - pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n", - mdname(mddev)); - pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n"); - err = -ENOTSUPP; - goto abort; - } /* * now since we have the hard sector sizes, we can make sure * chunk size is a multiple of that sector size @@ -288,6 +273,22 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) (unsigned long long)smallest->sectors); } + if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) { + conf->layout = RAID0_ORIG_LAYOUT; + } else if (mddev->layout == RAID0_ORIG_LAYOUT || + mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) { + conf->layout = mddev->layout; + } else if (default_layout == RAID0_ORIG_LAYOUT || + default_layout == RAID0_ALT_MULTIZONE_LAYOUT) { + conf->layout = default_layout; + } else { + pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n", + mdname(mddev)); + pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n"); + err = -EOPNOTSUPP; + goto abort; + } + pr_debug("md/raid0:%s: done.\n", mdname(mddev)); *private_conf = conf; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 08a7f97750f7a..474cf6abefea5 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -609,17 +609,17 @@ int raid5_calc_degraded(struct r5conf *conf) return degraded; } -static int has_failed(struct r5conf *conf) +static bool has_failed(struct r5conf *conf) { - int degraded; + int degraded = conf->mddev->degraded; - if (conf->mddev->reshape_position == MaxSector) - return conf->mddev->degraded > conf->max_degraded; + if (test_bit(MD_BROKEN, &conf->mddev->flags)) + return true; - degraded = raid5_calc_degraded(conf); - if (degraded > conf->max_degraded) - return 1; - return 0; + if (conf->mddev->reshape_position != MaxSector) + degraded = raid5_calc_degraded(conf); + + return degraded > conf->max_degraded; } struct stripe_head * @@ -2679,34 +2679,31 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev) unsigned long flags; pr_debug("raid456: error called\n"); + pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n", + mdname(mddev), bdevname(rdev->bdev, b)); + spin_lock_irqsave(&conf->device_lock, flags); + set_bit(Faulty, &rdev->flags); + clear_bit(In_sync, &rdev->flags); + mddev->degraded = raid5_calc_degraded(conf); - if (test_bit(In_sync, &rdev->flags) && - mddev->degraded == conf->max_degraded) { - /* - * Don't allow to achieve failed state - * Don't try to recover this device - */ + if (has_failed(conf)) { + set_bit(MD_BROKEN, &conf->mddev->flags); conf->recovery_disabled = mddev->recovery_disabled; - spin_unlock_irqrestore(&conf->device_lock, flags); - return; + + pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n", + mdname(mddev), mddev->degraded, conf->raid_disks); + } else { + pr_crit("md/raid:%s: Operation continuing on %d devices.\n", + mdname(mddev), conf->raid_disks - mddev->degraded); } - set_bit(Faulty, &rdev->flags); - clear_bit(In_sync, &rdev->flags); - mddev->degraded = raid5_calc_degraded(conf); spin_unlock_irqrestore(&conf->device_lock, flags); set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(Blocked, &rdev->flags); set_mask_bits(&mddev->sb_flags, 0, BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING)); - pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n" - "md/raid:%s: Operation continuing on %d devices.\n", - mdname(mddev), - bdevname(rdev->bdev, b), - mdname(mddev), - conf->raid_disks - mddev->degraded); r5c_update_on_rdev_error(mddev, rdev); } @@ -7725,6 +7722,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) */ if (rdev->saved_raid_disk >= 0 && rdev->saved_raid_disk >= first && + rdev->saved_raid_disk <= last && conf->disks[rdev->saved_raid_disk].rdev == NULL) first = rdev->saved_raid_disk; diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index 06383b26712b6..c665f7d20c448 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -1191,6 +1191,7 @@ void cec_received_msg_ts(struct cec_adapter *adap, if (abort) dst->rx_status |= CEC_RX_STATUS_FEATURE_ABORT; msg->flags = dst->flags; + msg->sequence = dst->sequence; /* Remove it from the wait_queue */ list_del_init(&data->list); @@ -1262,7 +1263,7 @@ static int cec_config_log_addr(struct cec_adapter *adap, * While trying to poll the physical address was reset * and the adapter was unconfigured, so bail out. */ - if (!adap->is_configuring) + if (adap->phys_addr == CEC_PHYS_ADDR_INVALID) return -EINTR; if (err) @@ -1320,7 +1321,6 @@ static void cec_adap_unconfigure(struct cec_adapter *adap) adap->phys_addr != CEC_PHYS_ADDR_INVALID) WARN_ON(adap->ops->adap_log_addr(adap, CEC_LOG_ADDR_INVALID)); adap->log_addrs.log_addr_mask = 0; - adap->is_configuring = false; adap->is_configured = false; memset(adap->phys_addrs, 0xff, sizeof(adap->phys_addrs)); cec_flush(adap); @@ -1513,9 +1513,10 @@ static int cec_config_thread_func(void *arg) for (i = 0; i < las->num_log_addrs; i++) las->log_addr[i] = CEC_LOG_ADDR_INVALID; cec_adap_unconfigure(adap); + adap->is_configuring = false; adap->kthread_config = NULL; - mutex_unlock(&adap->lock); complete(&adap->config_completion); + mutex_unlock(&adap->lock); return 0; } diff --git a/drivers/media/common/saa7146/saa7146_fops.c b/drivers/media/common/saa7146/saa7146_fops.c index aabb830e74689..4b332ea986168 100644 --- a/drivers/media/common/saa7146/saa7146_fops.c +++ b/drivers/media/common/saa7146/saa7146_fops.c @@ -525,7 +525,7 @@ int saa7146_vv_init(struct saa7146_dev* dev, struct saa7146_ext_vv *ext_vv) ERR("out of memory. aborting.\n"); kfree(vv); v4l2_ctrl_handler_free(hdl); - return -1; + return -ENOMEM; } saa7146_video_uops.init(dev,vv); diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c b/drivers/media/common/videobuf2/videobuf2-dma-contig.c index 44cd0e530bbd3..093ebe6f279f7 100644 --- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c +++ b/drivers/media/common/videobuf2/videobuf2-dma-contig.c @@ -154,7 +154,7 @@ static void *vb2_dc_alloc(struct device *dev, unsigned long attrs, buf->cookie = dma_alloc_attrs(dev, size, &buf->dma_addr, GFP_KERNEL | gfp_flags, buf->attrs); if (!buf->cookie) { - dev_err(dev, "dma_alloc_coherent of size %ld failed\n", size); + dev_err(dev, "dma_alloc_coherent of size %lu failed\n", size); kfree(buf); return ERR_PTR(-ENOMEM); } @@ -200,9 +200,9 @@ static int vb2_dc_mmap(void *buf_priv, struct vm_area_struct *vma) vma->vm_ops->open(vma); - pr_debug("%s: mapped dma addr 0x%08lx at 0x%08lx, size %ld\n", - __func__, (unsigned long)buf->dma_addr, vma->vm_start, - buf->size); + pr_debug("%s: mapped dma addr 0x%08lx at 0x%08lx, size %lu\n", + __func__, (unsigned long)buf->dma_addr, vma->vm_start, + buf->size); return 0; } diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index f14a872d12687..e58cb8434dafe 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -1413,7 +1413,7 @@ static const struct dvb_device dvbdev_dvr = { }; int dvb_dmxdev_init(struct dmxdev *dmxdev, struct dvb_adapter *dvb_adapter) { - int i; + int i, ret; if (dmxdev->demux->open(dmxdev->demux) < 0) return -EUSERS; @@ -1432,14 +1432,26 @@ int dvb_dmxdev_init(struct dmxdev *dmxdev, struct dvb_adapter *dvb_adapter) DMXDEV_STATE_FREE); } - dvb_register_device(dvb_adapter, &dmxdev->dvbdev, &dvbdev_demux, dmxdev, + ret = dvb_register_device(dvb_adapter, &dmxdev->dvbdev, &dvbdev_demux, dmxdev, DVB_DEVICE_DEMUX, dmxdev->filternum); - dvb_register_device(dvb_adapter, &dmxdev->dvr_dvbdev, &dvbdev_dvr, + if (ret < 0) + goto err_register_dvbdev; + + ret = dvb_register_device(dvb_adapter, &dmxdev->dvr_dvbdev, &dvbdev_dvr, dmxdev, DVB_DEVICE_DVR, dmxdev->filternum); + if (ret < 0) + goto err_register_dvr_dvbdev; dvb_ringbuffer_init(&dmxdev->dvr_buffer, NULL, 8192); return 0; + +err_register_dvr_dvbdev: + dvb_unregister_device(dmxdev->dvbdev); +err_register_dvbdev: + vfree(dmxdev->filter); + dmxdev->filter = NULL; + return ret; } EXPORT_SYMBOL(dvb_dmxdev_init); diff --git a/drivers/media/dvb-frontends/dib8000.c b/drivers/media/dvb-frontends/dib8000.c index 082796534b0ae..d67f2dd997d06 100644 --- a/drivers/media/dvb-frontends/dib8000.c +++ b/drivers/media/dvb-frontends/dib8000.c @@ -2107,32 +2107,55 @@ static void dib8000_load_ana_fe_coefs(struct dib8000_state *state, const s16 *an dib8000_write_word(state, 117 + mode, ana_fe[mode]); } -static const u16 lut_prbs_2k[14] = { - 0, 0x423, 0x009, 0x5C7, 0x7A6, 0x3D8, 0x527, 0x7FF, 0x79B, 0x3D6, 0x3A2, 0x53B, 0x2F4, 0x213 +static const u16 lut_prbs_2k[13] = { + 0x423, 0x009, 0x5C7, + 0x7A6, 0x3D8, 0x527, + 0x7FF, 0x79B, 0x3D6, + 0x3A2, 0x53B, 0x2F4, + 0x213 }; -static const u16 lut_prbs_4k[14] = { - 0, 0x208, 0x0C3, 0x7B9, 0x423, 0x5C7, 0x3D8, 0x7FF, 0x3D6, 0x53B, 0x213, 0x029, 0x0D0, 0x48E + +static const u16 lut_prbs_4k[13] = { + 0x208, 0x0C3, 0x7B9, + 0x423, 0x5C7, 0x3D8, + 0x7FF, 0x3D6, 0x53B, + 0x213, 0x029, 0x0D0, + 0x48E }; -static const u16 lut_prbs_8k[14] = { - 0, 0x740, 0x069, 0x7DD, 0x208, 0x7B9, 0x5C7, 0x7FF, 0x53B, 0x029, 0x48E, 0x4C4, 0x367, 0x684 + +static const u16 lut_prbs_8k[13] = { + 0x740, 0x069, 0x7DD, + 0x208, 0x7B9, 0x5C7, + 0x7FF, 0x53B, 0x029, + 0x48E, 0x4C4, 0x367, + 0x684 }; static u16 dib8000_get_init_prbs(struct dib8000_state *state, u16 subchannel) { int sub_channel_prbs_group = 0; + int prbs_group; - sub_channel_prbs_group = (subchannel / 3) + 1; - dprintk("sub_channel_prbs_group = %d , subchannel =%d prbs = 0x%04x\n", sub_channel_prbs_group, subchannel, lut_prbs_8k[sub_channel_prbs_group]); + sub_channel_prbs_group = subchannel / 3; + if (sub_channel_prbs_group >= ARRAY_SIZE(lut_prbs_2k)) + return 0; switch (state->fe[0]->dtv_property_cache.transmission_mode) { case TRANSMISSION_MODE_2K: - return lut_prbs_2k[sub_channel_prbs_group]; + prbs_group = lut_prbs_2k[sub_channel_prbs_group]; + break; case TRANSMISSION_MODE_4K: - return lut_prbs_4k[sub_channel_prbs_group]; + prbs_group = lut_prbs_4k[sub_channel_prbs_group]; + break; default: case TRANSMISSION_MODE_8K: - return lut_prbs_8k[sub_channel_prbs_group]; + prbs_group = lut_prbs_8k[sub_channel_prbs_group]; } + + dprintk("sub_channel_prbs_group = %d , subchannel =%d prbs = 0x%04x\n", + sub_channel_prbs_group, subchannel, prbs_group); + + return prbs_group; } static void dib8000_set_13seg_channel(struct dib8000_state *state) @@ -2409,10 +2432,8 @@ static void dib8000_set_isdbt_common_channel(struct dib8000_state *state, u8 seq /* TSB or ISDBT ? apply it now */ if (c->isdbt_sb_mode) { dib8000_set_sb_channel(state); - if (c->isdbt_sb_subchannel < 14) - init_prbs = dib8000_get_init_prbs(state, c->isdbt_sb_subchannel); - else - init_prbs = 0; + init_prbs = dib8000_get_init_prbs(state, + c->isdbt_sb_subchannel); } else { dib8000_set_13seg_channel(state); init_prbs = 0xfff; @@ -3004,6 +3025,7 @@ static int dib8000_tune(struct dvb_frontend *fe) unsigned long *timeout = &state->timeout; unsigned long now = jiffies; + u16 init_prbs; #ifdef DIB8000_AGC_FREEZE u16 agc1, agc2; #endif @@ -3302,8 +3324,10 @@ static int dib8000_tune(struct dvb_frontend *fe) break; case CT_DEMOD_STEP_11: /* 41 : init prbs autosearch */ - if (state->subchannel <= 41) { - dib8000_set_subchannel_prbs(state, dib8000_get_init_prbs(state, state->subchannel)); + init_prbs = dib8000_get_init_prbs(state, state->subchannel); + + if (init_prbs) { + dib8000_set_subchannel_prbs(state, init_prbs); *tune_state = CT_DEMOD_STEP_9; } else { *tune_state = CT_DEMOD_STOP; @@ -4449,8 +4473,10 @@ static struct dvb_frontend *dib8000_init(struct i2c_adapter *i2c_adap, u8 i2c_ad state->timf_default = cfg->pll->timf; - if (dib8000_identify(&state->i2c) == 0) + if (dib8000_identify(&state->i2c) == 0) { + kfree(fe); goto error; + } dibx000_init_i2c_master(&state->i2c_master, DIB8000, state->i2c.adap, state->i2c.addr); diff --git a/drivers/media/dvb-frontends/mn88443x.c b/drivers/media/dvb-frontends/mn88443x.c index e4528784f8477..fff212c0bf3b5 100644 --- a/drivers/media/dvb-frontends/mn88443x.c +++ b/drivers/media/dvb-frontends/mn88443x.c @@ -204,11 +204,18 @@ struct mn88443x_priv { struct regmap *regmap_t; }; -static void mn88443x_cmn_power_on(struct mn88443x_priv *chip) +static int mn88443x_cmn_power_on(struct mn88443x_priv *chip) { + struct device *dev = &chip->client_s->dev; struct regmap *r_t = chip->regmap_t; + int ret; - clk_prepare_enable(chip->mclk); + ret = clk_prepare_enable(chip->mclk); + if (ret) { + dev_err(dev, "Failed to prepare and enable mclk: %d\n", + ret); + return ret; + } gpiod_set_value_cansleep(chip->reset_gpio, 1); usleep_range(100, 1000); @@ -222,6 +229,8 @@ static void mn88443x_cmn_power_on(struct mn88443x_priv *chip) } else { regmap_write(r_t, HIZSET3, 0x8f); } + + return 0; } static void mn88443x_cmn_power_off(struct mn88443x_priv *chip) @@ -738,7 +747,10 @@ static int mn88443x_probe(struct i2c_client *client, chip->fe.demodulator_priv = chip; i2c_set_clientdata(client, chip); - mn88443x_cmn_power_on(chip); + ret = mn88443x_cmn_power_on(chip); + if (ret) + goto err_i2c_t; + mn88443x_s_sleep(chip); mn88443x_t_sleep(chip); diff --git a/drivers/media/firewire/firedtv-avc.c b/drivers/media/firewire/firedtv-avc.c index 2bf9467b917d1..71991f8638e6b 100644 --- a/drivers/media/firewire/firedtv-avc.c +++ b/drivers/media/firewire/firedtv-avc.c @@ -1165,7 +1165,11 @@ int avc_ca_pmt(struct firedtv *fdtv, char *msg, int length) read_pos += program_info_length; write_pos += program_info_length; } - while (read_pos < length) { + while (read_pos + 4 < length) { + if (write_pos + 4 >= sizeof(c->operand) - 4) { + ret = -EINVAL; + goto out; + } c->operand[write_pos++] = msg[read_pos++]; c->operand[write_pos++] = msg[read_pos++]; c->operand[write_pos++] = msg[read_pos++]; @@ -1177,13 +1181,17 @@ int avc_ca_pmt(struct firedtv *fdtv, char *msg, int length) c->operand[write_pos++] = es_info_length >> 8; c->operand[write_pos++] = es_info_length & 0xff; if (es_info_length > 0) { + if (read_pos >= length) { + ret = -EINVAL; + goto out; + } pmt_cmd_id = msg[read_pos++]; if (pmt_cmd_id != 1 && pmt_cmd_id != 4) dev_err(fdtv->device, "invalid pmt_cmd_id %d at stream level\n", pmt_cmd_id); - if (es_info_length > sizeof(c->operand) - 4 - - write_pos) { + if (es_info_length > sizeof(c->operand) - 4 - write_pos || + es_info_length > length - read_pos) { ret = -EINVAL; goto out; } diff --git a/drivers/media/firewire/firedtv-ci.c b/drivers/media/firewire/firedtv-ci.c index 9363d005e2b61..e0d57e09dab0c 100644 --- a/drivers/media/firewire/firedtv-ci.c +++ b/drivers/media/firewire/firedtv-ci.c @@ -134,6 +134,8 @@ static int fdtv_ca_pmt(struct firedtv *fdtv, void *arg) } else { data_length = msg->msg[3]; } + if (data_length > sizeof(msg->msg) - data_pos) + return -EINVAL; return avc_ca_pmt(fdtv, &msg->msg[data_pos], data_length); } diff --git a/drivers/media/i2c/adv7511-v4l2.c b/drivers/media/i2c/adv7511-v4l2.c index 809fa44ed9880..86267e01c2511 100644 --- a/drivers/media/i2c/adv7511-v4l2.c +++ b/drivers/media/i2c/adv7511-v4l2.c @@ -555,7 +555,7 @@ static void log_infoframe(struct v4l2_subdev *sd, const struct adv7511_cfg_read_ buffer[3] = 0; buffer[3] = hdmi_infoframe_checksum(buffer, len + 4); - if (hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)) < 0) { + if (hdmi_infoframe_unpack(&frame, buffer, len + 4) < 0) { v4l2_err(sd, "%s: unpack of %s infoframe failed\n", __func__, cri->desc); return; } diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c index b887299ac195f..1cee69919e1b9 100644 --- a/drivers/media/i2c/adv7604.c +++ b/drivers/media/i2c/adv7604.c @@ -2444,7 +2444,7 @@ static int adv76xx_read_infoframe(struct v4l2_subdev *sd, int index, buffer[i + 3] = infoframe_read(sd, adv76xx_cri[index].payload_addr + i); - if (hdmi_infoframe_unpack(frame, buffer, sizeof(buffer)) < 0) { + if (hdmi_infoframe_unpack(frame, buffer, len + 3) < 0) { v4l2_err(sd, "%s: unpack of %s infoframe failed\n", __func__, adv76xx_cri[index].desc); return -ENOENT; diff --git a/drivers/media/i2c/adv7842.c b/drivers/media/i2c/adv7842.c index 02cbab826d0b6..a581e822ce6f5 100644 --- a/drivers/media/i2c/adv7842.c +++ b/drivers/media/i2c/adv7842.c @@ -2574,7 +2574,7 @@ static void log_infoframe(struct v4l2_subdev *sd, struct adv7842_cfg_read_infofr for (i = 0; i < len; i++) buffer[i + 3] = infoframe_read(sd, cri->payload_addr + i); - if (hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)) < 0) { + if (hdmi_infoframe_unpack(&frame, buffer, len + 3) < 0) { v4l2_err(sd, "%s: unpack of %s infoframe failed\n", __func__, cri->desc); return; } diff --git a/drivers/media/i2c/imx258.c b/drivers/media/i2c/imx258.c index f86ae18bc104b..ffaa4a91e5713 100644 --- a/drivers/media/i2c/imx258.c +++ b/drivers/media/i2c/imx258.c @@ -22,7 +22,7 @@ #define IMX258_CHIP_ID 0x0258 /* V_TIMING internal */ -#define IMX258_VTS_30FPS 0x0c98 +#define IMX258_VTS_30FPS 0x0c50 #define IMX258_VTS_30FPS_2K 0x0638 #define IMX258_VTS_30FPS_VGA 0x034c #define IMX258_VTS_MAX 0xffff @@ -46,7 +46,7 @@ /* Analog gain control */ #define IMX258_REG_ANALOG_GAIN 0x0204 #define IMX258_ANA_GAIN_MIN 0 -#define IMX258_ANA_GAIN_MAX 0x1fff +#define IMX258_ANA_GAIN_MAX 480 #define IMX258_ANA_GAIN_STEP 1 #define IMX258_ANA_GAIN_DEFAULT 0x0 diff --git a/drivers/media/i2c/ir-kbd-i2c.c b/drivers/media/i2c/ir-kbd-i2c.c index 92376592455ee..56674173524fd 100644 --- a/drivers/media/i2c/ir-kbd-i2c.c +++ b/drivers/media/i2c/ir-kbd-i2c.c @@ -791,6 +791,7 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id) rc_proto = RC_PROTO_BIT_RC5 | RC_PROTO_BIT_RC6_MCE | RC_PROTO_BIT_RC6_6A_32; ir_codes = RC_MAP_HAUPPAUGE; + ir->polling_interval = 125; probe_tx = true; break; } diff --git a/drivers/media/i2c/mt9p031.c b/drivers/media/i2c/mt9p031.c index dc23b9ed510a4..18440c5104ad9 100644 --- a/drivers/media/i2c/mt9p031.c +++ b/drivers/media/i2c/mt9p031.c @@ -78,7 +78,9 @@ #define MT9P031_PIXEL_CLOCK_INVERT (1 << 15) #define MT9P031_PIXEL_CLOCK_SHIFT(n) ((n) << 8) #define MT9P031_PIXEL_CLOCK_DIVIDE(n) ((n) << 0) -#define MT9P031_FRAME_RESTART 0x0b +#define MT9P031_RESTART 0x0b +#define MT9P031_FRAME_PAUSE_RESTART (1 << 1) +#define MT9P031_FRAME_RESTART (1 << 0) #define MT9P031_SHUTTER_DELAY 0x0c #define MT9P031_RST 0x0d #define MT9P031_RST_ENABLE 1 @@ -445,9 +447,23 @@ static int mt9p031_set_params(struct mt9p031 *mt9p031) static int mt9p031_s_stream(struct v4l2_subdev *subdev, int enable) { struct mt9p031 *mt9p031 = to_mt9p031(subdev); + struct i2c_client *client = v4l2_get_subdevdata(subdev); + int val; int ret; if (!enable) { + /* enable pause restart */ + val = MT9P031_FRAME_PAUSE_RESTART; + ret = mt9p031_write(client, MT9P031_RESTART, val); + if (ret < 0) + return ret; + + /* enable restart + keep pause restart set */ + val |= MT9P031_FRAME_RESTART; + ret = mt9p031_write(client, MT9P031_RESTART, val); + if (ret < 0) + return ret; + /* Stop sensor readout */ ret = mt9p031_set_output_control(mt9p031, MT9P031_OUTPUT_CONTROL_CEN, 0); @@ -467,6 +483,16 @@ static int mt9p031_s_stream(struct v4l2_subdev *subdev, int enable) if (ret < 0) return ret; + /* + * - clear pause restart + * - don't clear restart as clearing restart manually can cause + * undefined behavior + */ + val = MT9P031_FRAME_RESTART; + ret = mt9p031_write(client, MT9P031_RESTART, val); + if (ret < 0) + return ret; + return mt9p031_pll_enable(mt9p031); } diff --git a/drivers/media/i2c/ov7670.c b/drivers/media/i2c/ov7670.c index b42b289faaef4..154776d0069ea 100644 --- a/drivers/media/i2c/ov7670.c +++ b/drivers/media/i2c/ov7670.c @@ -2000,7 +2000,6 @@ static int ov7670_remove(struct i2c_client *client) v4l2_async_unregister_subdev(sd); v4l2_ctrl_handler_free(&info->hdl); media_entity_cleanup(&info->sd.entity); - ov7670_power_off(sd); return 0; } diff --git a/drivers/media/i2c/tda1997x.c b/drivers/media/i2c/tda1997x.c index e43d8327b8103..5faffedb0feba 100644 --- a/drivers/media/i2c/tda1997x.c +++ b/drivers/media/i2c/tda1997x.c @@ -1247,13 +1247,13 @@ tda1997x_parse_infoframe(struct tda1997x_state *state, u16 addr) { struct v4l2_subdev *sd = &state->sd; union hdmi_infoframe frame; - u8 buffer[40]; + u8 buffer[40] = { 0 }; u8 reg; int len, err; /* read data */ len = io_readn(sd, addr, sizeof(buffer), buffer); - err = hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)); + err = hdmi_infoframe_unpack(&frame, buffer, len); if (err) { v4l_err(state->client, "failed parsing %d byte infoframe: 0x%04x/0x%02x\n", @@ -1695,14 +1695,15 @@ static int tda1997x_query_dv_timings(struct v4l2_subdev *sd, struct v4l2_dv_timings *timings) { struct tda1997x_state *state = to_state(sd); + int ret; v4l_dbg(1, debug, state->client, "%s\n", __func__); memset(timings, 0, sizeof(struct v4l2_dv_timings)); mutex_lock(&state->lock); - tda1997x_detect_std(state, timings); + ret = tda1997x_detect_std(state, timings); mutex_unlock(&state->lock); - return 0; + return ret; } static const struct v4l2_subdev_video_ops tda1997x_video_ops = { @@ -1926,13 +1927,13 @@ static int tda1997x_log_infoframe(struct v4l2_subdev *sd, int addr) { struct tda1997x_state *state = to_state(sd); union hdmi_infoframe frame; - u8 buffer[40]; + u8 buffer[40] = { 0 }; int len, err; /* read data */ len = io_readn(sd, addr, sizeof(buffer), buffer); v4l2_dbg(1, debug, sd, "infoframe: addr=%d len=%d\n", addr, len); - err = hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)); + err = hdmi_infoframe_unpack(&frame, buffer, len); if (err) { v4l_err(state->client, "failed parsing %d byte infoframe: 0x%04x/0x%02x\n", @@ -2233,6 +2234,7 @@ static int tda1997x_core_init(struct v4l2_subdev *sd) /* get initial HDMI status */ state->hdmi_status = io_read(sd, REG_HDMI_FLAGS); + io_write(sd, REG_EDID_ENABLE, EDID_ENABLE_A_EN | EDID_ENABLE_B_EN); return 0; } diff --git a/drivers/media/pci/b2c2/flexcop-pci.c b/drivers/media/pci/b2c2/flexcop-pci.c index a9d9520a94c6d..c9e6c7d663768 100644 --- a/drivers/media/pci/b2c2/flexcop-pci.c +++ b/drivers/media/pci/b2c2/flexcop-pci.c @@ -185,6 +185,8 @@ static irqreturn_t flexcop_pci_isr(int irq, void *dev_id) dma_addr_t cur_addr = fc->read_ibi_reg(fc,dma1_008).dma_0x8.dma_cur_addr << 2; u32 cur_pos = cur_addr - fc_pci->dma[0].dma_addr0; + if (cur_pos > fc_pci->dma[0].size * 2) + goto error; deb_irq("%u irq: %08x cur_addr: %llx: cur_pos: %08x, last_cur_pos: %08x ", jiffies_to_usecs(jiffies - fc_pci->last_irq), @@ -225,6 +227,7 @@ static irqreturn_t flexcop_pci_isr(int irq, void *dev_id) ret = IRQ_NONE; } +error: spin_unlock_irqrestore(&fc_pci->irq_lock, flags); return ret; } diff --git a/drivers/media/pci/bt8xx/bttv-driver.c b/drivers/media/pci/bt8xx/bttv-driver.c index 570530d976d21..6441e7d63d971 100644 --- a/drivers/media/pci/bt8xx/bttv-driver.c +++ b/drivers/media/pci/bt8xx/bttv-driver.c @@ -3898,7 +3898,7 @@ static int bttv_register_video(struct bttv *btv) /* video */ vdev_init(btv, &btv->video_dev, &bttv_video_template, "video"); - btv->video_dev.device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_TUNER | + btv->video_dev.device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_READWRITE | V4L2_CAP_STREAMING; if (btv->tuner_type != TUNER_ABSENT) btv->video_dev.device_caps |= V4L2_CAP_TUNER; @@ -3919,7 +3919,7 @@ static int bttv_register_video(struct bttv *btv) /* vbi */ vdev_init(btv, &btv->vbi_dev, &bttv_video_template, "vbi"); btv->vbi_dev.device_caps = V4L2_CAP_VBI_CAPTURE | V4L2_CAP_READWRITE | - V4L2_CAP_STREAMING | V4L2_CAP_TUNER; + V4L2_CAP_STREAMING; if (btv->tuner_type != TUNER_ABSENT) btv->vbi_dev.device_caps |= V4L2_CAP_TUNER; diff --git a/drivers/media/pci/cx23885/cx23885-alsa.c b/drivers/media/pci/cx23885/cx23885-alsa.c index a8e980c6dacb9..50772c2611cad 100644 --- a/drivers/media/pci/cx23885/cx23885-alsa.c +++ b/drivers/media/pci/cx23885/cx23885-alsa.c @@ -550,7 +550,7 @@ struct cx23885_audio_dev *cx23885_audio_register(struct cx23885_dev *dev) SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1, THIS_MODULE, sizeof(struct cx23885_audio_dev), &card); if (err < 0) - goto error; + goto error_msg; chip = (struct cx23885_audio_dev *) card->private_data; chip->dev = dev; @@ -576,6 +576,7 @@ struct cx23885_audio_dev *cx23885_audio_register(struct cx23885_dev *dev) error: snd_card_free(card); +error_msg: pr_err("%s(): Failed to register analog audio adapter\n", __func__); diff --git a/drivers/media/pci/cx23885/cx23885-core.c b/drivers/media/pci/cx23885/cx23885-core.c index ead0acb7807c8..6747ecb4911b1 100644 --- a/drivers/media/pci/cx23885/cx23885-core.c +++ b/drivers/media/pci/cx23885/cx23885-core.c @@ -2154,7 +2154,7 @@ static int cx23885_initdev(struct pci_dev *pci_dev, err = pci_set_dma_mask(pci_dev, 0xffffffff); if (err) { pr_err("%s/0: Oops: no 32bit PCI DMA ???\n", dev->name); - goto fail_ctrl; + goto fail_dma_set_mask; } err = request_irq(pci_dev->irq, cx23885_irq, @@ -2162,7 +2162,7 @@ static int cx23885_initdev(struct pci_dev *pci_dev, if (err < 0) { pr_err("%s: can't get IRQ %d\n", dev->name, pci_dev->irq); - goto fail_irq; + goto fail_dma_set_mask; } switch (dev->board) { @@ -2184,7 +2184,7 @@ static int cx23885_initdev(struct pci_dev *pci_dev, return 0; -fail_irq: +fail_dma_set_mask: cx23885_dev_unregister(dev); fail_ctrl: v4l2_ctrl_handler_free(hdl); diff --git a/drivers/media/pci/cx25821/cx25821-core.c b/drivers/media/pci/cx25821/cx25821-core.c index 44839a6461e88..534829e352d1d 100644 --- a/drivers/media/pci/cx25821/cx25821-core.c +++ b/drivers/media/pci/cx25821/cx25821-core.c @@ -1340,11 +1340,11 @@ static void cx25821_finidev(struct pci_dev *pci_dev) struct cx25821_dev *dev = get_cx25821(v4l2_dev); cx25821_shutdown(dev); - pci_disable_device(pci_dev); /* unregister stuff */ if (pci_dev->irq) free_irq(pci_dev->irq, dev); + pci_disable_device(pci_dev); cx25821_dev_unregister(dev); v4l2_device_unregister(v4l2_dev); diff --git a/drivers/media/pci/cx88/cx88-mpeg.c b/drivers/media/pci/cx88/cx88-mpeg.c index a57c991b165b1..10d2971ef0624 100644 --- a/drivers/media/pci/cx88/cx88-mpeg.c +++ b/drivers/media/pci/cx88/cx88-mpeg.c @@ -162,6 +162,9 @@ int cx8802_start_dma(struct cx8802_dev *dev, cx_write(MO_TS_GPCNTRL, GP_COUNT_CONTROL_RESET); q->count = 0; + /* clear interrupt status register */ + cx_write(MO_TS_INTSTAT, 0x1f1111); + /* enable irqs */ dprintk(1, "setting the interrupt mask\n"); cx_set(MO_PCI_INTMSK, core->pci_irqmask | PCI_INT_TSINT); diff --git a/drivers/media/pci/ivtv/ivtv-driver.h b/drivers/media/pci/ivtv/ivtv-driver.h index cafba6b1055df..90f38552bd362 100644 --- a/drivers/media/pci/ivtv/ivtv-driver.h +++ b/drivers/media/pci/ivtv/ivtv-driver.h @@ -333,7 +333,6 @@ struct ivtv_stream { struct ivtv *itv; /* for ease of use */ const char *name; /* name of the stream */ int type; /* stream type */ - u32 caps; /* V4L2 capabilities */ struct v4l2_fh *fh; /* pointer to the streaming filehandle */ spinlock_t qlock; /* locks access to the queues */ diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.c b/drivers/media/pci/ivtv/ivtv-ioctl.c index 137853944e461..396cc931f41a2 100644 --- a/drivers/media/pci/ivtv/ivtv-ioctl.c +++ b/drivers/media/pci/ivtv/ivtv-ioctl.c @@ -443,7 +443,7 @@ static int ivtv_g_fmt_vid_out_overlay(struct file *file, void *fh, struct v4l2_f struct ivtv_stream *s = &itv->streams[fh2id(fh)->type]; struct v4l2_window *winfmt = &fmt->fmt.win; - if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) + if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) return -EINVAL; if (!itv->osd_video_pbase) return -EINVAL; @@ -554,7 +554,7 @@ static int ivtv_try_fmt_vid_out_overlay(struct file *file, void *fh, struct v4l2 u32 chromakey = fmt->fmt.win.chromakey; u8 global_alpha = fmt->fmt.win.global_alpha; - if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) + if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) return -EINVAL; if (!itv->osd_video_pbase) return -EINVAL; @@ -1386,7 +1386,7 @@ static int ivtv_g_fbuf(struct file *file, void *fh, struct v4l2_framebuffer *fb) 0, }; - if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) + if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) return -ENOTTY; if (!itv->osd_video_pbase) return -ENOTTY; @@ -1453,7 +1453,7 @@ static int ivtv_s_fbuf(struct file *file, void *fh, const struct v4l2_framebuffe struct ivtv_stream *s = &itv->streams[fh2id(fh)->type]; struct yuv_playback_info *yi = &itv->yuv_info; - if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) + if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) return -ENOTTY; if (!itv->osd_video_pbase) return -ENOTTY; @@ -1473,7 +1473,7 @@ static int ivtv_overlay(struct file *file, void *fh, unsigned int on) struct ivtv *itv = id->itv; struct ivtv_stream *s = &itv->streams[fh2id(fh)->type]; - if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) + if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) return -ENOTTY; if (!itv->osd_video_pbase) return -ENOTTY; diff --git a/drivers/media/pci/ivtv/ivtv-streams.c b/drivers/media/pci/ivtv/ivtv-streams.c index f7de9118f609d..200d2100dbffd 100644 --- a/drivers/media/pci/ivtv/ivtv-streams.c +++ b/drivers/media/pci/ivtv/ivtv-streams.c @@ -176,7 +176,7 @@ static void ivtv_stream_init(struct ivtv *itv, int type) s->itv = itv; s->type = type; s->name = ivtv_stream_info[type].name; - s->caps = ivtv_stream_info[type].v4l2_caps; + s->vdev.device_caps = ivtv_stream_info[type].v4l2_caps; if (ivtv_stream_info[type].pio) s->dma = PCI_DMA_NONE; @@ -299,12 +299,9 @@ static int ivtv_reg_dev(struct ivtv *itv, int type) if (s_mpg->vdev.v4l2_dev) num = s_mpg->vdev.num + ivtv_stream_info[type].num_offset; } - s->vdev.device_caps = s->caps; - if (itv->osd_video_pbase) { - itv->streams[IVTV_DEC_STREAM_TYPE_YUV].vdev.device_caps |= - V4L2_CAP_VIDEO_OUTPUT_OVERLAY; - itv->streams[IVTV_DEC_STREAM_TYPE_MPG].vdev.device_caps |= - V4L2_CAP_VIDEO_OUTPUT_OVERLAY; + if (itv->osd_video_pbase && (type == IVTV_DEC_STREAM_TYPE_YUV || + type == IVTV_DEC_STREAM_TYPE_MPG)) { + s->vdev.device_caps |= V4L2_CAP_VIDEO_OUTPUT_OVERLAY; itv->v4l2_cap |= V4L2_CAP_VIDEO_OUTPUT_OVERLAY; } video_set_drvdata(&s->vdev, s); diff --git a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c index 80a7c41baa901..eb5621c9ebf85 100644 --- a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c +++ b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c @@ -258,19 +258,24 @@ static irqreturn_t netup_unidvb_isr(int irq, void *dev_id) if ((reg40 & AVL_IRQ_ASSERTED) != 0) { /* IRQ is being signaled */ reg_isr = readw(ndev->bmmio0 + REG_ISR); - if (reg_isr & NETUP_UNIDVB_IRQ_I2C0) { - iret = netup_i2c_interrupt(&ndev->i2c[0]); - } else if (reg_isr & NETUP_UNIDVB_IRQ_I2C1) { - iret = netup_i2c_interrupt(&ndev->i2c[1]); - } else if (reg_isr & NETUP_UNIDVB_IRQ_SPI) { + if (reg_isr & NETUP_UNIDVB_IRQ_SPI) iret = netup_spi_interrupt(ndev->spi); - } else if (reg_isr & NETUP_UNIDVB_IRQ_DMA1) { - iret = netup_dma_interrupt(&ndev->dma[0]); - } else if (reg_isr & NETUP_UNIDVB_IRQ_DMA2) { - iret = netup_dma_interrupt(&ndev->dma[1]); - } else if (reg_isr & NETUP_UNIDVB_IRQ_CI) { - iret = netup_ci_interrupt(ndev); + else if (!ndev->old_fw) { + if (reg_isr & NETUP_UNIDVB_IRQ_I2C0) { + iret = netup_i2c_interrupt(&ndev->i2c[0]); + } else if (reg_isr & NETUP_UNIDVB_IRQ_I2C1) { + iret = netup_i2c_interrupt(&ndev->i2c[1]); + } else if (reg_isr & NETUP_UNIDVB_IRQ_DMA1) { + iret = netup_dma_interrupt(&ndev->dma[0]); + } else if (reg_isr & NETUP_UNIDVB_IRQ_DMA2) { + iret = netup_dma_interrupt(&ndev->dma[1]); + } else if (reg_isr & NETUP_UNIDVB_IRQ_CI) { + iret = netup_ci_interrupt(ndev); + } else { + goto err; + } } else { +err: dev_err(&pci_dev->dev, "%s(): unknown interrupt 0x%x\n", __func__, reg_isr); diff --git a/drivers/media/pci/ngene/ngene-cards.c b/drivers/media/pci/ngene/ngene-cards.c index 6185806a00e00..8bfb3d8ea6103 100644 --- a/drivers/media/pci/ngene/ngene-cards.c +++ b/drivers/media/pci/ngene/ngene-cards.c @@ -1186,7 +1186,7 @@ MODULE_DEVICE_TABLE(pci, ngene_id_tbl); /****************************************************************************/ static pci_ers_result_t ngene_error_detected(struct pci_dev *dev, - enum pci_channel_state state) + pci_channel_state_t state) { dev_err(&dev->dev, "PCI error\n"); if (state == pci_channel_io_perm_failure) diff --git a/drivers/media/pci/saa7146/hexium_gemini.c b/drivers/media/pci/saa7146/hexium_gemini.c index f962269306707..86d4e2abed82a 100644 --- a/drivers/media/pci/saa7146/hexium_gemini.c +++ b/drivers/media/pci/saa7146/hexium_gemini.c @@ -284,7 +284,12 @@ static int hexium_attach(struct saa7146_dev *dev, struct saa7146_pci_extension_d hexium_set_input(hexium, 0); hexium->cur_input = 0; - saa7146_vv_init(dev, &vv_data); + ret = saa7146_vv_init(dev, &vv_data); + if (ret) { + i2c_del_adapter(&hexium->i2c_adapter); + kfree(hexium); + return ret; + } vv_data.vid_ops.vidioc_enum_input = vidioc_enum_input; vv_data.vid_ops.vidioc_g_input = vidioc_g_input; diff --git a/drivers/media/pci/saa7146/hexium_orion.c b/drivers/media/pci/saa7146/hexium_orion.c index bf5e55348f159..31388597386aa 100644 --- a/drivers/media/pci/saa7146/hexium_orion.c +++ b/drivers/media/pci/saa7146/hexium_orion.c @@ -355,10 +355,16 @@ static struct saa7146_ext_vv vv_data; static int hexium_attach(struct saa7146_dev *dev, struct saa7146_pci_extension_data *info) { struct hexium *hexium = (struct hexium *) dev->ext_priv; + int ret; DEB_EE("\n"); - saa7146_vv_init(dev, &vv_data); + ret = saa7146_vv_init(dev, &vv_data); + if (ret) { + pr_err("Error in saa7146_vv_init()\n"); + return ret; + } + vv_data.vid_ops.vidioc_enum_input = vidioc_enum_input; vv_data.vid_ops.vidioc_g_input = vidioc_g_input; vv_data.vid_ops.vidioc_s_input = vidioc_s_input; diff --git a/drivers/media/pci/saa7146/mxb.c b/drivers/media/pci/saa7146/mxb.c index 952ea250feda0..58fe4c1619eeb 100644 --- a/drivers/media/pci/saa7146/mxb.c +++ b/drivers/media/pci/saa7146/mxb.c @@ -683,10 +683,16 @@ static struct saa7146_ext_vv vv_data; static int mxb_attach(struct saa7146_dev *dev, struct saa7146_pci_extension_data *info) { struct mxb *mxb; + int ret; DEB_EE("dev:%p\n", dev); - saa7146_vv_init(dev, &vv_data); + ret = saa7146_vv_init(dev, &vv_data); + if (ret) { + ERR("Error in saa7146_vv_init()"); + return ret; + } + if (mxb_probe(dev)) { saa7146_vv_release(dev); return -1; diff --git a/drivers/media/platform/aspeed-video.c b/drivers/media/platform/aspeed-video.c index 6dde49d9aa4c2..c3f0b143330a5 100644 --- a/drivers/media/platform/aspeed-video.c +++ b/drivers/media/platform/aspeed-video.c @@ -151,7 +151,7 @@ #define VE_SRC_TB_EDGE_DET_BOT GENMASK(28, VE_SRC_TB_EDGE_DET_BOT_SHF) #define VE_MODE_DETECT_STATUS 0x098 -#define VE_MODE_DETECT_H_PIXELS GENMASK(11, 0) +#define VE_MODE_DETECT_H_PERIOD GENMASK(11, 0) #define VE_MODE_DETECT_V_LINES_SHF 16 #define VE_MODE_DETECT_V_LINES GENMASK(27, VE_MODE_DETECT_V_LINES_SHF) #define VE_MODE_DETECT_STATUS_VSYNC BIT(28) @@ -162,6 +162,8 @@ #define VE_SYNC_STATUS_VSYNC_SHF 16 #define VE_SYNC_STATUS_VSYNC GENMASK(27, VE_SYNC_STATUS_VSYNC_SHF) +#define VE_H_TOTAL_PIXELS 0x0A0 + #define VE_INTERRUPT_CTRL 0x304 #define VE_INTERRUPT_STATUS 0x308 #define VE_INTERRUPT_MODE_DETECT_WD BIT(0) @@ -477,6 +479,10 @@ static void aspeed_video_enable_mode_detect(struct aspeed_video *video) aspeed_video_update(video, VE_INTERRUPT_CTRL, 0, VE_INTERRUPT_MODE_DETECT); + /* Disable mode detect in order to re-trigger */ + aspeed_video_update(video, VE_SEQ_CTRL, + VE_SEQ_CTRL_TRIG_MODE_DET, 0); + /* Trigger mode detect */ aspeed_video_update(video, VE_SEQ_CTRL, 0, VE_SEQ_CTRL_TRIG_MODE_DET); } @@ -529,6 +535,8 @@ static void aspeed_video_irq_res_change(struct aspeed_video *video, ulong delay) set_bit(VIDEO_RES_CHANGE, &video->flags); clear_bit(VIDEO_FRAME_INPRG, &video->flags); + video->v4l2_input_status = V4L2_IN_ST_NO_SIGNAL; + aspeed_video_off(video); aspeed_video_bufs_done(video, VB2_BUF_STATE_ERROR); @@ -737,6 +745,7 @@ static void aspeed_video_get_resolution(struct aspeed_video *video) u32 src_lr_edge; u32 src_tb_edge; u32 sync; + u32 htotal; struct v4l2_bt_timings *det = &video->detected_timings; det->width = MIN_WIDTH; @@ -764,10 +773,6 @@ static void aspeed_video_get_resolution(struct aspeed_video *video) return; } - /* Disable mode detect in order to re-trigger */ - aspeed_video_update(video, VE_SEQ_CTRL, - VE_SEQ_CTRL_TRIG_MODE_DET, 0); - aspeed_video_check_and_set_polarity(video); aspeed_video_enable_mode_detect(video); @@ -785,6 +790,7 @@ static void aspeed_video_get_resolution(struct aspeed_video *video) src_tb_edge = aspeed_video_read(video, VE_SRC_TB_EDGE_DET); mds = aspeed_video_read(video, VE_MODE_DETECT_STATUS); sync = aspeed_video_read(video, VE_SYNC_STATUS); + htotal = aspeed_video_read(video, VE_H_TOTAL_PIXELS); video->frame_bottom = (src_tb_edge & VE_SRC_TB_EDGE_DET_BOT) >> VE_SRC_TB_EDGE_DET_BOT_SHF; @@ -801,8 +807,7 @@ static void aspeed_video_get_resolution(struct aspeed_video *video) VE_SRC_LR_EDGE_DET_RT_SHF; video->frame_left = src_lr_edge & VE_SRC_LR_EDGE_DET_LEFT; det->hfrontporch = video->frame_left; - det->hbackporch = (mds & VE_MODE_DETECT_H_PIXELS) - - video->frame_right; + det->hbackporch = htotal - video->frame_right; det->hsync = sync & VE_SYNC_STATUS_HSYNC; if (video->frame_left > video->frame_right) continue; @@ -1315,7 +1320,6 @@ static void aspeed_video_resolution_work(struct work_struct *work) struct delayed_work *dwork = to_delayed_work(work); struct aspeed_video *video = container_of(dwork, struct aspeed_video, res_work); - u32 input_status = video->v4l2_input_status; aspeed_video_on(video); @@ -1328,8 +1332,7 @@ static void aspeed_video_resolution_work(struct work_struct *work) aspeed_video_get_resolution(video); if (video->detected_timings.width != video->active_timings.width || - video->detected_timings.height != video->active_timings.height || - input_status != video->v4l2_input_status) { + video->detected_timings.height != video->active_timings.height) { static const struct v4l2_event ev = { .type = V4L2_EVENT_SOURCE_CHANGE, .u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION, @@ -1685,6 +1688,7 @@ static int aspeed_video_probe(struct platform_device *pdev) rc = aspeed_video_setup_video(video); if (rc) { + aspeed_video_free_buf(video, &video->jpeg); clk_unprepare(video->vclk); clk_unprepare(video->eclk); return rc; @@ -1712,8 +1716,7 @@ static int aspeed_video_remove(struct platform_device *pdev) v4l2_device_unregister(v4l2_dev); - dma_free_coherent(video->dev, VE_JPEG_HEADER_SIZE, video->jpeg.virt, - video->jpeg.dma); + aspeed_video_free_buf(video, &video->jpeg); of_reserved_mem_device_release(dev); diff --git a/drivers/media/platform/coda/coda-bit.c b/drivers/media/platform/coda/coda-bit.c index 00c7bed3dd572..e6b68be09f8f0 100644 --- a/drivers/media/platform/coda/coda-bit.c +++ b/drivers/media/platform/coda/coda-bit.c @@ -2023,17 +2023,25 @@ static int __coda_start_decoding(struct coda_ctx *ctx) u32 src_fourcc, dst_fourcc; int ret; + q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); + q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + src_fourcc = q_data_src->fourcc; + dst_fourcc = q_data_dst->fourcc; + if (!ctx->initialized) { ret = __coda_decoder_seq_init(ctx); if (ret < 0) return ret; + } else { + ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) | + CODA9_FRAME_TILED2LINEAR); + if (dst_fourcc == V4L2_PIX_FMT_NV12 || dst_fourcc == V4L2_PIX_FMT_YUYV) + ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE; + if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) + ctx->frame_mem_ctrl |= (0x3 << 9) | + ((ctx->use_vdoa) ? 0 : CODA9_FRAME_TILED2LINEAR); } - q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); - q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); - src_fourcc = q_data_src->fourcc; - dst_fourcc = q_data_dst->fourcc; - coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR); ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc); diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c index 834f11fe9dc29..ebe5e44b6fd38 100644 --- a/drivers/media/platform/coda/coda-common.c +++ b/drivers/media/platform/coda/coda-common.c @@ -372,6 +372,7 @@ static struct vdoa_data *coda_get_vdoa_data(void) if (!vdoa_data) vdoa_data = ERR_PTR(-EPROBE_DEFER); + put_device(&vdoa_pdev->dev); out: of_node_put(vdoa_node); @@ -1191,7 +1192,8 @@ static int coda_enum_frameintervals(struct file *file, void *fh, struct v4l2_frmivalenum *f) { struct coda_ctx *ctx = fh_to_ctx(fh); - int i; + struct coda_q_data *q_data; + const struct coda_codec *codec; if (f->index) return -EINVAL; @@ -1200,12 +1202,19 @@ static int coda_enum_frameintervals(struct file *file, void *fh, if (!ctx->vdoa && f->pixel_format == V4L2_PIX_FMT_YUYV) return -EINVAL; - for (i = 0; i < CODA_MAX_FORMATS; i++) { - if (f->pixel_format == ctx->cvd->src_formats[i] || - f->pixel_format == ctx->cvd->dst_formats[i]) - break; + if (coda_format_normalize_yuv(f->pixel_format) == V4L2_PIX_FMT_YUV420) { + q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + codec = coda_find_codec(ctx->dev, f->pixel_format, + q_data->fourcc); + } else { + codec = coda_find_codec(ctx->dev, V4L2_PIX_FMT_YUV420, + f->pixel_format); } - if (i == CODA_MAX_FORMATS) + if (!codec) + return -EINVAL; + + if (f->width < MIN_W || f->width > codec->max_w || + f->height < MIN_H || f->height > codec->max_h) return -EINVAL; f->type = V4L2_FRMIVAL_TYPE_CONTINUOUS; @@ -2163,8 +2172,8 @@ static void coda_encode_ctrls(struct coda_ctx *ctx) V4L2_CID_MPEG_VIDEO_H264_CHROMA_QP_INDEX_OFFSET, -12, 12, 1, 0); v4l2_ctrl_new_std_menu(&ctx->ctrls, &coda_ctrl_ops, V4L2_CID_MPEG_VIDEO_H264_PROFILE, - V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE, 0x0, - V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE); + V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE, 0x0, + V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE); if (ctx->dev->devtype->product == CODA_HX4 || ctx->dev->devtype->product == CODA_7541) { v4l2_ctrl_new_std_menu(&ctx->ctrls, &coda_ctrl_ops, @@ -2178,12 +2187,15 @@ static void coda_encode_ctrls(struct coda_ctx *ctx) if (ctx->dev->devtype->product == CODA_960) { v4l2_ctrl_new_std_menu(&ctx->ctrls, &coda_ctrl_ops, V4L2_CID_MPEG_VIDEO_H264_LEVEL, - V4L2_MPEG_VIDEO_H264_LEVEL_4_0, - ~((1 << V4L2_MPEG_VIDEO_H264_LEVEL_2_0) | + V4L2_MPEG_VIDEO_H264_LEVEL_4_2, + ~((1 << V4L2_MPEG_VIDEO_H264_LEVEL_1_0) | + (1 << V4L2_MPEG_VIDEO_H264_LEVEL_2_0) | (1 << V4L2_MPEG_VIDEO_H264_LEVEL_3_0) | (1 << V4L2_MPEG_VIDEO_H264_LEVEL_3_1) | (1 << V4L2_MPEG_VIDEO_H264_LEVEL_3_2) | - (1 << V4L2_MPEG_VIDEO_H264_LEVEL_4_0)), + (1 << V4L2_MPEG_VIDEO_H264_LEVEL_4_0) | + (1 << V4L2_MPEG_VIDEO_H264_LEVEL_4_1) | + (1 << V4L2_MPEG_VIDEO_H264_LEVEL_4_2)), V4L2_MPEG_VIDEO_H264_LEVEL_4_0); } v4l2_ctrl_new_std(&ctx->ctrls, &coda_ctrl_ops, @@ -2245,7 +2257,7 @@ static void coda_decode_ctrls(struct coda_ctx *ctx) ctx->h264_profile_ctrl = v4l2_ctrl_new_std_menu(&ctx->ctrls, &coda_ctrl_ops, V4L2_CID_MPEG_VIDEO_H264_PROFILE, V4L2_MPEG_VIDEO_H264_PROFILE_HIGH, - ~((1 << V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE) | + ~((1 << V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE) | (1 << V4L2_MPEG_VIDEO_H264_PROFILE_MAIN) | (1 << V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)), V4L2_MPEG_VIDEO_H264_PROFILE_HIGH); diff --git a/drivers/media/platform/coda/imx-vdoa.c b/drivers/media/platform/coda/imx-vdoa.c index 8bc0d83718193..dd6e2e320264e 100644 --- a/drivers/media/platform/coda/imx-vdoa.c +++ b/drivers/media/platform/coda/imx-vdoa.c @@ -287,7 +287,11 @@ static int vdoa_probe(struct platform_device *pdev) struct resource *res; int ret; - dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); + ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(&pdev->dev, "DMA enable failed\n"); + return ret; + } vdoa = devm_kzalloc(&pdev->dev, sizeof(*vdoa), GFP_KERNEL); if (!vdoa) diff --git a/drivers/media/platform/davinci/vpif.c b/drivers/media/platform/davinci/vpif.c index df66461f5d4f1..e7e8eba048acf 100644 --- a/drivers/media/platform/davinci/vpif.c +++ b/drivers/media/platform/davinci/vpif.c @@ -496,6 +496,7 @@ static int vpif_probe(struct platform_device *pdev) static int vpif_remove(struct platform_device *pdev) { + pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); return 0; } diff --git a/drivers/media/platform/exynos4-is/fimc-is.c b/drivers/media/platform/exynos4-is/fimc-is.c index 64148b7e0d986..9bb14bb2e4987 100644 --- a/drivers/media/platform/exynos4-is/fimc-is.c +++ b/drivers/media/platform/exynos4-is/fimc-is.c @@ -141,7 +141,7 @@ static int fimc_is_enable_clocks(struct fimc_is *is) dev_err(&is->pdev->dev, "clock %s enable failed\n", fimc_is_clocks[i]); for (--i; i >= 0; i--) - clk_disable(is->clocks[i]); + clk_disable_unprepare(is->clocks[i]); return ret; } pr_debug("enabled clock: %s\n", fimc_is_clocks[i]); diff --git a/drivers/media/platform/exynos4-is/fimc-isp-video.h b/drivers/media/platform/exynos4-is/fimc-isp-video.h index edcb3a5e3cb90..2dd4ddbc748a1 100644 --- a/drivers/media/platform/exynos4-is/fimc-isp-video.h +++ b/drivers/media/platform/exynos4-is/fimc-isp-video.h @@ -32,7 +32,7 @@ static inline int fimc_isp_video_device_register(struct fimc_isp *isp, return 0; } -void fimc_isp_video_device_unregister(struct fimc_isp *isp, +static inline void fimc_isp_video_device_unregister(struct fimc_isp *isp, enum v4l2_buf_type type) { } diff --git a/drivers/media/platform/imx-pxp.c b/drivers/media/platform/imx-pxp.c index 38d9423223025..3c36cefddec7c 100644 --- a/drivers/media/platform/imx-pxp.c +++ b/drivers/media/platform/imx-pxp.c @@ -1664,6 +1664,8 @@ static int pxp_probe(struct platform_device *pdev) if (irq < 0) return irq; + spin_lock_init(&dev->irqlock); + ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, pxp_irq_handler, IRQF_ONESHOT, dev_name(&pdev->dev), dev); if (ret < 0) { @@ -1681,8 +1683,6 @@ static int pxp_probe(struct platform_device *pdev) goto err_clk; } - spin_lock_init(&dev->irqlock); - ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev); if (ret) goto err_clk; diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c index 1d82aa2b6017c..dea0ee2cb7245 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c @@ -209,11 +209,11 @@ static int fops_vcodec_release(struct file *file) mtk_v4l2_debug(1, "[%d] encoder", ctx->id); mutex_lock(&dev->dev_mutex); + v4l2_m2m_ctx_release(ctx->m2m_ctx); mtk_vcodec_enc_release(ctx); v4l2_fh_del(&ctx->fh); v4l2_fh_exit(&ctx->fh); v4l2_ctrl_handler_free(&ctx->ctrl_hdl); - v4l2_m2m_ctx_release(ctx->m2m_ctx); list_del_init(&ctx->list); kfree(ctx); diff --git a/drivers/media/platform/mtk-vpu/mtk_vpu.c b/drivers/media/platform/mtk-vpu/mtk_vpu.c index cc2ff40d060d1..acf64723f9381 100644 --- a/drivers/media/platform/mtk-vpu/mtk_vpu.c +++ b/drivers/media/platform/mtk-vpu/mtk_vpu.c @@ -809,7 +809,8 @@ static int mtk_vpu_probe(struct platform_device *pdev) vpu->wdt.wq = create_singlethread_workqueue("vpu_wdt"); if (!vpu->wdt.wq) { dev_err(dev, "initialize wdt workqueue failed\n"); - return -ENOMEM; + ret = -ENOMEM; + goto clk_unprepare; } INIT_WORK(&vpu->wdt.ws, vpu_wdt_reset_func); mutex_init(&vpu->vpu_mutex); @@ -908,6 +909,8 @@ static int mtk_vpu_probe(struct platform_device *pdev) vpu_clock_disable(vpu); workqueue_destroy: destroy_workqueue(vpu->wdt.wq); +clk_unprepare: + clk_unprepare(vpu->clk); return ret; } diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c index bbc430a003443..7b52d3e5d3f89 100644 --- a/drivers/media/platform/qcom/venus/core.c +++ b/drivers/media/platform/qcom/venus/core.c @@ -289,11 +289,11 @@ static int venus_probe(struct platform_device *pdev) ret = venus_firmware_init(core); if (ret) - goto err_runtime_disable; + goto err_of_depopulate; ret = venus_boot(core); if (ret) - goto err_runtime_disable; + goto err_firmware_deinit; ret = hfi_core_resume(core, true); if (ret) @@ -329,6 +329,10 @@ static int venus_probe(struct platform_device *pdev) hfi_core_deinit(core, false); err_venus_shutdown: venus_shutdown(core); +err_firmware_deinit: + venus_firmware_deinit(core); +err_of_depopulate: + of_platform_depopulate(dev); err_runtime_disable: pm_runtime_put_noidle(dev); pm_runtime_set_suspended(dev); diff --git a/drivers/media/platform/qcom/venus/hfi.c b/drivers/media/platform/qcom/venus/hfi.c index 3d8b1284d1f35..68964a80fe619 100644 --- a/drivers/media/platform/qcom/venus/hfi.c +++ b/drivers/media/platform/qcom/venus/hfi.c @@ -104,6 +104,9 @@ int hfi_core_deinit(struct venus_core *core, bool blocking) mutex_lock(&core->lock); } + if (!core->ops) + goto unlock; + ret = core->ops->core_deinit(core); if (!ret) diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c index 30028ceb548b1..766ca497f8565 100644 --- a/drivers/media/platform/qcom/venus/venc.c +++ b/drivers/media/platform/qcom/venus/venc.c @@ -308,6 +308,8 @@ venc_try_fmt_common(struct venus_inst *inst, struct v4l2_format *f) else return NULL; fmt = find_format(inst, pixmp->pixelformat, f->type); + if (!fmt) + return NULL; } pixmp->width = clamp(pixmp->width, frame_width_min(inst), diff --git a/drivers/media/platform/rcar-vin/rcar-csi2.c b/drivers/media/platform/rcar-vin/rcar-csi2.c index d27eccfa57cae..db3d5d463f53e 100644 --- a/drivers/media/platform/rcar-vin/rcar-csi2.c +++ b/drivers/media/platform/rcar-vin/rcar-csi2.c @@ -430,16 +430,23 @@ static int rcsi2_wait_phy_start(struct rcar_csi2 *priv) static int rcsi2_set_phypll(struct rcar_csi2 *priv, unsigned int mbps) { const struct rcsi2_mbps_reg *hsfreq; + const struct rcsi2_mbps_reg *hsfreq_prev = NULL; - for (hsfreq = priv->info->hsfreqrange; hsfreq->mbps != 0; hsfreq++) + for (hsfreq = priv->info->hsfreqrange; hsfreq->mbps != 0; hsfreq++) { if (hsfreq->mbps >= mbps) break; + hsfreq_prev = hsfreq; + } if (!hsfreq->mbps) { dev_err(priv->dev, "Unsupported PHY speed (%u Mbps)", mbps); return -ERANGE; } + if (hsfreq_prev && + ((mbps - hsfreq_prev->mbps) <= (hsfreq->mbps - mbps))) + hsfreq = hsfreq_prev; + rcsi2_write(priv, PHYPLL_REG, PHYPLL_HSFREQRANGE(hsfreq->reg)); return 0; @@ -488,6 +495,8 @@ static int rcsi2_start_receiver(struct rcar_csi2 *priv) /* Code is validated in set_fmt. */ format = rcsi2_code_to_fmt(priv->mf.code); + if (!format) + return -EINVAL; /* * Enable all supported CSI-2 channels with virtual channel and @@ -902,10 +911,17 @@ static int rcsi2_phtw_write_mbps(struct rcar_csi2 *priv, unsigned int mbps, const struct rcsi2_mbps_reg *values, u16 code) { const struct rcsi2_mbps_reg *value; + const struct rcsi2_mbps_reg *prev_value = NULL; - for (value = values; value->mbps; value++) + for (value = values; value->mbps; value++) { if (value->mbps >= mbps) break; + prev_value = value; + } + + if (prev_value && + ((mbps - prev_value->mbps) <= (value->mbps - mbps))) + value = prev_value; if (!value->mbps) { dev_err(priv->dev, "Unsupported PHY speed (%u Mbps)", mbps); diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c index b776f83e395e0..9faecd049002f 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c @@ -1279,11 +1279,15 @@ static int s5p_mfc_probe(struct platform_device *pdev) spin_lock_init(&dev->condlock); dev->plat_dev = pdev; if (!dev->plat_dev) { - dev_err(&pdev->dev, "No platform data specified\n"); + mfc_err("No platform data specified\n"); return -ENODEV; } dev->variant = of_device_get_match_data(&pdev->dev); + if (!dev->variant) { + dev_err(&pdev->dev, "Failed to get device MFC hardware variant information\n"); + return -ENOENT; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); dev->regs_base = devm_ioremap_resource(&pdev->dev, res); diff --git a/drivers/media/platform/sti/delta/delta-v4l2.c b/drivers/media/platform/sti/delta/delta-v4l2.c index 2791107e641bc..29732b49a2cdb 100644 --- a/drivers/media/platform/sti/delta/delta-v4l2.c +++ b/drivers/media/platform/sti/delta/delta-v4l2.c @@ -1862,7 +1862,7 @@ static int delta_probe(struct platform_device *pdev) if (ret) { dev_err(delta->dev, "%s failed to initialize firmware ipc channel\n", DELTA_PREFIX); - goto err; + goto err_pm_disable; } /* register all available decoders */ @@ -1876,7 +1876,7 @@ static int delta_probe(struct platform_device *pdev) if (ret) { dev_err(delta->dev, "%s failed to register V4L2 device\n", DELTA_PREFIX); - goto err; + goto err_pm_disable; } delta->work_queue = create_workqueue(DELTA_NAME); @@ -1901,6 +1901,8 @@ static int delta_probe(struct platform_device *pdev) destroy_workqueue(delta->work_queue); err_v4l2: v4l2_device_unregister(&delta->v4l2_dev); +err_pm_disable: + pm_runtime_disable(dev); err: return ret; } diff --git a/drivers/media/platform/stm32/stm32-dcmi.c b/drivers/media/platform/stm32/stm32-dcmi.c index d41475f56ab54..72798aae7a628 100644 --- a/drivers/media/platform/stm32/stm32-dcmi.c +++ b/drivers/media/platform/stm32/stm32-dcmi.c @@ -135,6 +135,7 @@ struct stm32_dcmi { int sequence; struct list_head buffers; struct dcmi_buf *active; + int irq; struct v4l2_device v4l2_dev; struct video_device *vdev; @@ -1720,6 +1721,14 @@ static int dcmi_graph_notify_complete(struct v4l2_async_notifier *notifier) return ret; } + ret = devm_request_threaded_irq(dcmi->dev, dcmi->irq, dcmi_irq_callback, + dcmi_irq_thread, IRQF_ONESHOT, + dev_name(dcmi->dev), dcmi); + if (ret) { + dev_err(dcmi->dev, "Unable to request irq %d\n", dcmi->irq); + return ret; + } + return 0; } @@ -1881,6 +1890,8 @@ static int dcmi_probe(struct platform_device *pdev) if (irq <= 0) return irq ? irq : -ENXIO; + dcmi->irq = irq; + dcmi->res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!dcmi->res) { dev_err(&pdev->dev, "Could not get resource\n"); @@ -1893,14 +1904,6 @@ static int dcmi_probe(struct platform_device *pdev) return PTR_ERR(dcmi->regs); } - ret = devm_request_threaded_irq(&pdev->dev, irq, dcmi_irq_callback, - dcmi_irq_thread, IRQF_ONESHOT, - dev_name(&pdev->dev), dcmi); - if (ret) { - dev_err(&pdev->dev, "Unable to request irq %d\n", irq); - return ret; - } - mclk = devm_clk_get(&pdev->dev, "mclk"); if (IS_ERR(mclk)) { if (PTR_ERR(mclk) != -EPROBE_DEFER) diff --git a/drivers/media/platform/tegra-cec/tegra_cec.c b/drivers/media/platform/tegra-cec/tegra_cec.c index a632602131f21..efb80a78d2fa2 100644 --- a/drivers/media/platform/tegra-cec/tegra_cec.c +++ b/drivers/media/platform/tegra-cec/tegra_cec.c @@ -366,7 +366,11 @@ static int tegra_cec_probe(struct platform_device *pdev) return -ENOENT; } - clk_prepare_enable(cec->clk); + ret = clk_prepare_enable(cec->clk); + if (ret) { + dev_err(&pdev->dev, "Unable to prepare clock for CEC\n"); + return ret; + } /* set context info. */ cec->dev = &pdev->dev; @@ -446,9 +450,7 @@ static int tegra_cec_resume(struct platform_device *pdev) dev_notice(&pdev->dev, "Resuming\n"); - clk_prepare_enable(cec->clk); - - return 0; + return clk_prepare_enable(cec->clk); } #endif diff --git a/drivers/media/platform/vim2m.c b/drivers/media/platform/vim2m.c index 8d6b09623d884..6fba00e03c67b 100644 --- a/drivers/media/platform/vim2m.c +++ b/drivers/media/platform/vim2m.c @@ -1333,12 +1333,6 @@ static int vim2m_probe(struct platform_device *pdev) vfd->lock = &dev->dev_mutex; vfd->v4l2_dev = &dev->v4l2_dev; - ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0); - if (ret) { - v4l2_err(&dev->v4l2_dev, "Failed to register video device\n"); - goto error_v4l2; - } - video_set_drvdata(vfd, dev); v4l2_info(&dev->v4l2_dev, "Device registered as /dev/video%d\n", vfd->num); @@ -1361,12 +1355,20 @@ static int vim2m_probe(struct platform_device *pdev) media_device_init(&dev->mdev); dev->mdev.ops = &m2m_media_ops; dev->v4l2_dev.mdev = &dev->mdev; +#endif + + ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0); + if (ret) { + v4l2_err(&dev->v4l2_dev, "Failed to register video device\n"); + goto error_m2m; + } +#ifdef CONFIG_MEDIA_CONTROLLER ret = v4l2_m2m_register_media_controller(dev->m2m_dev, vfd, MEDIA_ENT_F_PROC_VIDEO_SCALER); if (ret) { v4l2_err(&dev->v4l2_dev, "Failed to init mem2mem media controller\n"); - goto error_dev; + goto error_v4l2; } ret = media_device_register(&dev->mdev); @@ -1381,11 +1383,13 @@ static int vim2m_probe(struct platform_device *pdev) error_m2m_mc: v4l2_m2m_unregister_media_controller(dev->m2m_dev); #endif -error_dev: +error_v4l2: video_unregister_device(&dev->vfd); /* vim2m_device_release called by video_unregister_device to release various objects */ return ret; -error_v4l2: +error_m2m: + v4l2_m2m_release(dev->m2m_dev); +error_dev: v4l2_device_unregister(&dev->v4l2_dev); error_free: kfree(dev); diff --git a/drivers/media/platform/vsp1/vsp1_rpf.c b/drivers/media/platform/vsp1/vsp1_rpf.c index 85587c1b6a373..75083cb234fe3 100644 --- a/drivers/media/platform/vsp1/vsp1_rpf.c +++ b/drivers/media/platform/vsp1/vsp1_rpf.c @@ -291,11 +291,11 @@ static void rpf_configure_partition(struct vsp1_entity *entity, + crop.left * fmtinfo->bpp[0] / 8; if (format->num_planes > 1) { + unsigned int bpl = format->plane_fmt[1].bytesperline; unsigned int offset; - offset = crop.top * format->plane_fmt[1].bytesperline - + crop.left / fmtinfo->hsub - * fmtinfo->bpp[1] / 8; + offset = crop.top / fmtinfo->vsub * bpl + + crop.left / fmtinfo->hsub * fmtinfo->bpp[1] / 8; mem.addr[1] += offset; mem.addr[2] += offset; } diff --git a/drivers/media/radio/radio-wl1273.c b/drivers/media/radio/radio-wl1273.c index 1123768731676..484046471c03f 100644 --- a/drivers/media/radio/radio-wl1273.c +++ b/drivers/media/radio/radio-wl1273.c @@ -1279,7 +1279,7 @@ static int wl1273_fm_vidioc_querycap(struct file *file, void *priv, strscpy(capability->driver, WL1273_FM_DRIVER_NAME, sizeof(capability->driver)); - strscpy(capability->card, "Texas Instruments Wl1273 FM Radio", + strscpy(capability->card, "TI Wl1273 FM Radio", sizeof(capability->card)); strscpy(capability->bus_info, radio->bus_type, sizeof(capability->bus_info)); diff --git a/drivers/media/radio/si470x/radio-si470x-i2c.c b/drivers/media/radio/si470x/radio-si470x-i2c.c index f491420d7b538..76d39e2e87706 100644 --- a/drivers/media/radio/si470x/radio-si470x-i2c.c +++ b/drivers/media/radio/si470x/radio-si470x-i2c.c @@ -11,7 +11,7 @@ /* driver definitions */ #define DRIVER_AUTHOR "Joonyoung Shim "; -#define DRIVER_CARD "Silicon Labs Si470x FM Radio Receiver" +#define DRIVER_CARD "Silicon Labs Si470x FM Radio" #define DRIVER_DESC "I2C radio driver for Si470x FM Radio Receivers" #define DRIVER_VERSION "1.0.2" @@ -368,7 +368,7 @@ static int si470x_i2c_probe(struct i2c_client *client) if (radio->hdl.error) { retval = radio->hdl.error; dev_err(&client->dev, "couldn't register control\n"); - goto err_dev; + goto err_all; } /* video device initialization */ @@ -463,7 +463,6 @@ static int si470x_i2c_probe(struct i2c_client *client) return 0; err_all: v4l2_ctrl_handler_free(&radio->hdl); -err_dev: v4l2_device_unregister(&radio->v4l2_dev); err_initial: return retval; diff --git a/drivers/media/radio/si470x/radio-si470x-usb.c b/drivers/media/radio/si470x/radio-si470x-usb.c index fedff68d8c496..3f8634a465730 100644 --- a/drivers/media/radio/si470x/radio-si470x-usb.c +++ b/drivers/media/radio/si470x/radio-si470x-usb.c @@ -16,7 +16,7 @@ /* driver definitions */ #define DRIVER_AUTHOR "Tobias Lorenz " -#define DRIVER_CARD "Silicon Labs Si470x FM Radio Receiver" +#define DRIVER_CARD "Silicon Labs Si470x FM Radio" #define DRIVER_DESC "USB radio driver for Si470x FM Radio Receivers" #define DRIVER_VERSION "1.0.10" diff --git a/drivers/media/rc/igorplugusb.c b/drivers/media/rc/igorplugusb.c index b981f7290c1b2..1e8276040ea5b 100644 --- a/drivers/media/rc/igorplugusb.c +++ b/drivers/media/rc/igorplugusb.c @@ -64,9 +64,11 @@ static void igorplugusb_irdata(struct igorplugusb *ir, unsigned len) if (start >= len) { dev_err(ir->dev, "receive overflow invalid: %u", overflow); } else { - if (overflow > 0) + if (overflow > 0) { dev_warn(ir->dev, "receive overflow, at least %u lost", overflow); + ir_raw_event_reset(ir->rc); + } do { rawir.duration = ir->buf_in[i] * 85333; diff --git a/drivers/media/rc/ite-cir.c b/drivers/media/rc/ite-cir.c index 4b8aee390518d..742b7f41b75f8 100644 --- a/drivers/media/rc/ite-cir.c +++ b/drivers/media/rc/ite-cir.c @@ -283,7 +283,7 @@ static irqreturn_t ite_cir_isr(int irq, void *data) } /* check for the receive interrupt */ - if (iflags & ITE_IRQ_RX_FIFO) { + if (iflags & (ITE_IRQ_RX_FIFO | ITE_IRQ_RX_FIFO_OVERRUN)) { /* read the FIFO bytes */ rx_bytes = dev->params.get_rx_bytes(dev, rx_buf, diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c index c68e52c17ae13..cfdd712e2adc3 100644 --- a/drivers/media/rc/mceusb.c +++ b/drivers/media/rc/mceusb.c @@ -1386,6 +1386,7 @@ static void mceusb_dev_recv(struct urb *urb) case -ECONNRESET: case -ENOENT: case -EILSEQ: + case -EPROTO: case -ESHUTDOWN: usb_unlink_urb(urb); return; @@ -1429,7 +1430,7 @@ static void mceusb_gen1_init(struct mceusb_dev *ir) */ ret = usb_control_msg(ir->usbdev, usb_rcvctrlpipe(ir->usbdev, 0), USB_REQ_SET_ADDRESS, USB_TYPE_VENDOR, 0, 0, - data, USB_CTRL_MSG_SZ, HZ * 3); + data, USB_CTRL_MSG_SZ, 3000); dev_dbg(dev, "set address - ret = %d", ret); dev_dbg(dev, "set address - data[0] = %d, data[1] = %d", data[0], data[1]); @@ -1437,20 +1438,20 @@ static void mceusb_gen1_init(struct mceusb_dev *ir) /* set feature: bit rate 38400 bps */ ret = usb_control_msg(ir->usbdev, usb_sndctrlpipe(ir->usbdev, 0), USB_REQ_SET_FEATURE, USB_TYPE_VENDOR, - 0xc04e, 0x0000, NULL, 0, HZ * 3); + 0xc04e, 0x0000, NULL, 0, 3000); dev_dbg(dev, "set feature - ret = %d", ret); /* bRequest 4: set char length to 8 bits */ ret = usb_control_msg(ir->usbdev, usb_sndctrlpipe(ir->usbdev, 0), 4, USB_TYPE_VENDOR, - 0x0808, 0x0000, NULL, 0, HZ * 3); + 0x0808, 0x0000, NULL, 0, 3000); dev_dbg(dev, "set char length - retB = %d", ret); /* bRequest 2: set handshaking to use DTR/DSR */ ret = usb_control_msg(ir->usbdev, usb_sndctrlpipe(ir->usbdev, 0), 2, USB_TYPE_VENDOR, - 0x0000, 0x0100, NULL, 0, HZ * 3); + 0x0000, 0x0100, NULL, 0, 3000); dev_dbg(dev, "set handshake - retC = %d", ret); /* device resume */ diff --git a/drivers/media/rc/rc-loopback.c b/drivers/media/rc/rc-loopback.c index ef8b83b707df0..13ab7312fa3b5 100644 --- a/drivers/media/rc/rc-loopback.c +++ b/drivers/media/rc/rc-loopback.c @@ -42,7 +42,7 @@ static int loop_set_tx_mask(struct rc_dev *dev, u32 mask) if ((mask & (RXMASK_REGULAR | RXMASK_LEARNING)) != mask) { dprintk("invalid tx mask: %u\n", mask); - return -EINVAL; + return 2; } dprintk("setting tx mask: %u\n", mask); diff --git a/drivers/media/rc/redrat3.c b/drivers/media/rc/redrat3.c index aad9526f3754d..c392276610478 100644 --- a/drivers/media/rc/redrat3.c +++ b/drivers/media/rc/redrat3.c @@ -405,7 +405,7 @@ static int redrat3_send_cmd(int cmd, struct redrat3_dev *rr3) udev = rr3->udev; res = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), cmd, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, - 0x0000, 0x0000, data, sizeof(u8), HZ * 10); + 0x0000, 0x0000, data, sizeof(u8), 10000); if (res < 0) { dev_err(rr3->dev, "%s: Error sending rr3 cmd res %d, data %d", @@ -481,7 +481,7 @@ static u32 redrat3_get_timeout(struct redrat3_dev *rr3) pipe = usb_rcvctrlpipe(rr3->udev, 0); ret = usb_control_msg(rr3->udev, pipe, RR3_GET_IR_PARAM, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, - RR3_IR_IO_SIG_TIMEOUT, 0, tmp, len, HZ * 5); + RR3_IR_IO_SIG_TIMEOUT, 0, tmp, len, 5000); if (ret != len) dev_warn(rr3->dev, "Failed to read timeout from hardware\n"); else { @@ -511,7 +511,7 @@ static int redrat3_set_timeout(struct rc_dev *rc_dev, unsigned int timeoutns) ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), RR3_SET_IR_PARAM, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, RR3_IR_IO_SIG_TIMEOUT, 0, timeout, sizeof(*timeout), - HZ * 25); + 25000); dev_dbg(dev, "set ir parm timeout %d ret 0x%02x\n", be32_to_cpu(*timeout), ret); @@ -543,32 +543,32 @@ static void redrat3_reset(struct redrat3_dev *rr3) *val = 0x01; rc = usb_control_msg(udev, rxpipe, RR3_RESET, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, - RR3_CPUCS_REG_ADDR, 0, val, len, HZ * 25); + RR3_CPUCS_REG_ADDR, 0, val, len, 25000); dev_dbg(dev, "reset returned 0x%02x\n", rc); *val = length_fuzz; rc = usb_control_msg(udev, txpipe, RR3_SET_IR_PARAM, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, - RR3_IR_IO_LENGTH_FUZZ, 0, val, len, HZ * 25); + RR3_IR_IO_LENGTH_FUZZ, 0, val, len, 25000); dev_dbg(dev, "set ir parm len fuzz %d rc 0x%02x\n", *val, rc); *val = (65536 - (minimum_pause * 2000)) / 256; rc = usb_control_msg(udev, txpipe, RR3_SET_IR_PARAM, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, - RR3_IR_IO_MIN_PAUSE, 0, val, len, HZ * 25); + RR3_IR_IO_MIN_PAUSE, 0, val, len, 25000); dev_dbg(dev, "set ir parm min pause %d rc 0x%02x\n", *val, rc); *val = periods_measure_carrier; rc = usb_control_msg(udev, txpipe, RR3_SET_IR_PARAM, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, - RR3_IR_IO_PERIODS_MF, 0, val, len, HZ * 25); + RR3_IR_IO_PERIODS_MF, 0, val, len, 25000); dev_dbg(dev, "set ir parm periods measure carrier %d rc 0x%02x", *val, rc); *val = RR3_DRIVER_MAXLENS; rc = usb_control_msg(udev, txpipe, RR3_SET_IR_PARAM, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, - RR3_IR_IO_MAX_LENGTHS, 0, val, len, HZ * 25); + RR3_IR_IO_MAX_LENGTHS, 0, val, len, 25000); dev_dbg(dev, "set ir parm max lens %d rc 0x%02x\n", *val, rc); kfree(val); @@ -586,7 +586,7 @@ static void redrat3_get_firmware_rev(struct redrat3_dev *rr3) rc = usb_control_msg(rr3->udev, usb_rcvctrlpipe(rr3->udev, 0), RR3_FW_VERSION, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, - 0, 0, buffer, RR3_FW_VERSION_LEN, HZ * 5); + 0, 0, buffer, RR3_FW_VERSION_LEN, 5000); if (rc >= 0) dev_info(rr3->dev, "Firmware rev: %s", buffer); @@ -826,14 +826,14 @@ static int redrat3_transmit_ir(struct rc_dev *rcdev, unsigned *txbuf, pipe = usb_sndbulkpipe(rr3->udev, rr3->ep_out->bEndpointAddress); ret = usb_bulk_msg(rr3->udev, pipe, irdata, - sendbuf_len, &ret_len, 10 * HZ); + sendbuf_len, &ret_len, 10000); dev_dbg(dev, "sent %d bytes, (ret %d)\n", ret_len, ret); /* now tell the hardware to transmit what we sent it */ pipe = usb_rcvctrlpipe(rr3->udev, 0); ret = usb_control_msg(rr3->udev, pipe, RR3_TX_SEND_SIGNAL, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, - 0, 0, irdata, 2, HZ * 10); + 0, 0, irdata, 2, 10000); if (ret < 0) dev_err(dev, "Error: control msg send failed, rc %d\n", ret); diff --git a/drivers/media/spi/cxd2880-spi.c b/drivers/media/spi/cxd2880-spi.c index 4077217777f92..11273be702b6e 100644 --- a/drivers/media/spi/cxd2880-spi.c +++ b/drivers/media/spi/cxd2880-spi.c @@ -524,13 +524,13 @@ cxd2880_spi_probe(struct spi_device *spi) if (IS_ERR(dvb_spi->vcc_supply)) { if (PTR_ERR(dvb_spi->vcc_supply) == -EPROBE_DEFER) { ret = -EPROBE_DEFER; - goto fail_adapter; + goto fail_regulator; } dvb_spi->vcc_supply = NULL; } else { ret = regulator_enable(dvb_spi->vcc_supply); if (ret) - goto fail_adapter; + goto fail_regulator; } dvb_spi->spi = spi; @@ -618,6 +618,9 @@ cxd2880_spi_probe(struct spi_device *spi) fail_attach: dvb_unregister_adapter(&dvb_spi->adapter); fail_adapter: + if (dvb_spi->vcc_supply) + regulator_disable(dvb_spi->vcc_supply); +fail_regulator: kfree(dvb_spi); return ret; } diff --git a/drivers/media/tuners/msi001.c b/drivers/media/tuners/msi001.c index 78e6fd600d8ef..44247049a3190 100644 --- a/drivers/media/tuners/msi001.c +++ b/drivers/media/tuners/msi001.c @@ -442,6 +442,13 @@ static int msi001_probe(struct spi_device *spi) V4L2_CID_RF_TUNER_BANDWIDTH_AUTO, 0, 1, 1, 1); dev->bandwidth = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops, V4L2_CID_RF_TUNER_BANDWIDTH, 200000, 8000000, 1, 200000); + if (dev->hdl.error) { + ret = dev->hdl.error; + dev_err(&spi->dev, "Could not initialize controls\n"); + /* control init failed, free handler */ + goto err_ctrl_handler_free; + } + v4l2_ctrl_auto_cluster(2, &dev->bandwidth_auto, 0, false); dev->lna_gain = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops, V4L2_CID_RF_TUNER_LNA_GAIN, 0, 1, 1, 1); diff --git a/drivers/media/tuners/si2157.c b/drivers/media/tuners/si2157.c index a39e1966816bf..8db9f0eb98b52 100644 --- a/drivers/media/tuners/si2157.c +++ b/drivers/media/tuners/si2157.c @@ -80,7 +80,7 @@ static int si2157_init(struct dvb_frontend *fe) dev_dbg(&client->dev, "\n"); /* Try to get Xtal trim property, to verify tuner still running */ - memcpy(cmd.args, "\x15\x00\x04\x02", 4); + memcpy(cmd.args, "\x15\x00\x02\x04", 4); cmd.wlen = 4; cmd.rlen = 4; ret = si2157_cmd_execute(client, &cmd); diff --git a/drivers/media/usb/b2c2/flexcop-usb.c b/drivers/media/usb/b2c2/flexcop-usb.c index 198ddfb8d2b18..e51338d5eac0c 100644 --- a/drivers/media/usb/b2c2/flexcop-usb.c +++ b/drivers/media/usb/b2c2/flexcop-usb.c @@ -87,7 +87,7 @@ static int flexcop_usb_readwrite_dw(struct flexcop_device *fc, u16 wRegOffsPCI, 0, fc_usb->data, sizeof(u32), - B2C2_WAIT_FOR_OPERATION_RDW * HZ); + B2C2_WAIT_FOR_OPERATION_RDW); if (ret != sizeof(u32)) { err("error while %s dword from %d (%d).", read ? "reading" : @@ -155,7 +155,7 @@ static int flexcop_usb_v8_memory_req(struct flexcop_usb *fc_usb, wIndex, fc_usb->data, buflen, - nWaitTime * HZ); + nWaitTime); if (ret != buflen) ret = -EIO; @@ -249,13 +249,13 @@ static int flexcop_usb_i2c_req(struct flexcop_i2c_adapter *i2c, /* DKT 020208 - add this to support special case of DiSEqC */ case USB_FUNC_I2C_CHECKWRITE: pipe = B2C2_USB_CTRL_PIPE_OUT; - nWaitTime = 2; + nWaitTime = 2000; request_type |= USB_DIR_OUT; break; case USB_FUNC_I2C_READ: case USB_FUNC_I2C_REPEATREAD: pipe = B2C2_USB_CTRL_PIPE_IN; - nWaitTime = 2; + nWaitTime = 2000; request_type |= USB_DIR_IN; break; default: @@ -282,7 +282,7 @@ static int flexcop_usb_i2c_req(struct flexcop_i2c_adapter *i2c, wIndex, fc_usb->data, buflen, - nWaitTime * HZ); + nWaitTime); if (ret != buflen) ret = -EIO; diff --git a/drivers/media/usb/b2c2/flexcop-usb.h b/drivers/media/usb/b2c2/flexcop-usb.h index e86faa0e06ca6..3dfd25fa4750f 100644 --- a/drivers/media/usb/b2c2/flexcop-usb.h +++ b/drivers/media/usb/b2c2/flexcop-usb.h @@ -91,13 +91,13 @@ typedef enum { UTILITY_SRAM_TESTVERIFY = 0x16, } flexcop_usb_utility_function_t; -#define B2C2_WAIT_FOR_OPERATION_RW (1*HZ) -#define B2C2_WAIT_FOR_OPERATION_RDW (3*HZ) -#define B2C2_WAIT_FOR_OPERATION_WDW (1*HZ) +#define B2C2_WAIT_FOR_OPERATION_RW 1000 +#define B2C2_WAIT_FOR_OPERATION_RDW 3000 +#define B2C2_WAIT_FOR_OPERATION_WDW 1000 -#define B2C2_WAIT_FOR_OPERATION_V8READ (3*HZ) -#define B2C2_WAIT_FOR_OPERATION_V8WRITE (3*HZ) -#define B2C2_WAIT_FOR_OPERATION_V8FLASH (3*HZ) +#define B2C2_WAIT_FOR_OPERATION_V8READ 3000 +#define B2C2_WAIT_FOR_OPERATION_V8WRITE 3000 +#define B2C2_WAIT_FOR_OPERATION_V8FLASH 3000 typedef enum { V8_MEMORY_PAGE_DVB_CI = 0x20, diff --git a/drivers/media/usb/cpia2/cpia2_usb.c b/drivers/media/usb/cpia2/cpia2_usb.c index 76aac06f9fb8e..cba03b2864738 100644 --- a/drivers/media/usb/cpia2/cpia2_usb.c +++ b/drivers/media/usb/cpia2/cpia2_usb.c @@ -550,7 +550,7 @@ static int write_packet(struct usb_device *udev, 0, /* index */ buf, /* buffer */ size, - HZ); + 1000); kfree(buf); return ret; @@ -582,7 +582,7 @@ static int read_packet(struct usb_device *udev, 0, /* index */ buf, /* buffer */ size, - HZ); + 1000); if (ret >= 0) memcpy(registers, buf, size); diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf.c b/drivers/media/usb/dvb-usb-v2/mxl111sf.c index 55b4ae7037a4e..5fbce81b64c77 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf.c +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf.c @@ -931,8 +931,6 @@ static int mxl111sf_init(struct dvb_usb_device *d) .len = sizeof(eeprom), .buf = eeprom }, }; - mutex_init(&state->msg_lock); - ret = get_chip_info(state); if (mxl_fail(ret)) pr_err("failed to get chip info during probe"); @@ -1074,6 +1072,14 @@ static int mxl111sf_get_stream_config_dvbt(struct dvb_frontend *fe, return 0; } +static int mxl111sf_probe(struct dvb_usb_device *dev) +{ + struct mxl111sf_state *state = d_to_priv(dev); + + mutex_init(&state->msg_lock); + return 0; +} + static struct dvb_usb_device_properties mxl111sf_props_dvbt = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, @@ -1083,6 +1089,7 @@ static struct dvb_usb_device_properties mxl111sf_props_dvbt = { .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, + .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_dvbt, .tuner_attach = mxl111sf_attach_tuner, @@ -1124,6 +1131,7 @@ static struct dvb_usb_device_properties mxl111sf_props_atsc = { .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, + .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_atsc, .tuner_attach = mxl111sf_attach_tuner, @@ -1165,6 +1173,7 @@ static struct dvb_usb_device_properties mxl111sf_props_mh = { .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, + .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_mh, .tuner_attach = mxl111sf_attach_tuner, @@ -1233,6 +1242,7 @@ static struct dvb_usb_device_properties mxl111sf_props_atsc_mh = { .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, + .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_atsc_mh, .tuner_attach = mxl111sf_attach_tuner, @@ -1311,6 +1321,7 @@ static struct dvb_usb_device_properties mxl111sf_props_mercury = { .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, + .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_mercury, .tuner_attach = mxl111sf_attach_tuner, @@ -1381,6 +1392,7 @@ static struct dvb_usb_device_properties mxl111sf_props_mercury_mh = { .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, + .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_mercury_mh, .tuner_attach = mxl111sf_attach_tuner, diff --git a/drivers/media/usb/dvb-usb/az6027.c b/drivers/media/usb/dvb-usb/az6027.c index 8de18da0c4bd1..5aa9c501ed9c9 100644 --- a/drivers/media/usb/dvb-usb/az6027.c +++ b/drivers/media/usb/dvb-usb/az6027.c @@ -391,6 +391,7 @@ static struct rc_map_table rc_map_az6027_table[] = { /* remote control stuff (does not work with my box) */ static int az6027_rc_query(struct dvb_usb_device *d, u32 *event, int *state) { + *state = REMOTE_NO_KEY_PRESSED; return 0; } diff --git a/drivers/media/usb/dvb-usb/dib0700_core.c b/drivers/media/usb/dvb-usb/dib0700_core.c index ef62dd6c5ae44..07d5e94578775 100644 --- a/drivers/media/usb/dvb-usb/dib0700_core.c +++ b/drivers/media/usb/dvb-usb/dib0700_core.c @@ -616,8 +616,6 @@ int dib0700_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff) deb_info("the endpoint number (%i) is not correct, use the adapter id instead", adap->fe_adap[0].stream.props.endpoint); if (onoff) st->channel_state |= 1 << (adap->id); - else - st->channel_state |= 1 << ~(adap->id); } else { if (onoff) st->channel_state |= 1 << (adap->fe_adap[0].stream.props.endpoint-2); diff --git a/drivers/media/usb/dvb-usb/dibusb-common.c b/drivers/media/usb/dvb-usb/dibusb-common.c index 59ce2dec11e98..9c1ebea68b544 100644 --- a/drivers/media/usb/dvb-usb/dibusb-common.c +++ b/drivers/media/usb/dvb-usb/dibusb-common.c @@ -223,7 +223,7 @@ int dibusb_read_eeprom_byte(struct dvb_usb_device *d, u8 offs, u8 *val) u8 *buf; int rc; - buf = kmalloc(2, GFP_KERNEL); + buf = kzalloc(2, GFP_KERNEL); if (!buf) return -ENOMEM; diff --git a/drivers/media/usb/dvb-usb/dvb-usb-i2c.c b/drivers/media/usb/dvb-usb/dvb-usb-i2c.c index 2e07106f46803..bc4b2abdde1a4 100644 --- a/drivers/media/usb/dvb-usb/dvb-usb-i2c.c +++ b/drivers/media/usb/dvb-usb/dvb-usb-i2c.c @@ -17,7 +17,8 @@ int dvb_usb_i2c_init(struct dvb_usb_device *d) if (d->props.i2c_algo == NULL) { err("no i2c algorithm specified"); - return -EINVAL; + ret = -EINVAL; + goto err; } strscpy(d->i2c_adap.name, d->desc->name, sizeof(d->i2c_adap.name)); @@ -27,11 +28,15 @@ int dvb_usb_i2c_init(struct dvb_usb_device *d) i2c_set_adapdata(&d->i2c_adap, d); - if ((ret = i2c_add_adapter(&d->i2c_adap)) < 0) + ret = i2c_add_adapter(&d->i2c_adap); + if (ret < 0) { err("could not add i2c adapter"); + goto err; + } d->state |= DVB_USB_STATE_I2C; +err: return ret; } diff --git a/drivers/media/usb/dvb-usb/dvb-usb-init.c b/drivers/media/usb/dvb-usb/dvb-usb-init.c index f57c4627624f5..e7720ff11d3d9 100644 --- a/drivers/media/usb/dvb-usb/dvb-usb-init.c +++ b/drivers/media/usb/dvb-usb/dvb-usb-init.c @@ -194,8 +194,8 @@ static int dvb_usb_init(struct dvb_usb_device *d, short *adapter_nums) err_adapter_init: dvb_usb_adapter_exit(d); -err_i2c_init: dvb_usb_i2c_exit(d); +err_i2c_init: if (d->priv && d->props.priv_destroy) d->props.priv_destroy(d); err_priv_init: diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c index b960abd00d483..8493ebb377c4d 100644 --- a/drivers/media/usb/dvb-usb/dw2102.c +++ b/drivers/media/usb/dvb-usb/dw2102.c @@ -2098,46 +2098,153 @@ static struct dvb_usb_device_properties s6x0_properties = { } }; -static const struct dvb_usb_device_description d1100 = { - "Prof 1100 USB ", - {&dw2102_table[PROF_1100], NULL}, - {NULL}, -}; +static struct dvb_usb_device_properties p1100_properties = { + .caps = DVB_USB_IS_AN_I2C_ADAPTER, + .usb_ctrl = DEVICE_SPECIFIC, + .size_of_priv = sizeof(struct dw2102_state), + .firmware = P1100_FIRMWARE, + .no_reconnect = 1, -static const struct dvb_usb_device_description d660 = { - "TeVii S660 USB", - {&dw2102_table[TEVII_S660], NULL}, - {NULL}, -}; + .i2c_algo = &s6x0_i2c_algo, + .rc.core = { + .rc_interval = 150, + .rc_codes = RC_MAP_TBS_NEC, + .module_name = "dw2102", + .allowed_protos = RC_PROTO_BIT_NEC, + .rc_query = prof_rc_query, + }, -static const struct dvb_usb_device_description d480_1 = { - "TeVii S480.1 USB", - {&dw2102_table[TEVII_S480_1], NULL}, - {NULL}, + .generic_bulk_ctrl_endpoint = 0x81, + .num_adapters = 1, + .download_firmware = dw2102_load_firmware, + .read_mac_address = s6x0_read_mac_address, + .adapter = { + { + .num_frontends = 1, + .fe = {{ + .frontend_attach = stv0288_frontend_attach, + .stream = { + .type = USB_BULK, + .count = 8, + .endpoint = 0x82, + .u = { + .bulk = { + .buffersize = 4096, + } + } + }, + } }, + } + }, + .num_device_descs = 1, + .devices = { + {"Prof 1100 USB ", + {&dw2102_table[PROF_1100], NULL}, + {NULL}, + }, + } }; -static const struct dvb_usb_device_description d480_2 = { - "TeVii S480.2 USB", - {&dw2102_table[TEVII_S480_2], NULL}, - {NULL}, -}; +static struct dvb_usb_device_properties s660_properties = { + .caps = DVB_USB_IS_AN_I2C_ADAPTER, + .usb_ctrl = DEVICE_SPECIFIC, + .size_of_priv = sizeof(struct dw2102_state), + .firmware = S660_FIRMWARE, + .no_reconnect = 1, -static const struct dvb_usb_device_description d7500 = { - "Prof 7500 USB DVB-S2", - {&dw2102_table[PROF_7500], NULL}, - {NULL}, -}; + .i2c_algo = &s6x0_i2c_algo, + .rc.core = { + .rc_interval = 150, + .rc_codes = RC_MAP_TEVII_NEC, + .module_name = "dw2102", + .allowed_protos = RC_PROTO_BIT_NEC, + .rc_query = dw2102_rc_query, + }, -static const struct dvb_usb_device_description d421 = { - "TeVii S421 PCI", - {&dw2102_table[TEVII_S421], NULL}, - {NULL}, + .generic_bulk_ctrl_endpoint = 0x81, + .num_adapters = 1, + .download_firmware = dw2102_load_firmware, + .read_mac_address = s6x0_read_mac_address, + .adapter = { + { + .num_frontends = 1, + .fe = {{ + .frontend_attach = ds3000_frontend_attach, + .stream = { + .type = USB_BULK, + .count = 8, + .endpoint = 0x82, + .u = { + .bulk = { + .buffersize = 4096, + } + } + }, + } }, + } + }, + .num_device_descs = 3, + .devices = { + {"TeVii S660 USB", + {&dw2102_table[TEVII_S660], NULL}, + {NULL}, + }, + {"TeVii S480.1 USB", + {&dw2102_table[TEVII_S480_1], NULL}, + {NULL}, + }, + {"TeVii S480.2 USB", + {&dw2102_table[TEVII_S480_2], NULL}, + {NULL}, + }, + } }; -static const struct dvb_usb_device_description d632 = { - "TeVii S632 USB", - {&dw2102_table[TEVII_S632], NULL}, - {NULL}, +static struct dvb_usb_device_properties p7500_properties = { + .caps = DVB_USB_IS_AN_I2C_ADAPTER, + .usb_ctrl = DEVICE_SPECIFIC, + .size_of_priv = sizeof(struct dw2102_state), + .firmware = P7500_FIRMWARE, + .no_reconnect = 1, + + .i2c_algo = &s6x0_i2c_algo, + .rc.core = { + .rc_interval = 150, + .rc_codes = RC_MAP_TBS_NEC, + .module_name = "dw2102", + .allowed_protos = RC_PROTO_BIT_NEC, + .rc_query = prof_rc_query, + }, + + .generic_bulk_ctrl_endpoint = 0x81, + .num_adapters = 1, + .download_firmware = dw2102_load_firmware, + .read_mac_address = s6x0_read_mac_address, + .adapter = { + { + .num_frontends = 1, + .fe = {{ + .frontend_attach = prof_7500_frontend_attach, + .stream = { + .type = USB_BULK, + .count = 8, + .endpoint = 0x82, + .u = { + .bulk = { + .buffersize = 4096, + } + } + }, + } }, + } + }, + .num_device_descs = 1, + .devices = { + {"Prof 7500 USB DVB-S2", + {&dw2102_table[PROF_7500], NULL}, + {NULL}, + }, + } }; static struct dvb_usb_device_properties su3000_properties = { @@ -2209,6 +2316,59 @@ static struct dvb_usb_device_properties su3000_properties = { } }; +static struct dvb_usb_device_properties s421_properties = { + .caps = DVB_USB_IS_AN_I2C_ADAPTER, + .usb_ctrl = DEVICE_SPECIFIC, + .size_of_priv = sizeof(struct dw2102_state), + .power_ctrl = su3000_power_ctrl, + .num_adapters = 1, + .identify_state = su3000_identify_state, + .i2c_algo = &su3000_i2c_algo, + + .rc.core = { + .rc_interval = 150, + .rc_codes = RC_MAP_SU3000, + .module_name = "dw2102", + .allowed_protos = RC_PROTO_BIT_RC5, + .rc_query = su3000_rc_query, + }, + + .read_mac_address = su3000_read_mac_address, + + .generic_bulk_ctrl_endpoint = 0x01, + + .adapter = { + { + .num_frontends = 1, + .fe = {{ + .streaming_ctrl = su3000_streaming_ctrl, + .frontend_attach = m88rs2000_frontend_attach, + .stream = { + .type = USB_BULK, + .count = 8, + .endpoint = 0x82, + .u = { + .bulk = { + .buffersize = 4096, + } + } + } + } }, + } + }, + .num_device_descs = 2, + .devices = { + { "TeVii S421 PCI", + { &dw2102_table[TEVII_S421], NULL }, + { NULL }, + }, + { "TeVii S632 USB", + { &dw2102_table[TEVII_S632], NULL }, + { NULL }, + }, + } +}; + static struct dvb_usb_device_properties t220_properties = { .caps = DVB_USB_IS_AN_I2C_ADAPTER, .usb_ctrl = DEVICE_SPECIFIC, @@ -2326,101 +2486,33 @@ static struct dvb_usb_device_properties tt_s2_4600_properties = { static int dw2102_probe(struct usb_interface *intf, const struct usb_device_id *id) { - int retval = -ENOMEM; - struct dvb_usb_device_properties *p1100; - struct dvb_usb_device_properties *s660; - struct dvb_usb_device_properties *p7500; - struct dvb_usb_device_properties *s421; - - p1100 = kmemdup(&s6x0_properties, - sizeof(struct dvb_usb_device_properties), GFP_KERNEL); - if (!p1100) - goto err0; - - /* copy default structure */ - /* fill only different fields */ - p1100->firmware = P1100_FIRMWARE; - p1100->devices[0] = d1100; - p1100->rc.core.rc_query = prof_rc_query; - p1100->rc.core.rc_codes = RC_MAP_TBS_NEC; - p1100->adapter->fe[0].frontend_attach = stv0288_frontend_attach; - - s660 = kmemdup(&s6x0_properties, - sizeof(struct dvb_usb_device_properties), GFP_KERNEL); - if (!s660) - goto err1; - - s660->firmware = S660_FIRMWARE; - s660->num_device_descs = 3; - s660->devices[0] = d660; - s660->devices[1] = d480_1; - s660->devices[2] = d480_2; - s660->adapter->fe[0].frontend_attach = ds3000_frontend_attach; - - p7500 = kmemdup(&s6x0_properties, - sizeof(struct dvb_usb_device_properties), GFP_KERNEL); - if (!p7500) - goto err2; - - p7500->firmware = P7500_FIRMWARE; - p7500->devices[0] = d7500; - p7500->rc.core.rc_query = prof_rc_query; - p7500->rc.core.rc_codes = RC_MAP_TBS_NEC; - p7500->adapter->fe[0].frontend_attach = prof_7500_frontend_attach; - - - s421 = kmemdup(&su3000_properties, - sizeof(struct dvb_usb_device_properties), GFP_KERNEL); - if (!s421) - goto err3; - - s421->num_device_descs = 2; - s421->devices[0] = d421; - s421->devices[1] = d632; - s421->adapter->fe[0].frontend_attach = m88rs2000_frontend_attach; - - if (0 == dvb_usb_device_init(intf, &dw2102_properties, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, &dw2104_properties, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, &dw3101_properties, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, &s6x0_properties, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, p1100, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, s660, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, p7500, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, s421, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, &su3000_properties, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, &t220_properties, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, &tt_s2_4600_properties, - THIS_MODULE, NULL, adapter_nr)) { - - /* clean up copied properties */ - kfree(s421); - kfree(p7500); - kfree(s660); - kfree(p1100); + if (!(dvb_usb_device_init(intf, &dw2102_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &dw2104_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &dw3101_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &s6x0_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &p1100_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &s660_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &p7500_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &s421_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &su3000_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &t220_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &tt_s2_4600_properties, + THIS_MODULE, NULL, adapter_nr))) { return 0; } - retval = -ENODEV; - kfree(s421); -err3: - kfree(p7500); -err2: - kfree(s660); -err1: - kfree(p1100); -err0: - return retval; + return -ENODEV; } static void dw2102_disconnect(struct usb_interface *intf) diff --git a/drivers/media/usb/dvb-usb/m920x.c b/drivers/media/usb/dvb-usb/m920x.c index d866a1990a7d2..7282f60226558 100644 --- a/drivers/media/usb/dvb-usb/m920x.c +++ b/drivers/media/usb/dvb-usb/m920x.c @@ -274,6 +274,13 @@ static int m920x_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int nu /* Should check for ack here, if we knew how. */ } if (msg[i].flags & I2C_M_RD) { + char *read = kmalloc(1, GFP_KERNEL); + if (!read) { + ret = -ENOMEM; + kfree(read); + goto unlock; + } + for (j = 0; j < msg[i].len; j++) { /* Last byte of transaction? * Send STOP, otherwise send ACK. */ @@ -281,9 +288,12 @@ static int m920x_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int nu if ((ret = m920x_read(d->udev, M9206_I2C, 0x0, 0x20 | stop, - &msg[i].buf[j], 1)) != 0) + read, 1)) != 0) goto unlock; + msg[i].buf[j] = read[0]; } + + kfree(read); } else { for (j = 0; j < msg[i].len; j++) { /* Last byte of transaction? Then send STOP. */ diff --git a/drivers/media/usb/dvb-usb/nova-t-usb2.c b/drivers/media/usb/dvb-usb/nova-t-usb2.c index e368935a50894..c16d4f1624952 100644 --- a/drivers/media/usb/dvb-usb/nova-t-usb2.c +++ b/drivers/media/usb/dvb-usb/nova-t-usb2.c @@ -130,7 +130,7 @@ static int nova_t_rc_query(struct dvb_usb_device *d, u32 *event, int *state) static int nova_t_read_mac_address (struct dvb_usb_device *d, u8 mac[6]) { - int i; + int i, ret; u8 b; mac[0] = 0x00; @@ -139,7 +139,9 @@ static int nova_t_read_mac_address (struct dvb_usb_device *d, u8 mac[6]) /* this is a complete guess, but works for my box */ for (i = 136; i < 139; i++) { - dibusb_read_eeprom_byte(d,i, &b); + ret = dibusb_read_eeprom_byte(d, i, &b); + if (ret) + return ret; mac[5 - (i - 136)] = b; } diff --git a/drivers/media/usb/dvb-usb/vp702x.c b/drivers/media/usb/dvb-usb/vp702x.c index 381b5c898a076..b7ee972455e5b 100644 --- a/drivers/media/usb/dvb-usb/vp702x.c +++ b/drivers/media/usb/dvb-usb/vp702x.c @@ -291,16 +291,22 @@ static int vp702x_rc_query(struct dvb_usb_device *d, u32 *event, int *state) static int vp702x_read_mac_addr(struct dvb_usb_device *d,u8 mac[6]) { u8 i, *buf; + int ret; struct vp702x_device_state *st = d->priv; mutex_lock(&st->buf_mutex); buf = st->buf; - for (i = 6; i < 12; i++) - vp702x_usb_in_op(d, READ_EEPROM_REQ, i, 1, &buf[i - 6], 1); + for (i = 6; i < 12; i++) { + ret = vp702x_usb_in_op(d, READ_EEPROM_REQ, i, 1, + &buf[i - 6], 1); + if (ret < 0) + goto err; + } memcpy(mac, buf, 6); +err: mutex_unlock(&st->buf_mutex); - return 0; + return ret; } static int vp702x_frontend_attach(struct dvb_usb_adapter *adap) diff --git a/drivers/media/usb/em28xx/em28xx-cards.c b/drivers/media/usb/em28xx/em28xx-cards.c index 5983e72a0622c..5ae13ee9272d5 100644 --- a/drivers/media/usb/em28xx/em28xx-cards.c +++ b/drivers/media/usb/em28xx/em28xx-cards.c @@ -3515,8 +3515,10 @@ static int em28xx_init_dev(struct em28xx *dev, struct usb_device *udev, if (dev->is_audio_only) { retval = em28xx_audio_setup(dev); - if (retval) - return -ENODEV; + if (retval) { + retval = -ENODEV; + goto err_deinit_media; + } em28xx_init_extension(dev); return 0; @@ -3535,7 +3537,7 @@ static int em28xx_init_dev(struct em28xx *dev, struct usb_device *udev, dev_err(&dev->intf->dev, "%s: em28xx_i2c_register bus 0 - error [%d]!\n", __func__, retval); - return retval; + goto err_deinit_media; } /* register i2c bus 1 */ @@ -3551,9 +3553,7 @@ static int em28xx_init_dev(struct em28xx *dev, struct usb_device *udev, "%s: em28xx_i2c_register bus 1 - error [%d]!\n", __func__, retval); - em28xx_i2c_unregister(dev, 0); - - return retval; + goto err_unreg_i2c; } } @@ -3561,6 +3561,12 @@ static int em28xx_init_dev(struct em28xx *dev, struct usb_device *udev, em28xx_card_setup(dev); return 0; + +err_unreg_i2c: + em28xx_i2c_unregister(dev, 0); +err_deinit_media: + em28xx_unregister_media_device(dev); + return retval; } static int em28xx_duplicate_dev(struct em28xx *dev) @@ -3815,6 +3821,8 @@ static int em28xx_usb_probe(struct usb_interface *intf, goto err_free; } + kref_init(&dev->ref); + dev->devno = nr; dev->model = id->driver_info; dev->alt = -1; @@ -3915,6 +3923,8 @@ static int em28xx_usb_probe(struct usb_interface *intf, } if (dev->board.has_dual_ts && em28xx_duplicate_dev(dev) == 0) { + kref_init(&dev->dev_next->ref); + dev->dev_next->ts = SECONDARY_TS; dev->dev_next->alt = -1; dev->dev_next->is_audio_only = has_vendor_audio && @@ -3969,12 +3979,8 @@ static int em28xx_usb_probe(struct usb_interface *intf, em28xx_write_reg(dev, 0x0b, 0x82); mdelay(100); } - - kref_init(&dev->dev_next->ref); } - kref_init(&dev->ref); - request_modules(dev); /* diff --git a/drivers/media/usb/em28xx/em28xx-core.c b/drivers/media/usb/em28xx/em28xx-core.c index 3daa64bb1e1d9..308bc029099d9 100644 --- a/drivers/media/usb/em28xx/em28xx-core.c +++ b/drivers/media/usb/em28xx/em28xx-core.c @@ -89,7 +89,7 @@ int em28xx_read_reg_req_len(struct em28xx *dev, u8 req, u16 reg, mutex_lock(&dev->ctrl_urb_lock); ret = usb_control_msg(udev, pipe, req, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x0000, reg, dev->urb_buf, len, HZ); + 0x0000, reg, dev->urb_buf, len, 1000); if (ret < 0) { em28xx_regdbg("(pipe 0x%08x): IN: %02x %02x %02x %02x %02x %02x %02x %02x failed with error %i\n", pipe, @@ -158,7 +158,7 @@ int em28xx_write_regs_req(struct em28xx *dev, u8 req, u16 reg, char *buf, memcpy(dev->urb_buf, buf, len); ret = usb_control_msg(udev, pipe, req, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x0000, reg, dev->urb_buf, len, HZ); + 0x0000, reg, dev->urb_buf, len, 1000); mutex_unlock(&dev->ctrl_urb_lock); if (ret < 0) { @@ -1152,8 +1152,9 @@ int em28xx_suspend_extension(struct em28xx *dev) dev_info(&dev->intf->dev, "Suspending extensions\n"); mutex_lock(&em28xx_devlist_mutex); list_for_each_entry(ops, &em28xx_extension_devlist, next) { - if (ops->suspend) - ops->suspend(dev); + if (!ops->suspend) + continue; + ops->suspend(dev); if (dev->dev_next) ops->suspend(dev->dev_next); } diff --git a/drivers/media/usb/em28xx/em28xx-input.c b/drivers/media/usb/em28xx/em28xx-input.c index 59529cbf9cd0b..0b6d77c3bec86 100644 --- a/drivers/media/usb/em28xx/em28xx-input.c +++ b/drivers/media/usb/em28xx/em28xx-input.c @@ -842,7 +842,6 @@ static int em28xx_ir_init(struct em28xx *dev) kfree(ir); ref_put: em28xx_shutdown_buttons(dev); - kref_put(&dev->ref, em28xx_free_device); return err; } diff --git a/drivers/media/usb/go7007/go7007-driver.c b/drivers/media/usb/go7007/go7007-driver.c index 153a0c3e3da64..b9302d77d6c83 100644 --- a/drivers/media/usb/go7007/go7007-driver.c +++ b/drivers/media/usb/go7007/go7007-driver.c @@ -691,49 +691,23 @@ struct go7007 *go7007_alloc(const struct go7007_board_info *board, struct device *dev) { struct go7007 *go; - int i; go = kzalloc(sizeof(struct go7007), GFP_KERNEL); if (go == NULL) return NULL; go->dev = dev; go->board_info = board; - go->board_id = 0; go->tuner_type = -1; - go->channel_number = 0; - go->name[0] = 0; mutex_init(&go->hw_lock); init_waitqueue_head(&go->frame_waitq); spin_lock_init(&go->spinlock); go->status = STATUS_INIT; - memset(&go->i2c_adapter, 0, sizeof(go->i2c_adapter)); - go->i2c_adapter_online = 0; - go->interrupt_available = 0; init_waitqueue_head(&go->interrupt_waitq); - go->input = 0; go7007_update_board(go); - go->encoder_h_halve = 0; - go->encoder_v_halve = 0; - go->encoder_subsample = 0; go->format = V4L2_PIX_FMT_MJPEG; go->bitrate = 1500000; go->fps_scale = 1; - go->pali = 0; go->aspect_ratio = GO7007_RATIO_1_1; - go->gop_size = 0; - go->ipb = 0; - go->closed_gop = 0; - go->repeat_seqhead = 0; - go->seq_header_enable = 0; - go->gop_header_enable = 0; - go->dvd_mode = 0; - go->interlace_coding = 0; - for (i = 0; i < 4; ++i) - go->modet[i].enable = 0; - for (i = 0; i < 1624; ++i) - go->modet_map[i] = 0; - go->audio_deliver = NULL; - go->audio_enabled = 0; return go; } diff --git a/drivers/media/usb/go7007/s2250-board.c b/drivers/media/usb/go7007/s2250-board.c index 49e75a1a1f3f8..af3b18c6d9e17 100644 --- a/drivers/media/usb/go7007/s2250-board.c +++ b/drivers/media/usb/go7007/s2250-board.c @@ -504,6 +504,7 @@ static int s2250_probe(struct i2c_client *client, u8 *data; struct go7007 *go = i2c_get_adapdata(adapter); struct go7007_usb *usb = go->hpi_context; + int err = -EIO; audio = i2c_new_dummy_device(adapter, TLV320_ADDRESS >> 1); if (IS_ERR(audio)) @@ -532,11 +533,8 @@ static int s2250_probe(struct i2c_client *client, V4L2_CID_HUE, -512, 511, 1, 0); sd->ctrl_handler = &state->hdl; if (state->hdl.error) { - int err = state->hdl.error; - - v4l2_ctrl_handler_free(&state->hdl); - kfree(state); - return err; + err = state->hdl.error; + goto fail; } state->std = V4L2_STD_NTSC; @@ -600,7 +598,7 @@ static int s2250_probe(struct i2c_client *client, i2c_unregister_device(audio); v4l2_ctrl_handler_free(&state->hdl); kfree(state); - return -EIO; + return err; } static int s2250_remove(struct i2c_client *client) diff --git a/drivers/media/usb/hdpvr/hdpvr-video.c b/drivers/media/usb/hdpvr/hdpvr-video.c index bad71d863d395..7849f1fbbcc4d 100644 --- a/drivers/media/usb/hdpvr/hdpvr-video.c +++ b/drivers/media/usb/hdpvr/hdpvr-video.c @@ -308,7 +308,6 @@ static int hdpvr_start_streaming(struct hdpvr_device *dev) dev->status = STATUS_STREAMING; - INIT_WORK(&dev->worker, hdpvr_transmit_buffers); schedule_work(&dev->worker); v4l2_dbg(MSG_BUFFER, hdpvr_debug, &dev->v4l2_dev, @@ -1165,6 +1164,9 @@ int hdpvr_register_videodev(struct hdpvr_device *dev, struct device *parent, bool ac3 = dev->flags & HDPVR_FLAG_AC3_CAP; int res; + // initialize dev->worker + INIT_WORK(&dev->worker, hdpvr_transmit_buffers); + dev->cur_std = V4L2_STD_525_60; dev->width = 720; dev->height = 480; diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c index 4c991eae53cdf..11e7fcfc3f195 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c @@ -1468,7 +1468,7 @@ static int pvr2_upload_firmware1(struct pvr2_hdw *hdw) for (address = 0; address < fwsize; address += 0x800) { memcpy(fw_ptr, fw_entry->data + address, 0x800); ret += usb_control_msg(hdw->usb_dev, pipe, 0xa0, 0x40, address, - 0, fw_ptr, 0x800, HZ); + 0, fw_ptr, 0x800, 1000); } trace_firmware("Upload done, releasing device's CPU"); @@ -1606,7 +1606,7 @@ int pvr2_upload_firmware2(struct pvr2_hdw *hdw) ((u32 *)fw_ptr)[icnt] = swab32(((u32 *)fw_ptr)[icnt]); ret |= usb_bulk_msg(hdw->usb_dev, pipe, fw_ptr,bcnt, - &actual_length, HZ); + &actual_length, 1000); ret |= (actual_length != bcnt); if (ret) break; fw_done += bcnt; @@ -2570,6 +2570,11 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf, } while (0); mutex_unlock(&pvr2_unit_mtx); + INIT_WORK(&hdw->workpoll, pvr2_hdw_worker_poll); + + if (hdw->unit_number == -1) + goto fail; + cnt1 = 0; cnt2 = scnprintf(hdw->name+cnt1,sizeof(hdw->name)-cnt1,"pvrusb2"); cnt1 += cnt2; @@ -2581,8 +2586,6 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf, if (cnt1 >= sizeof(hdw->name)) cnt1 = sizeof(hdw->name)-1; hdw->name[cnt1] = 0; - INIT_WORK(&hdw->workpoll,pvr2_hdw_worker_poll); - pvr2_trace(PVR2_TRACE_INIT,"Driver unit number is %d, name is %s", hdw->unit_number,hdw->name); @@ -3439,7 +3442,7 @@ void pvr2_hdw_cpufw_set_enabled(struct pvr2_hdw *hdw, 0xa0,0xc0, address,0, hdw->fw_buffer+address, - 0x800,HZ); + 0x800,1000); if (ret < 0) break; } @@ -3978,7 +3981,7 @@ void pvr2_hdw_cpureset_assert(struct pvr2_hdw *hdw,int val) /* Write the CPUCS register on the 8051. The lsb of the register is the reset bit; a 1 asserts reset while a 0 clears it. */ pipe = usb_sndctrlpipe(hdw->usb_dev, 0); - ret = usb_control_msg(hdw->usb_dev,pipe,0xa0,0x40,0xe600,0,da,1,HZ); + ret = usb_control_msg(hdw->usb_dev,pipe,0xa0,0x40,0xe600,0,da,1,1000); if (ret < 0) { pvr2_trace(PVR2_TRACE_ERROR_LEGS, "cpureset_assert(%d) error=%d",val,ret); diff --git a/drivers/media/usb/s2255/s2255drv.c b/drivers/media/usb/s2255/s2255drv.c index 329ec80895927..7ed526306816a 100644 --- a/drivers/media/usb/s2255/s2255drv.c +++ b/drivers/media/usb/s2255/s2255drv.c @@ -1884,7 +1884,7 @@ static long s2255_vendor_req(struct s2255_dev *dev, unsigned char Request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, Value, Index, buf, - TransferBufferLength, HZ * 5); + TransferBufferLength, USB_CTRL_SET_TIMEOUT); if (r >= 0) memcpy(TransferBuffer, buf, TransferBufferLength); @@ -1893,7 +1893,7 @@ static long s2255_vendor_req(struct s2255_dev *dev, unsigned char Request, r = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), Request, USB_TYPE_VENDOR | USB_RECIP_DEVICE, Value, Index, buf, - TransferBufferLength, HZ * 5); + TransferBufferLength, USB_CTRL_SET_TIMEOUT); } kfree(buf); return r; diff --git a/drivers/media/usb/stk1160/stk1160-core.c b/drivers/media/usb/stk1160/stk1160-core.c index b4f8bc5db1389..ce717502ea4c3 100644 --- a/drivers/media/usb/stk1160/stk1160-core.c +++ b/drivers/media/usb/stk1160/stk1160-core.c @@ -65,7 +65,7 @@ int stk1160_read_reg(struct stk1160 *dev, u16 reg, u8 *value) return -ENOMEM; ret = usb_control_msg(dev->udev, pipe, 0x00, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x00, reg, buf, sizeof(u8), HZ); + 0x00, reg, buf, sizeof(u8), 1000); if (ret < 0) { stk1160_err("read failed on reg 0x%x (%d)\n", reg, ret); @@ -85,7 +85,7 @@ int stk1160_write_reg(struct stk1160 *dev, u16 reg, u16 value) ret = usb_control_msg(dev->udev, pipe, 0x01, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, reg, NULL, 0, HZ); + value, reg, NULL, 0, 1000); if (ret < 0) { stk1160_err("write failed on reg 0x%x (%d)\n", reg, ret); @@ -403,7 +403,7 @@ static void stk1160_disconnect(struct usb_interface *interface) /* Here is the only place where isoc get released */ stk1160_uninit_isoc(dev); - stk1160_clear_queue(dev); + stk1160_clear_queue(dev, VB2_BUF_STATE_ERROR); video_unregister_device(&dev->vdev); v4l2_device_disconnect(&dev->v4l2_dev); diff --git a/drivers/media/usb/stk1160/stk1160-v4l.c b/drivers/media/usb/stk1160/stk1160-v4l.c index bcd14c66e8dfa..a307807571ab9 100644 --- a/drivers/media/usb/stk1160/stk1160-v4l.c +++ b/drivers/media/usb/stk1160/stk1160-v4l.c @@ -258,7 +258,7 @@ static int stk1160_start_streaming(struct stk1160 *dev) stk1160_uninit_isoc(dev); out_stop_hw: usb_set_interface(dev->udev, 0, 0); - stk1160_clear_queue(dev); + stk1160_clear_queue(dev, VB2_BUF_STATE_QUEUED); mutex_unlock(&dev->v4l_lock); @@ -306,7 +306,7 @@ static int stk1160_stop_streaming(struct stk1160 *dev) stk1160_stop_hw(dev); - stk1160_clear_queue(dev); + stk1160_clear_queue(dev, VB2_BUF_STATE_ERROR); stk1160_dbg("streaming stopped\n"); @@ -745,7 +745,7 @@ static const struct video_device v4l_template = { /********************************************************************/ /* Must be called with both v4l_lock and vb_queue_lock hold */ -void stk1160_clear_queue(struct stk1160 *dev) +void stk1160_clear_queue(struct stk1160 *dev, enum vb2_buffer_state vb2_state) { struct stk1160_buffer *buf; unsigned long flags; @@ -756,7 +756,7 @@ void stk1160_clear_queue(struct stk1160 *dev) buf = list_first_entry(&dev->avail_bufs, struct stk1160_buffer, list); list_del(&buf->list); - vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); + vb2_buffer_done(&buf->vb.vb2_buf, vb2_state); stk1160_dbg("buffer [%p/%d] aborted\n", buf, buf->vb.vb2_buf.index); } @@ -766,7 +766,7 @@ void stk1160_clear_queue(struct stk1160 *dev) buf = dev->isoc_ctl.buf; dev->isoc_ctl.buf = NULL; - vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); + vb2_buffer_done(&buf->vb.vb2_buf, vb2_state); stk1160_dbg("buffer [%p/%d] aborted\n", buf, buf->vb.vb2_buf.index); } diff --git a/drivers/media/usb/stk1160/stk1160.h b/drivers/media/usb/stk1160/stk1160.h index a31ea1c80f255..a70963ce87533 100644 --- a/drivers/media/usb/stk1160/stk1160.h +++ b/drivers/media/usb/stk1160/stk1160.h @@ -166,7 +166,7 @@ struct regval { int stk1160_vb2_setup(struct stk1160 *dev); int stk1160_video_register(struct stk1160 *dev); void stk1160_video_unregister(struct stk1160 *dev); -void stk1160_clear_queue(struct stk1160 *dev); +void stk1160_clear_queue(struct stk1160 *dev, enum vb2_buffer_state vb2_state); /* Provided by stk1160-video.c */ int stk1160_alloc_isoc(struct stk1160 *dev); diff --git a/drivers/media/usb/stkwebcam/stk-webcam.c b/drivers/media/usb/stkwebcam/stk-webcam.c index 21f90a8874852..7d6a4ff6e1421 100644 --- a/drivers/media/usb/stkwebcam/stk-webcam.c +++ b/drivers/media/usb/stkwebcam/stk-webcam.c @@ -1346,7 +1346,7 @@ static int stk_camera_probe(struct usb_interface *interface, if (!dev->isoc_ep) { pr_err("Could not find isoc-in endpoint\n"); err = -ENODEV; - goto error; + goto error_put; } dev->vsettings.palette = V4L2_PIX_FMT_RGB565; dev->vsettings.mode = MODE_VGA; @@ -1359,10 +1359,12 @@ static int stk_camera_probe(struct usb_interface *interface, err = stk_register_video_device(dev); if (err) - goto error; + goto error_put; return 0; +error_put: + usb_put_intf(interface); error: v4l2_ctrl_handler_free(hdl); v4l2_device_unregister(&dev->v4l2_dev); diff --git a/drivers/media/usb/tm6000/tm6000-video.c b/drivers/media/usb/tm6000/tm6000-video.c index c46cbcfafab3f..8874b0b922eee 100644 --- a/drivers/media/usb/tm6000/tm6000-video.c +++ b/drivers/media/usb/tm6000/tm6000-video.c @@ -854,8 +854,7 @@ static int vidioc_querycap(struct file *file, void *priv, struct tm6000_core *dev = ((struct tm6000_fh *)priv)->dev; strscpy(cap->driver, "tm6000", sizeof(cap->driver)); - strscpy(cap->card, "Trident TVMaster TM5600/6000/6010", - sizeof(cap->card)); + strscpy(cap->card, "Trident TM5600/6000/6010", sizeof(cap->card)); usb_make_path(dev->udev, cap->bus_info, sizeof(cap->bus_info)); cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_READWRITE | V4L2_CAP_DEVICE_CAPS; diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c index 7d60dd3b0bd85..96ef64b6a232b 100644 --- a/drivers/media/usb/uvc/uvc_v4l2.c +++ b/drivers/media/usb/uvc/uvc_v4l2.c @@ -467,10 +467,13 @@ static int uvc_v4l2_set_streamparm(struct uvc_streaming *stream, uvc_simplify_fraction(&timeperframe.numerator, &timeperframe.denominator, 8, 333); - if (parm->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) + if (parm->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) { parm->parm.capture.timeperframe = timeperframe; - else + parm->parm.capture.capability = V4L2_CAP_TIMEPERFRAME; + } else { parm->parm.output.timeperframe = timeperframe; + parm->parm.output.capability = V4L2_CAP_TIMEPERFRAME; + } return 0; } @@ -856,29 +859,31 @@ static int uvc_ioctl_enum_input(struct file *file, void *fh, struct uvc_video_chain *chain = handle->chain; const struct uvc_entity *selector = chain->selector; struct uvc_entity *iterm = NULL; + struct uvc_entity *it; u32 index = input->index; - int pin = 0; if (selector == NULL || (chain->dev->quirks & UVC_QUIRK_IGNORE_SELECTOR_UNIT)) { if (index != 0) return -EINVAL; - list_for_each_entry(iterm, &chain->entities, chain) { - if (UVC_ENTITY_IS_ITERM(iterm)) + list_for_each_entry(it, &chain->entities, chain) { + if (UVC_ENTITY_IS_ITERM(it)) { + iterm = it; break; + } } - pin = iterm->id; } else if (index < selector->bNrInPins) { - pin = selector->baSourceID[index]; - list_for_each_entry(iterm, &chain->entities, chain) { - if (!UVC_ENTITY_IS_ITERM(iterm)) + list_for_each_entry(it, &chain->entities, chain) { + if (!UVC_ENTITY_IS_ITERM(it)) continue; - if (iterm->id == pin) + if (it->id == selector->baSourceID[index]) { + iterm = it; break; + } } } - if (iterm == NULL || iterm->id != pin) + if (iterm == NULL) return -EINVAL; memset(input, 0, sizeof(*input)); @@ -894,8 +899,8 @@ static int uvc_ioctl_g_input(struct file *file, void *fh, unsigned int *input) { struct uvc_fh *handle = fh; struct uvc_video_chain *chain = handle->chain; + u8 *buf; int ret; - u8 i; if (chain->selector == NULL || (chain->dev->quirks & UVC_QUIRK_IGNORE_SELECTOR_UNIT)) { @@ -903,22 +908,27 @@ static int uvc_ioctl_g_input(struct file *file, void *fh, unsigned int *input) return 0; } + buf = kmalloc(1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + ret = uvc_query_ctrl(chain->dev, UVC_GET_CUR, chain->selector->id, chain->dev->intfnum, UVC_SU_INPUT_SELECT_CONTROL, - &i, 1); - if (ret < 0) - return ret; + buf, 1); + if (!ret) + *input = *buf - 1; - *input = i - 1; - return 0; + kfree(buf); + + return ret; } static int uvc_ioctl_s_input(struct file *file, void *fh, unsigned int input) { struct uvc_fh *handle = fh; struct uvc_video_chain *chain = handle->chain; + u8 *buf; int ret; - u32 i; ret = uvc_acquire_privileges(handle); if (ret < 0) @@ -934,10 +944,17 @@ static int uvc_ioctl_s_input(struct file *file, void *fh, unsigned int input) if (input >= chain->selector->bNrInPins) return -EINVAL; - i = input + 1; - return uvc_query_ctrl(chain->dev, UVC_SET_CUR, chain->selector->id, - chain->dev->intfnum, UVC_SU_INPUT_SELECT_CONTROL, - &i, 1); + buf = kmalloc(1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + *buf = input + 1; + ret = uvc_query_ctrl(chain->dev, UVC_SET_CUR, chain->selector->id, + chain->dev->intfnum, UVC_SU_INPUT_SELECT_CONTROL, + buf, 1); + kfree(buf); + + return ret; } static int uvc_ioctl_queryctrl(struct file *file, void *fh, diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c index 5d095b2a03464..fe58723fc5ac7 100644 --- a/drivers/media/usb/uvc/uvc_video.c +++ b/drivers/media/usb/uvc/uvc_video.c @@ -112,6 +112,11 @@ int uvc_query_ctrl(struct uvc_device *dev, u8 query, u8 unit, case 5: /* Invalid unit */ case 6: /* Invalid control */ case 7: /* Invalid Request */ + /* + * The firmware has not properly implemented + * the control or there has been a HW error. + */ + return -EIO; case 8: /* Invalid value within range */ return -EINVAL; default: /* reserved or unknown */ @@ -1910,6 +1915,10 @@ static int uvc_video_start_transfer(struct uvc_streaming *stream, if (ep == NULL) return -EIO; + /* Reject broken descriptors. */ + if (usb_endpoint_maxp(&ep->desc) == 0) + return -EIO; + ret = uvc_init_video_bulk(stream, ep, gfp_flags); } diff --git a/drivers/media/usb/uvc/uvcvideo.h b/drivers/media/usb/uvc/uvcvideo.h index 24e3d8c647e77..5f137400bebd6 100644 --- a/drivers/media/usb/uvc/uvcvideo.h +++ b/drivers/media/usb/uvc/uvcvideo.h @@ -179,7 +179,7 @@ /* Maximum status buffer size in bytes of interrupt URB. */ #define UVC_MAX_STATUS_SIZE 16 -#define UVC_CTRL_CONTROL_TIMEOUT 500 +#define UVC_CTRL_CONTROL_TIMEOUT 5000 #define UVC_CTRL_STREAMING_TIMEOUT 5000 /* Maximum allowed number of control mappings per device */ diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c index 4f23e939ead0b..60454e1b727e9 100644 --- a/drivers/media/v4l2-core/v4l2-dv-timings.c +++ b/drivers/media/v4l2-core/v4l2-dv-timings.c @@ -196,7 +196,7 @@ bool v4l2_find_dv_timings_cap(struct v4l2_dv_timings *t, if (!v4l2_valid_dv_timings(t, cap, fnc, fnc_handle)) return false; - for (i = 0; i < v4l2_dv_timings_presets[i].bt.width; i++) { + for (i = 0; v4l2_dv_timings_presets[i].bt.width; i++) { if (v4l2_valid_dv_timings(v4l2_dv_timings_presets + i, cap, fnc, fnc_handle) && v4l2_match_dv_timings(t, v4l2_dv_timings_presets + i, @@ -218,7 +218,7 @@ bool v4l2_find_dv_timings_cea861_vic(struct v4l2_dv_timings *t, u8 vic) { unsigned int i; - for (i = 0; i < v4l2_dv_timings_presets[i].bt.width; i++) { + for (i = 0; v4l2_dv_timings_presets[i].bt.width; i++) { const struct v4l2_bt_timings *bt = &v4l2_dv_timings_presets[i].bt; diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 24db33f803c06..3012e8ecffb94 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -902,7 +902,7 @@ static void v4l_print_default(const void *arg, bool write_only) pr_cont("driver-specific ioctl\n"); } -static int check_ext_ctrls(struct v4l2_ext_controls *c, int allow_priv) +static bool check_ext_ctrls(struct v4l2_ext_controls *c, unsigned long ioctl) { __u32 i; @@ -911,23 +911,41 @@ static int check_ext_ctrls(struct v4l2_ext_controls *c, int allow_priv) for (i = 0; i < c->count; i++) c->controls[i].reserved2[0] = 0; - /* V4L2_CID_PRIVATE_BASE cannot be used as control class - when using extended controls. - Only when passed in through VIDIOC_G_CTRL and VIDIOC_S_CTRL - is it allowed for backwards compatibility. - */ - if (!allow_priv && c->which == V4L2_CID_PRIVATE_BASE) - return 0; - if (!c->which) - return 1; + switch (c->which) { + case V4L2_CID_PRIVATE_BASE: + /* + * V4L2_CID_PRIVATE_BASE cannot be used as control class + * when using extended controls. + * Only when passed in through VIDIOC_G_CTRL and VIDIOC_S_CTRL + * is it allowed for backwards compatibility. + */ + if (ioctl == VIDIOC_G_CTRL || ioctl == VIDIOC_S_CTRL) + return false; + break; + case V4L2_CTRL_WHICH_DEF_VAL: + /* Default value cannot be changed */ + if (ioctl == VIDIOC_S_EXT_CTRLS || + ioctl == VIDIOC_TRY_EXT_CTRLS) { + c->error_idx = c->count; + return false; + } + return true; + case V4L2_CTRL_WHICH_CUR_VAL: + return true; + case V4L2_CTRL_WHICH_REQUEST_VAL: + c->error_idx = c->count; + return false; + } + /* Check that all controls are from the same control class. */ for (i = 0; i < c->count; i++) { if (V4L2_CTRL_ID2WHICH(c->controls[i].id) != c->which) { - c->error_idx = i; - return 0; + c->error_idx = ioctl == VIDIOC_TRY_EXT_CTRLS ? i : + c->count; + return false; } } - return 1; + return true; } static int check_fmt(struct file *file, enum v4l2_buf_type type) @@ -2028,6 +2046,7 @@ static int v4l_prepare_buf(const struct v4l2_ioctl_ops *ops, static int v4l_g_parm(const struct v4l2_ioctl_ops *ops, struct file *file, void *fh, void *arg) { + struct video_device *vfd = video_devdata(file); struct v4l2_streamparm *p = arg; v4l2_std_id std; int ret = check_fmt(file, p->type); @@ -2039,7 +2058,8 @@ static int v4l_g_parm(const struct v4l2_ioctl_ops *ops, if (p->type != V4L2_BUF_TYPE_VIDEO_CAPTURE && p->type != V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) return -EINVAL; - p->parm.capture.readbuffers = 2; + if (vfd->device_caps & V4L2_CAP_READWRITE) + p->parm.capture.readbuffers = 2; ret = ops->vidioc_g_std(file, fh, &std); if (ret == 0) v4l2_video_std_frame_period(std, &p->parm.capture.timeperframe); @@ -2145,7 +2165,7 @@ static int v4l_g_ctrl(const struct v4l2_ioctl_ops *ops, ctrls.controls = &ctrl; ctrl.id = p->id; ctrl.value = p->value; - if (check_ext_ctrls(&ctrls, 1)) { + if (check_ext_ctrls(&ctrls, VIDIOC_G_CTRL)) { int ret = ops->vidioc_g_ext_ctrls(file, fh, &ctrls); if (ret == 0) @@ -2179,7 +2199,7 @@ static int v4l_s_ctrl(const struct v4l2_ioctl_ops *ops, ctrls.controls = &ctrl; ctrl.id = p->id; ctrl.value = p->value; - if (check_ext_ctrls(&ctrls, 1)) + if (check_ext_ctrls(&ctrls, VIDIOC_S_CTRL)) return ops->vidioc_s_ext_ctrls(file, fh, &ctrls); return -EINVAL; } @@ -2201,8 +2221,8 @@ static int v4l_g_ext_ctrls(const struct v4l2_ioctl_ops *ops, vfd, vfd->v4l2_dev->mdev, p); if (ops->vidioc_g_ext_ctrls == NULL) return -ENOTTY; - return check_ext_ctrls(p, 0) ? ops->vidioc_g_ext_ctrls(file, fh, p) : - -EINVAL; + return check_ext_ctrls(p, VIDIOC_G_EXT_CTRLS) ? + ops->vidioc_g_ext_ctrls(file, fh, p) : -EINVAL; } static int v4l_s_ext_ctrls(const struct v4l2_ioctl_ops *ops, @@ -2222,8 +2242,8 @@ static int v4l_s_ext_ctrls(const struct v4l2_ioctl_ops *ops, vfd, vfd->v4l2_dev->mdev, p); if (ops->vidioc_s_ext_ctrls == NULL) return -ENOTTY; - return check_ext_ctrls(p, 0) ? ops->vidioc_s_ext_ctrls(file, fh, p) : - -EINVAL; + return check_ext_ctrls(p, VIDIOC_S_EXT_CTRLS) ? + ops->vidioc_s_ext_ctrls(file, fh, p) : -EINVAL; } static int v4l_try_ext_ctrls(const struct v4l2_ioctl_ops *ops, @@ -2243,8 +2263,8 @@ static int v4l_try_ext_ctrls(const struct v4l2_ioctl_ops *ops, vfd, vfd->v4l2_dev->mdev, p); if (ops->vidioc_try_ext_ctrls == NULL) return -ENOTTY; - return check_ext_ctrls(p, 0) ? ops->vidioc_try_ext_ctrls(file, fh, p) : - -EINVAL; + return check_ext_ctrls(p, VIDIOC_TRY_EXT_CTRLS) ? + ops->vidioc_try_ext_ctrls(file, fh, p) : -EINVAL; } /* diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c index 639dc8d45e603..d56837c04a81a 100644 --- a/drivers/media/v4l2-core/v4l2-mem2mem.c +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c @@ -460,19 +460,14 @@ int v4l2_m2m_reqbufs(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, } EXPORT_SYMBOL_GPL(v4l2_m2m_reqbufs); -int v4l2_m2m_querybuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, - struct v4l2_buffer *buf) +static void v4l2_m2m_adjust_mem_offset(struct vb2_queue *vq, + struct v4l2_buffer *buf) { - struct vb2_queue *vq; - int ret = 0; - unsigned int i; - - vq = v4l2_m2m_get_vq(m2m_ctx, buf->type); - ret = vb2_querybuf(vq, buf); - /* Adjust MMAP memory offsets for the CAPTURE queue */ if (buf->memory == V4L2_MEMORY_MMAP && !V4L2_TYPE_IS_OUTPUT(vq->type)) { if (V4L2_TYPE_IS_MULTIPLANAR(vq->type)) { + unsigned int i; + for (i = 0; i < buf->length; ++i) buf->m.planes[i].m.mem_offset += DST_QUEUE_OFF_BASE; @@ -480,8 +475,23 @@ int v4l2_m2m_querybuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, buf->m.offset += DST_QUEUE_OFF_BASE; } } +} - return ret; +int v4l2_m2m_querybuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, + struct v4l2_buffer *buf) +{ + struct vb2_queue *vq; + int ret; + + vq = v4l2_m2m_get_vq(m2m_ctx, buf->type); + ret = vb2_querybuf(vq, buf); + if (ret) + return ret; + + /* Adjust MMAP memory offsets for the CAPTURE queue */ + v4l2_m2m_adjust_mem_offset(vq, buf); + + return 0; } EXPORT_SYMBOL_GPL(v4l2_m2m_querybuf); @@ -500,10 +510,16 @@ int v4l2_m2m_qbuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, return -EPERM; } ret = vb2_qbuf(vq, vdev->v4l2_dev->mdev, buf); - if (!ret && !(buf->flags & V4L2_BUF_FLAG_IN_REQUEST)) + if (ret) + return ret; + + /* Adjust MMAP memory offsets for the CAPTURE queue */ + v4l2_m2m_adjust_mem_offset(vq, buf); + + if (!(buf->flags & V4L2_BUF_FLAG_IN_REQUEST)) v4l2_m2m_try_schedule(m2m_ctx); - return ret; + return 0; } EXPORT_SYMBOL_GPL(v4l2_m2m_qbuf); @@ -511,9 +527,17 @@ int v4l2_m2m_dqbuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct v4l2_buffer *buf) { struct vb2_queue *vq; + int ret; vq = v4l2_m2m_get_vq(m2m_ctx, buf->type); - return vb2_dqbuf(vq, buf, file->f_flags & O_NONBLOCK); + ret = vb2_dqbuf(vq, buf, file->f_flags & O_NONBLOCK); + if (ret) + return ret; + + /* Adjust MMAP memory offsets for the CAPTURE queue */ + v4l2_m2m_adjust_mem_offset(vq, buf); + + return 0; } EXPORT_SYMBOL_GPL(v4l2_m2m_dqbuf); @@ -522,9 +546,17 @@ int v4l2_m2m_prepare_buf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, { struct video_device *vdev = video_devdata(file); struct vb2_queue *vq; + int ret; vq = v4l2_m2m_get_vq(m2m_ctx, buf->type); - return vb2_prepare_buf(vq, vdev->v4l2_dev->mdev, buf); + ret = vb2_prepare_buf(vq, vdev->v4l2_dev->mdev, buf); + if (ret) + return ret; + + /* Adjust MMAP memory offsets for the CAPTURE queue */ + v4l2_m2m_adjust_mem_offset(vq, buf); + + return 0; } EXPORT_SYMBOL_GPL(v4l2_m2m_prepare_buf); diff --git a/drivers/memory/atmel-ebi.c b/drivers/memory/atmel-ebi.c index 89646896a1833..6f9cf6270a437 100644 --- a/drivers/memory/atmel-ebi.c +++ b/drivers/memory/atmel-ebi.c @@ -545,20 +545,27 @@ static int atmel_ebi_probe(struct platform_device *pdev) smc_np = of_parse_phandle(dev->of_node, "atmel,smc", 0); ebi->smc.regmap = syscon_node_to_regmap(smc_np); - if (IS_ERR(ebi->smc.regmap)) - return PTR_ERR(ebi->smc.regmap); + if (IS_ERR(ebi->smc.regmap)) { + ret = PTR_ERR(ebi->smc.regmap); + goto put_node; + } ebi->smc.layout = atmel_hsmc_get_reg_layout(smc_np); - if (IS_ERR(ebi->smc.layout)) - return PTR_ERR(ebi->smc.layout); + if (IS_ERR(ebi->smc.layout)) { + ret = PTR_ERR(ebi->smc.layout); + goto put_node; + } ebi->smc.clk = of_clk_get(smc_np, 0); if (IS_ERR(ebi->smc.clk)) { - if (PTR_ERR(ebi->smc.clk) != -ENOENT) - return PTR_ERR(ebi->smc.clk); + if (PTR_ERR(ebi->smc.clk) != -ENOENT) { + ret = PTR_ERR(ebi->smc.clk); + goto put_node; + } ebi->smc.clk = NULL; } + of_node_put(smc_np); ret = clk_prepare_enable(ebi->smc.clk); if (ret) return ret; @@ -609,6 +616,10 @@ static int atmel_ebi_probe(struct platform_device *pdev) } return of_platform_populate(np, NULL, NULL, dev); + +put_node: + of_node_put(smc_np); + return ret; } static __maybe_unused int atmel_ebi_resume(struct device *dev) diff --git a/drivers/memory/emif.c b/drivers/memory/emif.c index af296b6fcbbdc..9c4c668f9a88a 100644 --- a/drivers/memory/emif.c +++ b/drivers/memory/emif.c @@ -1423,7 +1423,7 @@ static struct emif_data *__init_or_module get_device_details( temp = devm_kzalloc(dev, sizeof(*pd), GFP_KERNEL); dev_info = devm_kzalloc(dev, sizeof(*dev_info), GFP_KERNEL); - if (!emif || !pd || !dev_info) { + if (!emif || !temp || !dev_info) { dev_err(dev, "%s:%d: allocation error\n", __func__, __LINE__); goto error; } @@ -1515,7 +1515,7 @@ static int __init_or_module emif_probe(struct platform_device *pdev) { struct emif_data *emif; struct resource *res; - int irq; + int irq, ret; if (pdev->dev.of_node) emif = of_get_memory_device_details(pdev->dev.of_node, &pdev->dev); @@ -1549,7 +1549,9 @@ static int __init_or_module emif_probe(struct platform_device *pdev) emif_onetime_settings(emif); emif_debugfs_init(emif); disable_and_clear_all_interrupts(emif); - setup_interrupts(emif, irq); + ret = setup_interrupts(emif, irq); + if (ret) + goto error; /* One-time actions taken on probing the first device */ if (!emif1) { diff --git a/drivers/memory/fsl_ifc.c b/drivers/memory/fsl_ifc.c index 2790258346070..84fa32f288c80 100644 --- a/drivers/memory/fsl_ifc.c +++ b/drivers/memory/fsl_ifc.c @@ -263,7 +263,7 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev) ret = fsl_ifc_ctrl_init(fsl_ifc_ctrl_dev); if (ret < 0) - goto err; + goto err_unmap_nandirq; init_waitqueue_head(&fsl_ifc_ctrl_dev->nand_wait); @@ -272,7 +272,7 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev) if (ret != 0) { dev_err(&dev->dev, "failed to install irq (%d)\n", fsl_ifc_ctrl_dev->irq); - goto err_irq; + goto err_unmap_nandirq; } if (fsl_ifc_ctrl_dev->nand_irq) { @@ -281,17 +281,16 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev) if (ret != 0) { dev_err(&dev->dev, "failed to install irq (%d)\n", fsl_ifc_ctrl_dev->nand_irq); - goto err_nandirq; + goto err_free_irq; } } return 0; -err_nandirq: - free_irq(fsl_ifc_ctrl_dev->nand_irq, fsl_ifc_ctrl_dev); - irq_dispose_mapping(fsl_ifc_ctrl_dev->nand_irq); -err_irq: +err_free_irq: free_irq(fsl_ifc_ctrl_dev->irq, fsl_ifc_ctrl_dev); +err_unmap_nandirq: + irq_dispose_mapping(fsl_ifc_ctrl_dev->nand_irq); irq_dispose_mapping(fsl_ifc_ctrl_dev->irq); err: iounmap(fsl_ifc_ctrl_dev->gregs); diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index d9ee8e3dc72da..55907e4c36b18 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -1727,7 +1727,7 @@ static int msb_init_card(struct memstick_dev *card) msb->pages_in_block = boot_block->attr.block_size * 2; msb->block_size = msb->page_size * msb->pages_in_block; - if (msb->page_size > PAGE_SIZE) { + if ((size_t)msb->page_size > PAGE_SIZE) { /* this isn't supported by linux at all, anyway*/ dbg("device page %d size isn't supported", msb->page_size); return -EINVAL; diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c index 64fff6abe60e8..74d6686b35f77 100644 --- a/drivers/memstick/host/jmb38x_ms.c +++ b/drivers/memstick/host/jmb38x_ms.c @@ -899,7 +899,7 @@ static struct memstick_host *jmb38x_ms_alloc_host(struct jmb38x_ms *jm, int cnt) iounmap(host->addr); err_out_free: - kfree(msh); + memstick_free_host(msh); return NULL; } diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c index d2ef46337191c..eaa2a94d18be4 100644 --- a/drivers/memstick/host/r592.c +++ b/drivers/memstick/host/r592.c @@ -837,15 +837,15 @@ static void r592_remove(struct pci_dev *pdev) } memstick_remove_host(dev->host); + if (dev->dummy_dma_page) + dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->dummy_dma_page, + dev->dummy_dma_page_physical_address); + free_irq(dev->irq, dev); iounmap(dev->mmio); pci_release_regions(pdev); pci_disable_device(pdev); memstick_free_host(dev->host); - - if (dev->dummy_dma_page) - dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->dummy_dma_page, - dev->dummy_dma_page_physical_address); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index 3e9dc92cb467b..842de1f352dfc 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -493,7 +493,7 @@ static int ab8500_handle_hierarchical_line(struct ab8500 *ab8500, if (line == AB8540_INT_GPIO43F || line == AB8540_INT_GPIO44F) line += 1; - handle_nested_irq(irq_create_mapping(ab8500->domain, line)); + handle_nested_irq(irq_find_mapping(ab8500->domain, line)); } return 0; diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c index a6bd2134cea2a..14e4bbe6a9da3 100644 --- a/drivers/mfd/asic3.c +++ b/drivers/mfd/asic3.c @@ -914,14 +914,14 @@ static int __init asic3_mfd_probe(struct platform_device *pdev, ret = mfd_add_devices(&pdev->dev, pdev->id, &asic3_cell_ds1wm, 1, mem, asic->irq_base, NULL); if (ret < 0) - goto out; + goto out_unmap; } if (mem_sdio && (irq >= 0)) { ret = mfd_add_devices(&pdev->dev, pdev->id, &asic3_cell_mmc, 1, mem_sdio, irq, NULL); if (ret < 0) - goto out; + goto out_unmap; } ret = 0; @@ -935,8 +935,12 @@ static int __init asic3_mfd_probe(struct platform_device *pdev, ret = mfd_add_devices(&pdev->dev, 0, asic3_cell_leds, ASIC3_NUM_LEDS, NULL, 0, NULL); } + return ret; - out: +out_unmap: + if (asic->tmio_cnf) + iounmap(asic->tmio_cnf); +out: return ret; } diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c index aa59496e43768..9db1000944c34 100644 --- a/drivers/mfd/axp20x.c +++ b/drivers/mfd/axp20x.c @@ -125,12 +125,13 @@ static const struct regmap_range axp288_writeable_ranges[] = { static const struct regmap_range axp288_volatile_ranges[] = { regmap_reg_range(AXP20X_PWR_INPUT_STATUS, AXP288_POWER_REASON), + regmap_reg_range(AXP22X_PWR_OUT_CTRL1, AXP22X_ALDO3_V_OUT), regmap_reg_range(AXP288_BC_GLOBAL, AXP288_BC_GLOBAL), regmap_reg_range(AXP288_BC_DET_STAT, AXP20X_VBUS_IPSOUT_MGMT), regmap_reg_range(AXP20X_CHRG_BAK_CTRL, AXP20X_CHRG_BAK_CTRL), regmap_reg_range(AXP20X_IRQ1_EN, AXP20X_IPSOUT_V_HIGH_L), regmap_reg_range(AXP20X_TIMER_CTRL, AXP20X_TIMER_CTRL), - regmap_reg_range(AXP22X_GPIO_STATE, AXP22X_GPIO_STATE), + regmap_reg_range(AXP20X_GPIO1_CTRL, AXP22X_GPIO_STATE), regmap_reg_range(AXP288_RT_BATT_V_H, AXP288_RT_BATT_V_L), regmap_reg_range(AXP20X_FG_RES, AXP288_FG_CC_CAP_REG), }; diff --git a/drivers/mfd/davinci_voicecodec.c b/drivers/mfd/davinci_voicecodec.c index e5c8bc998eb4e..965820481f1e1 100644 --- a/drivers/mfd/davinci_voicecodec.c +++ b/drivers/mfd/davinci_voicecodec.c @@ -46,14 +46,12 @@ static int __init davinci_vc_probe(struct platform_device *pdev) } clk_enable(davinci_vc->clk); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - - fifo_base = (dma_addr_t)res->start; - davinci_vc->base = devm_ioremap_resource(&pdev->dev, res); + davinci_vc->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(davinci_vc->base)) { ret = PTR_ERR(davinci_vc->base); goto fail; } + fifo_base = (dma_addr_t)res->start; davinci_vc->regmap = devm_regmap_init_mmio(&pdev->dev, davinci_vc->base, diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c index dfac6afa82ca5..f1f2ad9ff0b34 100644 --- a/drivers/mfd/db8500-prcmu.c +++ b/drivers/mfd/db8500-prcmu.c @@ -1695,22 +1695,20 @@ static long round_clock_rate(u8 clock, unsigned long rate) } static const unsigned long db8500_armss_freqs[] = { - 200000000, - 400000000, - 800000000, + 199680000, + 399360000, + 798720000, 998400000 }; /* The DB8520 has slightly higher ARMSS max frequency */ static const unsigned long db8520_armss_freqs[] = { - 200000000, - 400000000, - 800000000, + 199680000, + 399360000, + 798720000, 1152000000 }; - - static long round_armss_rate(unsigned long rate) { unsigned long freq = 0; diff --git a/drivers/mfd/intel-lpss-acpi.c b/drivers/mfd/intel-lpss-acpi.c index c8fe334b5fe8b..045cbf0cbe53a 100644 --- a/drivers/mfd/intel-lpss-acpi.c +++ b/drivers/mfd/intel-lpss-acpi.c @@ -102,6 +102,7 @@ static int intel_lpss_acpi_probe(struct platform_device *pdev) { struct intel_lpss_platform_info *info; const struct acpi_device_id *id; + int ret; id = acpi_match_device(intel_lpss_acpi_ids, &pdev->dev); if (!id) @@ -115,10 +116,14 @@ static int intel_lpss_acpi_probe(struct platform_device *pdev) info->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); info->irq = platform_get_irq(pdev, 0); + ret = intel_lpss_probe(&pdev->dev, info); + if (ret) + return ret; + pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); - return intel_lpss_probe(&pdev->dev, info); + return 0; } static int intel_lpss_acpi_remove(struct platform_device *pdev) diff --git a/drivers/mfd/ipaq-micro.c b/drivers/mfd/ipaq-micro.c index a1d9be82734de..88387c7e74433 100644 --- a/drivers/mfd/ipaq-micro.c +++ b/drivers/mfd/ipaq-micro.c @@ -407,7 +407,7 @@ static int __init micro_probe(struct platform_device *pdev) micro_reset_comm(micro); irq = platform_get_irq(pdev, 0); - if (!irq) + if (irq < 0) return -EINVAL; ret = devm_request_irq(&pdev->dev, irq, micro_serial_isr, IRQF_SHARED, "ipaq-micro", diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c index 1abe7432aad82..e281a9202f110 100644 --- a/drivers/mfd/mc13xxx-core.c +++ b/drivers/mfd/mc13xxx-core.c @@ -323,8 +323,10 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode, adc1 |= MC13783_ADC1_ATOX; dev_dbg(mc13xxx->dev, "%s: request irq\n", __func__); - mc13xxx_irq_request(mc13xxx, MC13XXX_IRQ_ADCDONE, + ret = mc13xxx_irq_request(mc13xxx, MC13XXX_IRQ_ADCDONE, mc13xxx_handler_adcdone, __func__, &adcdone_data); + if (ret) + goto out; mc13xxx_reg_write(mc13xxx, MC13XXX_ADC0, adc0); mc13xxx_reg_write(mc13xxx, MC13XXX_ADC1, adc1); diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c index 1aee3b3253fc9..508349399f8af 100644 --- a/drivers/mfd/stmpe.c +++ b/drivers/mfd/stmpe.c @@ -1091,7 +1091,7 @@ static irqreturn_t stmpe_irq(int irq, void *data) if (variant->id_val == STMPE801_ID || variant->id_val == STMPE1600_ID) { - int base = irq_create_mapping(stmpe->domain, 0); + int base = irq_find_mapping(stmpe->domain, 0); handle_nested_irq(base); return IRQ_HANDLED; @@ -1119,7 +1119,7 @@ static irqreturn_t stmpe_irq(int irq, void *data) while (status) { int bit = __ffs(status); int line = bank * 8 + bit; - int nestedirq = irq_create_mapping(stmpe->domain, line); + int nestedirq = irq_find_mapping(stmpe->domain, line); handle_nested_irq(nestedirq); status &= ~(1 << bit); diff --git a/drivers/mfd/tc3589x.c b/drivers/mfd/tc3589x.c index 67c9995bb1aa6..23cfbd050120d 100644 --- a/drivers/mfd/tc3589x.c +++ b/drivers/mfd/tc3589x.c @@ -187,7 +187,7 @@ static irqreturn_t tc3589x_irq(int irq, void *data) while (status) { int bit = __ffs(status); - int virq = irq_create_mapping(tc3589x->domain, bit); + int virq = irq_find_mapping(tc3589x->domain, bit); handle_nested_irq(virq); status &= ~(1 << bit); diff --git a/drivers/mfd/tqmx86.c b/drivers/mfd/tqmx86.c index 22d2f02d855c2..ccc5a9ac788c1 100644 --- a/drivers/mfd/tqmx86.c +++ b/drivers/mfd/tqmx86.c @@ -210,6 +210,8 @@ static int tqmx86_probe(struct platform_device *pdev) /* Assumes the IRQ resource is first. */ tqmx_gpio_resources[0].start = gpio_irq; + } else { + tqmx_gpio_resources[0].flags = 0; } ocores_platfom_data.clock_khz = tqmx86_board_id_to_clk_rate(board_id); diff --git a/drivers/mfd/wm8994-irq.c b/drivers/mfd/wm8994-irq.c index 6c3a619e26286..651a028bc519a 100644 --- a/drivers/mfd/wm8994-irq.c +++ b/drivers/mfd/wm8994-irq.c @@ -154,7 +154,7 @@ static irqreturn_t wm8994_edge_irq(int irq, void *data) struct wm8994 *wm8994 = data; while (gpio_get_value_cansleep(wm8994->pdata.irq_gpio)) - handle_nested_irq(irq_create_mapping(wm8994->edge_irq, 0)); + handle_nested_irq(irq_find_mapping(wm8994->edge_irq, 0)); return IRQ_HANDLED; } diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c index d6cd5537126c6..69f9b0336410d 100644 --- a/drivers/misc/atmel-ssc.c +++ b/drivers/misc/atmel-ssc.c @@ -232,9 +232,9 @@ static int ssc_probe(struct platform_device *pdev) clk_disable_unprepare(ssc->clk); ssc->irq = platform_get_irq(pdev, 0); - if (!ssc->irq) { + if (ssc->irq < 0) { dev_dbg(&pdev->dev, "could not get irq\n"); - return -ENXIO; + return ssc->irq; } mutex_lock(&user_lock); diff --git a/drivers/misc/cardreader/alcor_pci.c b/drivers/misc/cardreader/alcor_pci.c index 1fadb95b85b09..ba5d5c102b3cc 100644 --- a/drivers/misc/cardreader/alcor_pci.c +++ b/drivers/misc/cardreader/alcor_pci.c @@ -260,7 +260,7 @@ static int alcor_pci_probe(struct pci_dev *pdev, if (!priv) return -ENOMEM; - ret = ida_simple_get(&alcor_pci_idr, 0, 0, GFP_KERNEL); + ret = ida_alloc(&alcor_pci_idr, GFP_KERNEL); if (ret < 0) return ret; priv->id = ret; @@ -274,7 +274,8 @@ static int alcor_pci_probe(struct pci_dev *pdev, ret = pci_request_regions(pdev, DRV_NAME_ALCOR_PCI); if (ret) { dev_err(&pdev->dev, "Cannot request region\n"); - return -ENOMEM; + ret = -ENOMEM; + goto error_free_ida; } if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) { @@ -318,6 +319,8 @@ static int alcor_pci_probe(struct pci_dev *pdev, error_release_regions: pci_release_regions(pdev); +error_free_ida: + ida_free(&alcor_pci_idr, priv->id); return ret; } @@ -331,7 +334,7 @@ static void alcor_pci_remove(struct pci_dev *pdev) mfd_remove_devices(&pdev->dev); - ida_simple_remove(&alcor_pci_idr, priv->id); + ida_free(&alcor_pci_idr, priv->id); pci_release_regions(pdev); pci_set_drvdata(pdev, NULL); diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c index a328cab110143..8acf6e6aff316 100644 --- a/drivers/misc/cardreader/rtsx_usb.c +++ b/drivers/misc/cardreader/rtsx_usb.c @@ -631,16 +631,20 @@ static int rtsx_usb_probe(struct usb_interface *intf, ucr->pusb_dev = usb_dev; - ucr->iobuf = usb_alloc_coherent(ucr->pusb_dev, IOBUF_SIZE, - GFP_KERNEL, &ucr->iobuf_dma); - if (!ucr->iobuf) + ucr->cmd_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL); + if (!ucr->cmd_buf) return -ENOMEM; + ucr->rsp_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL); + if (!ucr->rsp_buf) { + ret = -ENOMEM; + goto out_free_cmd_buf; + } + usb_set_intfdata(intf, ucr); ucr->vendor_id = id->idVendor; ucr->product_id = id->idProduct; - ucr->cmd_buf = ucr->rsp_buf = ucr->iobuf; mutex_init(&ucr->dev_mutex); @@ -667,8 +671,12 @@ static int rtsx_usb_probe(struct usb_interface *intf, return 0; out_init_fail: - usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, - ucr->iobuf_dma); + usb_set_intfdata(ucr->pusb_intf, NULL); + kfree(ucr->rsp_buf); + ucr->rsp_buf = NULL; +out_free_cmd_buf: + kfree(ucr->cmd_buf); + ucr->cmd_buf = NULL; return ret; } @@ -681,8 +689,12 @@ static void rtsx_usb_disconnect(struct usb_interface *intf) mfd_remove_devices(&intf->dev); usb_set_intfdata(ucr->pusb_intf, NULL); - usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, - ucr->iobuf_dma); + + kfree(ucr->cmd_buf); + ucr->cmd_buf = NULL; + + kfree(ucr->rsp_buf); + ucr->rsp_buf = NULL; } #ifdef CONFIG_PM diff --git a/drivers/misc/cb710/sgbuf2.c b/drivers/misc/cb710/sgbuf2.c index dfd2969e36289..d52e079bb324e 100644 --- a/drivers/misc/cb710/sgbuf2.c +++ b/drivers/misc/cb710/sgbuf2.c @@ -47,7 +47,7 @@ static inline bool needs_unaligned_copy(const void *ptr) #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS return false; #else - return ((ptr - NULL) & 3) != 0; + return ((uintptr_t)ptr & 3) != 0; #endif } diff --git a/drivers/misc/eeprom/ee1004.c b/drivers/misc/eeprom/ee1004.c index b081c67416d77..6b313997085e0 100644 --- a/drivers/misc/eeprom/ee1004.c +++ b/drivers/misc/eeprom/ee1004.c @@ -82,6 +82,9 @@ static ssize_t ee1004_eeprom_read(struct i2c_client *client, char *buf, if (unlikely(offset + count > EE1004_PAGE_SIZE)) count = EE1004_PAGE_SIZE - offset; + if (count > I2C_SMBUS_BLOCK_MAX) + count = I2C_SMBUS_BLOCK_MAX; + status = i2c_smbus_read_i2c_block_data_or_emulated(client, offset, count, buf); dev_dbg(&client->dev, "read %zu@%d --> %d\n", count, offset, status); diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index beaf15807f789..fb5ddf3864fdd 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -693,16 +693,18 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx) static u64 fastrpc_get_payload_size(struct fastrpc_invoke_ctx *ctx, int metalen) { u64 size = 0; - int i; + int oix; size = ALIGN(metalen, FASTRPC_ALIGN); - for (i = 0; i < ctx->nscalars; i++) { + for (oix = 0; oix < ctx->nbufs; oix++) { + int i = ctx->olaps[oix].raix; + if (ctx->args[i].fd == 0 || ctx->args[i].fd == -1) { - if (ctx->olaps[i].offset == 0) + if (ctx->olaps[oix].offset == 0) size = ALIGN(size, FASTRPC_ALIGN); - size += (ctx->olaps[i].mend - ctx->olaps[i].mstart); + size += (ctx->olaps[oix].mend - ctx->olaps[oix].mstart); } } @@ -1243,7 +1245,14 @@ static int fastrpc_dmabuf_alloc(struct fastrpc_user *fl, char __user *argp) } if (copy_to_user(argp, &bp, sizeof(bp))) { - dma_buf_put(buf->dmabuf); + /* + * The usercopy failed, but we can't do much about it, as + * dma_buf_fd() already called fd_install() and made the + * file descriptor accessible for the current process. It + * might already be closed and dmabuf no longer valid when + * we reach this point. Therefore "leak" the fd and rely on + * the process exit path to do any required cleanup. + */ return -EFAULT; } diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c index 1dc6c7c5cbce9..97b8ecc423833 100644 --- a/drivers/misc/genwqe/card_base.c +++ b/drivers/misc/genwqe/card_base.c @@ -1240,7 +1240,7 @@ static void genwqe_remove(struct pci_dev *pci_dev) * error is detected. */ static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev, - enum pci_channel_state state) + pci_channel_state_t state) { struct genwqe_dev *cd; diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index 8d18f19c99c4b..2f88581057335 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -1060,10 +1060,10 @@ static int kgdbts_option_setup(char *opt) { if (strlen(opt) >= MAX_CONFIG_LEN) { printk(KERN_ERR "kgdbts: config string too long\n"); - return -ENOSPC; + return 1; } strcpy(config, opt); - return 0; + return 1; } __setup("kgdbts=", kgdbts_option_setup); diff --git a/drivers/misc/lattice-ecp3-config.c b/drivers/misc/lattice-ecp3-config.c index 884485c3f7232..3a0d2b052ed29 100644 --- a/drivers/misc/lattice-ecp3-config.c +++ b/drivers/misc/lattice-ecp3-config.c @@ -77,12 +77,12 @@ static void firmware_load(const struct firmware *fw, void *context) if (fw == NULL) { dev_err(&spi->dev, "Cannot load firmware, aborting\n"); - return; + goto out; } if (fw->size == 0) { dev_err(&spi->dev, "Error: Firmware size is 0!\n"); - return; + goto out; } /* Fill dummy data (24 stuffing bits for commands) */ @@ -104,7 +104,7 @@ static void firmware_load(const struct firmware *fw, void *context) dev_err(&spi->dev, "Error: No supported FPGA detected (JEDEC_ID=%08x)!\n", jedec_id); - return; + goto out; } dev_info(&spi->dev, "FPGA %s detected\n", ecp3_dev[i].name); @@ -117,7 +117,7 @@ static void firmware_load(const struct firmware *fw, void *context) buffer = kzalloc(fw->size + 8, GFP_KERNEL); if (!buffer) { dev_err(&spi->dev, "Error: Can't allocate memory!\n"); - return; + goto out; } /* @@ -156,7 +156,7 @@ static void firmware_load(const struct firmware *fw, void *context) "Error: Timeout waiting for FPGA to clear (status=%08x)!\n", status); kfree(buffer); - return; + goto out; } dev_info(&spi->dev, "Configuring the FPGA...\n"); @@ -182,7 +182,7 @@ static void firmware_load(const struct firmware *fw, void *context) release_firmware(fw); kfree(buffer); - +out: complete(&data->fw_loaded); } diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile index 30c8ac24635d4..4405fb2bc7a00 100644 --- a/drivers/misc/lkdtm/Makefile +++ b/drivers/misc/lkdtm/Makefile @@ -16,7 +16,7 @@ KCOV_INSTRUMENT_rodata.o := n OBJCOPYFLAGS := OBJCOPYFLAGS_rodata_objcopy.o := \ - --rename-section .noinstr.text=.rodata,alloc,readonly,load + --rename-section .noinstr.text=.rodata,alloc,readonly,load,contents targets += rodata.o rodata_objcopy.o $(obj)/rodata_objcopy.o: $(obj)/rodata.o FORCE $(call if_changed,objcopy) diff --git a/drivers/misc/lkdtm/refcount.c b/drivers/misc/lkdtm/refcount.c index 0a146b32da132..abf3b7c1f686c 100644 --- a/drivers/misc/lkdtm/refcount.c +++ b/drivers/misc/lkdtm/refcount.c @@ -6,14 +6,6 @@ #include "lkdtm.h" #include -#ifdef CONFIG_REFCOUNT_FULL -#define REFCOUNT_MAX (UINT_MAX - 1) -#define REFCOUNT_SATURATED UINT_MAX -#else -#define REFCOUNT_MAX INT_MAX -#define REFCOUNT_SATURATED (INT_MIN / 2) -#endif - static void overflow_check(refcount_t *ref) { switch (refcount_read(ref)) { diff --git a/drivers/misc/lkdtm/usercopy.c b/drivers/misc/lkdtm/usercopy.c index e172719dd86d0..4617c63b10260 100644 --- a/drivers/misc/lkdtm/usercopy.c +++ b/drivers/misc/lkdtm/usercopy.c @@ -30,12 +30,12 @@ static const unsigned char test_text[] = "This is a test.\n"; */ static noinline unsigned char *trick_compiler(unsigned char *stack) { - return stack + 0; + return stack + unconst; } static noinline unsigned char *do_usercopy_stack_callee(int value) { - unsigned char buf[32]; + unsigned char buf[128]; int i; /* Exercise stack to avoid everything living in registers. */ @@ -43,7 +43,12 @@ static noinline unsigned char *do_usercopy_stack_callee(int value) buf[i] = value & 0xff; } - return trick_compiler(buf); + /* + * Put the target buffer in the middle of stack allocation + * so that we don't step on future stack users regardless + * of stack growth direction. + */ + return trick_compiler(&buf[(128/2)-32]); } static noinline void do_usercopy_stack(bool to_user, bool bad_frame) @@ -66,6 +71,12 @@ static noinline void do_usercopy_stack(bool to_user, bool bad_frame) bad_stack -= sizeof(unsigned long); } +#ifdef ARCH_HAS_CURRENT_STACK_POINTER + pr_info("stack : %px\n", (void *)current_stack_pointer); +#endif + pr_info("good_stack: %px-%px\n", good_stack, good_stack + sizeof(good_stack)); + pr_info("bad_stack : %px-%px\n", bad_stack, bad_stack + sizeof(good_stack)); + user_addr = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, 0); diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index e56dc47540646..556133daa25e9 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -90,6 +90,7 @@ #define MEI_DEV_ID_CDF 0x18D3 /* Cedar Fork */ #define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */ +#define MEI_DEV_ID_ICP_N 0x38E0 /* Ice Lake Point N */ #define MEI_DEV_ID_TGP_LP 0xA0E0 /* Tiger Lake Point LP */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 75ab2ffbf235f..d7233f2238651 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -103,6 +103,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H_3, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_N, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH12_CFG)}, diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig index 90137f2bc8e1d..d308e40921d79 100644 --- a/drivers/misc/mic/Kconfig +++ b/drivers/misc/mic/Kconfig @@ -132,7 +132,7 @@ comment "VOP Driver" config VOP tristate "VOP Driver" - depends on VOP_BUS + depends on VOP_BUS && VHOST_DPN select VHOST_RING select VIRTIO help diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index 4d1b44de14921..c742ab02ae186 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -558,7 +558,9 @@ int ocxl_file_register_afu(struct ocxl_afu *afu) err_unregister: ocxl_sysfs_unregister_afu(info); // safe to call even if register failed + free_minor(info); device_unregister(&info->dev); + return rc; err_put: ocxl_afu_put(afu); free_minor(info); diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c index c2338750313c4..a49782dd903cd 100644 --- a/drivers/misc/vmw_vmci/vmci_queue_pair.c +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -2238,7 +2238,8 @@ int vmci_qp_broker_map(struct vmci_handle handle, result = VMCI_SUCCESS; - if (context_id != VMCI_HOST_CONTEXT_ID) { + if (context_id != VMCI_HOST_CONTEXT_ID && + !QPBROKERSTATE_HAS_MEM(entry)) { struct vmci_qp_page_store page_store; page_store.pages = guest_mem; @@ -2345,7 +2346,8 @@ int vmci_qp_broker_unmap(struct vmci_handle handle, goto out; } - if (context_id != VMCI_HOST_CONTEXT_ID) { + if (context_id != VMCI_HOST_CONTEXT_ID && + QPBROKERSTATE_HAS_MEM(entry)) { qp_acquire_queue_mutex(entry->produce_q); result = qp_save_headers(entry); if (result < VMCI_SUCCESS) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 8322d22a59c45..482e01ece0b7f 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -591,6 +591,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, } mmc_wait_for_req(card->host, &mrq); + memcpy(&idata->ic.response, cmd.resp, sizeof(cmd.resp)); if (cmd.error) { dev_err(mmc_dev(card->host), "%s: cmd error %d\n", @@ -640,8 +641,6 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, if (idata->ic.postsleep_min_us) usleep_range(idata->ic.postsleep_min_us, idata->ic.postsleep_max_us); - memcpy(&(idata->ic.response), cmd.resp, sizeof(cmd.resp)); - if (idata->rpmb || (cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B) { /* * Ensure RPMB/R1B command has completed by polling CMD13 @@ -1127,7 +1126,7 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) card->erase_arg == MMC_TRIM_ARG ? INAND_CMD38_ARG_TRIM : INAND_CMD38_ARG_ERASE, - 0); + card->ext_csd.generic_cmd6_time); } if (!err) err = mmc_erase(card, from, nr, card->erase_arg); @@ -1169,7 +1168,7 @@ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq, arg == MMC_SECURE_TRIM1_ARG ? INAND_CMD38_ARG_SECTRIM1 : INAND_CMD38_ARG_SECERASE, - 0); + card->ext_csd.generic_cmd6_time); if (err) goto out_retry; } @@ -1187,7 +1186,7 @@ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq, err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, INAND_CMD38_ARG_EXT_CSD, INAND_CMD38_ARG_SECTRIM2, - 0); + card->ext_csd.generic_cmd6_time); if (err) goto out_retry; } @@ -1493,8 +1492,7 @@ void mmc_blk_cqe_recovery(struct mmc_queue *mq) err = mmc_cqe_recovery(host); if (err) mmc_blk_reset(mq->blkdata, host, MMC_BLK_CQE_RECOVERY); - else - mmc_blk_reset_success(mq->blkdata, MMC_BLK_CQE_RECOVERY); + mmc_blk_reset_success(mq->blkdata, MMC_BLK_CQE_RECOVERY); pr_debug("%s: CQE recovery done\n", mmc_hostname(host)); } @@ -1672,31 +1670,31 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req) struct mmc_card *card = mq->card; struct mmc_host *host = card->host; blk_status_t error = BLK_STS_OK; - int retries = 0; do { u32 status; int err; + int retries = 0; - mmc_blk_rw_rq_prep(mqrq, card, 1, mq); + while (retries++ <= MMC_READ_SINGLE_RETRIES) { + mmc_blk_rw_rq_prep(mqrq, card, 1, mq); - mmc_wait_for_req(host, mrq); - - err = mmc_send_status(card, &status); - if (err) - goto error_exit; + mmc_wait_for_req(host, mrq); - if (!mmc_host_is_spi(host) && - !mmc_blk_in_tran_state(status)) { - err = mmc_blk_fix_state(card, req); + err = mmc_send_status(card, &status); if (err) goto error_exit; - } - if (mrq->cmd->error && retries++ < MMC_READ_SINGLE_RETRIES) - continue; + if (!mmc_host_is_spi(host) && + !mmc_blk_in_tran_state(status)) { + err = mmc_blk_fix_state(card, req); + if (err) + goto error_exit; + } - retries = 0; + if (!mrq->cmd->error) + break; + } if (mrq->cmd->error || mrq->data->error || diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 8f94c25395080..d7bda1fa0a6e2 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2364,7 +2364,7 @@ void mmc_start_host(struct mmc_host *host) _mmc_detect_change(host, 0, false); } -void mmc_stop_host(struct mmc_host *host) +void __mmc_stop_host(struct mmc_host *host) { if (host->slot.cd_irq >= 0) { mmc_gpio_set_cd_wake(host, false); @@ -2373,6 +2373,11 @@ void mmc_stop_host(struct mmc_host *host) host->rescan_disable = 1; cancel_delayed_work_sync(&host->detect); +} + +void mmc_stop_host(struct mmc_host *host) +{ + __mmc_stop_host(host); /* clear pm flags now and let card drivers set them as needed */ host->pm_flags = 0; diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index db3c9c68875d8..a6c814fdbf0a9 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -69,6 +69,7 @@ static inline void mmc_delay(unsigned int ms) void mmc_rescan(struct work_struct *work); void mmc_start_host(struct mmc_host *host); +void __mmc_stop_host(struct mmc_host *host); void mmc_stop_host(struct mmc_host *host); void _mmc_detect_change(struct mmc_host *host, unsigned long delay, diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index f7339590a84c8..32801639e0be5 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -76,9 +76,18 @@ static void mmc_host_classdev_release(struct device *dev) kfree(host); } +static int mmc_host_classdev_shutdown(struct device *dev) +{ + struct mmc_host *host = cls_dev_to_mmc_host(dev); + + __mmc_stop_host(host); + return 0; +} + static struct class mmc_host_class = { .name = "mmc_host", .dev_release = mmc_host_classdev_release, + .shutdown_pre = mmc_host_classdev_shutdown, .pm = MMC_HOST_CLASS_DEV_PM_OPS, }; @@ -488,6 +497,16 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) EXPORT_SYMBOL(mmc_alloc_host); +static int mmc_validate_host_caps(struct mmc_host *host) +{ + if (host->caps & MMC_CAP_SDIO_IRQ && !host->ops->enable_sdio_irq) { + dev_warn(host->parent, "missing ->enable_sdio_irq() ops\n"); + return -EINVAL; + } + + return 0; +} + /** * mmc_add_host - initialise host hardware * @host: mmc host @@ -500,8 +519,9 @@ int mmc_add_host(struct mmc_host *host) { int err; - WARN_ON((host->caps & MMC_CAP_SDIO_IRQ) && - !host->ops->enable_sdio_irq); + err = mmc_validate_host_caps(host); + if (err) + return err; err = device_add(&host->class_dev); if (err) diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index 09311c2bd8588..d495ba2f368cb 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -19,7 +19,9 @@ #include "host.h" #include "mmc_ops.h" -#define MMC_OPS_TIMEOUT_MS (10 * 60 * 1000) /* 10 minute timeout */ +#define MMC_OPS_TIMEOUT_MS (10 * 60 * 1000) /* 10min*/ +#define MMC_BKOPS_TIMEOUT_MS (120 * 1000) /* 120s */ +#define MMC_CACHE_FLUSH_TIMEOUT_MS (30 * 1000) /* 30s */ static const u8 tuning_blk_pattern_4bit[] = { 0xff, 0x0f, 0xff, 0x00, 0xff, 0xcc, 0xc3, 0xcc, @@ -458,10 +460,6 @@ static int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms, bool expired = false; bool busy = false; - /* We have an unspecified cmd timeout, use the fallback value. */ - if (!timeout_ms) - timeout_ms = MMC_OPS_TIMEOUT_MS; - /* * In cases when not allowed to poll by using CMD13 or because we aren't * capable of polling by using ->card_busy(), then rely on waiting the @@ -534,6 +532,12 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, mmc_retune_hold(host); + if (!timeout_ms) { + pr_warn("%s: unspecified timeout for CMD6 - use generic\n", + mmc_hostname(host)); + timeout_ms = card->ext_csd.generic_cmd6_time; + } + /* * If the cmd timeout and the max_busy_timeout of the host are both * specified, let's validate them. A failure means we need to prevent @@ -542,7 +546,7 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, * which also means they are on their own when it comes to deal with the * busy timeout. */ - if (!(host->caps & MMC_CAP_NEED_RSP_BUSY) && timeout_ms && + if (!(host->caps & MMC_CAP_NEED_RSP_BUSY) && host->max_busy_timeout && (timeout_ms > host->max_busy_timeout)) use_r1b_resp = false; @@ -554,10 +558,6 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, cmd.flags = MMC_CMD_AC; if (use_r1b_resp) { cmd.flags |= MMC_RSP_SPI_R1B | MMC_RSP_R1B; - /* - * A busy_timeout of zero means the host can decide to use - * whatever value it finds suitable. - */ cmd.busy_timeout = timeout_ms; } else { cmd.flags |= MMC_RSP_SPI_R1 | MMC_RSP_R1; @@ -943,7 +943,7 @@ void mmc_run_bkops(struct mmc_card *card) * urgent levels by using an asynchronous background task, when idle. */ err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_BKOPS_START, 1, MMC_OPS_TIMEOUT_MS); + EXT_CSD_BKOPS_START, 1, MMC_BKOPS_TIMEOUT_MS); if (err) pr_warn("%s: Error %d starting bkops\n", mmc_hostname(card->host), err); @@ -961,7 +961,8 @@ int mmc_flush_cache(struct mmc_card *card) if (mmc_cache_enabled(card->host)) { err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_FLUSH_CACHE, 1, 0); + EXT_CSD_FLUSH_CACHE, 1, + MMC_CACHE_FLUSH_TIMEOUT_MS); if (err) pr_err("%s: cache flush error %d\n", mmc_hostname(card->host), err); diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 0bf33786fc5c5..9e0791332ef38 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -626,6 +626,8 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr, if (host->ops->init_card) host->ops->init_card(host, card); + card->ocr = ocr_card; + /* * If the host and card support UHS-I mode request the card * to switch to 1.8V signaling level. No 1.8v signalling if @@ -738,7 +740,7 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr, goto mismatch; } } - card->ocr = ocr_card; + mmc_fixup_device(card, sdio_fixup_methods); if (card->type == MMC_TYPE_SD_COMBO) { diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 49ea02c467bf1..1b4a40d910cfb 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -449,7 +449,7 @@ config MMC_OMAP_HS config MMC_WBSD tristate "Winbond W83L51xD SD/MMC Card Interface support" - depends on ISA_DMA_API + depends on ISA_DMA_API && !M68K help This selects the Winbond(R) W83L51xD Secure digital and Multimedia card Interface. diff --git a/drivers/mmc/host/cqhci.c b/drivers/mmc/host/cqhci.c index 2d65b32d205a5..ec56464eaf23e 100644 --- a/drivers/mmc/host/cqhci.c +++ b/drivers/mmc/host/cqhci.c @@ -273,6 +273,9 @@ static void __cqhci_enable(struct cqhci_host *cq_host) cqhci_writel(cq_host, cqcfg, CQHCI_CFG); + if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) + cqhci_writel(cq_host, 0, CQHCI_CTL); + mmc->cqe_on = true; if (cq_host->ops->enable) diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c index ebfaeb33bc8c0..ade7c022a33c0 100644 --- a/drivers/mmc/host/davinci_mmc.c +++ b/drivers/mmc/host/davinci_mmc.c @@ -1376,8 +1376,12 @@ static int davinci_mmcsd_suspend(struct device *dev) static int davinci_mmcsd_resume(struct device *dev) { struct mmc_davinci_host *host = dev_get_drvdata(dev); + int ret; + + ret = clk_enable(host->clk); + if (ret) + return ret; - clk_enable(host->clk); mmc_davinci_reset_ctrl(host, 0); return 0; diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c index 5e3d95b636769..ae2c74186e1d2 100644 --- a/drivers/mmc/host/dw_mmc-exynos.c +++ b/drivers/mmc/host/dw_mmc-exynos.c @@ -462,6 +462,18 @@ static s8 dw_mci_exynos_get_best_clksmpl(u8 candiates) } } + /* + * If there is no cadiates value, then it needs to return -EIO. + * If there are candiates values and don't find bset clk sample value, + * then use a first candiates clock sample value. + */ + for (i = 0; i < iter; i++) { + __c = ror8(candiates, i); + if ((__c & 0x1) == 0x1) { + loc = i; + goto out; + } + } out: return loc; } @@ -492,6 +504,8 @@ static int dw_mci_exynos_execute_tuning(struct dw_mci_slot *slot, u32 opcode) priv->tuned_sample = found; } else { ret = -EIO; + dev_warn(&mmc->class_dev, + "There is no candiates value about clksmpl!\n"); } return ret; diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 6ace82028667b..ba37be8e423c2 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -782,6 +782,7 @@ static int dw_mci_edmac_start_dma(struct dw_mci *host, int ret = 0; /* Set external dma config: burst size, burst width */ + memset(&cfg, 0, sizeof(cfg)); cfg.dst_addr = host->phy_regs + fifo_offset; cfg.src_addr = cfg.dst_addr; cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; @@ -2012,7 +2013,8 @@ static void dw_mci_tasklet_func(unsigned long priv) * delayed. Allowing the transfer to take place * avoids races and keeps things simple. */ - if (err != -ETIMEDOUT) { + if (err != -ETIMEDOUT && + host->dir_status == DW_MCI_RECV_STATUS) { state = STATE_SENDING_DATA; continue; } diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index f816c06ef9160..a316c912a118f 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -224,6 +224,26 @@ static int jz4740_mmc_acquire_dma_channels(struct jz4740_mmc_host *host) return PTR_ERR(host->dma_rx); } + /* + * Limit the maximum segment size in any SG entry according to + * the parameters of the DMA engine device. + */ + if (host->dma_tx) { + struct device *dev = host->dma_tx->device->dev; + unsigned int max_seg_size = dma_get_max_seg_size(dev); + + if (max_seg_size < host->mmc->max_seg_size) + host->mmc->max_seg_size = max_seg_size; + } + + if (host->dma_rx) { + struct device *dev = host->dma_rx->device->dev; + unsigned int max_seg_size = dma_get_max_seg_size(dev); + + if (max_seg_size < host->mmc->max_seg_size) + host->mmc->max_seg_size = max_seg_size; + } + return 0; } diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index a3e3b274f0ea3..9044faf0050a1 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -174,6 +174,8 @@ struct meson_host { int irq; bool vqmmc_enabled; + bool needs_pre_post_req; + }; #define CMD_CFG_LENGTH_MASK GENMASK(8, 0) @@ -655,6 +657,8 @@ static void meson_mmc_request_done(struct mmc_host *mmc, struct meson_host *host = mmc_priv(mmc); host->cmd = NULL; + if (host->needs_pre_post_req) + meson_mmc_post_req(mmc, mrq, 0); mmc_request_done(host->mmc, mrq); } @@ -738,7 +742,7 @@ static void meson_mmc_desc_chain_transfer(struct mmc_host *mmc, u32 cmd_cfg) writel(start, host->regs + SD_EMMC_START); } -/* local sg copy to buffer version with _to/fromio usage for dram_access_quirk */ +/* local sg copy for dram_access_quirk */ static void meson_mmc_copy_buffer(struct meson_host *host, struct mmc_data *data, size_t buflen, bool to_buffer) { @@ -756,21 +760,27 @@ static void meson_mmc_copy_buffer(struct meson_host *host, struct mmc_data *data sg_miter_start(&miter, sgl, nents, sg_flags); while ((offset < buflen) && sg_miter_next(&miter)) { - unsigned int len; + unsigned int buf_offset = 0; + unsigned int len, left; + u32 *buf = miter.addr; len = min(miter.length, buflen - offset); + left = len; - /* When dram_access_quirk, the bounce buffer is a iomem mapping */ - if (host->dram_access_quirk) { - if (to_buffer) - memcpy_toio(host->bounce_iomem_buf + offset, miter.addr, len); - else - memcpy_fromio(miter.addr, host->bounce_iomem_buf + offset, len); + if (to_buffer) { + do { + writel(*buf++, host->bounce_iomem_buf + offset + buf_offset); + + buf_offset += 4; + left -= 4; + } while (left); } else { - if (to_buffer) - memcpy(host->bounce_buf + offset, miter.addr, len); - else - memcpy(miter.addr, host->bounce_buf + offset, len); + do { + *buf++ = readl(host->bounce_iomem_buf + offset + buf_offset); + + buf_offset += 4; + left -= 4; + } while (left); } offset += len; @@ -822,7 +832,11 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) if (data->flags & MMC_DATA_WRITE) { cmd_cfg |= CMD_CFG_DATA_WR; WARN_ON(xfer_bytes > host->bounce_buf_size); - meson_mmc_copy_buffer(host, data, xfer_bytes, true); + if (host->dram_access_quirk) + meson_mmc_copy_buffer(host, data, xfer_bytes, true); + else + sg_copy_to_buffer(data->sg, data->sg_len, + host->bounce_buf, xfer_bytes); dma_wmb(); } @@ -841,28 +855,56 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) writel(cmd->arg, host->regs + SD_EMMC_CMD_ARG); } +static int meson_mmc_validate_dram_access(struct mmc_host *mmc, struct mmc_data *data) +{ + struct scatterlist *sg; + int i; + + /* Reject request if any element offset or size is not 32bit aligned */ + for_each_sg(data->sg, sg, data->sg_len, i) { + if (!IS_ALIGNED(sg->offset, sizeof(u32)) || + !IS_ALIGNED(sg->length, sizeof(u32))) { + dev_err(mmc_dev(mmc), "unaligned sg offset %u len %u\n", + data->sg->offset, data->sg->length); + return -EINVAL; + } + } + + return 0; +} + static void meson_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq) { struct meson_host *host = mmc_priv(mmc); - bool needs_pre_post_req = mrq->data && + host->needs_pre_post_req = mrq->data && !(mrq->data->host_cookie & SD_EMMC_PRE_REQ_DONE); - if (needs_pre_post_req) { + /* + * The memory at the end of the controller used as bounce buffer for + * the dram_access_quirk only accepts 32bit read/write access, + * check the aligment and length of the data before starting the request. + */ + if (host->dram_access_quirk && mrq->data) { + mrq->cmd->error = meson_mmc_validate_dram_access(mmc, mrq->data); + if (mrq->cmd->error) { + mmc_request_done(mmc, mrq); + return; + } + } + + if (host->needs_pre_post_req) { meson_mmc_get_transfer_mode(mmc, mrq); if (!meson_mmc_desc_chain_mode(mrq->data)) - needs_pre_post_req = false; + host->needs_pre_post_req = false; } - if (needs_pre_post_req) + if (host->needs_pre_post_req) meson_mmc_pre_req(mmc, mrq); /* Stop execution */ writel(0, host->regs + SD_EMMC_START); meson_mmc_start_cmd(mmc, mrq->sbc ?: mrq->cmd); - - if (needs_pre_post_req) - meson_mmc_post_req(mmc, mrq, 0); } static void meson_mmc_read_resp(struct mmc_host *mmc, struct mmc_command *cmd) @@ -991,7 +1033,11 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id) if (meson_mmc_bounce_buf_read(data)) { xfer_bytes = data->blksz * data->blocks; WARN_ON(xfer_bytes > host->bounce_buf_size); - meson_mmc_copy_buffer(host, data, xfer_bytes, false); + if (host->dram_access_quirk) + meson_mmc_copy_buffer(host, data, xfer_bytes, false); + else + sg_copy_from_buffer(data->sg, data->sg_len, + host->bounce_buf, xfer_bytes); } next_cmd = meson_mmc_get_next_command(cmd); diff --git a/drivers/mmc/host/meson-mx-sdio.c b/drivers/mmc/host/meson-mx-sdio.c index 360d523132bd5..780552a86ec08 100644 --- a/drivers/mmc/host/meson-mx-sdio.c +++ b/drivers/mmc/host/meson-mx-sdio.c @@ -665,6 +665,11 @@ static int meson_mx_mmc_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; + goto error_free_mmc; + } + ret = devm_request_threaded_irq(host->controller_dev, irq, meson_mx_mmc_irq, meson_mx_mmc_irq_thread, IRQF_ONESHOT, diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index 0953bd8a4f79d..3bb59d670220f 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -36,8 +36,8 @@ int sdmmc_idma_validate_data(struct mmci_host *host, * excepted the last element which has no constraint on idmasize */ for_each_sg(data->sg, sg, data->sg_len - 1, i) { - if (!IS_ALIGNED(sg_dma_address(data->sg), sizeof(u32)) || - !IS_ALIGNED(sg_dma_len(data->sg), SDMMC_IDMA_BURST)) { + if (!IS_ALIGNED(sg->offset, sizeof(u32)) || + !IS_ALIGNED(sg->length, SDMMC_IDMA_BURST)) { dev_err(mmc_dev(host->mmc), "unaligned scatterlist: ofst:%x length:%d\n", data->sg->offset, data->sg->length); @@ -45,7 +45,7 @@ int sdmmc_idma_validate_data(struct mmci_host *host, } } - if (!IS_ALIGNED(sg_dma_address(data->sg), sizeof(u32))) { + if (!IS_ALIGNED(sg->offset, sizeof(u32))) { dev_err(mmc_dev(host->mmc), "unaligned last scatterlist: ofst:%x length:%d\n", data->sg->offset, data->sg->length); diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c index a0670e9cd0127..5c81dc7371db7 100644 --- a/drivers/mmc/host/moxart-mmc.c +++ b/drivers/mmc/host/moxart-mmc.c @@ -631,6 +631,7 @@ static int moxart_probe(struct platform_device *pdev) host->dma_chan_tx, host->dma_chan_rx); host->have_dma = true; + memset(&cfg, 0, sizeof(cfg)); cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; @@ -695,12 +696,12 @@ static int moxart_remove(struct platform_device *pdev) if (!IS_ERR(host->dma_chan_rx)) dma_release_channel(host->dma_chan_rx); mmc_remove_host(mmc); - mmc_free_host(mmc); writel(0, host->base + REG_INTERRUPT_MASK); writel(0, host->base + REG_POWER_CONTROL); writel(readl(host->base + REG_CLOCK_CONTROL) | CLK_OFF, host->base + REG_CLOCK_CONTROL); + mmc_free_host(mmc); } return 0; } diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c index 52054931c3507..3a90037254a4d 100644 --- a/drivers/mmc/host/mxs-mmc.c +++ b/drivers/mmc/host/mxs-mmc.c @@ -565,6 +565,11 @@ static const struct of_device_id mxs_mmc_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, mxs_mmc_dt_ids); +static void mxs_mmc_regulator_disable(void *regulator) +{ + regulator_disable(regulator); +} + static int mxs_mmc_probe(struct platform_device *pdev) { const struct of_device_id *of_id = @@ -606,6 +611,11 @@ static int mxs_mmc_probe(struct platform_device *pdev) "Failed to enable vmmc regulator: %d\n", ret); goto out_mmc_free; } + + ret = devm_add_action_or_reset(&pdev->dev, mxs_mmc_regulator_disable, + reg_vmmc); + if (ret) + goto out_mmc_free; } ssp->clk = devm_clk_get(&pdev->dev, NULL); diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 689eb119d44fc..23fd93407eced 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -349,10 +349,10 @@ static void renesas_sdhi_hs400_complete(struct tmio_mmc_host *host) SH_MOBILE_SDHI_SCC_TMPPORT2_HS400OSEL) | sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_TMPPORT2)); - /* Set the sampling clock selection range of HS400 mode */ sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_DTCNTL, SH_MOBILE_SDHI_SCC_DTCNTL_TAPEN | - 0x4 << SH_MOBILE_SDHI_SCC_DTCNTL_TAPNUM_SHIFT); + sd_scc_read32(host, priv, + SH_MOBILE_SDHI_SCC_DTCNTL)); if (host->pdata->flags & TMIO_MMC_HAVE_4TAP_HS400) diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c index 11087976ab19c..e5ae3346b05a9 100644 --- a/drivers/mmc/host/rtsx_pci_sdmmc.c +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c @@ -37,10 +37,7 @@ struct realtek_pci_sdmmc { bool double_clk; bool eject; bool initial_mode; - int power_state; -#define SDMMC_POWER_ON 1 -#define SDMMC_POWER_OFF 0 - + int prev_power_state; int sg_count; s32 cookie; int cookie_sg_count; @@ -539,9 +536,22 @@ static int sd_write_long_data(struct realtek_pci_sdmmc *host, return 0; } +static inline void sd_enable_initial_mode(struct realtek_pci_sdmmc *host) +{ + rtsx_pci_write_register(host->pcr, SD_CFG1, + SD_CLK_DIVIDE_MASK, SD_CLK_DIVIDE_128); +} + +static inline void sd_disable_initial_mode(struct realtek_pci_sdmmc *host) +{ + rtsx_pci_write_register(host->pcr, SD_CFG1, + SD_CLK_DIVIDE_MASK, SD_CLK_DIVIDE_0); +} + static int sd_rw_multi(struct realtek_pci_sdmmc *host, struct mmc_request *mrq) { struct mmc_data *data = mrq->data; + int err; if (host->sg_count < 0) { data->error = host->sg_count; @@ -550,22 +560,19 @@ static int sd_rw_multi(struct realtek_pci_sdmmc *host, struct mmc_request *mrq) return data->error; } - if (data->flags & MMC_DATA_READ) - return sd_read_long_data(host, mrq); + if (data->flags & MMC_DATA_READ) { + if (host->initial_mode) + sd_disable_initial_mode(host); - return sd_write_long_data(host, mrq); -} + err = sd_read_long_data(host, mrq); -static inline void sd_enable_initial_mode(struct realtek_pci_sdmmc *host) -{ - rtsx_pci_write_register(host->pcr, SD_CFG1, - SD_CLK_DIVIDE_MASK, SD_CLK_DIVIDE_128); -} + if (host->initial_mode) + sd_enable_initial_mode(host); -static inline void sd_disable_initial_mode(struct realtek_pci_sdmmc *host) -{ - rtsx_pci_write_register(host->pcr, SD_CFG1, - SD_CLK_DIVIDE_MASK, SD_CLK_DIVIDE_0); + return err; + } + + return sd_write_long_data(host, mrq); } static void sd_normal_rw(struct realtek_pci_sdmmc *host, @@ -892,14 +899,21 @@ static int sd_set_bus_width(struct realtek_pci_sdmmc *host, return err; } -static int sd_power_on(struct realtek_pci_sdmmc *host) +static int sd_power_on(struct realtek_pci_sdmmc *host, unsigned char power_mode) { struct rtsx_pcr *pcr = host->pcr; int err; - if (host->power_state == SDMMC_POWER_ON) + if (host->prev_power_state == MMC_POWER_ON) return 0; + if (host->prev_power_state == MMC_POWER_UP) { + rtsx_pci_write_register(pcr, SD_BUS_STAT, SD_CLK_TOGGLE_EN, 0); + goto finish; + } + + msleep(100); + rtsx_pci_init_cmd(pcr); rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_SELECT, 0x07, SD_MOD_SEL); rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_SHARE_MODE, @@ -918,11 +932,17 @@ static int sd_power_on(struct realtek_pci_sdmmc *host) if (err < 0) return err; + mdelay(1); + err = rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, SD_OUTPUT_EN); if (err < 0) return err; - host->power_state = SDMMC_POWER_ON; + /* send at least 74 clocks */ + rtsx_pci_write_register(pcr, SD_BUS_STAT, SD_CLK_TOGGLE_EN, SD_CLK_TOGGLE_EN); + +finish: + host->prev_power_state = power_mode; return 0; } @@ -931,7 +951,7 @@ static int sd_power_off(struct realtek_pci_sdmmc *host) struct rtsx_pcr *pcr = host->pcr; int err; - host->power_state = SDMMC_POWER_OFF; + host->prev_power_state = MMC_POWER_OFF; rtsx_pci_init_cmd(pcr); @@ -957,7 +977,7 @@ static int sd_set_power_mode(struct realtek_pci_sdmmc *host, if (power_mode == MMC_POWER_OFF) err = sd_power_off(host); else - err = sd_power_on(host); + err = sd_power_on(host, power_mode); return err; } @@ -1392,10 +1412,11 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev) host = mmc_priv(mmc); host->pcr = pcr; + mmc->ios.power_delay_ms = 5; host->mmc = mmc; host->pdev = pdev; host->cookie = -1; - host->power_state = SDMMC_POWER_OFF; + host->prev_power_state = MMC_POWER_OFF; INIT_WORK(&host->work, sd_request); platform_set_drvdata(pdev, host); pcr->slots[RTSX_SD_CARD].p_dev = pdev; diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 771676209005b..d97c19ef75830 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "sdhci-pltfm.h" #include "sdhci-esdhc.h" #include "cqhci.h" @@ -217,8 +218,7 @@ static struct esdhc_soc_data usdhc_imx7ulp_data = { static struct esdhc_soc_data usdhc_imx8qxp_data = { .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING | ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200 - | ESDHC_FLAG_HS400 | ESDHC_FLAG_HS400_ES - | ESDHC_FLAG_CQHCI, + | ESDHC_FLAG_HS400 | ESDHC_FLAG_HS400_ES, }; struct pltfm_imx_data { @@ -1022,6 +1022,7 @@ static void esdhc_reset_tuning(struct sdhci_host *host) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host); u32 ctrl; + int ret; /* Reset the tuning circuit */ if (esdhc_is_usdhc(imx_data)) { @@ -1034,7 +1035,22 @@ static void esdhc_reset_tuning(struct sdhci_host *host) } else if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) { ctrl = readl(host->ioaddr + SDHCI_AUTO_CMD_STATUS); ctrl &= ~ESDHC_MIX_CTRL_SMPCLK_SEL; + ctrl &= ~ESDHC_MIX_CTRL_EXE_TUNE; writel(ctrl, host->ioaddr + SDHCI_AUTO_CMD_STATUS); + /* Make sure ESDHC_MIX_CTRL_EXE_TUNE cleared */ + ret = readl_poll_timeout(host->ioaddr + SDHCI_AUTO_CMD_STATUS, + ctrl, !(ctrl & ESDHC_MIX_CTRL_EXE_TUNE), 1, 50); + if (ret == -ETIMEDOUT) + dev_warn(mmc_dev(host->mmc), + "Warning! clear execute tuning bit failed\n"); + /* + * SDHCI_INT_DATA_AVAIL is W1C bit, set this bit will clear the + * usdhc IP internal logic flag execute_tuning_with_clr_buf, which + * will finally make sure the normal data transfer logic correct. + */ + ctrl = readl(host->ioaddr + SDHCI_INT_STATUS); + ctrl |= SDHCI_INT_DATA_AVAIL; + writel(ctrl, host->ioaddr + SDHCI_INT_STATUS); } } } diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 8bed81cf03adc..8ab963055238a 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -1589,6 +1589,23 @@ static void sdhci_msm_set_clock(struct sdhci_host *host, unsigned int clock) __sdhci_msm_set_clock(host, clock); } +static void sdhci_msm_set_timeout(struct sdhci_host *host, struct mmc_command *cmd) +{ + u32 count, start = 15; + + __sdhci_set_timeout(host, cmd); + count = sdhci_readb(host, SDHCI_TIMEOUT_CONTROL); + /* + * Update software timeout value if its value is less than hardware data + * timeout value. Qcom SoC hardware data timeout value was calculated + * using 4 * MCLK * 2^(count + 13). where MCLK = 1 / host->clock. + */ + if (cmd && cmd->data && host->clock > 400000 && + host->clock <= 50000000 && + ((1 << (count + start)) > (10 * host->clock))) + host->data_timeout = 22LL * NSEC_PER_SEC; +} + /* * Platform specific register write functions. This is so that, if any * register write needs to be followed up by platform specific actions, @@ -1753,6 +1770,7 @@ static const struct sdhci_ops sdhci_msm_ops = { .set_uhs_signaling = sdhci_msm_set_uhs_signaling, .write_w = sdhci_msm_writew, .write_b = sdhci_msm_writeb, + .set_timeout = sdhci_msm_set_timeout, }; static const struct sdhci_pltfm_data sdhci_msm_pdata = { diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index 7023cbec4017b..dd10f7abf5a71 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -192,7 +192,12 @@ static void sdhci_arasan_set_clock(struct sdhci_host *host, unsigned int clock) * through low speeds without power cycling. */ sdhci_set_clock(host, host->max_clk); - phy_power_on(sdhci_arasan->phy); + if (phy_power_on(sdhci_arasan->phy)) { + pr_err("%s: Cannot power on phy.\n", + mmc_hostname(host->mmc)); + return; + } + sdhci_arasan->is_phy_on = true; /* @@ -228,7 +233,12 @@ static void sdhci_arasan_set_clock(struct sdhci_host *host, unsigned int clock) msleep(20); if (ctrl_phy) { - phy_power_on(sdhci_arasan->phy); + if (phy_power_on(sdhci_arasan->phy)) { + pr_err("%s: Cannot power on phy.\n", + mmc_hostname(host->mmc)); + return; + } + sdhci_arasan->is_phy_on = true; } } @@ -416,7 +426,9 @@ static int sdhci_arasan_suspend(struct device *dev) ret = phy_power_off(sdhci_arasan->phy); if (ret) { dev_err(dev, "Cannot power off phy.\n"); - sdhci_resume_host(host); + if (sdhci_resume_host(host)) + dev_err(dev, "Cannot resume host.\n"); + return ret; } sdhci_arasan->is_phy_on = false; diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index 5922ae021d869..0ff339004d8a5 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -519,12 +519,16 @@ static void esdhc_of_adma_workaround(struct sdhci_host *host, u32 intmask) static int esdhc_of_enable_dma(struct sdhci_host *host) { + int ret; u32 value; struct device *dev = mmc_dev(host->mmc); if (of_device_is_compatible(dev->of_node, "fsl,ls1043a-esdhc") || - of_device_is_compatible(dev->of_node, "fsl,ls1046a-esdhc")) - dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); + of_device_is_compatible(dev->of_node, "fsl,ls1046a-esdhc")) { + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); + if (ret) + return ret; + } value = sdhci_readl(host, ESDHC_DMA_SYSCTL); diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index d3135249b2e40..346ca41b28f8b 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -675,7 +675,8 @@ static void sdhci_omap_set_power(struct sdhci_host *host, unsigned char mode, { struct mmc_host *mmc = host->mmc; - mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd); + if (!IS_ERR(mmc->supply.vmmc)) + mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd); } static int sdhci_omap_enable_dma(struct sdhci_host *host) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index a9151bd27211a..1e19c0d0bca80 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -1936,6 +1936,7 @@ static const struct pci_device_id pci_ids[] = { SDHCI_PCI_DEVICE(INTEL, JSL_SD, intel_byt_sd), SDHCI_PCI_DEVICE(INTEL, LKF_EMMC, intel_glk_emmc), SDHCI_PCI_DEVICE(INTEL, LKF_SD, intel_byt_sd), + SDHCI_PCI_DEVICE(INTEL, ADL_EMMC, intel_glk_emmc), SDHCI_PCI_DEVICE(O2, 8120, o2), SDHCI_PCI_DEVICE(O2, 8220, o2), SDHCI_PCI_DEVICE(O2, 8221, o2), diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index 41a2394313dd0..3170c19683c49 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -145,6 +145,8 @@ static int sdhci_o2_get_cd(struct mmc_host *mmc) if (!(sdhci_readw(host, O2_PLL_DLL_WDT_CONTROL1) & O2_PLL_LOCK_STATUS)) sdhci_o2_enable_internal_clock(host); + else + sdhci_o2_wait_card_detect_stable(host); return !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT); } diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h index 779156ce1ee17..b4ac314790b2c 100644 --- a/drivers/mmc/host/sdhci-pci.h +++ b/drivers/mmc/host/sdhci-pci.h @@ -59,6 +59,7 @@ #define PCI_DEVICE_ID_INTEL_JSL_SD 0x4df8 #define PCI_DEVICE_ID_INTEL_LKF_EMMC 0x98c4 #define PCI_DEVICE_ID_INTEL_LKF_SD 0x98f8 +#define PCI_DEVICE_ID_INTEL_ADL_EMMC 0x54c4 #define PCI_DEVICE_ID_SYSKONNECT_8000 0x8000 #define PCI_DEVICE_ID_VIA_95D0 0x95d0 diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index c105356ad4cb7..65dfe75120ed6 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -340,23 +340,6 @@ static void tegra_sdhci_set_tap(struct sdhci_host *host, unsigned int tap) } } -static void tegra_sdhci_hs400_enhanced_strobe(struct mmc_host *mmc, - struct mmc_ios *ios) -{ - struct sdhci_host *host = mmc_priv(mmc); - u32 val; - - val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); - - if (ios->enhanced_strobe) - val |= SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; - else - val &= ~SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; - - sdhci_writel(host, val, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); - -} - static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); @@ -768,6 +751,32 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock) } } +static void tegra_sdhci_hs400_enhanced_strobe(struct mmc_host *mmc, + struct mmc_ios *ios) +{ + struct sdhci_host *host = mmc_priv(mmc); + u32 val; + + val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); + + if (ios->enhanced_strobe) { + val |= SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; + /* + * When CMD13 is sent from mmc_select_hs400es() after + * switching to HS400ES mode, the bus is operating at + * either MMC_HIGH_26_MAX_DTR or MMC_HIGH_52_MAX_DTR. + * To meet Tegra SDHCI requirement at HS400ES mode, force SDHCI + * interface clock to MMC_HS200_MAX_DTR (200 MHz) so that host + * controller CAR clock and the interface clock are rate matched. + */ + tegra_sdhci_set_clock(host, MMC_HS200_MAX_DTR); + } else { + val &= ~SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; + } + + sdhci_writel(host, val, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); +} + static unsigned int tegra_sdhci_get_max_clock(struct sdhci_host *host) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c index 5f57e78e5f13f..6f6fb4c4448f0 100644 --- a/drivers/mmc/host/sdhci-xenon.c +++ b/drivers/mmc/host/sdhci-xenon.c @@ -240,16 +240,6 @@ static void xenon_voltage_switch(struct sdhci_host *host) { /* Wait for 5ms after set 1.8V signal enable bit */ usleep_range(5000, 5500); - - /* - * For some reason the controller's Host Control2 register reports - * the bit representing 1.8V signaling as 0 when read after it was - * written as 1. Subsequent read reports 1. - * - * Since this may cause some issues, do an empty read of the Host - * Control2 register here to circumvent this. - */ - sdhci_readw(host, SDHCI_HOST_CONTROL2); } static const struct sdhci_ops sdhci_xenon_ops = { diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 2ecd9acebb2f0..deafcc56adee6 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -749,7 +749,19 @@ static void sdhci_adma_table_pre(struct sdhci_host *host, len -= offset; } - BUG_ON(len > 65536); + /* + * The block layer forces a minimum segment size of PAGE_SIZE, + * so 'len' can be too big here if PAGE_SIZE >= 64KiB. Write + * multiple descriptors, noting that the ADMA table is sized + * for 4KiB chunks anyway, so it will be big enough. + */ + while (len > host->max_adma) { + int n = 32 * 1024; /* 32KiB*/ + + __sdhci_adma_write_desc(host, &desc, addr, n, ADMA2_TRAN_VALID); + addr += n; + len -= n; + } /* tran, valid */ if (len) @@ -1741,6 +1753,12 @@ void sdhci_set_power_noreg(struct sdhci_host *host, unsigned char mode, break; case MMC_VDD_32_33: case MMC_VDD_33_34: + /* + * 3.4 ~ 3.6V are valid only for those platforms where it's + * known that the voltage range is supported by hardware. + */ + case MMC_VDD_34_35: + case MMC_VDD_35_36: pwr = SDHCI_POWER_330; break; default: @@ -3562,6 +3580,7 @@ struct sdhci_host *sdhci_alloc_host(struct device *dev, * descriptor for each segment, plus 1 for a nop end descriptor. */ host->adma_table_cnt = SDHCI_MAX_SEGS * 2 + 1; + host->max_adma = 65536; return host; } @@ -4215,10 +4234,12 @@ int sdhci_setup_host(struct sdhci_host *host) * be larger than 64 KiB though. */ if (host->flags & SDHCI_USE_ADMA) { - if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC) + if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC) { + host->max_adma = 65532; /* 32-bit alignment */ mmc->max_seg_size = 65535; - else + } else { mmc->max_seg_size = 65536; + } } else { mmc->max_seg_size = mmc->max_req_size; } diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 96a0a8f97f559..54f9d6720f132 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -349,7 +349,8 @@ struct sdhci_adma2_64_desc { /* * Maximum segments assuming a 512KiB maximum requisition size and a minimum - * 4KiB page size. + * 4KiB page size. Note this also allows enough for multiple descriptors in + * case of PAGE_SIZE >= 64KiB. */ #define SDHCI_MAX_SEGS 128 @@ -547,6 +548,7 @@ struct sdhci_host { unsigned int blocks; /* remaining PIO blocks */ int sg_count; /* Mapped sg entries */ + int max_adma; /* Max. length in ADMA descriptor */ void *adma_table; /* ADMA descriptor table */ void *align_buffer; /* Bounce buffer */ diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c index 156046e56a584..5e1d7025dbf78 100644 --- a/drivers/mmc/host/vub300.c +++ b/drivers/mmc/host/vub300.c @@ -576,7 +576,7 @@ static void check_vub300_port_status(struct vub300_mmc_host *vub300) GET_SYSTEM_PORT_STATUS, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, &vub300->system_port_status, - sizeof(vub300->system_port_status), HZ); + sizeof(vub300->system_port_status), 1000); if (sizeof(vub300->system_port_status) == retval) new_system_port_status(vub300); } @@ -1241,7 +1241,7 @@ static void __download_offload_pseudocode(struct vub300_mmc_host *vub300, SET_INTERRUPT_PSEUDOCODE, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, - xfer_buffer, xfer_length, HZ); + xfer_buffer, xfer_length, 1000); kfree(xfer_buffer); if (retval < 0) goto copy_error_message; @@ -1284,7 +1284,7 @@ static void __download_offload_pseudocode(struct vub300_mmc_host *vub300, SET_TRANSFER_PSEUDOCODE, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, - xfer_buffer, xfer_length, HZ); + xfer_buffer, xfer_length, 1000); kfree(xfer_buffer); if (retval < 0) goto copy_error_message; @@ -1991,7 +1991,7 @@ static void __set_clock_speed(struct vub300_mmc_host *vub300, u8 buf[8], usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_CLOCK_SPEED, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x00, 0x00, buf, buf_array_size, HZ); + 0x00, 0x00, buf, buf_array_size, 1000); if (retval != 8) { dev_err(&vub300->udev->dev, "SET_CLOCK_SPEED" " %dkHz failed with retval=%d\n", kHzClock, retval); @@ -2013,14 +2013,14 @@ static void vub300_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_SD_POWER, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x0000, 0x0000, NULL, 0, HZ); + 0x0000, 0x0000, NULL, 0, 1000); /* must wait for the VUB300 u-proc to boot up */ msleep(600); } else if ((ios->power_mode == MMC_POWER_UP) && !vub300->card_powered) { usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_SD_POWER, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x0001, 0x0000, NULL, 0, HZ); + 0x0001, 0x0000, NULL, 0, 1000); msleep(600); vub300->card_powered = 1; } else if (ios->power_mode == MMC_POWER_ON) { @@ -2282,14 +2282,14 @@ static int vub300_probe(struct usb_interface *interface, GET_HC_INF0, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, &vub300->hc_info, - sizeof(vub300->hc_info), HZ); + sizeof(vub300->hc_info), 1000); if (retval < 0) goto error5; retval = usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_ROM_WAIT_STATES, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - firmware_rom_wait_states, 0x0000, NULL, 0, HZ); + firmware_rom_wait_states, 0x0000, NULL, 0, 1000); if (retval < 0) goto error5; dev_info(&vub300->udev->dev, @@ -2304,7 +2304,7 @@ static int vub300_probe(struct usb_interface *interface, GET_SYSTEM_PORT_STATUS, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, &vub300->system_port_status, - sizeof(vub300->system_port_status), HZ); + sizeof(vub300->system_port_status), 1000); if (retval < 0) { goto error4; } else if (sizeof(vub300->system_port_status) == retval) { diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig index a7e47e068ad9b..7769a9b556c70 100644 --- a/drivers/mtd/chips/Kconfig +++ b/drivers/mtd/chips/Kconfig @@ -55,12 +55,14 @@ choice LITTLE_ENDIAN_BYTE, if the bytes are reversed. config MTD_CFI_NOSWAP + depends on !ARCH_IXP4XX || CPU_BIG_ENDIAN bool "NO" config MTD_CFI_BE_BYTE_SWAP bool "BIG_ENDIAN_BYTE" config MTD_CFI_LE_BYTE_SWAP + depends on !ARCH_IXP4XX bool "LITTLE_ENDIAN_BYTE" endchoice diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 9c98ddef0097d..006221284d0ae 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -59,6 +59,10 @@ #define CFI_SR_WBASB BIT(3) #define CFI_SR_SLSB BIT(1) +enum cfi_quirks { + CFI_QUIRK_DQ_TRUE_DATA = BIT(0), +}; + static int cfi_amdstd_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *); static int cfi_amdstd_write_words(struct mtd_info *, loff_t, size_t, size_t *, const u_char *); #if !FORCE_WORD_WRITE @@ -432,6 +436,15 @@ static void fixup_s29ns512p_sectors(struct mtd_info *mtd) mtd->name); } +static void fixup_quirks(struct mtd_info *mtd) +{ + struct map_info *map = mtd->priv; + struct cfi_private *cfi = map->fldrv_priv; + + if (cfi->mfr == CFI_MFR_AMD && cfi->id == 0x0c01) + cfi->quirks |= CFI_QUIRK_DQ_TRUE_DATA; +} + /* Used to fix CFI-Tables of chips without Extended Query Tables */ static struct cfi_fixup cfi_nopri_fixup_table[] = { { CFI_MFR_SST, 0x234a, fixup_sst39vf }, /* SST39VF1602 */ @@ -470,6 +483,7 @@ static struct cfi_fixup cfi_fixup_table[] = { #if !FORCE_WORD_WRITE { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers }, #endif + { CFI_MFR_ANY, CFI_ID_ANY, fixup_quirks }, { 0, 0, NULL } }; static struct cfi_fixup jedec_fixup_table[] = { @@ -798,21 +812,25 @@ static struct mtd_info *cfi_amdstd_setup(struct mtd_info *mtd) } /* - * Return true if the chip is ready. + * Return true if the chip is ready and has the correct value. * * Ready is one of: read mode, query mode, erase-suspend-read mode (in any * non-suspended sector) and is indicated by no toggle bits toggling. * + * Error are indicated by toggling bits or bits held with the wrong value, + * or with bits toggling. + * * Note that anything more complicated than checking if no bits are toggling * (including checking DQ5 for an error status) is tricky to get working * correctly and is therefore not done (particularly with interleaved chips * as each chip must be checked independently of the others). */ static int __xipram chip_ready(struct map_info *map, struct flchip *chip, - unsigned long addr) + unsigned long addr, map_word *expected) { struct cfi_private *cfi = map->fldrv_priv; map_word d, t; + int ret; if (cfi_use_status_reg(cfi)) { map_word ready = CMD(CFI_SR_DRB); @@ -822,57 +840,32 @@ static int __xipram chip_ready(struct map_info *map, struct flchip *chip, */ cfi_send_gen_cmd(0x70, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); - d = map_read(map, addr); + t = map_read(map, addr); - return map_word_andequal(map, d, ready, ready); + return map_word_andequal(map, t, ready, ready); } d = map_read(map, addr); t = map_read(map, addr); - return map_word_equal(map, d, t); + ret = map_word_equal(map, d, t); + + if (!ret || !expected) + return ret; + + return map_word_equal(map, t, *expected); } -/* - * Return true if the chip is ready and has the correct value. - * - * Ready is one of: read mode, query mode, erase-suspend-read mode (in any - * non-suspended sector) and it is indicated by no bits toggling. - * - * Error are indicated by toggling bits or bits held with the wrong value, - * or with bits toggling. - * - * Note that anything more complicated than checking if no bits are toggling - * (including checking DQ5 for an error status) is tricky to get working - * correctly and is therefore not done (particularly with interleaved chips - * as each chip must be checked independently of the others). - * - */ static int __xipram chip_good(struct map_info *map, struct flchip *chip, - unsigned long addr, map_word expected) + unsigned long addr, map_word *expected) { struct cfi_private *cfi = map->fldrv_priv; - map_word oldd, curd; - - if (cfi_use_status_reg(cfi)) { - map_word ready = CMD(CFI_SR_DRB); - - /* - * For chips that support status register, check device - * ready bit - */ - cfi_send_gen_cmd(0x70, cfi->addr_unlock1, chip->start, map, cfi, - cfi->device_type, NULL); - curd = map_read(map, addr); - - return map_word_andequal(map, curd, ready, ready); - } + map_word *datum = expected; - oldd = map_read(map, addr); - curd = map_read(map, addr); + if (cfi->quirks & CFI_QUIRK_DQ_TRUE_DATA) + datum = NULL; - return map_word_equal(map, oldd, curd) && - map_word_equal(map, curd, expected); + return chip_ready(map, chip, addr, datum); } static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr, int mode) @@ -889,7 +882,7 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr case FL_STATUS: for (;;) { - if (chip_ready(map, chip, adr)) + if (chip_ready(map, chip, adr, NULL)) break; if (time_after(jiffies, timeo)) { @@ -927,7 +920,7 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr chip->state = FL_ERASE_SUSPENDING; chip->erase_suspended = 1; for (;;) { - if (chip_ready(map, chip, adr)) + if (chip_ready(map, chip, adr, NULL)) break; if (time_after(jiffies, timeo)) { @@ -1459,7 +1452,7 @@ static int do_otp_lock(struct map_info *map, struct flchip *chip, loff_t adr, /* wait for chip to become ready */ timeo = jiffies + msecs_to_jiffies(2); for (;;) { - if (chip_ready(map, chip, adr)) + if (chip_ready(map, chip, adr, NULL)) break; if (time_after(jiffies, timeo)) { @@ -1695,7 +1688,7 @@ static int __xipram do_write_oneword_once(struct map_info *map, * "chip_good" to avoid the failure due to scheduling. */ if (time_after(jiffies, timeo) && - !chip_good(map, chip, adr, datum)) { + !chip_good(map, chip, adr, &datum)) { xip_enable(map, chip, adr); printk(KERN_WARNING "MTD %s(): software timeout\n", __func__); xip_disable(map, chip, adr); @@ -1703,7 +1696,7 @@ static int __xipram do_write_oneword_once(struct map_info *map, break; } - if (chip_good(map, chip, adr, datum)) { + if (chip_good(map, chip, adr, &datum)) { if (cfi_check_err_status(map, chip, adr)) ret = -EIO; break; @@ -1975,14 +1968,14 @@ static int __xipram do_write_buffer_wait(struct map_info *map, * "chip_good" to avoid the failure due to scheduling. */ if (time_after(jiffies, timeo) && - !chip_good(map, chip, adr, datum)) { + !chip_good(map, chip, adr, &datum)) { pr_err("MTD %s(): software timeout, address:0x%.8lx.\n", __func__, adr); ret = -EIO; break; } - if (chip_good(map, chip, adr, datum)) { + if (chip_good(map, chip, adr, &datum)) { if (cfi_check_err_status(map, chip, adr)) ret = -EIO; break; @@ -2191,7 +2184,7 @@ static int cfi_amdstd_panic_wait(struct map_info *map, struct flchip *chip, * If the driver thinks the chip is idle, and no toggle bits * are changing, then the chip is actually idle for sure. */ - if (chip->state == FL_READY && chip_ready(map, chip, adr)) + if (chip->state == FL_READY && chip_ready(map, chip, adr, NULL)) return 0; /* @@ -2208,7 +2201,7 @@ static int cfi_amdstd_panic_wait(struct map_info *map, struct flchip *chip, /* wait for the chip to become ready */ for (i = 0; i < jiffies_to_usecs(timeo); i++) { - if (chip_ready(map, chip, adr)) + if (chip_ready(map, chip, adr, NULL)) return 0; udelay(1); @@ -2272,13 +2265,13 @@ static int do_panic_write_oneword(struct map_info *map, struct flchip *chip, map_write(map, datum, adr); for (i = 0; i < jiffies_to_usecs(uWriteTimeout); i++) { - if (chip_ready(map, chip, adr)) + if (chip_ready(map, chip, adr, NULL)) break; udelay(1); } - if (!chip_good(map, chip, adr, datum) || + if (!chip_ready(map, chip, adr, &datum) || cfi_check_err_status(map, chip, adr)) { /* reset on all failures. */ map_write(map, CMD(0xF0), chip->start); @@ -2420,6 +2413,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip) DECLARE_WAITQUEUE(wait, current); int ret = 0; int retry_cnt = 0; + map_word datum = map_word_ff(map); adr = cfi->addr_unlock1; @@ -2474,7 +2468,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip) chip->erase_suspended = 0; } - if (chip_good(map, chip, adr, map_word_ff(map))) { + if (chip_ready(map, chip, adr, &datum)) { if (cfi_check_err_status(map, chip, adr)) ret = -EIO; break; @@ -2519,6 +2513,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, DECLARE_WAITQUEUE(wait, current); int ret = 0; int retry_cnt = 0; + map_word datum = map_word_ff(map); adr += chip->start; @@ -2573,7 +2568,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, chip->erase_suspended = 0; } - if (chip_good(map, chip, adr, map_word_ff(map))) { + if (chip_ready(map, chip, adr, &datum)) { if (cfi_check_err_status(map, chip, adr)) ret = -EIO; break; @@ -2767,7 +2762,7 @@ static int __maybe_unused do_ppb_xxlock(struct map_info *map, */ timeo = jiffies + msecs_to_jiffies(2000); /* 2s max (un)locking */ for (;;) { - if (chip_ready(map, chip, adr)) + if (chip_ready(map, chip, adr, NULL)) break; if (time_after(jiffies, timeo)) { diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index bc82305ebb4c2..ffbf4f6cb9cfe 100644 --- a/drivers/mtd/maps/Kconfig +++ b/drivers/mtd/maps/Kconfig @@ -303,7 +303,7 @@ config MTD_DC21285 config MTD_IXP4XX tristate "CFI Flash device mapped on Intel IXP4xx based systems" - depends on MTD_CFI && MTD_COMPLEX_MAPPINGS && ARCH_IXP4XX + depends on MTD_CFI && MTD_COMPLEX_MAPPINGS && ARCH_IXP4XX && MTD_CFI_ADV_OPTIONS help This enables MTD access to flash devices on platforms based on Intel's IXP4xx family of network processors such as the diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 32a76b8feaa5d..ac5d3b6db9b84 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -727,8 +727,6 @@ int del_mtd_device(struct mtd_info *mtd) mutex_lock(&mtd_table_mutex); - debugfs_remove_recursive(mtd->dbg.dfs_dir); - if (idr_find(&mtd_idr, mtd->index) != mtd) { ret = -ENODEV; goto out_error; @@ -744,6 +742,8 @@ int del_mtd_device(struct mtd_info *mtd) mtd->index, mtd->name, mtd->usecount); ret = -EBUSY; } else { + debugfs_remove_recursive(mtd->dbg.dfs_dir); + /* Try to remove the NVMEM provider */ if (mtd->nvmem) nvmem_unregister(mtd->nvmem); diff --git a/drivers/mtd/nand/bbt.c b/drivers/mtd/nand/bbt.c index 044adf9138546..64af6898131d6 100644 --- a/drivers/mtd/nand/bbt.c +++ b/drivers/mtd/nand/bbt.c @@ -123,7 +123,7 @@ int nanddev_bbt_set_block_status(struct nand_device *nand, unsigned int entry, unsigned int rbits = bits_per_block + offs - BITS_PER_LONG; pos[1] &= ~GENMASK(rbits - 1, 0); - pos[1] |= val >> rbits; + pos[1] |= val >> (bits_per_block - rbits); } return 0; diff --git a/drivers/mtd/nand/onenand/generic.c b/drivers/mtd/nand/onenand/generic.c index 8b6f4da5d7201..a4b8b65fe15f5 100644 --- a/drivers/mtd/nand/onenand/generic.c +++ b/drivers/mtd/nand/onenand/generic.c @@ -53,7 +53,12 @@ static int generic_onenand_probe(struct platform_device *pdev) } info->onenand.mmcontrol = pdata ? pdata->mmcontrol : NULL; - info->onenand.irq = platform_get_irq(pdev, 0); + + err = platform_get_irq(pdev, 0); + if (err < 0) + goto out_iounmap; + + info->onenand.irq = err; info->mtd.dev.parent = &pdev->dev; info->mtd.priv = &info->onenand; diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c index 23d11e8b56441..17c751f359d3d 100644 --- a/drivers/mtd/nand/raw/atmel/nand-controller.c +++ b/drivers/mtd/nand/raw/atmel/nand-controller.c @@ -2004,13 +2004,15 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc, nc->mck = of_clk_get(dev->parent->of_node, 0); if (IS_ERR(nc->mck)) { dev_err(dev, "Failed to retrieve MCK clk\n"); - return PTR_ERR(nc->mck); + ret = PTR_ERR(nc->mck); + goto out_release_dma; } np = of_parse_phandle(dev->parent->of_node, "atmel,smc", 0); if (!np) { dev_err(dev, "Missing or invalid atmel,smc property\n"); - return -EINVAL; + ret = -EINVAL; + goto out_release_dma; } nc->smc = syscon_node_to_regmap(np); @@ -2018,10 +2020,16 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc, if (IS_ERR(nc->smc)) { ret = PTR_ERR(nc->smc); dev_err(dev, "Could not get SMC regmap (err = %d)\n", ret); - return ret; + goto out_release_dma; } return 0; + +out_release_dma: + if (nc->dmac) + dma_release_channel(nc->dmac); + + return ret; } static int diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c index 0f3c09fb9c34f..bd9f45edc9a34 100644 --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c @@ -1756,7 +1756,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip, mtd->oobsize / trans, host->hwcfg.sector_size_1k); - if (!ret) { + if (ret != -EBADMSG) { *err_addr = brcmnand_get_uncorrecc_addr(ctrl); if (*err_addr) diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c index 2d1c22dc88c15..cc5009200cc23 100644 --- a/drivers/mtd/nand/raw/cafe_nand.c +++ b/drivers/mtd/nand/raw/cafe_nand.c @@ -757,7 +757,7 @@ static int cafe_nand_probe(struct pci_dev *pdev, "CAFE NAND", mtd); if (err) { dev_warn(&pdev->dev, "Could not register IRQ %d\n", pdev->irq); - goto out_ior; + goto out_free_rs; } /* Disable master reset, enable NAND clock */ @@ -801,6 +801,8 @@ static int cafe_nand_probe(struct pci_dev *pdev, /* Disable NAND IRQ in global IRQ mask register */ cafe_writel(cafe, ~1 & cafe_readl(cafe, GLOBAL_IRQ_MASK), GLOBAL_IRQ_MASK); free_irq(pdev->irq, mtd); + out_free_rs: + free_rs(cafe->rs); out_ior: pci_iounmap(pdev, cafe->mmio); out_free_mtd: diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c index 81e4b0f466623..cc8369a595de3 100644 --- a/drivers/mtd/nand/raw/fsmc_nand.c +++ b/drivers/mtd/nand/raw/fsmc_nand.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -93,6 +94,14 @@ #define FSMC_BUSY_WAIT_TIMEOUT (1 * HZ) +/* + * According to SPEAr300 Reference Manual (RM0082) + * TOUDEL = 7ns (Output delay from the flip-flops to the board) + * TINDEL = 5ns (Input delay from the board to the flipflop) + */ +#define TOUTDEL 7000 +#define TINDEL 5000 + struct fsmc_nand_timings { u8 tclr; u8 tar; @@ -277,7 +286,7 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host, { unsigned long hclk = clk_get_rate(host->clk); unsigned long hclkn = NSEC_PER_SEC / hclk; - u32 thiz, thold, twait, tset; + u32 thiz, thold, twait, tset, twait_min; if (sdrt->tRC_min < 30000) return -EOPNOTSUPP; @@ -309,13 +318,6 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host, else if (tims->thold > FSMC_THOLD_MASK) tims->thold = FSMC_THOLD_MASK; - twait = max(sdrt->tRP_min, sdrt->tWP_min); - tims->twait = DIV_ROUND_UP(twait / 1000, hclkn) - 1; - if (tims->twait == 0) - tims->twait = 1; - else if (tims->twait > FSMC_TWAIT_MASK) - tims->twait = FSMC_TWAIT_MASK; - tset = max(sdrt->tCS_min - sdrt->tWP_min, sdrt->tCEA_max - sdrt->tREA_max); tims->tset = DIV_ROUND_UP(tset / 1000, hclkn) - 1; @@ -324,6 +326,21 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host, else if (tims->tset > FSMC_TSET_MASK) tims->tset = FSMC_TSET_MASK; + /* + * According to SPEAr300 Reference Manual (RM0082) which gives more + * information related to FSMSC timings than the SPEAr600 one (RM0305), + * twait >= tCEA - (tset * TCLK) + TOUTDEL + TINDEL + */ + twait_min = sdrt->tCEA_max - ((tims->tset + 1) * hclkn * 1000) + + TOUTDEL + TINDEL; + twait = max3(sdrt->tRP_min, sdrt->tWP_min, twait_min); + + tims->twait = DIV_ROUND_UP(twait / 1000, hclkn) - 1; + if (tims->twait == 0) + tims->twait = 1; + else if (tims->twait > FSMC_TWAIT_MASK) + tims->twait = FSMC_TWAIT_MASK; + return 0; } @@ -650,6 +667,9 @@ static int fsmc_exec_op(struct nand_chip *chip, const struct nand_operation *op, instr->ctx.waitrdy.timeout_ms); break; } + + if (instr->delay_ns) + ndelay(instr->delay_ns); } return ret; diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index 60f146920b9f4..02218c3b548f9 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -645,6 +645,7 @@ static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this, const struct nand_sdr_timings *sdr) { struct gpmi_nfc_hardware_timing *hw = &this->hw; + struct resources *r = &this->resources; unsigned int dll_threshold_ps = this->devdata->max_chain_delay; unsigned int period_ps, reference_period_ps; unsigned int data_setup_cycles, data_hold_cycles, addr_setup_cycles; @@ -668,6 +669,8 @@ static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this, wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_NO_DELAY; } + hw->clk_rate = clk_round_rate(r->clock[0], hw->clk_rate); + /* SDR core timings are given in picoseconds */ period_ps = div_u64((u64)NSEC_PER_SEC * 1000, hw->clk_rate); @@ -710,14 +713,32 @@ static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this, (use_half_period ? BM_GPMI_CTRL1_HALF_PERIOD : 0); } -static void gpmi_nfc_apply_timings(struct gpmi_nand_data *this) +static int gpmi_nfc_apply_timings(struct gpmi_nand_data *this) { struct gpmi_nfc_hardware_timing *hw = &this->hw; struct resources *r = &this->resources; void __iomem *gpmi_regs = r->gpmi_regs; unsigned int dll_wait_time_us; + int ret; + + /* Clock dividers do NOT guarantee a clean clock signal on its output + * during the change of the divide factor on i.MX6Q/UL/SX. On i.MX7/8, + * all clock dividers provide these guarantee. + */ + if (GPMI_IS_MX6Q(this) || GPMI_IS_MX6SX(this)) + clk_disable_unprepare(r->clock[0]); - clk_set_rate(r->clock[0], hw->clk_rate); + ret = clk_set_rate(r->clock[0], hw->clk_rate); + if (ret) { + dev_err(this->dev, "cannot set clock rate to %lu Hz: %d\n", hw->clk_rate, ret); + return ret; + } + + if (GPMI_IS_MX6Q(this) || GPMI_IS_MX6SX(this)) { + ret = clk_prepare_enable(r->clock[0]); + if (ret) + return ret; + } writel(hw->timing0, gpmi_regs + HW_GPMI_TIMING0); writel(hw->timing1, gpmi_regs + HW_GPMI_TIMING1); @@ -736,6 +757,8 @@ static void gpmi_nfc_apply_timings(struct gpmi_nand_data *this) /* Wait for the DLL to settle. */ udelay(dll_wait_time_us); + + return 0; } static int gpmi_setup_data_interface(struct nand_chip *chip, int chipnr, @@ -1184,15 +1207,6 @@ static int gpmi_get_clks(struct gpmi_nand_data *this) r->clock[i] = clk; } - if (GPMI_IS_MX6(this)) - /* - * Set the default value for the gpmi clock. - * - * If you want to use the ONFI nand which is in the - * Synchronous Mode, you should change the clock as you need. - */ - clk_set_rate(r->clock[0], 22000000); - return 0; err_clock: @@ -2429,7 +2443,9 @@ static int gpmi_nfc_exec_op(struct nand_chip *chip, */ if (this->hw.must_apply_timings) { this->hw.must_apply_timings = false; - gpmi_nfc_apply_timings(this); + ret = gpmi_nfc_apply_timings(this); + if (ret) + goto out_pm; } dev_dbg(this->dev, "%s: %d instructions\n", __func__, op->ninstrs); @@ -2558,6 +2574,7 @@ static int gpmi_nfc_exec_op(struct nand_chip *chip, this->bch = false; +out_pm: pm_runtime_mark_last_busy(this->dev); pm_runtime_put_autosuspend(this->dev); diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c index 8b90def6686fb..a5eb0a1f559c7 100644 --- a/drivers/mtd/nand/raw/mpc5121_nfc.c +++ b/drivers/mtd/nand/raw/mpc5121_nfc.c @@ -290,7 +290,6 @@ static int ads5121_chipselect_init(struct mtd_info *mtd) /* Control chips select signal on ADS5121 board */ static void ads5121_select_chip(struct nand_chip *nand, int chip) { - struct mtd_info *mtd = nand_to_mtd(nand); struct mpc5121_nfc_prv *prv = nand_get_controller_data(nand); u8 v; diff --git a/drivers/mtd/nand/raw/mtk_ecc.c b/drivers/mtd/nand/raw/mtk_ecc.c index 74595b644b7cd..57fa7807cd7d3 100644 --- a/drivers/mtd/nand/raw/mtk_ecc.c +++ b/drivers/mtd/nand/raw/mtk_ecc.c @@ -43,6 +43,7 @@ struct mtk_ecc_caps { u32 err_mask; + u32 err_shift; const u8 *ecc_strength; const u32 *ecc_regs; u8 num_ecc_strength; @@ -76,7 +77,7 @@ static const u8 ecc_strength_mt2712[] = { }; static const u8 ecc_strength_mt7622[] = { - 4, 6, 8, 10, 12, 14, 16 + 4, 6, 8, 10, 12 }; enum mtk_ecc_regs { @@ -221,7 +222,7 @@ void mtk_ecc_get_stats(struct mtk_ecc *ecc, struct mtk_ecc_stats *stats, for (i = 0; i < sectors; i++) { offset = (i >> 2) << 2; err = readl(ecc->regs + ECC_DECENUM0 + offset); - err = err >> ((i % 4) * 8); + err = err >> ((i % 4) * ecc->caps->err_shift); err &= ecc->caps->err_mask; if (err == ecc->caps->err_mask) { /* uncorrectable errors */ @@ -449,6 +450,7 @@ EXPORT_SYMBOL(mtk_ecc_get_parity_bits); static const struct mtk_ecc_caps mtk_ecc_caps_mt2701 = { .err_mask = 0x3f, + .err_shift = 8, .ecc_strength = ecc_strength_mt2701, .ecc_regs = mt2701_ecc_regs, .num_ecc_strength = 20, @@ -459,6 +461,7 @@ static const struct mtk_ecc_caps mtk_ecc_caps_mt2701 = { static const struct mtk_ecc_caps mtk_ecc_caps_mt2712 = { .err_mask = 0x7f, + .err_shift = 8, .ecc_strength = ecc_strength_mt2712, .ecc_regs = mt2712_ecc_regs, .num_ecc_strength = 23, @@ -468,10 +471,11 @@ static const struct mtk_ecc_caps mtk_ecc_caps_mt2712 = { }; static const struct mtk_ecc_caps mtk_ecc_caps_mt7622 = { - .err_mask = 0x3f, + .err_mask = 0x1f, + .err_shift = 5, .ecc_strength = ecc_strength_mt7622, .ecc_regs = mt7622_ecc_regs, - .num_ecc_strength = 7, + .num_ecc_strength = 5, .ecc_mode_shift = 4, .parity_bits = 13, .pg_irq_sel = 0, diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index c10995ca624a6..5af3bef6c2303 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -2,7 +2,6 @@ /* * Copyright (c) 2016, The Linux Foundation. All rights reserved. */ - #include #include #include @@ -2944,10 +2943,6 @@ static int qcom_nandc_probe(struct platform_device *pdev) if (!nandc->base_dma) return -ENXIO; - ret = qcom_nandc_alloc(nandc); - if (ret) - goto err_nandc_alloc; - ret = clk_prepare_enable(nandc->core_clk); if (ret) goto err_core_clk; @@ -2956,6 +2951,10 @@ static int qcom_nandc_probe(struct platform_device *pdev) if (ret) goto err_aon_clk; + ret = qcom_nandc_alloc(nandc); + if (ret) + goto err_nandc_alloc; + ret = qcom_nandc_setup(nandc); if (ret) goto err_setup; @@ -2967,15 +2966,14 @@ static int qcom_nandc_probe(struct platform_device *pdev) return 0; err_setup: + qcom_nandc_unalloc(nandc); +err_nandc_alloc: clk_disable_unprepare(nandc->aon_clk); err_aon_clk: clk_disable_unprepare(nandc->core_clk); err_core_clk: - qcom_nandc_unalloc(nandc); -err_nandc_alloc: dma_unmap_resource(dev, res->start, resource_size(res), DMA_BIDIRECTIONAL, 0); - return ret; } diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c index e509c93737c4f..f0e2f2d652829 100644 --- a/drivers/mtd/nand/raw/sh_flctl.c +++ b/drivers/mtd/nand/raw/sh_flctl.c @@ -384,7 +384,8 @@ static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf, dma_addr_t dma_addr; dma_cookie_t cookie; uint32_t reg; - int ret; + int ret = 0; + unsigned long time_left; if (dir == DMA_FROM_DEVICE) { chan = flctl->chan_fifo0_rx; @@ -425,13 +426,14 @@ static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf, goto out; } - ret = + time_left = wait_for_completion_timeout(&flctl->dma_complete, msecs_to_jiffies(3000)); - if (ret <= 0) { + if (time_left == 0) { dmaengine_terminate_all(chan); dev_err(&flctl->pdev->dev, "wait_for_completion_timeout\n"); + ret = -ETIMEDOUT; } out: @@ -441,7 +443,7 @@ static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf, dma_unmap_single(chan->device->dev, dma_addr, len, dir); - /* ret > 0 is success */ + /* ret == 0 is success */ return ret; } @@ -465,7 +467,7 @@ static void read_fiforeg(struct sh_flctl *flctl, int rlen, int offset) /* initiate DMA transfer */ if (flctl->chan_fifo0_rx && rlen >= 32 && - flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_FROM_DEVICE) > 0) + !flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_FROM_DEVICE)) goto convert; /* DMA success */ /* do polling transfer */ @@ -524,7 +526,7 @@ static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen, /* initiate DMA transfer */ if (flctl->chan_fifo0_tx && rlen >= 32 && - flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_TO_DEVICE) > 0) + !flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_TO_DEVICE)) return; /* DMA success */ /* do polling transfer */ diff --git a/drivers/mtd/spi-nor/hisi-sfc.c b/drivers/mtd/spi-nor/hisi-sfc.c index 8fcc48056a8bc..569cfd473c87b 100644 --- a/drivers/mtd/spi-nor/hisi-sfc.c +++ b/drivers/mtd/spi-nor/hisi-sfc.c @@ -474,7 +474,6 @@ static int hisi_spi_nor_remove(struct platform_device *pdev) hisi_spi_nor_unregister_all(host); mutex_destroy(&host->lock); - clk_disable_unprepare(host->clk); return 0; } diff --git a/drivers/mtd/spi-nor/intel-spi-pci.c b/drivers/mtd/spi-nor/intel-spi-pci.c index 3cda8e7a68f81..47ffeda489cc0 100644 --- a/drivers/mtd/spi-nor/intel-spi-pci.c +++ b/drivers/mtd/spi-nor/intel-spi-pci.c @@ -63,6 +63,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x02a4), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x18e0), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x19e0), (unsigned long)&bxt_info }, + { PCI_VDEVICE(INTEL, 0x1bca), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x34a4), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x4b24), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0xa0a4), (unsigned long)&bxt_info }, diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index d636bbe214cb9..df521ff0b3280 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -350,9 +350,6 @@ static ssize_t dev_attribute_show(struct device *dev, * we still can use 'ubi->ubi_num'. */ ubi = container_of(dev, struct ubi_device, dev); - ubi = ubi_get_device(ubi->ubi_num); - if (!ubi) - return -ENODEV; if (attr == &dev_eraseblock_size) ret = sprintf(buf, "%d\n", ubi->leb_size); @@ -381,7 +378,6 @@ static ssize_t dev_attribute_show(struct device *dev, else ret = -EINVAL; - ubi_put_device(ubi); return ret; } @@ -956,9 +952,6 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, goto out_detach; } - /* Make device "available" before it becomes accessible via sysfs */ - ubi_devices[ubi_num] = ubi; - err = uif_init(ubi); if (err) goto out_detach; @@ -1003,6 +996,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, wake_up_process(ubi->bgt_thread); spin_unlock(&ubi->wl_lock); + ubi_devices[ubi_num] = ubi; ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL); return ubi_num; @@ -1011,7 +1005,6 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, out_uif: uif_close(ubi); out_detach: - ubi_devices[ubi_num] = NULL; ubi_wl_close(ubi); ubi_free_internal_volumes(ubi); vfree(ubi->vtbl); diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 53f448e7433a9..6e95c4b1473e6 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -468,7 +468,9 @@ static int scan_pool(struct ubi_device *ubi, struct ubi_attach_info *ai, if (err == UBI_IO_FF_BITFLIPS) scrub = 1; - add_aeb(ai, free, pnum, ec, scrub); + ret = add_aeb(ai, free, pnum, ec, scrub); + if (ret) + goto out; continue; } else if (err == 0 || err == UBI_IO_BITFLIPS) { dbg_bld("Found non empty PEB:%i in pool", pnum); @@ -638,8 +640,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, if (fm_pos >= fm_size) goto fail_bad; - add_aeb(ai, &ai->free, be32_to_cpu(fmec->pnum), - be32_to_cpu(fmec->ec), 0); + ret = add_aeb(ai, &ai->free, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 0); + if (ret) + goto fail; } /* read EC values from used list */ @@ -649,8 +653,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, if (fm_pos >= fm_size) goto fail_bad; - add_aeb(ai, &used, be32_to_cpu(fmec->pnum), - be32_to_cpu(fmec->ec), 0); + ret = add_aeb(ai, &used, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 0); + if (ret) + goto fail; } /* read EC values from scrub list */ @@ -660,8 +666,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, if (fm_pos >= fm_size) goto fail_bad; - add_aeb(ai, &used, be32_to_cpu(fmec->pnum), - be32_to_cpu(fmec->ec), 1); + ret = add_aeb(ai, &used, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 1); + if (ret) + goto fail; } /* read EC values from erase list */ @@ -671,8 +679,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, if (fm_pos >= fm_size) goto fail_bad; - add_aeb(ai, &ai->erase, be32_to_cpu(fmec->pnum), - be32_to_cpu(fmec->ec), 1); + ret = add_aeb(ai, &ai->erase, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 1); + if (ret) + goto fail; } ai->mean_ec = div_u64(ai->ec_sum, ai->ec_count); diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 139ee132bfbcf..6ea95ade4ca6b 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -56,16 +56,11 @@ static ssize_t vol_attribute_show(struct device *dev, { int ret; struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); - struct ubi_device *ubi; - - ubi = ubi_get_device(vol->ubi->ubi_num); - if (!ubi) - return -ENODEV; + struct ubi_device *ubi = vol->ubi; spin_lock(&ubi->volumes_lock); if (!ubi->volumes[vol->vol_id]) { spin_unlock(&ubi->volumes_lock); - ubi_put_device(ubi); return -ENODEV; } /* Take a reference to prevent volume removal */ @@ -103,7 +98,6 @@ static ssize_t vol_attribute_show(struct device *dev, vol->ref_count -= 1; ubi_assert(vol->ref_count >= 0); spin_unlock(&ubi->volumes_lock); - ubi_put_device(ubi); return ret; } @@ -315,7 +309,6 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) ubi->volumes[vol_id] = NULL; ubi->vol_count -= 1; spin_unlock(&ubi->volumes_lock); - ubi_eba_destroy_table(eba_tbl); out_acc: spin_lock(&ubi->volumes_lock); ubi->rsvd_pebs -= vol->reserved_pebs; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 7def041bbe484..7bbaa49d94815 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -319,7 +319,7 @@ static struct ubi_wl_entry *find_wl_entry(struct ubi_device *ubi, struct rb_root *root, int diff) { struct rb_node *p; - struct ubi_wl_entry *e, *prev_e = NULL; + struct ubi_wl_entry *e; int max; e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); @@ -334,7 +334,6 @@ static struct ubi_wl_entry *find_wl_entry(struct ubi_device *ubi, p = p->rb_left; else { p = p->rb_right; - prev_e = e; e = e1; } } diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index eb7f76753c9c0..9f44e2e458df1 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -136,6 +136,9 @@ static int com20020pci_probe(struct pci_dev *pdev, return -ENOMEM; ci = (struct com20020_pci_card_info *)id->driver_data; + if (!ci) + return -EINVAL; + priv->ci = ci; mm = &ci->misc_map; diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index e3b25f3109367..31ed7616e84e7 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -233,7 +233,7 @@ static inline int __check_agg_selection_timer(struct port *port) if (bond == NULL) return 0; - return BOND_AD_INFO(bond).agg_select_timer ? 1 : 0; + return atomic_read(&BOND_AD_INFO(bond).agg_select_timer) ? 1 : 0; } /** @@ -1013,8 +1013,8 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) if (port->aggregator && port->aggregator->is_active && !__port_is_enabled(port)) { - __enable_port(port); + *update_slave_arr = true; } } break; @@ -1770,6 +1770,7 @@ static void ad_agg_selection_logic(struct aggregator *agg, port = port->next_port_in_aggregator) { __enable_port(port); } + *update_slave_arr = true; } } @@ -1984,7 +1985,7 @@ static void ad_marker_response_received(struct bond_marker *marker, */ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout) { - BOND_AD_INFO(bond).agg_select_timer = timeout; + atomic_set(&BOND_AD_INFO(bond).agg_select_timer, timeout); } /** @@ -2217,7 +2218,8 @@ void bond_3ad_unbind_slave(struct slave *slave) temp_aggregator->num_of_ports--; if (__agg_active_ports(temp_aggregator) == 0) { select_new_active_agg = temp_aggregator->is_active; - ad_clear_agg(temp_aggregator); + if (temp_aggregator->num_of_ports == 0) + ad_clear_agg(temp_aggregator); if (select_new_active_agg) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); /* select new active aggregator */ @@ -2267,6 +2269,28 @@ void bond_3ad_update_ad_actor_settings(struct bonding *bond) spin_unlock_bh(&bond->mode_lock); } +/** + * bond_agg_timer_advance - advance agg_select_timer + * @bond: bonding structure + * + * Return true when agg_select_timer reaches 0. + */ +static bool bond_agg_timer_advance(struct bonding *bond) +{ + int val, nval; + + while (1) { + val = atomic_read(&BOND_AD_INFO(bond).agg_select_timer); + if (!val) + return false; + nval = val - 1; + if (atomic_cmpxchg(&BOND_AD_INFO(bond).agg_select_timer, + val, nval) == val) + break; + } + return nval == 0; +} + /** * bond_3ad_state_machine_handler - handle state machines timeout * @bond: bonding struct to work on @@ -2302,9 +2326,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work) if (!bond_has_slaves(bond)) goto re_arm; - /* check if agg_select_timer timer after initialize is timed out */ - if (BOND_AD_INFO(bond).agg_select_timer && - !(--BOND_AD_INFO(bond).agg_select_timer)) { + if (bond_agg_timer_advance(bond)) { slave = bond_first_slave_rcu(bond); port = slave ? &(SLAVE_AD_INFO(slave)->port) : NULL; diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index c81698550e5a7..8bee935c8f90f 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1276,12 +1276,12 @@ int bond_alb_initialize(struct bonding *bond, int rlb_enabled) return res; if (rlb_enabled) { - bond->alb_info.rlb_enabled = 1; res = rlb_initialize(bond); if (res) { tlb_deinitialize(bond); return res; } + bond->alb_info.rlb_enabled = 1; } else { bond->alb_info.rlb_enabled = 0; } @@ -1514,14 +1514,14 @@ void bond_alb_monitor(struct work_struct *work) struct slave *slave; if (!bond_has_slaves(bond)) { - bond_info->tx_rebalance_counter = 0; + atomic_set(&bond_info->tx_rebalance_counter, 0); bond_info->lp_counter = 0; goto re_arm; } rcu_read_lock(); - bond_info->tx_rebalance_counter++; + atomic_inc(&bond_info->tx_rebalance_counter); bond_info->lp_counter++; /* send learning packets */ @@ -1543,7 +1543,7 @@ void bond_alb_monitor(struct work_struct *work) } /* rebalance tx traffic */ - if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) { + if (atomic_read(&bond_info->tx_rebalance_counter) >= BOND_TLB_REBALANCE_TICKS) { bond_for_each_slave_rcu(bond, slave, iter) { tlb_clear_slave(bond, slave, 1); if (slave == rcu_access_pointer(bond->curr_active_slave)) { @@ -1553,7 +1553,7 @@ void bond_alb_monitor(struct work_struct *work) bond_info->unbalanced_load = 0; } } - bond_info->tx_rebalance_counter = 0; + atomic_set(&bond_info->tx_rebalance_counter, 0); } if (bond_info->rlb_enabled) { @@ -1623,7 +1623,8 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave) tlb_init_slave(slave); /* order a rebalance ASAP */ - bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; + atomic_set(&bond->alb_info.tx_rebalance_counter, + BOND_TLB_REBALANCE_TICKS); if (bond->alb_info.rlb_enabled) bond->alb_info.rlb_rebalance = 1; @@ -1660,7 +1661,8 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char rlb_clear_slave(bond, slave); } else if (link == BOND_LINK_UP) { /* order a rebalance ASAP */ - bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; + atomic_set(&bond_info->tx_rebalance_counter, + BOND_TLB_REBALANCE_TICKS); if (bond->alb_info.rlb_enabled) { bond->alb_info.rlb_rebalance = 1; /* If the updelay module parameter is smaller than the diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e21643377162b..246bcbd650b4b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -792,9 +792,6 @@ static bool bond_should_notify_peers(struct bonding *bond) slave = rcu_dereference(bond->curr_active_slave); rcu_read_unlock(); - netdev_dbg(bond->dev, "bond_should_notify_peers: slave %s\n", - slave ? slave->dev->name : "NULL"); - if (!slave || !bond->send_peer_notif || bond->send_peer_notif % max(1, bond->params.peer_notif_delay) != 0 || @@ -802,6 +799,9 @@ static bool bond_should_notify_peers(struct bonding *bond) test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) return false; + netdev_dbg(bond->dev, "bond_should_notify_peers: slave %s\n", + slave ? slave->dev->name : "NULL"); + return true; } @@ -1219,7 +1219,7 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) skb->dev = bond->dev; if (BOND_MODE(bond) == BOND_MODE_ALB && - bond->dev->priv_flags & IFF_BRIDGE_PORT && + netif_is_bridge_port(bond->dev) && skb->pkt_type == PACKET_HOST) { if (unlikely(skb_cow_head(skb, @@ -1926,7 +1926,6 @@ static int __bond_release_one(struct net_device *bond_dev, /* recompute stats just before removing the slave */ bond_get_stats(bond->dev, &bond->bond_stats); - bond_upper_dev_unlink(bond, slave); /* unregister rx_handler early so bond_handle_frame wouldn't be called * for this slave anymore. */ @@ -1935,6 +1934,8 @@ static int __bond_release_one(struct net_device *bond_dev, if (BOND_MODE(bond) == BOND_MODE_8023AD) bond_3ad_unbind_slave(slave); + bond_upper_dev_unlink(bond, slave); + if (bond_mode_can_use_xmit_hash(bond)) bond_update_slave_arr(bond, slave); @@ -1978,10 +1979,9 @@ static int __bond_release_one(struct net_device *bond_dev, bond_select_active_slave(bond); } - if (!bond_has_slaves(bond)) { - bond_set_carrier(bond); + bond_set_carrier(bond); + if (!bond_has_slaves(bond)) eth_hw_addr_random(bond_dev); - } unblock_netpoll_tx(); synchronize_rcu(); @@ -3071,9 +3071,11 @@ static void bond_activebackup_arp_mon(struct bonding *bond) if (!rtnl_trylock()) return; - if (should_notify_peers) + if (should_notify_peers) { + bond->send_peer_notif--; call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); + } if (should_notify_rtnl) { bond_slave_state_notify(bond); bond_slave_link_notify(bond); diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 215c109232893..933087d85549a 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1452,7 +1452,7 @@ static int bond_option_ad_actor_system_set(struct bonding *bond, mac = (u8 *)&newval->value; } - if (!is_valid_ether_addr(mac)) + if (is_multicast_ether_addr(mac)) goto err; netdev_dbg(bond->dev, "Setting ad_actor_system to %pM\n", mac); diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig index 50cc051b90d84..45b6ee7e4dd0e 100644 --- a/drivers/net/caif/Kconfig +++ b/drivers/net/caif/Kconfig @@ -44,7 +44,7 @@ config CAIF_HSI config CAIF_VIRTIO tristate "CAIF virtio transport driver" - depends on CAIF && HAS_DMA + depends on CAIF && HAS_DMA && VHOST_DPN select VHOST_RING select VIRTIO select GENERIC_ALLOCATOR diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c index eb426822ad066..7f2c551e5d690 100644 --- a/drivers/net/caif/caif_virtio.c +++ b/drivers/net/caif/caif_virtio.c @@ -723,13 +723,21 @@ static int cfv_probe(struct virtio_device *vdev) /* Carrier is off until netdevice is opened */ netif_carrier_off(netdev); + /* serialize netdev register + virtio_device_ready() with ndo_open() */ + rtnl_lock(); + /* register Netdev */ - err = register_netdev(netdev); + err = register_netdevice(netdev); if (err) { + rtnl_unlock(); dev_err(&vdev->dev, "Unable to register netdev (%d)\n", err); goto err; } + virtio_device_ready(vdev); + + rtnl_unlock(); + debugfs_init(cfv); return 0; diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index b8f1f2b69dd3e..ca864ece89ae5 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -241,13 +241,14 @@ struct grcan_device_config { .rxsize = GRCAN_DEFAULT_BUFFER_SIZE, \ } -#define GRCAN_TXBUG_SAFE_GRLIB_VERSION 0x4100 +#define GRCAN_TXBUG_SAFE_GRLIB_VERSION 4100 #define GRLIB_VERSION_MASK 0xffff /* GRCAN private data structure */ struct grcan_priv { struct can_priv can; /* must be the first member */ struct net_device *dev; + struct device *ofdev_dev; struct napi_struct napi; struct grcan_registers __iomem *regs; /* ioremap'ed registers */ @@ -924,7 +925,7 @@ static void grcan_free_dma_buffers(struct net_device *dev) struct grcan_priv *priv = netdev_priv(dev); struct grcan_dma *dma = &priv->dma; - dma_free_coherent(&dev->dev, dma->base_size, dma->base_buf, + dma_free_coherent(priv->ofdev_dev, dma->base_size, dma->base_buf, dma->base_handle); memset(dma, 0, sizeof(*dma)); } @@ -949,7 +950,7 @@ static int grcan_allocate_dma_buffers(struct net_device *dev, /* Extra GRCAN_BUFFER_ALIGNMENT to allow for alignment */ dma->base_size = lsize + ssize + GRCAN_BUFFER_ALIGNMENT; - dma->base_buf = dma_alloc_coherent(&dev->dev, + dma->base_buf = dma_alloc_coherent(priv->ofdev_dev, dma->base_size, &dma->base_handle, GFP_KERNEL); @@ -1113,8 +1114,10 @@ static int grcan_close(struct net_device *dev) priv->closing = true; if (priv->need_txbug_workaround) { + spin_unlock_irqrestore(&priv->lock, flags); del_timer_sync(&priv->hang_timer); del_timer_sync(&priv->rr_timer); + spin_lock_irqsave(&priv->lock, flags); } netif_stop_queue(dev); grcan_stop_hardware(dev); @@ -1134,7 +1137,7 @@ static int grcan_close(struct net_device *dev) return 0; } -static int grcan_transmit_catch_up(struct net_device *dev, int budget) +static void grcan_transmit_catch_up(struct net_device *dev) { struct grcan_priv *priv = netdev_priv(dev); unsigned long flags; @@ -1142,7 +1145,7 @@ static int grcan_transmit_catch_up(struct net_device *dev, int budget) spin_lock_irqsave(&priv->lock, flags); - work_done = catch_up_echo_skb(dev, budget, true); + work_done = catch_up_echo_skb(dev, -1, true); if (work_done) { if (!priv->resetting && !priv->closing && !(priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)) @@ -1156,8 +1159,6 @@ static int grcan_transmit_catch_up(struct net_device *dev, int budget) } spin_unlock_irqrestore(&priv->lock, flags); - - return work_done; } static int grcan_receive(struct net_device *dev, int budget) @@ -1239,19 +1240,13 @@ static int grcan_poll(struct napi_struct *napi, int budget) struct net_device *dev = priv->dev; struct grcan_registers __iomem *regs = priv->regs; unsigned long flags; - int tx_work_done, rx_work_done; - int rx_budget = budget / 2; - int tx_budget = budget - rx_budget; + int work_done; - /* Half of the budget for receiveing messages */ - rx_work_done = grcan_receive(dev, rx_budget); + work_done = grcan_receive(dev, budget); - /* Half of the budget for transmitting messages as that can trigger echo - * frames being received - */ - tx_work_done = grcan_transmit_catch_up(dev, tx_budget); + grcan_transmit_catch_up(dev); - if (rx_work_done < rx_budget && tx_work_done < tx_budget) { + if (work_done < budget) { napi_complete(napi); /* Guarantee no interference with a running reset that otherwise @@ -1268,7 +1263,7 @@ static int grcan_poll(struct napi_struct *napi, int budget) spin_unlock_irqrestore(&priv->lock, flags); } - return rx_work_done + tx_work_done; + return work_done; } /* Work tx bug by waiting while for the risky situation to clear. If that fails, @@ -1600,6 +1595,7 @@ static int grcan_setup_netdev(struct platform_device *ofdev, memcpy(&priv->config, &grcan_module_config, sizeof(struct grcan_device_config)); priv->dev = dev; + priv->ofdev_dev = &ofdev->dev; priv->regs = base; priv->can.bittiming_const = &grcan_bittiming_const; priv->can.do_set_bittiming = grcan_set_bittiming; @@ -1652,6 +1648,7 @@ static int grcan_setup_netdev(struct platform_device *ofdev, static int grcan_probe(struct platform_device *ofdev) { struct device_node *np = ofdev->dev.of_node; + struct device_node *sysid_parent; struct resource *res; u32 sysid, ambafreq; int irq, err; @@ -1661,10 +1658,14 @@ static int grcan_probe(struct platform_device *ofdev) /* Compare GRLIB version number with the first that does not * have the tx bug (see start_xmit) */ - err = of_property_read_u32(np, "systemid", &sysid); - if (!err && ((sysid & GRLIB_VERSION_MASK) - >= GRCAN_TXBUG_SAFE_GRLIB_VERSION)) - txbug = false; + sysid_parent = of_find_node_by_path("/ambapp0"); + if (sysid_parent) { + err = of_property_read_u32(sysid_parent, "systemid", &sysid); + if (!err && ((sysid & GRLIB_VERSION_MASK) >= + GRCAN_TXBUG_SAFE_GRLIB_VERSION)) + txbug = false; + of_node_put(sysid_parent); + } err = of_property_read_u32(np, "freq", &ambafreq); if (err) { diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index e7a26ec9bdc11..faa78d38d752b 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -248,6 +248,9 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices"); #define KVASER_PCIEFD_SPACK_EWLR BIT(23) #define KVASER_PCIEFD_SPACK_EPLR BIT(24) +/* Kvaser KCAN_EPACK second word */ +#define KVASER_PCIEFD_EPACK_DIR_TX BIT(0) + struct kvaser_pciefd; struct kvaser_pciefd_can { @@ -1283,7 +1286,10 @@ static int kvaser_pciefd_rx_error_frame(struct kvaser_pciefd_can *can, can->err_rep_cnt++; can->can.can_stats.bus_error++; - stats->rx_errors++; + if (p->header[1] & KVASER_PCIEFD_EPACK_DIR_TX) + stats->tx_errors++; + else + stats->rx_errors++; can->bec.txerr = bec.txerr; can->bec.rxerr = bec.rxerr; diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index de275ccb4fd0b..26f721664e761 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -206,15 +206,15 @@ enum m_can_reg { /* Interrupts for version 3.0.x */ #define IR_ERR_LEC_30X (IR_STE | IR_FOE | IR_ACKE | IR_BE | IR_CRCE) -#define IR_ERR_BUS_30X (IR_ERR_LEC_30X | IR_WDI | IR_ELO | IR_BEU | \ - IR_BEC | IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | \ - IR_RF1L | IR_RF0L) +#define IR_ERR_BUS_30X (IR_ERR_LEC_30X | IR_WDI | IR_BEU | IR_BEC | \ + IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | IR_RF1L | \ + IR_RF0L) #define IR_ERR_ALL_30X (IR_ERR_STATE | IR_ERR_BUS_30X) /* Interrupts for version >= 3.1.x */ #define IR_ERR_LEC_31X (IR_PED | IR_PEA) -#define IR_ERR_BUS_31X (IR_ERR_LEC_31X | IR_WDI | IR_ELO | IR_BEU | \ - IR_BEC | IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | \ - IR_RF1L | IR_RF0L) +#define IR_ERR_BUS_31X (IR_ERR_LEC_31X | IR_WDI | IR_BEU | IR_BEC | \ + IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | IR_RF1L | \ + IR_RF0L) #define IR_ERR_ALL_31X (IR_ERR_STATE | IR_ERR_BUS_31X) /* Interrupt Line Select (ILS) */ @@ -751,8 +751,6 @@ static void m_can_handle_other_err(struct net_device *dev, u32 irqstatus) { if (irqstatus & IR_WDI) netdev_err(dev, "Message RAM Watchdog event due to missing READY\n"); - if (irqstatus & IR_ELO) - netdev_err(dev, "Error Logging Overflow\n"); if (irqstatus & IR_BEU) netdev_err(dev, "Bit Error Uncorrected\n"); if (irqstatus & IR_BEC) @@ -1445,8 +1443,6 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) M_CAN_FIFO_DATA(i / 4), *(u32 *)(cf->data + i)); - can_put_echo_skb(skb, dev, 0); - if (cdev->can.ctrlmode & CAN_CTRLMODE_FD) { cccr = m_can_read(cdev, M_CAN_CCCR); cccr &= ~(CCCR_CMR_MASK << CCCR_CMR_SHIFT); @@ -1463,6 +1459,9 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) m_can_write(cdev, M_CAN_CCCR, cccr); } m_can_write(cdev, M_CAN_TXBTIE, 0x1); + + can_put_echo_skb(skb, dev, 0); + m_can_write(cdev, M_CAN_TXBAR, 0x1); /* End of xmit function for version 3.0.x */ } else { diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c index db41dddd57716..e90651f7b2eaf 100644 --- a/drivers/net/can/pch_can.c +++ b/drivers/net/can/pch_can.c @@ -692,11 +692,11 @@ static int pch_can_rx_normal(struct net_device *ndev, u32 obj_num, int quota) cf->data[i + 1] = data_reg >> 8; } - netif_receive_skb(skb); rcv_pkts++; stats->rx_packets++; quota--; stats->rx_bytes += cf->can_dlc; + netif_receive_skb(skb); pch_fifo_thresh(priv, obj_num); obj_num++; diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c index bf5adea9c0a38..ac52288fa3bfe 100644 --- a/drivers/net/can/rcar/rcar_can.c +++ b/drivers/net/can/rcar/rcar_can.c @@ -848,10 +848,12 @@ static int __maybe_unused rcar_can_suspend(struct device *dev) struct rcar_can_priv *priv = netdev_priv(ndev); u16 ctlr; - if (netif_running(ndev)) { - netif_stop_queue(ndev); - netif_device_detach(ndev); - } + if (!netif_running(ndev)) + return 0; + + netif_stop_queue(ndev); + netif_device_detach(ndev); + ctlr = readw(&priv->regs->ctlr); ctlr |= RCAR_CAN_CTLR_CANM_HALT; writew(ctlr, &priv->regs->ctlr); @@ -870,6 +872,9 @@ static int __maybe_unused rcar_can_resume(struct device *dev) u16 ctlr; int err; + if (!netif_running(ndev)) + return 0; + err = clk_enable(priv->clk); if (err) { netdev_err(ndev, "clk_enable() failed, error %d\n", err); @@ -883,10 +888,9 @@ static int __maybe_unused rcar_can_resume(struct device *dev) writew(ctlr, &priv->regs->ctlr); priv->can.state = CAN_STATE_ERROR_ACTIVE; - if (netif_running(ndev)) { - netif_device_attach(ndev); - netif_start_queue(ndev); - } + netif_device_attach(ndev); + netif_start_queue(ndev); + return 0; } diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c index edaa1ca972c15..d4e9815ca26ff 100644 --- a/drivers/net/can/rcar/rcar_canfd.c +++ b/drivers/net/can/rcar/rcar_canfd.c @@ -1598,15 +1598,15 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch, netif_napi_add(ndev, &priv->napi, rcar_canfd_rx_poll, RCANFD_NAPI_WEIGHT); + spin_lock_init(&priv->tx_lock); + devm_can_led_init(ndev); + gpriv->ch[priv->channel] = priv; err = register_candev(ndev); if (err) { dev_err(&pdev->dev, "register_candev() failed, error %d\n", err); goto fail_candev; } - spin_lock_init(&priv->tx_lock); - devm_can_led_init(ndev); - gpriv->ch[priv->channel] = priv; dev_info(&pdev->dev, "device registered (channel %u)\n", priv->channel); return 0; diff --git a/drivers/net/can/sja1000/ems_pcmcia.c b/drivers/net/can/sja1000/ems_pcmcia.c index 770304eaef950..80b30768d9c6c 100644 --- a/drivers/net/can/sja1000/ems_pcmcia.c +++ b/drivers/net/can/sja1000/ems_pcmcia.c @@ -235,7 +235,12 @@ static int ems_pcmcia_add_card(struct pcmcia_device *pdev, unsigned long base) free_sja1000dev(dev); } - err = request_irq(dev->irq, &ems_pcmcia_interrupt, IRQF_SHARED, + if (!card->channels) { + err = -ENODEV; + goto failure_cleanup; + } + + err = request_irq(pdev->irq, &ems_pcmcia_interrupt, IRQF_SHARED, DRV_NAME, card); if (!err) return 0; diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c index 8c0244f51059e..41a63777030cb 100644 --- a/drivers/net/can/sja1000/peak_pci.c +++ b/drivers/net/can/sja1000/peak_pci.c @@ -731,16 +731,15 @@ static void peak_pci_remove(struct pci_dev *pdev) struct net_device *prev_dev = chan->prev_dev; dev_info(&pdev->dev, "removing device %s\n", dev->name); + /* do that only for first channel */ + if (!prev_dev && chan->pciec_card) + peak_pciec_remove(chan->pciec_card); unregister_sja1000dev(dev); free_sja1000dev(dev); dev = prev_dev; - if (!dev) { - /* do that only for first channel */ - if (chan->pciec_card) - peak_pciec_remove(chan->pciec_card); + if (!dev) break; - } priv = netdev_priv(dev); chan = priv->priv; } diff --git a/drivers/net/can/softing/softing_cs.c b/drivers/net/can/softing/softing_cs.c index 2e93ee7923739..e5c939b63fa65 100644 --- a/drivers/net/can/softing/softing_cs.c +++ b/drivers/net/can/softing/softing_cs.c @@ -293,7 +293,7 @@ static int softingcs_probe(struct pcmcia_device *pcmcia) return 0; platform_failed: - kfree(dev); + platform_device_put(pdev); mem_failed: pcmcia_bad: pcmcia_failed: diff --git a/drivers/net/can/softing/softing_fw.c b/drivers/net/can/softing/softing_fw.c index 8f44fdd8804bf..1c2afa17c26d1 100644 --- a/drivers/net/can/softing/softing_fw.c +++ b/drivers/net/can/softing/softing_fw.c @@ -565,18 +565,19 @@ int softing_startstop(struct net_device *dev, int up) if (ret < 0) goto failed; } - /* enable_error_frame */ - /* + + /* enable_error_frame + * * Error reporting is switched off at the moment since * the receiving of them is not yet 100% verified * This should be enabled sooner or later - * - if (error_reporting) { + */ + if (0 && error_reporting) { ret = softing_fct_cmd(card, 51, "enable_error_frame"); if (ret < 0) goto failed; } - */ + /* initialize interface */ iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 2]); iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 4]); diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 249d2fba28c7f..6458da9c13b95 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -823,7 +823,6 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne usb_unanchor_urb(urb); usb_free_coherent(dev->udev, size, buf, urb->transfer_dma); - dev_kfree_skb(skb); atomic_dec(&dev->active_tx_urbs); diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 485e20e0dec2c..8847942a8d97e 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -224,8 +224,8 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv, if (id == ESD_EV_CAN_ERROR_EXT) { u8 state = msg->msg.rx.data[0]; u8 ecc = msg->msg.rx.data[1]; - u8 txerr = msg->msg.rx.data[2]; - u8 rxerr = msg->msg.rx.data[3]; + u8 rxerr = msg->msg.rx.data[2]; + u8 txerr = msg->msg.rx.data[3]; skb = alloc_can_err_skb(priv->netdev, &cf); if (skb == NULL) { diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 0ad13d78815c5..bf4ab30186aff 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -184,14 +184,16 @@ struct gs_can { struct usb_anchor tx_submitted; atomic_t active_tx_urbs; + void *rxbuf[GS_MAX_RX_URBS]; + dma_addr_t rxbuf_dma[GS_MAX_RX_URBS]; }; /* usb interface struct */ struct gs_usb { struct gs_can *canch[GS_MAX_INTF]; struct usb_anchor rx_submitted; - atomic_t active_channels; struct usb_device *udev; + u8 active_channels; }; /* 'allocate' a tx context. @@ -320,7 +322,7 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) /* device reports out of range channel id */ if (hf->channel >= GS_MAX_INTF) - goto resubmit_urb; + goto device_detach; dev = usbcan->canch[hf->channel]; @@ -405,6 +407,7 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) /* USB failure take down all interfaces */ if (rc == -ENODEV) { + device_detach: for (rc = 0; rc < GS_MAX_INTF; rc++) { if (usbcan->canch[rc]) netif_device_detach(usbcan->canch[rc]->netdev); @@ -506,6 +509,8 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, hf->echo_id = idx; hf->channel = dev->channel; + hf->flags = 0; + hf->reserved = 0; cf = (struct can_frame *)skb->data; @@ -585,10 +590,11 @@ static int gs_can_open(struct net_device *netdev) if (rc) return rc; - if (atomic_add_return(1, &parent->active_channels) == 1) { + if (!parent->active_channels) { for (i = 0; i < GS_MAX_RX_URBS; i++) { struct urb *urb; u8 *buf; + dma_addr_t buf_dma; /* alloc rx urb */ urb = usb_alloc_urb(0, GFP_KERNEL); @@ -599,7 +605,7 @@ static int gs_can_open(struct net_device *netdev) buf = usb_alloc_coherent(dev->udev, sizeof(struct gs_host_frame), GFP_KERNEL, - &urb->transfer_dma); + &buf_dma); if (!buf) { netdev_err(netdev, "No memory left for USB buffer\n"); @@ -607,6 +613,8 @@ static int gs_can_open(struct net_device *netdev) return -ENOMEM; } + urb->transfer_dma = buf_dma; + /* fill, anchor, and submit rx urb */ usb_fill_bulk_urb(urb, dev->udev, @@ -630,10 +638,17 @@ static int gs_can_open(struct net_device *netdev) rc); usb_unanchor_urb(urb); + usb_free_coherent(dev->udev, + sizeof(struct gs_host_frame), + buf, + buf_dma); usb_free_urb(urb); break; } + dev->rxbuf[i] = buf; + dev->rxbuf_dma[i] = buf_dma; + /* Drop reference, * USB core will take care of freeing it */ @@ -686,6 +701,7 @@ static int gs_can_open(struct net_device *netdev) dev->can.state = CAN_STATE_ERROR_ACTIVE; + parent->active_channels++; if (!(dev->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)) netif_start_queue(netdev); @@ -697,12 +713,20 @@ static int gs_can_close(struct net_device *netdev) int rc; struct gs_can *dev = netdev_priv(netdev); struct gs_usb *parent = dev->parent; + unsigned int i; netif_stop_queue(netdev); /* Stop polling */ - if (atomic_dec_and_test(&parent->active_channels)) + parent->active_channels--; + if (!parent->active_channels) { usb_kill_anchored_urbs(&parent->rx_submitted); + for (i = 0; i < GS_MAX_RX_URBS; i++) + usb_free_coherent(dev->udev, + sizeof(struct gs_host_frame), + dev->rxbuf[i], + dev->rxbuf_dma[i]); + } /* Stop sending URBs */ usb_kill_anchored_urbs(&dev->tx_submitted); @@ -980,8 +1004,6 @@ static int gs_usb_probe(struct usb_interface *intf, init_usb_anchor(&dev->rx_submitted); - atomic_set(&dev->active_channels, 0); - usb_set_intfdata(intf, dev); dev->udev = interface_to_usbdev(intf); diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h index 390b6bde883c8..61e67986b625e 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h @@ -35,9 +35,10 @@ #define KVASER_USB_RX_BUFFER_SIZE 3072 #define KVASER_USB_MAX_NET_DEVICES 5 -/* USB devices features */ -#define KVASER_USB_HAS_SILENT_MODE BIT(0) -#define KVASER_USB_HAS_TXRX_ERRORS BIT(1) +/* Kvaser USB device quirks */ +#define KVASER_USB_QUIRK_HAS_SILENT_MODE BIT(0) +#define KVASER_USB_QUIRK_HAS_TXRX_ERRORS BIT(1) +#define KVASER_USB_QUIRK_IGNORE_CLK_FREQ BIT(2) /* Device capabilities */ #define KVASER_USB_CAP_BERR_CAP 0x01 @@ -65,12 +66,7 @@ struct kvaser_usb_dev_card_data_hydra { struct kvaser_usb_dev_card_data { u32 ctrlmode_supported; u32 capabilities; - union { - struct { - enum kvaser_usb_leaf_family family; - } leaf; - struct kvaser_usb_dev_card_data_hydra hydra; - }; + struct kvaser_usb_dev_card_data_hydra hydra; }; /* Context for an outstanding, not yet ACKed, transmission */ @@ -84,7 +80,7 @@ struct kvaser_usb { struct usb_device *udev; struct usb_interface *intf; struct kvaser_usb_net_priv *nets[KVASER_USB_MAX_NET_DEVICES]; - const struct kvaser_usb_dev_ops *ops; + const struct kvaser_usb_driver_info *driver_info; const struct kvaser_usb_dev_cfg *cfg; struct usb_endpoint_descriptor *bulk_in, *bulk_out; @@ -166,6 +162,12 @@ struct kvaser_usb_dev_ops { int *cmd_len, u16 transid); }; +struct kvaser_usb_driver_info { + u32 quirks; + enum kvaser_usb_leaf_family family; + const struct kvaser_usb_dev_ops *ops; +}; + struct kvaser_usb_dev_cfg { const struct can_clock clock; const unsigned int timestamp_freq; @@ -185,4 +187,7 @@ int kvaser_usb_send_cmd_async(struct kvaser_usb_net_priv *priv, void *cmd, int len); int kvaser_usb_can_rx_over_error(struct net_device *netdev); + +extern const struct can_bittiming_const kvaser_usb_flexc_bittiming_const; + #endif /* KVASER_USB_H */ diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index 0f1d3e807d631..416763fd1f11c 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -79,104 +79,134 @@ #define USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID 269 #define USB_HYBRID_PRO_CANLIN_PRODUCT_ID 270 -static inline bool kvaser_is_leaf(const struct usb_device_id *id) -{ - return (id->idProduct >= USB_LEAF_DEVEL_PRODUCT_ID && - id->idProduct <= USB_CAN_R_PRODUCT_ID) || - (id->idProduct >= USB_LEAF_LITE_V2_PRODUCT_ID && - id->idProduct <= USB_MINI_PCIE_2HS_PRODUCT_ID); -} +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_hydra = { + .quirks = 0, + .ops = &kvaser_usb_hydra_dev_ops, +}; -static inline bool kvaser_is_usbcan(const struct usb_device_id *id) -{ - return id->idProduct >= USB_USBCAN_REVB_PRODUCT_ID && - id->idProduct <= USB_MEMORATOR_PRODUCT_ID; -} +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_usbcan = { + .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS | + KVASER_USB_QUIRK_HAS_SILENT_MODE, + .family = KVASER_USBCAN, + .ops = &kvaser_usb_leaf_dev_ops, +}; -static inline bool kvaser_is_hydra(const struct usb_device_id *id) -{ - return id->idProduct >= USB_BLACKBIRD_V2_PRODUCT_ID && - id->idProduct <= USB_HYBRID_PRO_CANLIN_PRODUCT_ID; -} +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf = { + .quirks = KVASER_USB_QUIRK_IGNORE_CLK_FREQ, + .family = KVASER_LEAF, + .ops = &kvaser_usb_leaf_dev_ops, +}; + +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err = { + .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS | + KVASER_USB_QUIRK_IGNORE_CLK_FREQ, + .family = KVASER_LEAF, + .ops = &kvaser_usb_leaf_dev_ops, +}; + +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err_listen = { + .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS | + KVASER_USB_QUIRK_HAS_SILENT_MODE | + KVASER_USB_QUIRK_IGNORE_CLK_FREQ, + .family = KVASER_LEAF, + .ops = &kvaser_usb_leaf_dev_ops, +}; + +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leafimx = { + .quirks = 0, + .ops = &kvaser_usb_leaf_dev_ops, +}; static const struct usb_device_id kvaser_usb_table[] = { - /* Leaf USB product IDs */ - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_DEVEL_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_PRODUCT_ID) }, + /* Leaf M32C USB product IDs */ + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_DEVEL_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_LS_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_SWC_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_LIN_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_LS_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_SWC_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_DEVEL_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_HSHS_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_UPRO_HSHS_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_GI_PRODUCT_ID) }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_GI_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_OBDII_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_HSLS_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_CH_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_SPRO_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_OEM_MERCURY_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_OEM_LEAF_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_CAN_R_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID) }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, + + /* Leaf i.MX28 USB product IDs */ + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, /* USBCANII USB product IDs */ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan }, { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_REVB_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMORATOR_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan }, { USB_DEVICE(KVASER_VENDOR_ID, USB_VCI2_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan }, /* Minihydra USB product IDs */ - { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_5HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_5HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_4HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_HS_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_2HS_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_2HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_2HS_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_CANLIN_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_USBCAN_PRO_2HS_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_CANLIN_PRODUCT_ID) }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_5HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_5HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_4HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_HS_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_2HS_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_2HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_2HS_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_CANLIN_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_USBCAN_PRO_2HS_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_CANLIN_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { } }; MODULE_DEVICE_TABLE(usb, kvaser_usb_table); @@ -267,6 +297,7 @@ int kvaser_usb_can_rx_over_error(struct net_device *netdev) static void kvaser_usb_read_bulk_callback(struct urb *urb) { struct kvaser_usb *dev = urb->context; + const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; int err; unsigned int i; @@ -283,8 +314,8 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb) goto resubmit_urb; } - dev->ops->dev_read_bulk_callback(dev, urb->transfer_buffer, - urb->actual_length); + ops->dev_read_bulk_callback(dev, urb->transfer_buffer, + urb->actual_length); resubmit_urb: usb_fill_bulk_urb(urb, dev->udev, @@ -378,6 +409,7 @@ static int kvaser_usb_open(struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; + const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; int err; err = open_candev(netdev); @@ -388,11 +420,11 @@ static int kvaser_usb_open(struct net_device *netdev) if (err) goto error; - err = dev->ops->dev_set_opt_mode(priv); + err = ops->dev_set_opt_mode(priv); if (err) goto error; - err = dev->ops->dev_start_chip(priv); + err = ops->dev_start_chip(priv); if (err) { netdev_warn(netdev, "Cannot start device, error %d\n", err); goto error; @@ -449,22 +481,23 @@ static int kvaser_usb_close(struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; + const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; int err; netif_stop_queue(netdev); - err = dev->ops->dev_flush_queue(priv); + err = ops->dev_flush_queue(priv); if (err) netdev_warn(netdev, "Cannot flush queue, error %d\n", err); - if (dev->ops->dev_reset_chip) { - err = dev->ops->dev_reset_chip(dev, priv->channel); + if (ops->dev_reset_chip) { + err = ops->dev_reset_chip(dev, priv->channel); if (err) netdev_warn(netdev, "Cannot reset card, error %d\n", err); } - err = dev->ops->dev_stop_chip(priv); + err = ops->dev_stop_chip(priv); if (err) netdev_warn(netdev, "Cannot stop device, error %d\n", err); @@ -503,6 +536,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; + const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; struct net_device_stats *stats = &netdev->stats; struct kvaser_usb_tx_urb_context *context = NULL; struct urb *urb; @@ -545,8 +579,8 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, goto freeurb; } - buf = dev->ops->dev_frame_to_cmd(priv, skb, &context->dlc, &cmd_len, - context->echo_index); + buf = ops->dev_frame_to_cmd(priv, skb, &context->dlc, &cmd_len, + context->echo_index); if (!buf) { stats->tx_dropped++; dev_kfree_skb(skb); @@ -630,15 +664,16 @@ static void kvaser_usb_remove_interfaces(struct kvaser_usb *dev) } } -static int kvaser_usb_init_one(struct kvaser_usb *dev, - const struct usb_device_id *id, int channel) +static int kvaser_usb_init_one(struct kvaser_usb *dev, int channel) { struct net_device *netdev; struct kvaser_usb_net_priv *priv; + const struct kvaser_usb_driver_info *driver_info = dev->driver_info; + const struct kvaser_usb_dev_ops *ops = driver_info->ops; int err; - if (dev->ops->dev_reset_chip) { - err = dev->ops->dev_reset_chip(dev, channel); + if (ops->dev_reset_chip) { + err = ops->dev_reset_chip(dev, channel); if (err) return err; } @@ -667,20 +702,19 @@ static int kvaser_usb_init_one(struct kvaser_usb *dev, priv->can.state = CAN_STATE_STOPPED; priv->can.clock.freq = dev->cfg->clock.freq; priv->can.bittiming_const = dev->cfg->bittiming_const; - priv->can.do_set_bittiming = dev->ops->dev_set_bittiming; - priv->can.do_set_mode = dev->ops->dev_set_mode; - if ((id->driver_info & KVASER_USB_HAS_TXRX_ERRORS) || + priv->can.do_set_bittiming = ops->dev_set_bittiming; + priv->can.do_set_mode = ops->dev_set_mode; + if ((driver_info->quirks & KVASER_USB_QUIRK_HAS_TXRX_ERRORS) || (priv->dev->card_data.capabilities & KVASER_USB_CAP_BERR_CAP)) - priv->can.do_get_berr_counter = dev->ops->dev_get_berr_counter; - if (id->driver_info & KVASER_USB_HAS_SILENT_MODE) + priv->can.do_get_berr_counter = ops->dev_get_berr_counter; + if (driver_info->quirks & KVASER_USB_QUIRK_HAS_SILENT_MODE) priv->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY; priv->can.ctrlmode_supported |= dev->card_data.ctrlmode_supported; if (priv->can.ctrlmode_supported & CAN_CTRLMODE_FD) { priv->can.data_bittiming_const = dev->cfg->data_bittiming_const; - priv->can.do_set_data_bittiming = - dev->ops->dev_set_data_bittiming; + priv->can.do_set_data_bittiming = ops->dev_set_data_bittiming; } netdev->flags |= IFF_ECHO; @@ -711,29 +745,22 @@ static int kvaser_usb_probe(struct usb_interface *intf, struct kvaser_usb *dev; int err; int i; + const struct kvaser_usb_driver_info *driver_info; + const struct kvaser_usb_dev_ops *ops; + + driver_info = (const struct kvaser_usb_driver_info *)id->driver_info; + if (!driver_info) + return -ENODEV; dev = devm_kzalloc(&intf->dev, sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; - if (kvaser_is_leaf(id)) { - dev->card_data.leaf.family = KVASER_LEAF; - dev->ops = &kvaser_usb_leaf_dev_ops; - } else if (kvaser_is_usbcan(id)) { - dev->card_data.leaf.family = KVASER_USBCAN; - dev->ops = &kvaser_usb_leaf_dev_ops; - } else if (kvaser_is_hydra(id)) { - dev->ops = &kvaser_usb_hydra_dev_ops; - } else { - dev_err(&intf->dev, - "Product ID (%d) is not a supported Kvaser USB device\n", - id->idProduct); - return -ENODEV; - } - dev->intf = intf; + dev->driver_info = driver_info; + ops = driver_info->ops; - err = dev->ops->dev_setup_endpoints(dev); + err = ops->dev_setup_endpoints(dev); if (err) { dev_err(&intf->dev, "Cannot get usb endpoint(s)"); return err; @@ -747,22 +774,22 @@ static int kvaser_usb_probe(struct usb_interface *intf, dev->card_data.ctrlmode_supported = 0; dev->card_data.capabilities = 0; - err = dev->ops->dev_init_card(dev); + err = ops->dev_init_card(dev); if (err) { dev_err(&intf->dev, "Failed to initialize card, error %d\n", err); return err; } - err = dev->ops->dev_get_software_info(dev); + err = ops->dev_get_software_info(dev); if (err) { dev_err(&intf->dev, "Cannot get software info, error %d\n", err); return err; } - if (dev->ops->dev_get_software_details) { - err = dev->ops->dev_get_software_details(dev); + if (ops->dev_get_software_details) { + err = ops->dev_get_software_details(dev); if (err) { dev_err(&intf->dev, "Cannot get software details, error %d\n", err); @@ -780,14 +807,14 @@ static int kvaser_usb_probe(struct usb_interface *intf, dev_dbg(&intf->dev, "Max outstanding tx = %d URBs\n", dev->max_tx_urbs); - err = dev->ops->dev_get_card_info(dev); + err = ops->dev_get_card_info(dev); if (err) { dev_err(&intf->dev, "Cannot get card info, error %d\n", err); return err; } - if (dev->ops->dev_get_capabilities) { - err = dev->ops->dev_get_capabilities(dev); + if (ops->dev_get_capabilities) { + err = ops->dev_get_capabilities(dev); if (err) { dev_err(&intf->dev, "Cannot get capabilities, error %d\n", err); @@ -797,7 +824,7 @@ static int kvaser_usb_probe(struct usb_interface *intf, } for (i = 0; i < dev->nchannels; i++) { - err = kvaser_usb_init_one(dev, id, i); + err = kvaser_usb_init_one(dev, i); if (err) { kvaser_usb_remove_interfaces(dev); return err; diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c index 218fadc911558..a7c408acb0c09 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c @@ -371,7 +371,7 @@ static const struct can_bittiming_const kvaser_usb_hydra_kcan_bittiming_c = { .brp_inc = 1, }; -static const struct can_bittiming_const kvaser_usb_hydra_flexc_bittiming_c = { +const struct can_bittiming_const kvaser_usb_flexc_bittiming_const = { .name = "kvaser_usb_flex", .tseg1_min = 4, .tseg1_max = 16, @@ -2024,5 +2024,5 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_flexc = { .freq = 24000000, }, .timestamp_freq = 1, - .bittiming_const = &kvaser_usb_hydra_flexc_bittiming_c, + .bittiming_const = &kvaser_usb_flexc_bittiming_const, }; diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c index 1b9957f12459a..0e0403dd05500 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c @@ -28,10 +28,6 @@ #include "kvaser_usb.h" -/* Forward declaration */ -static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg; - -#define CAN_USB_CLOCK 8000000 #define MAX_USBCAN_NET_DEVICES 2 /* Command header size */ @@ -80,6 +76,12 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg; #define CMD_LEAF_LOG_MESSAGE 106 +/* Leaf frequency options */ +#define KVASER_USB_LEAF_SWOPTION_FREQ_MASK 0x60 +#define KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK 0 +#define KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK BIT(5) +#define KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK BIT(6) + /* error factors */ #define M16C_EF_ACKE BIT(0) #define M16C_EF_CRCE BIT(1) @@ -98,16 +100,6 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg; #define USBCAN_ERROR_STATE_RX_ERROR BIT(1) #define USBCAN_ERROR_STATE_BUSERROR BIT(2) -/* bittiming parameters */ -#define KVASER_USB_TSEG1_MIN 1 -#define KVASER_USB_TSEG1_MAX 16 -#define KVASER_USB_TSEG2_MIN 1 -#define KVASER_USB_TSEG2_MAX 8 -#define KVASER_USB_SJW_MAX 4 -#define KVASER_USB_BRP_MIN 1 -#define KVASER_USB_BRP_MAX 64 -#define KVASER_USB_BRP_INC 1 - /* ctrl modes */ #define KVASER_CTRL_MODE_NORMAL 1 #define KVASER_CTRL_MODE_SILENT 2 @@ -340,6 +332,70 @@ struct kvaser_usb_err_summary { }; }; +static const struct can_bittiming_const kvaser_usb_leaf_m16c_bittiming_const = { + .name = "kvaser_usb_ucii", + .tseg1_min = 4, + .tseg1_max = 16, + .tseg2_min = 2, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 16, + .brp_inc = 1, +}; + +static const struct can_bittiming_const kvaser_usb_leaf_m32c_bittiming_const = { + .name = "kvaser_usb_leaf", + .tseg1_min = 3, + .tseg1_max = 16, + .tseg2_min = 2, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 2, + .brp_max = 128, + .brp_inc = 2, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_usbcan_dev_cfg = { + .clock = { + .freq = 8000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_m16c_bittiming_const, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_m32c_dev_cfg = { + .clock = { + .freq = 16000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_m32c_bittiming_const, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_16mhz = { + .clock = { + .freq = 16000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_flexc_bittiming_const, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_24mhz = { + .clock = { + .freq = 24000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_flexc_bittiming_const, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_32mhz = { + .clock = { + .freq = 32000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_flexc_bittiming_const, +}; + static void * kvaser_usb_leaf_frame_to_cmd(const struct kvaser_usb_net_priv *priv, const struct sk_buff *skb, int *frame_len, @@ -359,7 +415,7 @@ kvaser_usb_leaf_frame_to_cmd(const struct kvaser_usb_net_priv *priv, sizeof(struct kvaser_cmd_tx_can); cmd->u.tx_can.channel = priv->channel; - switch (dev->card_data.leaf.family) { + switch (dev->driver_info->family) { case KVASER_LEAF: cmd_tx_can_flags = &cmd->u.tx_can.leaf.flags; break; @@ -471,6 +527,34 @@ static int kvaser_usb_leaf_send_simple_cmd(const struct kvaser_usb *dev, return rc; } +static void kvaser_usb_leaf_get_software_info_leaf(struct kvaser_usb *dev, + const struct leaf_cmd_softinfo *softinfo) +{ + u32 sw_options = le32_to_cpu(softinfo->sw_options); + + dev->fw_version = le32_to_cpu(softinfo->fw_version); + dev->max_tx_urbs = le16_to_cpu(softinfo->max_outstanding_tx); + + if (dev->driver_info->quirks & KVASER_USB_QUIRK_IGNORE_CLK_FREQ) { + /* Firmware expects bittiming parameters calculated for 16MHz + * clock, regardless of the actual clock + */ + dev->cfg = &kvaser_usb_leaf_m32c_dev_cfg; + } else { + switch (sw_options & KVASER_USB_LEAF_SWOPTION_FREQ_MASK) { + case KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_imx_dev_cfg_16mhz; + break; + case KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_imx_dev_cfg_24mhz; + break; + case KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_imx_dev_cfg_32mhz; + break; + } + } +} + static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev) { struct kvaser_cmd cmd; @@ -484,16 +568,15 @@ static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev) if (err) return err; - switch (dev->card_data.leaf.family) { + switch (dev->driver_info->family) { case KVASER_LEAF: - dev->fw_version = le32_to_cpu(cmd.u.leaf.softinfo.fw_version); - dev->max_tx_urbs = - le16_to_cpu(cmd.u.leaf.softinfo.max_outstanding_tx); + kvaser_usb_leaf_get_software_info_leaf(dev, &cmd.u.leaf.softinfo); break; case KVASER_USBCAN: dev->fw_version = le32_to_cpu(cmd.u.usbcan.softinfo.fw_version); dev->max_tx_urbs = le16_to_cpu(cmd.u.usbcan.softinfo.max_outstanding_tx); + dev->cfg = &kvaser_usb_leaf_usbcan_dev_cfg; break; } @@ -532,7 +615,7 @@ static int kvaser_usb_leaf_get_card_info(struct kvaser_usb *dev) dev->nchannels = cmd.u.cardinfo.nchannels; if (dev->nchannels > KVASER_USB_MAX_NET_DEVICES || - (dev->card_data.leaf.family == KVASER_USBCAN && + (dev->driver_info->family == KVASER_USBCAN && dev->nchannels > MAX_USBCAN_NET_DEVICES)) return -EINVAL; @@ -668,7 +751,7 @@ kvaser_usb_leaf_rx_error_update_can_state(struct kvaser_usb_net_priv *priv, new_state < CAN_STATE_BUS_OFF) priv->can.can_stats.restarts++; - switch (dev->card_data.leaf.family) { + switch (dev->driver_info->family) { case KVASER_LEAF: if (es->leaf.error_factor) { priv->can.can_stats.bus_error++; @@ -747,7 +830,7 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev, } } - switch (dev->card_data.leaf.family) { + switch (dev->driver_info->family) { case KVASER_LEAF: if (es->leaf.error_factor) { cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT; @@ -939,7 +1022,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev, stats = &priv->netdev->stats; if ((cmd->u.rx_can_header.flag & MSG_FLAG_ERROR_FRAME) && - (dev->card_data.leaf.family == KVASER_LEAF && + (dev->driver_info->family == KVASER_LEAF && cmd->id == CMD_LEAF_LOG_MESSAGE)) { kvaser_usb_leaf_leaf_rx_error(dev, cmd); return; @@ -955,7 +1038,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev, return; } - switch (dev->card_data.leaf.family) { + switch (dev->driver_info->family) { case KVASER_LEAF: rx_data = cmd->u.leaf.rx_can.data; break; @@ -970,7 +1053,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev, return; } - if (dev->card_data.leaf.family == KVASER_LEAF && cmd->id == + if (dev->driver_info->family == KVASER_LEAF && cmd->id == CMD_LEAF_LOG_MESSAGE) { cf->can_id = le32_to_cpu(cmd->u.leaf.log_message.id); if (cf->can_id & KVASER_EXTENDED_FRAME) @@ -1067,14 +1150,14 @@ static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev, break; case CMD_LEAF_LOG_MESSAGE: - if (dev->card_data.leaf.family != KVASER_LEAF) + if (dev->driver_info->family != KVASER_LEAF) goto warn; kvaser_usb_leaf_rx_can_msg(dev, cmd); break; case CMD_CHIP_STATE_EVENT: case CMD_CAN_ERROR_EVENT: - if (dev->card_data.leaf.family == KVASER_LEAF) + if (dev->driver_info->family == KVASER_LEAF) kvaser_usb_leaf_leaf_rx_error(dev, cmd); else kvaser_usb_leaf_usbcan_rx_error(dev, cmd); @@ -1086,12 +1169,12 @@ static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev, /* Ignored commands */ case CMD_USBCAN_CLOCK_OVERFLOW_EVENT: - if (dev->card_data.leaf.family != KVASER_USBCAN) + if (dev->driver_info->family != KVASER_USBCAN) goto warn; break; case CMD_FLUSH_QUEUE_REPLY: - if (dev->card_data.leaf.family != KVASER_LEAF) + if (dev->driver_info->family != KVASER_LEAF) goto warn; break; @@ -1225,24 +1308,11 @@ static int kvaser_usb_leaf_init_card(struct kvaser_usb *dev) { struct kvaser_usb_dev_card_data *card_data = &dev->card_data; - dev->cfg = &kvaser_usb_leaf_dev_cfg; card_data->ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES; return 0; } -static const struct can_bittiming_const kvaser_usb_leaf_bittiming_const = { - .name = "kvaser_usb", - .tseg1_min = KVASER_USB_TSEG1_MIN, - .tseg1_max = KVASER_USB_TSEG1_MAX, - .tseg2_min = KVASER_USB_TSEG2_MIN, - .tseg2_max = KVASER_USB_TSEG2_MAX, - .sjw_max = KVASER_USB_SJW_MAX, - .brp_min = KVASER_USB_BRP_MIN, - .brp_max = KVASER_USB_BRP_MAX, - .brp_inc = KVASER_USB_BRP_INC, -}; - static int kvaser_usb_leaf_set_bittiming(struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); @@ -1348,11 +1418,3 @@ const struct kvaser_usb_dev_ops kvaser_usb_leaf_dev_ops = { .dev_read_bulk_callback = kvaser_usb_leaf_read_bulk_callback, .dev_frame_to_cmd = kvaser_usb_leaf_frame_to_cmd, }; - -static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg = { - .clock = { - .freq = CAN_USB_CLOCK, - }, - .timestamp_freq = 1, - .bittiming_const = &kvaser_usb_leaf_bittiming_const, -}; diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 41eee6f0491c6..957e51a77d4d1 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -33,10 +33,6 @@ #define MCBA_USB_RX_BUFF_SIZE 64 #define MCBA_USB_TX_BUFF_SIZE (sizeof(struct mcba_usb_msg)) -/* MCBA endpoint numbers */ -#define MCBA_USB_EP_IN 1 -#define MCBA_USB_EP_OUT 1 - /* Microchip command id */ #define MBCA_CMD_RECEIVE_MESSAGE 0xE3 #define MBCA_CMD_I_AM_ALIVE_FROM_CAN 0xF5 @@ -84,6 +80,8 @@ struct mcba_priv { atomic_t free_ctx_cnt; void *rxbuf[MCBA_MAX_RX_URBS]; dma_addr_t rxbuf_dma[MCBA_MAX_RX_URBS]; + int rx_pipe; + int tx_pipe; }; /* CAN frame */ @@ -272,10 +270,8 @@ static netdev_tx_t mcba_usb_xmit(struct mcba_priv *priv, memcpy(buf, usb_msg, MCBA_USB_TX_BUFF_SIZE); - usb_fill_bulk_urb(urb, priv->udev, - usb_sndbulkpipe(priv->udev, MCBA_USB_EP_OUT), buf, - MCBA_USB_TX_BUFF_SIZE, mcba_usb_write_bulk_callback, - ctx); + usb_fill_bulk_urb(urb, priv->udev, priv->tx_pipe, buf, MCBA_USB_TX_BUFF_SIZE, + mcba_usb_write_bulk_callback, ctx); urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_anchor_urb(urb, &priv->tx_submitted); @@ -368,7 +364,6 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb, xmit_failed: can_free_echo_skb(priv->netdev, ctx->ndx); mcba_usb_free_ctx(ctx); - dev_kfree_skb(skb); stats->tx_dropped++; return NETDEV_TX_OK; @@ -611,7 +606,7 @@ static void mcba_usb_read_bulk_callback(struct urb *urb) resubmit_urb: usb_fill_bulk_urb(urb, priv->udev, - usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_OUT), + priv->rx_pipe, urb->transfer_buffer, MCBA_USB_RX_BUFF_SIZE, mcba_usb_read_bulk_callback, priv); @@ -656,7 +651,7 @@ static int mcba_usb_start(struct mcba_priv *priv) urb->transfer_dma = buf_dma; usb_fill_bulk_urb(urb, priv->udev, - usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_IN), + priv->rx_pipe, buf, MCBA_USB_RX_BUFF_SIZE, mcba_usb_read_bulk_callback, priv); urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; @@ -810,6 +805,13 @@ static int mcba_usb_probe(struct usb_interface *intf, struct mcba_priv *priv; int err = -ENOMEM; struct usb_device *usbdev = interface_to_usbdev(intf); + struct usb_endpoint_descriptor *in, *out; + + err = usb_find_common_endpoints(intf->cur_altsetting, &in, &out, NULL, NULL); + if (err) { + dev_err(&intf->dev, "Can't find endpoints\n"); + return err; + } netdev = alloc_candev(sizeof(struct mcba_priv), MCBA_MAX_TX_URBS); if (!netdev) { @@ -855,6 +857,9 @@ static int mcba_usb_probe(struct usb_interface *intf, goto cleanup_free_candev; } + priv->rx_pipe = usb_rcvbulkpipe(priv->udev, in->bEndpointAddress); + priv->tx_pipe = usb_sndbulkpipe(priv->udev, out->bEndpointAddress); + devm_can_led_init(netdev); /* Start USB dev only if we have successfully registered CAN device */ diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 96bbdef672bc9..571d9b0bfe518 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -551,11 +551,10 @@ static int pcan_usb_fd_decode_status(struct pcan_usb_fd_if *usb_if, } else if (sm->channel_p_w_b & PUCAN_BUS_WARNING) { new_state = CAN_STATE_ERROR_WARNING; } else { - /* no error bit (so, no error skb, back to active state) */ - dev->can.state = CAN_STATE_ERROR_ACTIVE; + /* back to (or still in) ERROR_ACTIVE state */ + new_state = CAN_STATE_ERROR_ACTIVE; pdev->bec.txerr = 0; pdev->bec.rxerr = 0; - return 0; } /* state hasn't changed */ diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index c43e98bb6e2d7..b514b2eaa3180 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -670,9 +670,20 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, atomic_inc(&priv->active_tx_urbs); err = usb_submit_urb(urb, GFP_ATOMIC); - if (unlikely(err)) - goto failed; - else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) + if (unlikely(err)) { + can_free_echo_skb(netdev, context->echo_index); + + usb_unanchor_urb(urb); + usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); + + atomic_dec(&priv->active_tx_urbs); + + if (err == -ENODEV) + netif_device_detach(netdev); + else + netdev_warn(netdev, "failed tx_urb %d\n", err); + stats->tx_dropped++; + } else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) /* Slow down tx path */ netif_stop_queue(netdev); @@ -691,19 +702,6 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; -failed: - can_free_echo_skb(netdev, context->echo_index); - - usb_unanchor_urb(urb); - usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); - - atomic_dec(&priv->active_tx_urbs); - - if (err == -ENODEV) - netif_device_detach(netdev); - else - netdev_warn(netdev, "failed tx_urb %d\n", err); - nomembuf: usb_free_urb(urb); diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index 7000c6cd1e48b..282c53ef76d23 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -148,7 +148,7 @@ static void vxcan_setup(struct net_device *dev) dev->hard_header_len = 0; dev->addr_len = 0; dev->tx_queue_len = 0; - dev->flags = (IFF_NOARP|IFF_ECHO); + dev->flags = IFF_NOARP; dev->netdev_ops = &vxcan_netdev_ops; dev->needs_free_netdev = true; diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 0de39ebb35662..be3811311db2d 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -239,7 +239,7 @@ static const struct can_bittiming_const xcan_bittiming_const_canfd = { }; /* AXI CANFD Data Bittiming constants as per AXI CANFD 1.0 specs */ -static struct can_bittiming_const xcan_data_bittiming_const_canfd = { +static const struct can_bittiming_const xcan_data_bittiming_const_canfd = { .name = DRIVER_NAME, .tseg1_min = 1, .tseg1_max = 16, @@ -265,7 +265,7 @@ static const struct can_bittiming_const xcan_bittiming_const_canfd2 = { }; /* AXI CANFD 2.0 Data Bittiming constants as per AXI CANFD 2.0 spec */ -static struct can_bittiming_const xcan_data_bittiming_const_canfd2 = { +static const struct can_bittiming_const xcan_data_bittiming_const_canfd2 = { .name = DRIVER_NAME, .tseg1_min = 1, .tseg1_max = 32, @@ -1753,7 +1753,12 @@ static int xcan_probe(struct platform_device *pdev) spin_lock_init(&priv->tx_lock); /* Get IRQ for the device */ - ndev->irq = platform_get_irq(pdev, 0); + ret = platform_get_irq(pdev, 0); + if (ret < 0) + goto err_free; + + ndev->irq = ret; + ndev->flags |= IFF_ECHO; /* We support local echo */ platform_set_drvdata(pdev, ndev); diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index e78b683f73052..825d840cdb8c3 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2353,9 +2353,8 @@ static int b53_switch_init(struct b53_device *dev) dev->cpu_port = 5; } - /* cpu port is always last */ - dev->num_ports = dev->cpu_port + 1; dev->enabled_ports |= BIT(dev->cpu_port); + dev->num_ports = fls(dev->enabled_ports); /* Include non standard CPU port built-in PHYs to be probed */ if (is539x(dev) || is531x5(dev)) { diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 0ee1c0a7b165b..282e9e2a50d9c 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -602,6 +602,11 @@ static void bcm_sf2_sw_mac_config(struct dsa_switch *ds, int port, reg |= LINK_STS; if (state->duplex == DUPLEX_FULL) reg |= DUPLX_MODE; + if (state->pause & MLO_PAUSE_TXRX_MASK) { + if (state->pause & MLO_PAUSE_TX) + reg |= TXFLOW_CNTL; + reg |= RXFLOW_CNTL; + } core_writel(priv, reg, offset); } diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index e15d18bb981e9..5cc31118b97e9 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -542,14 +542,14 @@ static void bcm_sf2_cfp_slice_ipv6(struct bcm_sf2_priv *priv, static struct cfp_rule *bcm_sf2_cfp_rule_find(struct bcm_sf2_priv *priv, int port, u32 location) { - struct cfp_rule *rule = NULL; + struct cfp_rule *rule; list_for_each_entry(rule, &priv->cfp.rules_list, next) { if (rule->port == port && rule->fs.location == location) - break; + return rule; } - return rule; + return NULL; } static int bcm_sf2_cfp_rule_cmp(struct bcm_sf2_priv *priv, int port, diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 19d1f1c51f97e..4b9d5b0ce416b 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -1303,7 +1303,7 @@ static int lan9303_probe_reset_gpio(struct lan9303 *chip, struct device_node *np) { chip->reset_gpio = devm_gpiod_get_optional(chip->dev, "reset", - GPIOD_OUT_LOW); + GPIOD_OUT_HIGH); if (IS_ERR(chip->reset_gpio)) return PTR_ERR(chip->reset_gpio); diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index af3d56636a076..b546002e5fd41 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -229,7 +229,7 @@ #define GSWIP_SDMA_PCTRLp(p) (0xBC0 + ((p) * 0x6)) #define GSWIP_SDMA_PCTRL_EN BIT(0) /* SDMA Port Enable */ #define GSWIP_SDMA_PCTRL_FCEN BIT(1) /* Flow Control Enable */ -#define GSWIP_SDMA_PCTRL_PAUFWD BIT(1) /* Pause Frame Forwarding */ +#define GSWIP_SDMA_PCTRL_PAUFWD BIT(3) /* Pause Frame Forwarding */ #define GSWIP_TABLE_ACTIVE_VLAN 0x01 #define GSWIP_TABLE_VLAN_MAPPING 0x02 @@ -837,7 +837,8 @@ static int gswip_setup(struct dsa_switch *ds) gswip_switch_mask(priv, 0, GSWIP_MAC_CTRL_2_MLEN, GSWIP_MAC_CTRL_2p(cpu_port)); - gswip_switch_w(priv, VLAN_ETH_FRAME_LEN + 8, GSWIP_MAC_FLEN); + gswip_switch_w(priv, VLAN_ETH_FRAME_LEN + 8 + ETH_FCS_LEN, + GSWIP_MAC_FLEN); gswip_switch_mask(priv, 0, GSWIP_BM_QUEUE_GCTRL_GL_MOD, GSWIP_BM_QUEUE_GCTRL); @@ -1606,9 +1607,6 @@ static void gswip_phylink_mac_config(struct dsa_switch *ds, int port, break; case PHY_INTERFACE_MODE_RMII: miicfg |= GSWIP_MII_CFG_MODE_RMIIM; - - /* Configure the RMII clock as output: */ - miicfg |= GSWIP_MII_CFG_RMII_CLK; break; case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: @@ -1960,8 +1958,10 @@ static int gswip_gphy_fw_list(struct gswip_priv *priv, for_each_available_child_of_node(gphy_fw_list_np, gphy_fw_np) { err = gswip_gphy_fw_probe(priv, &priv->gphy_fw[i], gphy_fw_np, i); - if (err) + if (err) { + of_node_put(gphy_fw_np); goto remove_gphy; + } i++; } diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c index 8b00f8e6c02f4..5639c5c59e255 100644 --- a/drivers/net/dsa/microchip/ksz8795_spi.c +++ b/drivers/net/dsa/microchip/ksz8795_spi.c @@ -86,12 +86,23 @@ static const struct of_device_id ksz8795_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, ksz8795_dt_ids); +static const struct spi_device_id ksz8795_spi_ids[] = { + { "ksz8765" }, + { "ksz8794" }, + { "ksz8795" }, + { "ksz8863" }, + { "ksz8873" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, ksz8795_spi_ids); + static struct spi_driver ksz8795_spi_driver = { .driver = { .name = "ksz8795-switch", .owner = THIS_MODULE, .of_match_table = of_match_ptr(ksz8795_dt_ids), }, + .id_table = ksz8795_spi_ids, .probe = ksz8795_spi_probe, .remove = ksz8795_spi_remove, .shutdown = ksz8795_spi_shutdown, diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index 1142768969c20..9bda83d063e8e 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -88,12 +88,24 @@ static const struct of_device_id ksz9477_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, ksz9477_dt_ids); +static const struct spi_device_id ksz9477_spi_ids[] = { + { "ksz9477" }, + { "ksz9897" }, + { "ksz9893" }, + { "ksz9563" }, + { "ksz8563" }, + { "ksz9567" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, ksz9477_spi_ids); + static struct spi_driver ksz9477_spi_driver = { .driver = { .name = "ksz9477-switch", .owner = THIS_MODULE, .of_match_table = of_match_ptr(ksz9477_dt_ids), }, + .id_table = ksz9477_spi_ids, .probe = ksz9477_spi_probe, .remove = ksz9477_spi_remove, .shutdown = ksz9477_spi_shutdown, diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index e1a3c33fdad90..2d8382eb9add3 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -842,11 +842,8 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port, /* Remove this port from the port matrix of the other ports * in the same bridge. If the port is disabled, port matrix * is kept and not being setup until the port becomes enabled. - * And the other port's port matrix cannot be broken when the - * other port is still a VLAN-aware port. */ - if (dsa_is_user_port(ds, i) && i != port && - !dsa_port_is_vlan_filtering(&ds->ports[i])) { + if (dsa_is_user_port(ds, i) && i != port) { if (dsa_to_port(ds, i)->bridge_dev != bridge) continue; if (priv->ports[i].enable) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 40b105daaf9e7..b336ed071fa89 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2910,6 +2910,7 @@ static int mv88e6xxx_mdios_register(struct mv88e6xxx_chip *chip, */ child = of_get_child_by_name(np, "mdio"); err = mv88e6xxx_mdio_register(chip, child, false); + of_node_put(child); if (err) return err; @@ -3062,6 +3063,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = { .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_speed = mv88e6185_port_set_speed, .port_tag_remap = mv88e6095_port_tag_remap, + .port_set_policy = mv88e6352_port_set_policy, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, .port_set_ether_type = mv88e6351_port_set_ether_type, diff --git a/drivers/net/dsa/rtl8366rb.c b/drivers/net/dsa/rtl8366rb.c index 7f731bf369980..d047004360615 100644 --- a/drivers/net/dsa/rtl8366rb.c +++ b/drivers/net/dsa/rtl8366rb.c @@ -1264,7 +1264,7 @@ static int rtl8366rb_set_mc_index(struct realtek_smi *smi, int port, int index) static bool rtl8366rb_is_vlan_valid(struct realtek_smi *smi, unsigned int vlan) { - unsigned int max = RTL8366RB_NUM_VLANS; + unsigned int max = RTL8366RB_NUM_VLANS - 1; if (smi->vlan4k_enabled) max = RTL8366RB_NUM_VIDS - 1; diff --git a/drivers/net/ethernet/8390/mcf8390.c b/drivers/net/ethernet/8390/mcf8390.c index 4ad8031ab6695..065fdbe66c425 100644 --- a/drivers/net/ethernet/8390/mcf8390.c +++ b/drivers/net/ethernet/8390/mcf8390.c @@ -406,12 +406,12 @@ static int mcf8390_init(struct net_device *dev) static int mcf8390_probe(struct platform_device *pdev) { struct net_device *dev; - struct resource *mem, *irq; + struct resource *mem; resource_size_t msize; - int ret; + int ret, irq; - irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (irq == NULL) { + irq = platform_get_irq(pdev, 0); + if (irq < 0) { dev_err(&pdev->dev, "no IRQ specified?\n"); return -ENXIO; } @@ -434,7 +434,7 @@ static int mcf8390_probe(struct platform_device *pdev) SET_NETDEV_DEV(dev, &pdev->dev); platform_set_drvdata(pdev, dev); - dev->irq = irq->start; + dev->irq = irq; dev->base_addr = mem->start; ret = mcf8390_init(dev); diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index e8e9c166185de..2d4eb2f280a12 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -100,6 +100,7 @@ config JME config KORINA tristate "Korina (IDT RC32434) Ethernet support" depends on MIKROTIK_RB532 + select CRC32 ---help--- If you have a Mikrotik RouterBoard 500 or IDT RC32434 based system say Y. Otherwise say N. diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index bb032be7fe31e..f36536114790b 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -163,7 +163,8 @@ static int altera_tse_mdio_create(struct net_device *dev, unsigned int id) mdio = mdiobus_alloc(); if (mdio == NULL) { netdev_err(dev, "Error allocating MDIO bus\n"); - return -ENOMEM; + ret = -ENOMEM; + goto put_node; } mdio->name = ALTERA_TSE_RESOURCE_NAME; @@ -180,6 +181,7 @@ static int altera_tse_mdio_create(struct net_device *dev, unsigned int id) mdio->id); goto out_free_mdio; } + of_node_put(mdio_node); if (netif_msg_drv(priv)) netdev_info(dev, "MDIO bus %s: created\n", mdio->id); @@ -189,6 +191,8 @@ static int altera_tse_mdio_create(struct net_device *dev, unsigned int id) out_free_mdio: mdiobus_free(mdio); mdio = NULL; +put_node: + of_node_put(mdio_node); return ret; } @@ -1431,16 +1435,19 @@ static int altera_tse_probe(struct platform_device *pdev) priv->rxdescmem_busaddr = dma_res->start; } else { + ret = -ENODEV; goto err_free_netdev; } - if (!dma_set_mask(priv->device, DMA_BIT_MASK(priv->dmaops->dmamask))) + if (!dma_set_mask(priv->device, DMA_BIT_MASK(priv->dmaops->dmamask))) { dma_set_coherent_mask(priv->device, DMA_BIT_MASK(priv->dmaops->dmamask)); - else if (!dma_set_mask(priv->device, DMA_BIT_MASK(32))) + } else if (!dma_set_mask(priv->device, DMA_BIT_MASK(32))) { dma_set_coherent_mask(priv->device, DMA_BIT_MASK(32)); - else + } else { + ret = -EIO; goto err_free_netdev; + } /* MAC address space */ ret = request_and_map(pdev, "control_port", &control_port, diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index b2cd3bdba9f89..533b8519ec352 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -1331,6 +1331,10 @@ #define MDIO_VEND2_PMA_CDR_CONTROL 0x8056 #endif +#ifndef MDIO_VEND2_PMA_MISC_CTRL0 +#define MDIO_VEND2_PMA_MISC_CTRL0 0x8090 +#endif + #ifndef MDIO_CTRL1_SPEED1G #define MDIO_CTRL1_SPEED1G (MDIO_CTRL1_SPEED10G & ~BMCR_SPEED100) #endif @@ -1389,6 +1393,10 @@ #define XGBE_PMA_RX_RST_0_RESET_ON 0x10 #define XGBE_PMA_RX_RST_0_RESET_OFF 0x00 +#define XGBE_PMA_PLL_CTRL_MASK BIT(15) +#define XGBE_PMA_PLL_CTRL_ENABLE BIT(15) +#define XGBE_PMA_PLL_CTRL_DISABLE 0x0000 + /* Bit setting and getting macros * The get macro will extract the current bit field value from within * the variable diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index da8c2c4aca7ef..0442d7e1cd20b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -721,7 +721,9 @@ static void xgbe_stop_timers(struct xgbe_prv_data *pdata) if (!channel->tx_ring) break; + /* Deactivate the Tx timer */ del_timer_sync(&channel->tx_timer); + channel->tx_timer_active = 0; } } @@ -2765,6 +2767,14 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget) buf2_len = xgbe_rx_buf2_len(rdata, packet, len); len += buf2_len; + if (buf2_len > rdata->rx.buf.dma_len) { + /* Hardware inconsistency within the descriptors + * that has resulted in a length underflow. + */ + error = 1; + goto skip_data; + } + if (!skb) { skb = xgbe_create_skb(pdata, napi, rdata, buf1_len); @@ -2794,8 +2804,10 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget) if (!last || context_next) goto read_again; - if (!skb) + if (!skb || error) { + dev_kfree_skb(skb); goto next_packet; + } /* Be sure we don't exceed the configured MTU */ max_len = netdev->mtu + ETH_HLEN; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index 7b86240ecd5fe..c4f1fc97987ae 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -418,6 +418,9 @@ static void xgbe_pci_remove(struct pci_dev *pdev) pci_free_irq_vectors(pdata->pcidev); + /* Disable all interrupts in the hardware */ + XP_IOWRITE(pdata, XP_INT_EN, 0x0); + xgbe_free_pdata(pdata); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index d6f6afb67bcc6..0b325ae875b52 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -1972,12 +1972,26 @@ static void xgbe_phy_rx_reset(struct xgbe_prv_data *pdata) } } +static void xgbe_phy_pll_ctrl(struct xgbe_prv_data *pdata, bool enable) +{ + XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_VEND2_PMA_MISC_CTRL0, + XGBE_PMA_PLL_CTRL_MASK, + enable ? XGBE_PMA_PLL_CTRL_ENABLE + : XGBE_PMA_PLL_CTRL_DISABLE); + + /* Wait for command to complete */ + usleep_range(100, 200); +} + static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata, unsigned int cmd, unsigned int sub_cmd) { unsigned int s0 = 0; unsigned int wait; + /* Disable PLL re-initialization during FW command processing */ + xgbe_phy_pll_ctrl(pdata, false); + /* Log if a previous command did not complete */ if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) { netif_dbg(pdata, link, pdata->netdev, @@ -1998,7 +2012,7 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata, wait = XGBE_RATECHANGE_COUNT; while (wait--) { if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) - return; + goto reenable_pll; usleep_range(1000, 2000); } @@ -2008,6 +2022,10 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata, /* Reset on error */ xgbe_phy_rx_reset(pdata); + +reenable_pll: + /* Enable PLL re-initialization */ + xgbe_phy_pll_ctrl(pdata, true); } static void xgbe_phy_rrc(struct xgbe_prv_data *pdata) diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c b/drivers/net/ethernet/apm/xgene-v2/main.c index 02b4f3af02b54..848be6bf2fd1f 100644 --- a/drivers/net/ethernet/apm/xgene-v2/main.c +++ b/drivers/net/ethernet/apm/xgene-v2/main.c @@ -677,11 +677,13 @@ static int xge_probe(struct platform_device *pdev) ret = register_netdev(ndev); if (ret) { netdev_err(ndev, "Failed to register netdev\n"); - goto err; + goto err_mdio_remove; } return 0; +err_mdio_remove: + xge_mdio_remove(ndev); err: free_netdev(ndev); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index cc8031ae9aa3f..ce4e617a6ec49 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -696,6 +696,12 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring, buf_pool->rx_skb[skb_index] = NULL; datalen = xgene_enet_get_data_len(le64_to_cpu(raw_desc->m1)); + + /* strip off CRC as HW isn't doing this */ + nv = GET_VAL(NV, le64_to_cpu(raw_desc->m0)); + if (!nv) + datalen -= 4; + skb_put(skb, datalen); prefetch(skb->data - NET_IP_ALIGN); skb->protocol = eth_type_trans(skb, ndev); @@ -717,12 +723,8 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring, } } - nv = GET_VAL(NV, le64_to_cpu(raw_desc->m0)); - if (!nv) { - /* strip off CRC as HW isn't doing this */ - datalen -= 4; + if (!nv) goto skip_jumbo; - } slots = page_pool->slots - 1; head = page_pool->head; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 03821b46a8cb4..4c22f119ac62f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -305,6 +305,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, if (!buff->is_eop) { buff_ = buff; do { + if (buff_->next >= self->size) { + err = -EIO; + goto err_exit; + } next_ = buff_->next, buff_ = &self->buff_ring[next_]; is_rsc_completed = @@ -327,6 +331,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, if (buff->is_error || buff->is_cso_err) { buff_ = buff; do { + if (buff_->next >= self->size) { + err = -EIO; + goto err_exit; + } next_ = buff_->next, buff_ = &self->buff_ring[next_]; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 2ad3fa6316ce3..cb5954eeb4090 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -674,6 +674,13 @@ static int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self, err = -ENXIO; goto err_exit; } + + /* Validate that the new hw_head_ is reasonable. */ + if (hw_head_ >= ring->size) { + err = -ENXIO; + goto err_exit; + } + ring->hw_head = hw_head_; err = aq_hw_err_from_flags(self); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index 873f9865f0d15..b7d70c33459fd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -481,6 +481,11 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self, goto err_exit; if (fw.len == 0xFFFFU) { + if (sw.len > sizeof(self->rpc)) { + printk(KERN_INFO "Invalid sw len: %x\n", sw.len); + err = -EINVAL; + goto err_exit; + } err = hw_atl_utils_fw_rpc_call(self, sw.len); if (err < 0) goto err_exit; @@ -489,6 +494,11 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self, if (rpc) { if (fw.len) { + if (fw.len > sizeof(self->rpc)) { + printk(KERN_INFO "Invalid fw len: %x\n", fw.len); + err = -EINVAL; + goto err_exit; + } err = hw_atl_utils_fw_downld_dwords(self, self->rpc_addr, diff --git a/drivers/net/ethernet/arc/Kconfig b/drivers/net/ethernet/arc/Kconfig index 45c663d8b9aab..2cd0e45d0b6c1 100644 --- a/drivers/net/ethernet/arc/Kconfig +++ b/drivers/net/ethernet/arc/Kconfig @@ -21,6 +21,7 @@ config ARC_EMAC_CORE depends on ARC || ARCH_ROCKCHIP || COMPILE_TEST select MII select PHYLIB + select CRC32 config ARC_EMAC tristate "ARC EMAC support" diff --git a/drivers/net/ethernet/broadcom/Makefile b/drivers/net/ethernet/broadcom/Makefile index 7046ad6d3d0e3..ac50da49ca770 100644 --- a/drivers/net/ethernet/broadcom/Makefile +++ b/drivers/net/ethernet/broadcom/Makefile @@ -16,3 +16,8 @@ obj-$(CONFIG_BGMAC_BCMA) += bgmac-bcma.o bgmac-bcma-mdio.o obj-$(CONFIG_BGMAC_PLATFORM) += bgmac-platform.o obj-$(CONFIG_SYSTEMPORT) += bcmsysport.o obj-$(CONFIG_BNXT) += bnxt/ + +# FIXME: temporarily silence -Warray-bounds on non W=1+ builds +ifndef KBUILD_EXTRA_WARN +CFLAGS_tg3.o += -Wno-array-bounds +endif diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 470d12e308814..5a2094a281e15 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1277,11 +1277,11 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, struct bcm_sysport_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; struct bcm_sysport_tx_ring *ring; + unsigned long flags, desc_flags; struct bcm_sysport_cb *cb; struct netdev_queue *txq; u32 len_status, addr_lo; unsigned int skb_len; - unsigned long flags; dma_addr_t mapping; u16 queue; int ret; @@ -1339,8 +1339,10 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, ring->desc_count--; /* Ports are latched, so write upper address first */ + spin_lock_irqsave(&priv->desc_lock, desc_flags); tdma_writel(priv, len_status, TDMA_WRITE_PORT_HI(ring->index)); tdma_writel(priv, addr_lo, TDMA_WRITE_PORT_LO(ring->index)); + spin_unlock_irqrestore(&priv->desc_lock, desc_flags); /* Check ring space and update SW control flow */ if (ring->desc_count == 0) @@ -1970,6 +1972,7 @@ static int bcm_sysport_open(struct net_device *dev) } /* Initialize both hardware and software ring */ + spin_lock_init(&priv->desc_lock); for (i = 0; i < dev->num_tx_queues; i++) { ret = bcm_sysport_init_tx_ring(priv, i); if (ret) { diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 6d80735fbc7f4..57336ca3f4277 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -742,6 +742,7 @@ struct bcm_sysport_priv { int wol_irq; /* Transmit rings */ + spinlock_t desc_lock; struct bcm_sysport_tx_ring *tx_rings; /* Receive queue */ diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c index 34d18302b1a38..2d52754afc33a 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c @@ -323,7 +323,6 @@ static void bgmac_remove(struct bcma_device *core) bcma_mdio_mii_unregister(bgmac->mii_bus); bgmac_enet_remove(bgmac); bcma_set_drvdata(core, NULL); - kfree(bgmac); } static struct bcma_driver bgmac_bcma_driver = { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h index 1835d2e451c01..fc7fce642666c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h @@ -635,11 +635,13 @@ static int bnx2x_ilt_client_mem_op(struct bnx2x *bp, int cli_num, { int i, rc; struct bnx2x_ilt *ilt = BP_ILT(bp); - struct ilt_client_info *ilt_cli = &ilt->clients[cli_num]; + struct ilt_client_info *ilt_cli; if (!ilt || !ilt->lines) return -1; + ilt_cli = &ilt->clients[cli_num]; + if (ilt_cli->flags & (ILT_CLIENT_SKIP_INIT | ILT_CLIENT_SKIP_MEM)) return 0; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index cff64e43bdd80..b5f58c62e7d20 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -14267,10 +14267,6 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp) /* Stop Tx */ bnx2x_tx_disable(bp); - /* Delete all NAPI objects */ - bnx2x_del_all_napi(bp); - if (CNIC_LOADED(bp)) - bnx2x_del_all_napi_cnic(bp); netdev_reset_tc(bp->dev); del_timer_sync(&bp->timer); @@ -14375,6 +14371,11 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev) bnx2x_drain_tx_queues(bp); bnx2x_send_unload_req(bp, UNLOAD_RECOVERY); bnx2x_netif_stop(bp, 1); + bnx2x_del_all_napi(bp); + + if (CNIC_LOADED(bp)) + bnx2x_del_all_napi_cnic(bp); + bnx2x_free_irq(bp); /* Report UNLOAD_DONE to MCP */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index cf39623b828b7..4630998d47fd4 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -1246,7 +1246,7 @@ int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param, /* SR-IOV capability was enabled but there are no VFs*/ if (iov->total == 0) { - err = -EINVAL; + err = 0; goto failed; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e67f07faca789..5a7d5e7f3b238 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -372,7 +372,7 @@ static bool bnxt_txr_netif_try_stop_queue(struct bnxt *bp, * netif_tx_queue_stopped(). */ smp_mb(); - if (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh) { + if (bnxt_tx_avail(bp, txr) >= bp->tx_wake_thresh) { netif_tx_wake_queue(txq); return false; } @@ -701,7 +701,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) smp_mb(); if (unlikely(netif_tx_queue_stopped(txq)) && - bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh && + bnxt_tx_avail(bp, txr) >= bp->tx_wake_thresh && READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING) netif_tx_wake_queue(txq); } @@ -2206,7 +2206,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) { tx_pkts++; /* return full budget so NAPI will complete. */ - if (unlikely(tx_pkts > bp->tx_wake_thresh)) { + if (unlikely(tx_pkts >= bp->tx_wake_thresh)) { rx_pkts = budget; raw_cons = NEXT_RAW_CMP(raw_cons); if (budget) @@ -3329,7 +3329,7 @@ static int bnxt_init_tx_rings(struct bnxt *bp) u16 i; bp->tx_wake_thresh = max_t(int, bp->tx_ring_size / 2, - MAX_SKB_FRAGS + 1); + BNXT_MIN_TX_DESC_CNT); for (i = 0; i < bp->tx_nr_rings; i++) { struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; @@ -9791,7 +9791,7 @@ static bool bnxt_rfs_capable(struct bnxt *bp) if (bp->flags & BNXT_FLAG_CHIP_P5) return bnxt_rfs_supported(bp); - if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp)) + if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp) || !bp->rx_nr_rings) return false; vnics = 1 + bp->rx_nr_rings; @@ -11725,10 +11725,9 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp) goto init_dflt_ring_err; bp->tx_nr_rings_per_tc = bp->tx_nr_rings; - if (bnxt_rfs_supported(bp) && bnxt_rfs_capable(bp)) { - bp->flags |= BNXT_FLAG_RFS; - bp->dev->features |= NETIF_F_NTUPLE; - } + + bnxt_set_dflt_rfs(bp); + init_dflt_ring_err: bnxt_ulp_irq_restart(bp, rc); return rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 510ff01bdad8c..9e8a0c772ca93 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -559,7 +559,8 @@ struct nqe_cn { #define BNXT_MAX_MTU 9500 #define BNXT_MAX_PAGE_MODE_MTU \ ((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN - \ - XDP_PACKET_HEADROOM) + XDP_PACKET_HEADROOM - \ + SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info))) #define BNXT_MIN_PKT_SIZE 52 @@ -601,6 +602,11 @@ struct nqe_cn { #define BNXT_MAX_RX_JUM_DESC_CNT (RX_DESC_CNT * MAX_RX_AGG_PAGES - 1) #define BNXT_MAX_TX_DESC_CNT (TX_DESC_CNT * MAX_TX_PAGES - 1) +/* Minimum TX BDs for a TX packet with MAX_SKB_FRAGS + 1. We need one extra + * BD because the first TX BD is always a long BD. + */ +#define BNXT_MIN_TX_DESC_CNT (MAX_SKB_FRAGS + 2) + #define RX_RING(x) (((x) & ~(RX_DESC_CNT - 1)) >> (BNXT_PAGE_SHIFT - 4)) #define RX_IDX(x) ((x) & (RX_DESC_CNT - 1)) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 2118523782246..d74c6a34b936a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -744,7 +744,7 @@ static int bnxt_set_ringparam(struct net_device *dev, if ((ering->rx_pending > BNXT_MAX_RX_DESC_CNT) || (ering->tx_pending > BNXT_MAX_TX_DESC_CNT) || - (ering->tx_pending <= MAX_SKB_FRAGS)) + (ering->tx_pending < BNXT_MIN_TX_DESC_CNT)) return -EINVAL; if (netif_running(dev)) @@ -1673,9 +1673,7 @@ static int bnxt_set_pauseparam(struct net_device *dev, } link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL; - if (bp->hwrm_spec_code >= 0x10201) - link_info->req_flow_ctrl = - PORT_PHY_CFG_REQ_AUTO_PAUSE_AUTONEG_PAUSE; + link_info->req_flow_ctrl = 0; } else { /* when transition from auto pause to force pause, * force a link change diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index c2a1fa75b2147..e03e2bfcc6a10 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1547,6 +1547,11 @@ static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev, return skb; } +static void bcmgenet_hide_tsb(struct sk_buff *skb) +{ + __skb_pull(skb, sizeof(struct status_64)); +} + static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -1655,6 +1660,8 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) } GENET_CB(skb)->last_cb = tx_cb_ptr; + + bcmgenet_hide_tsb(skb); skb_tx_timestamp(skb); /* Decrement total BD count and advance our write pointer */ @@ -3507,10 +3514,12 @@ static int bcmgenet_probe(struct platform_device *pdev) /* Request the WOL interrupt and advertise suspend if available */ priv->wol_irq_disabled = true; - err = devm_request_irq(&pdev->dev, priv->wol_irq, bcmgenet_wol_isr, 0, - dev->name, priv); - if (!err) - device_set_wakeup_capable(&pdev->dev, 1); + if (priv->wol_irq > 0) { + err = devm_request_irq(&pdev->dev, priv->wol_irq, + bcmgenet_wol_isr, 0, dev->name, priv); + if (!err) + device_set_wakeup_capable(&pdev->dev, 1); + } /* Set the needed headroom to account for any possible * features enabling/disabling at runtime diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 164988f3b4fab..a2da09da4907b 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -41,6 +41,13 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct bcmgenet_priv *priv = netdev_priv(dev); + struct device *kdev = &priv->pdev->dev; + + if (!device_can_wakeup(kdev)) { + wol->supported = 0; + wol->wolopts = 0; + return; + } wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE; wol->wolopts = priv->wolopts; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index ebd0853a6f313..78219a9943a73 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -927,7 +927,6 @@ static void gem_rx_refill(struct macb_queue *queue) /* Make hw descriptor updates visible to CPU */ rmb(); - queue->rx_prepared_head++; desc = macb_rx_desc(queue, entry); if (!queue->rx_skbuff[entry]) { @@ -966,6 +965,7 @@ static void gem_rx_refill(struct macb_queue *queue) dma_wmb(); desc->addr &= ~MACB_BIT(RX_USED); } + queue->rx_prepared_head++; } /* Make descriptor updates visible to hardware */ @@ -1283,7 +1283,14 @@ static int macb_poll(struct napi_struct *napi, int budget) if (work_done < budget) { napi_complete_done(napi, work_done); - /* Packets received while interrupts were disabled */ + /* RSR bits only seem to propagate to raise interrupts when + * interrupts are enabled at the time, so if bits are already + * set due to packets received while interrupts were disabled, + * they will not cause another interrupt to be generated when + * interrupts are re-enabled. + * Check for this case here. This has been seen to happen + * around 30% of the time under heavy network load. + */ status = macb_readl(bp, RSR); if (status) { if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) @@ -1291,6 +1298,22 @@ static int macb_poll(struct napi_struct *napi, int budget) napi_reschedule(napi); } else { queue_writel(queue, IER, bp->rx_intr_mask); + + /* In rare cases, packets could have been received in + * the window between the check above and re-enabling + * interrupts. Therefore, a double-check is required + * to avoid losing a wakeup. This can potentially race + * with the interrupt handler doing the same actions + * if an interrupt is raised just after enabling them, + * but this should be harmless. + */ + status = macb_readl(bp, RSR); + if (unlikely(status)) { + queue_writel(queue, IDR, bp->rx_intr_mask); + if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) + queue_writel(queue, ISR, MACB_BIT(RCOMP)); + napi_schedule(napi); + } } } @@ -1355,6 +1378,7 @@ static void macb_tx_restart(struct macb_queue *queue) unsigned int head = queue->tx_head; unsigned int tail = queue->tx_tail; struct macb *bp = queue->bp; + unsigned int head_idx, tbqp; if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(queue, ISR, MACB_BIT(TXUBR)); @@ -1362,6 +1386,13 @@ static void macb_tx_restart(struct macb_queue *queue) if (head == tail) return; + tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp); + tbqp = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, tbqp)); + head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, head)); + + if (tbqp == head_idx) + return; + macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); } @@ -4284,7 +4315,7 @@ static int macb_probe(struct platform_device *pdev) #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) { - dma_set_mask(&pdev->dev, DMA_BIT_MASK(44)); + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44)); bp->hw_dma_cap |= HW_DMA_CAP_64B; } #endif diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c index 617b3b728dd04..94f3babfad309 100644 --- a/drivers/net/ethernet/cadence/macb_pci.c +++ b/drivers/net/ethernet/cadence/macb_pci.c @@ -112,9 +112,9 @@ static void macb_remove(struct pci_dev *pdev) struct platform_device *plat_dev = pci_get_drvdata(pdev); struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev); - platform_device_unregister(plat_dev); clk_unregister(plat_data->pclk); clk_unregister(plat_data->hclk); + platform_device_unregister(plat_dev); } static const struct pci_device_id dev_id_table[] = { diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c index 43a3f0dbf857c..f5aec28c77306 100644 --- a/drivers/net/ethernet/cadence/macb_ptp.c +++ b/drivers/net/ethernet/cadence/macb_ptp.c @@ -275,6 +275,12 @@ void gem_ptp_rxstamp(struct macb *bp, struct sk_buff *skb, if (GEM_BFEXT(DMA_RXVALID, desc->addr)) { desc_ptp = macb_ptp_desc(bp, desc); + /* Unlikely but check */ + if (!desc_ptp) { + dev_warn_ratelimited(&bp->pdev->dev, + "Timestamp not supported in BD\n"); + return; + } gem_hw_timestamp(bp, desc_ptp->ts_1, desc_ptp->ts_2, &ts); memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps)); shhwtstamps->hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec); @@ -307,8 +313,11 @@ int gem_ptp_txstamp(struct macb_queue *queue, struct sk_buff *skb, if (CIRC_SPACE(head, tail, PTP_TS_BUFFER_SIZE) == 0) return -ENOMEM; - skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; desc_ptp = macb_ptp_desc(queue->bp, desc); + /* Unlikely but check */ + if (!desc_ptp) + return -EINVAL; + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_timestamp = &queue->tx_timestamps[head]; tx_timestamp->skb = skb; /* ensure ts_1/ts_2 is loaded after ctrl (TX_USED check) */ diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 9361f964bb9b2..816453a4f8d6c 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -1193,7 +1193,7 @@ static int nic_register_interrupts(struct nicpf *nic) dev_err(&nic->pdev->dev, "Request for #%d msix vectors failed, returned %d\n", nic->num_vec, ret); - return 1; + return ret; } /* Register mailbox interrupt handler */ diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 5c45c0c6dd234..27ea528ef4484 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1226,7 +1226,7 @@ static int nicvf_register_misc_interrupt(struct nicvf *nic) if (ret < 0) { netdev_err(nic->netdev, "Req for #%d msix vectors failed\n", nic->num_vec); - return 1; + return ret; } sprintf(nic->irq_name[irq], "%s Mbox", "NICVF"); @@ -1245,7 +1245,7 @@ static int nicvf_register_misc_interrupt(struct nicvf *nic) if (!nicvf_check_pf_ready(nic)) { nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); nicvf_unregister_interrupts(nic); - return 1; + return -EIO; } return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c index 0ccdde366ae17..540d99f59226e 100644 --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c @@ -1153,6 +1153,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (!adapter->registered_device_map) { pr_err("%s: could not register any net devices\n", pci_name(pdev)); + err = -EINVAL; goto out_release_adapter_res; } diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c index 0a9f2c5966242..d3e11fe1eabcc 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c @@ -3677,6 +3677,8 @@ int t3_prep_adapter(struct adapter *adapter, const struct adapter_info *ai, MAC_STATS_ACCUM_SECS : (MAC_STATS_ACCUM_SECS * 10); adapter->params.pci.vpd_cap_addr = pci_find_capability(adapter->pdev, PCI_CAP_ID_VPD); + if (!adapter->params.pci.vpd_cap_addr) + return -ENODEV; ret = get_vpd_params(adapter, &adapter->params.vpd); if (ret < 0) return ret; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index f537be9cb3155..5ba30b8eb1e11 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -1466,12 +1466,15 @@ static int cxgb4_get_module_info(struct net_device *dev, if (ret) return ret; - if (!sff8472_comp || (sff_diag_type & 4)) { + if (!sff8472_comp || (sff_diag_type & SFP_DIAG_ADDRMODE)) { modinfo->type = ETH_MODULE_SFF_8079; modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; } else { modinfo->type = ETH_MODULE_SFF_8472; - modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + if (sff_diag_type & SFP_DIAG_IMPLEMENTED) + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + else + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN / 2; } break; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h index 002fc62ea7262..63bc956d20376 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h @@ -293,6 +293,8 @@ enum { #define I2C_PAGE_SIZE 0x100 #define SFP_DIAG_TYPE_ADDR 0x5c #define SFP_DIAG_TYPE_LEN 0x1 +#define SFP_DIAG_ADDRMODE BIT(2) +#define SFP_DIAG_IMPLEMENTED BIT(6) #define SFF_8472_COMP_ADDR 0x5e #define SFF_8472_COMP_LEN 0x1 #define SFF_REV_ADDR 0x1 diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c index d04a6c1634452..da8d10475a08e 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -99,7 +100,7 @@ cxgb_find_route(struct cxgb4_lld_info *lldi, rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, peer_port, local_port, IPPROTO_TCP, - tos, 0); + tos & ~INET_ECN_MASK, 0); if (IS_ERR(rt)) return NULL; n = dst_neigh_lookup(&rt->dst, &peer_ip); diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index c9fb1ec625d8b..a8a8b77c1611e 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -304,21 +304,21 @@ static void gmac_speed_set(struct net_device *netdev) switch (phydev->speed) { case 1000: status.bits.speed = GMAC_SPEED_1000; - if (phydev->interface == PHY_INTERFACE_MODE_RGMII) + if (phy_interface_mode_is_rgmii(phydev->interface)) status.bits.mii_rmii = GMAC_PHY_RGMII_1000; netdev_dbg(netdev, "connect %s to RGMII @ 1Gbit\n", phydev_name(phydev)); break; case 100: status.bits.speed = GMAC_SPEED_100; - if (phydev->interface == PHY_INTERFACE_MODE_RGMII) + if (phy_interface_mode_is_rgmii(phydev->interface)) status.bits.mii_rmii = GMAC_PHY_RGMII_100_10; netdev_dbg(netdev, "connect %s to RGMII @ 100 Mbit\n", phydev_name(phydev)); break; case 10: status.bits.speed = GMAC_SPEED_10; - if (phydev->interface == PHY_INTERFACE_MODE_RGMII) + if (phy_interface_mode_is_rgmii(phydev->interface)) status.bits.mii_rmii = GMAC_PHY_RGMII_100_10; netdev_dbg(netdev, "connect %s to RGMII @ 10 Mbit\n", phydev_name(phydev)); @@ -388,6 +388,9 @@ static int gmac_setup_phy(struct net_device *netdev) status.bits.mii_rmii = GMAC_PHY_GMII; break; case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: netdev_dbg(netdev, "RGMII: set GMAC0 and GMAC1 to MII/RGMII mode\n"); status.bits.mii_rmii = GMAC_PHY_RGMII_100_10; diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index c813e6f2b371e..c97fc0e384ca6 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -4708,6 +4708,10 @@ type3_infoblock(struct net_device *dev, u_char count, u_char *p) lp->ibn = 3; lp->active = *p++; if (MOTO_SROM_BUG) lp->active = 0; + /* if (MOTO_SROM_BUG) statement indicates lp->active could + * be 8 (i.e. the size of array lp->phy) */ + if (WARN_ON(lp->active >= ARRAY_SIZE(lp->phy))) + return -EINVAL; lp->phy[lp->active].gep = (*p ? p : NULL); p += (2 * (*p) + 1); lp->phy[lp->active].rst = (*p ? p : NULL); p += (2 * (*p) + 1); lp->phy[lp->active].mc = get_unaligned_le16(p); p += 2; @@ -4999,19 +5003,23 @@ mii_get_phy(struct net_device *dev) } if ((j == limit) && (i < DE4X5_MAX_MII)) { for (k=0; k < DE4X5_MAX_PHY && lp->phy[k].id; k++); - lp->phy[k].addr = i; - lp->phy[k].id = id; - lp->phy[k].spd.reg = GENERIC_REG; /* ANLPA register */ - lp->phy[k].spd.mask = GENERIC_MASK; /* 100Mb/s technologies */ - lp->phy[k].spd.value = GENERIC_VALUE; /* TX & T4, H/F Duplex */ - lp->mii_cnt++; - lp->active++; - printk("%s: Using generic MII device control. If the board doesn't operate,\nplease mail the following dump to the author:\n", dev->name); - j = de4x5_debug; - de4x5_debug |= DEBUG_MII; - de4x5_dbg_mii(dev, k); - de4x5_debug = j; - printk("\n"); + if (k < DE4X5_MAX_PHY) { + lp->phy[k].addr = i; + lp->phy[k].id = id; + lp->phy[k].spd.reg = GENERIC_REG; /* ANLPA register */ + lp->phy[k].spd.mask = GENERIC_MASK; /* 100Mb/s technologies */ + lp->phy[k].spd.value = GENERIC_VALUE; /* TX & T4, H/F Duplex */ + lp->mii_cnt++; + lp->active++; + printk("%s: Using generic MII device control. If the board doesn't operate,\nplease mail the following dump to the author:\n", dev->name); + j = de4x5_debug; + de4x5_debug |= DEBUG_MII; + de4x5_dbg_mii(dev, k); + de4x5_debug = j; + printk("\n"); + } else { + goto purgatory; + } } } purgatory: diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 3e3e08698876b..fea4223ad6f15 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1410,8 +1410,10 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* alloc_etherdev ensures aligned and zeroed private structures */ dev = alloc_etherdev (sizeof (*tp)); - if (!dev) + if (!dev) { + pci_disable_device(pdev); return -ENOMEM; + } SET_NETDEV_DEV(dev, &pdev->dev); if (pci_resource_len (pdev, 0) < tulip_tbl[chip_idx].io_size) { @@ -1788,6 +1790,7 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) err_out_free_netdev: free_netdev (dev); + pci_disable_device(pdev); return -ENODEV; } diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 649c5c429bd7c..1288b5e3d2201 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -2287,7 +2287,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state) /* Uses sync mcc */ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, - u8 page_num, u8 *data) + u8 page_num, u32 off, u32 len, u8 *data) { struct be_dma_mem cmd; struct be_mcc_wrb *wrb; @@ -2321,10 +2321,10 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, req->port = cpu_to_le32(adapter->hba_port_num); req->page_num = cpu_to_le32(page_num); status = be_mcc_notify_wait(adapter); - if (!status) { + if (!status && len > 0) { struct be_cmd_resp_port_type *resp = cmd.va; - memcpy(data, resp->page_data, PAGE_DATA_LEN); + memcpy(data, resp->page_data + off, len); } err: mutex_unlock(&adapter->mcc_lock); @@ -2415,7 +2415,7 @@ int be_cmd_query_cable_type(struct be_adapter *adapter) int status; status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - page_data); + 0, PAGE_DATA_LEN, page_data); if (!status) { switch (adapter->phy.interface_type) { case PHY_TYPE_QSFP: @@ -2440,7 +2440,7 @@ int be_cmd_query_sfp_info(struct be_adapter *adapter) int status; status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - page_data); + 0, PAGE_DATA_LEN, page_data); if (!status) { strlcpy(adapter->phy.vendor_name, page_data + SFP_VENDOR_NAME_OFFSET, SFP_VENDOR_NAME_LEN - 1); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index c30d6d6f0f3a0..9e17d6a7ab8cd 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -2427,7 +2427,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon, int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state); int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, - u8 page_num, u8 *data); + u8 page_num, u32 off, u32 len, u8 *data); int be_cmd_query_cable_type(struct be_adapter *adapter); int be_cmd_query_sfp_info(struct be_adapter *adapter); int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd, diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 5bb5abf995887..7cc1f41971c57 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -1339,7 +1339,7 @@ static int be_get_module_info(struct net_device *netdev, return -EOPNOTSUPP; status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - page_data); + 0, PAGE_DATA_LEN, page_data); if (!status) { if (!page_data[SFP_PLUS_SFF_8472_COMP]) { modinfo->type = ETH_MODULE_SFF_8079; @@ -1357,25 +1357,32 @@ static int be_get_module_eeprom(struct net_device *netdev, { struct be_adapter *adapter = netdev_priv(netdev); int status; + u32 begin, end; if (!check_privilege(adapter, MAX_PRIVILEGES)) return -EOPNOTSUPP; - status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - data); - if (status) - goto err; + begin = eeprom->offset; + end = eeprom->offset + eeprom->len; + + if (begin < PAGE_DATA_LEN) { + status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, begin, + min_t(u32, end, PAGE_DATA_LEN) - begin, + data); + if (status) + goto err; + + data += PAGE_DATA_LEN - begin; + begin = PAGE_DATA_LEN; + } - if (eeprom->offset + eeprom->len > PAGE_DATA_LEN) { - status = be_cmd_read_port_transceiver_data(adapter, - TR_PAGE_A2, - data + - PAGE_DATA_LEN); + if (end > PAGE_DATA_LEN) { + status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A2, + begin - PAGE_DATA_LEN, + end - begin, data); if (status) goto err; } - if (eeprom->offset) - memcpy(data, data + eeprom->offset, eeprom->len); err: return be_cmd_status(status); } diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 2c06cdcd3e751..98e94d914597a 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1734,6 +1734,19 @@ static void ftgmac100_setup_clk(struct ftgmac100 *priv) FTGMAC_100MHZ); } +static bool ftgmac100_has_child_node(struct device_node *np, const char *name) +{ + struct device_node *child_np = of_get_child_by_name(np, name); + bool ret = false; + + if (child_np) { + ret = true; + of_node_put(child_np); + } + + return ret; +} + static int ftgmac100_probe(struct platform_device *pdev) { struct resource *res; @@ -1850,7 +1863,7 @@ static int ftgmac100_probe(struct platform_device *pdev) /* Display what we found */ phy_attached_info(phy); - } else if (np && !of_get_child_by_name(np, "mdio")) { + } else if (np && !ftgmac100_has_child_node(np, "mdio")) { /* Support legacy ASPEED devicetree descriptions that decribe a * MAC with an embedded MDIO controller but have no "mdio" * child node. Automatically scan the MDIO bus for available @@ -1880,6 +1893,11 @@ static int ftgmac100_probe(struct platform_device *pdev) /* AST2400 doesn't have working HW checksum generation */ if (np && (of_device_is_compatible(np, "aspeed,ast2400-mac"))) netdev->hw_features &= ~NETIF_F_HW_CSUM; + + /* AST2600 tx checksum with NCSI is broken */ + if (priv->use_ncsi && of_device_is_compatible(np, "aspeed,ast2600-mac")) + netdev->hw_features &= ~NETIF_F_HW_CSUM; + if (np && of_get_property(np, "no-hw-checksum", NULL)) netdev->hw_features &= ~(NETIF_F_HW_CSUM | NETIF_F_RXCSUM); netdev->features |= netdev->hw_features; diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index 7ce2e99b594d6..0a186d16e73f7 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -506,11 +506,15 @@ static int dpaa_get_ts_info(struct net_device *net_dev, info->phc_index = -1; fman_node = of_get_parent(mac_node); - if (fman_node) + if (fman_node) { ptp_node = of_parse_phandle(fman_node, "ptimer-handle", 0); + of_node_put(fman_node); + } - if (ptp_node) + if (ptp_node) { ptp_dev = of_find_device_by_node(ptp_node); + of_node_put(ptp_node); + } if (ptp_dev) ptp = platform_get_drvdata(ptp_dev); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 7af7cc7c8669a..34540e604f748 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -3616,10 +3616,10 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) fsl_mc_portal_free(priv->mc_io); - free_netdev(net_dev); - dev_dbg(net_dev->dev.parent, "Removed interface %s\n", net_dev->name); + free_netdev(net_dev); + return 0; } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c index 6437fe6b9abf0..a6b8f573ab5b5 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c @@ -148,7 +148,7 @@ static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev) base = of_iomap(node, 0); if (!base) { err = -ENOMEM; - goto err_close; + goto err_put; } err = fsl_mc_allocate_irqs(mc_dev); @@ -191,6 +191,8 @@ static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev) fsl_mc_free_irqs(mc_dev); err_unmap: iounmap(base); +err_put: + of_node_put(node); err_close: dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle); err_free_mcp: diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index b77eaf31bd4ed..cee77326e7e85 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1222,7 +1222,6 @@ static void enetc_clear_bdrs(struct enetc_ndev_priv *priv) static int enetc_setup_irqs(struct enetc_ndev_priv *priv) { struct pci_dev *pdev = priv->si->pdev; - cpumask_t cpu_mask; int i, j, err; for (i = 0; i < priv->bdr_int_num; i++) { @@ -1249,9 +1248,7 @@ static int enetc_setup_irqs(struct enetc_ndev_priv *priv) enetc_wr(hw, ENETC_SIMSITRV(idx), entry); } - cpumask_clear(&cpu_mask); - cpumask_set_cpu(i % num_online_cpus(), &cpu_mask); - irq_set_affinity_hint(irq, &cpu_mask); + irq_set_affinity_hint(irq, get_cpu_mask(i % num_online_cpus())); } return 0; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 89121d7ce3e6f..636aa6d81d8fe 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -155,7 +155,7 @@ static const struct { { ENETC_PM0_TFRM, "MAC tx frames" }, { ENETC_PM0_TFCS, "MAC tx fcs errors" }, { ENETC_PM0_TVLAN, "MAC tx VLAN frames" }, - { ENETC_PM0_TERR, "MAC tx frames" }, + { ENETC_PM0_TERR, "MAC tx frame errors" }, { ENETC_PM0_TUCA, "MAC tx unicast frames" }, { ENETC_PM0_TMCA, "MAC tx multicast frames" }, { ENETC_PM0_TBCA, "MAC tx broadcast frames" }, diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index d89568f810bc4..5163e06a7dd7d 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -373,6 +373,9 @@ struct bufdesc_ex { #define FEC_ENET_WAKEUP ((uint)0x00020000) /* Wakeup request */ #define FEC_ENET_TXF (FEC_ENET_TXF_0 | FEC_ENET_TXF_1 | FEC_ENET_TXF_2) #define FEC_ENET_RXF (FEC_ENET_RXF_0 | FEC_ENET_RXF_1 | FEC_ENET_RXF_2) +#define FEC_ENET_RXF_GET(X) (((X) == 0) ? FEC_ENET_RXF_0 : \ + (((X) == 1) ? FEC_ENET_RXF_1 : \ + FEC_ENET_RXF_2)) #define FEC_ENET_TS_AVAIL ((uint)0x00010000) #define FEC_ENET_TS_TIMER ((uint)0x00008000) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index a53c2d637a971..a31f891d51fbc 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1444,7 +1444,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) break; pkt_received++; - writel(FEC_ENET_RXF, fep->hwp + FEC_IEVENT); + writel(FEC_ENET_RXF_GET(queue_id), fep->hwp + FEC_IEVENT); /* Check for errors. */ status ^= BD_ENET_RX_LAST; diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index 47f6fee1f3964..1812434cda847 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -1791,7 +1791,7 @@ static int fman_port_probe(struct platform_device *of_dev) fman = dev_get_drvdata(&fm_pdev->dev); if (!fman) { err = -EINVAL; - goto return_err; + goto put_device; } err = of_property_read_u32(port_node, "cell-index", &val); @@ -1799,7 +1799,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: reading cell-index for %pOF failed\n", __func__, port_node); err = -EINVAL; - goto return_err; + goto put_device; } port_id = (u8)val; port->dts_params.id = port_id; @@ -1833,7 +1833,7 @@ static int fman_port_probe(struct platform_device *of_dev) } else { dev_err(port->dev, "%s: Illegal port type\n", __func__); err = -EINVAL; - goto return_err; + goto put_device; } port->dts_params.type = port_type; @@ -1847,7 +1847,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: incorrect qman-channel-id\n", __func__); err = -EINVAL; - goto return_err; + goto put_device; } port->dts_params.qman_channel_id = qman_channel_id; } @@ -1857,7 +1857,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: of_address_to_resource() failed\n", __func__); err = -ENOMEM; - goto return_err; + goto put_device; } port->dts_params.fman = fman; @@ -1882,6 +1882,8 @@ static int fman_port_probe(struct platform_device *of_dev) return 0; +put_device: + put_device(&fm_pdev->dev); return_err: of_node_put(port_node); free_port: diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 7ab8095db1928..147126e79986c 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -94,14 +94,17 @@ static void mac_exception(void *handle, enum fman_mac_exceptions ex) __func__, ex); } -static void set_fman_mac_params(struct mac_device *mac_dev, - struct fman_mac_params *params) +static int set_fman_mac_params(struct mac_device *mac_dev, + struct fman_mac_params *params) { struct mac_priv_s *priv = mac_dev->priv; params->base_addr = (typeof(params->base_addr)) devm_ioremap(priv->dev, mac_dev->res->start, resource_size(mac_dev->res)); + if (!params->base_addr) + return -ENOMEM; + memcpy(¶ms->addr, mac_dev->addr, sizeof(mac_dev->addr)); params->max_speed = priv->max_speed; params->phy_if = mac_dev->phy_if; @@ -112,6 +115,8 @@ static void set_fman_mac_params(struct mac_device *mac_dev, params->event_cb = mac_exception; params->dev_id = mac_dev; params->internal_phy_node = priv->internal_phy_node; + + return 0; } static int tgec_initialization(struct mac_device *mac_dev) @@ -123,7 +128,9 @@ static int tgec_initialization(struct mac_device *mac_dev) priv = mac_dev->priv; - set_fman_mac_params(mac_dev, ¶ms); + err = set_fman_mac_params(mac_dev, ¶ms); + if (err) + goto _return; mac_dev->fman_mac = tgec_config(¶ms); if (!mac_dev->fman_mac) { @@ -169,7 +176,9 @@ static int dtsec_initialization(struct mac_device *mac_dev) priv = mac_dev->priv; - set_fman_mac_params(mac_dev, ¶ms); + err = set_fman_mac_params(mac_dev, ¶ms); + if (err) + goto _return; mac_dev->fman_mac = dtsec_config(¶ms); if (!mac_dev->fman_mac) { @@ -218,7 +227,9 @@ static int memac_initialization(struct mac_device *mac_dev) priv = mac_dev->priv; - set_fman_mac_params(mac_dev, ¶ms); + err = set_fman_mac_params(mac_dev, ¶ms); + if (err) + goto _return; if (priv->max_speed == SPEED_10000) params.phy_if = PHY_INTERFACE_MODE_XGMII; diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index 3c8e4e2efc070..01a7255e86c92 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -1489,6 +1489,7 @@ static int gfar_get_ts_info(struct net_device *dev, ptp_node = of_find_compatible_node(NULL, NULL, "fsl,etsec-ptp"); if (ptp_node) { ptp_dev = of_find_device_by_node(ptp_node); + of_node_put(ptp_node); if (ptp_dev) ptp = platform_get_drvdata(ptp_dev); } diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index c82c85ef5fb34..c37aea7ba8502 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -301,9 +301,10 @@ static int xgmac_mdio_probe(struct platform_device *pdev) static int xgmac_mdio_remove(struct platform_device *pdev) { struct mii_bus *bus = platform_get_drvdata(pdev); + struct mdio_fsl_priv *priv = bus->priv; mdiobus_unregister(bus); - iounmap(bus->priv); + iounmap(priv->mdio_base); mdiobus_free(bus); return 0; diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index ebc37e2569221..f19edd4c6c5bb 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -391,7 +391,7 @@ struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv) gve_num_tx_qpls(priv)); /* we are out of rx qpls */ - if (id == priv->qpl_cfg.qpl_map_size) + if (id == gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv)) return NULL; set_bit(id, priv->qpl_cfg.qpl_id_map); diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index f8dfa7501f65a..5b450c6100add 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -30,6 +30,7 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) { struct gve_priv *priv = netdev_priv(dev); unsigned int start; + u64 packets, bytes; int ring; if (priv->rx) { @@ -37,10 +38,12 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) do { start = u64_stats_fetch_begin(&priv->rx[ring].statss); - s->rx_packets += priv->rx[ring].rpackets; - s->rx_bytes += priv->rx[ring].rbytes; + packets = priv->rx[ring].rpackets; + bytes = priv->rx[ring].rbytes; } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); + s->rx_packets += packets; + s->rx_bytes += bytes; } } if (priv->tx) { @@ -48,10 +51,12 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) do { start = u64_stats_fetch_begin(&priv->tx[ring].statss); - s->tx_packets += priv->tx[ring].pkt_done; - s->tx_bytes += priv->tx[ring].bytes_done; + packets = priv->tx[ring].pkt_done; + bytes = priv->tx[ring].bytes_done; } while (u64_stats_fetch_retry(&priv->tx[ring].statss, start)); + s->tx_packets += packets; + s->tx_bytes += bytes; } } } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index ed3829ae4ef1b..580199fdd0c22 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -398,6 +398,10 @@ static void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, return; if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) { + /* DSAF_MAX_PORT_NUM is 6, but DSAF_GE_NUM is 8. + We need check to prevent array overflow */ + if (port >= DSAF_MAX_PORT_NUM) + return; reg_val_1 = 0x1 << port; port_rst_off = dsaf_dev->mac_cb[port]->port_rst_off; /* there is difference between V1 and V2 in register.*/ diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c index 03ca7d925e8e0..2e38c7d214c45 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c @@ -10,6 +10,27 @@ static LIST_HEAD(hnae3_ae_algo_list); static LIST_HEAD(hnae3_client_list); static LIST_HEAD(hnae3_ae_dev_list); +void hnae3_unregister_ae_algo_prepare(struct hnae3_ae_algo *ae_algo) +{ + const struct pci_device_id *pci_id; + struct hnae3_ae_dev *ae_dev; + + if (!ae_algo) + return; + + list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) { + if (!hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B)) + continue; + + pci_id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev); + if (!pci_id) + continue; + if (IS_ENABLED(CONFIG_PCI_IOV)) + pci_disable_sriov(ae_dev->pdev); + } +} +EXPORT_SYMBOL(hnae3_unregister_ae_algo_prepare); + /* we are keeping things simple and using single lock for all the * list. This is a non-critical code so other updations, if happen * in parallel, can wait. diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 0db835d87d09d..6cf8490110642 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -666,6 +666,7 @@ struct hnae3_handle { int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev); void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev); +void hnae3_unregister_ae_algo_prepare(struct hnae3_ae_algo *ae_algo); void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo); void hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index e64e175162068..ffd1018d43fbe 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -56,6 +56,7 @@ MODULE_PARM_DESC(debug, " Network interface message level setting"); #define HNS3_OUTER_VLAN_TAG 2 #define HNS3_MIN_TX_LEN 33U +#define HNS3_MIN_TUN_PKT_LEN 65U /* hns3_pci_tbl - PCI Device ID Table * @@ -451,6 +452,11 @@ static int hns3_nic_net_open(struct net_device *netdev) if (hns3_nic_resetting(netdev)) return -EBUSY; + if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state)) { + netdev_warn(netdev, "net open repeatedly!\n"); + return 0; + } + netif_carrier_off(netdev); ret = hns3_nic_set_real_num_queue(netdev); @@ -931,8 +937,11 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto, l4.tcp->doff); break; case IPPROTO_UDP: - if (hns3_tunnel_csum_bug(skb)) - return skb_checksum_help(skb); + if (hns3_tunnel_csum_bug(skb)) { + int ret = skb_put_padto(skb, HNS3_MIN_TUN_PKT_LEN); + + return ret ? ret : skb_checksum_help(skb); + } hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1); hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4T_S, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index e34e0854635c3..d64cded30eeb4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -257,6 +257,9 @@ enum hclge_opcode_type { /* Led command */ HCLGE_OPC_LED_STATUS_CFG = 0xB000, + /* clear hardware resource command */ + HCLGE_OPC_CLEAR_HW_RESOURCE = 0x700B, + /* NCL config command */ HCLGE_OPC_QUERY_NCL_CONFIG = 0x7011, /* M7 stats command */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index a1790af73096d..bb22d91f6e53e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -124,7 +124,7 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets, if (ret) return ret; - for (i = 0; i < hdev->tc_max; i++) { + for (i = 0; i < HNAE3_MAX_TC; i++) { switch (ets->tc_tsa[i]) { case IEEE_8021QAZ_TSA_STRICT: if (hdev->tm_info.tc_info[i].tc_sch_mode != @@ -132,6 +132,15 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets, *changed = true; break; case IEEE_8021QAZ_TSA_ETS: + /* The hardware will switch to sp mode if bandwidth is + * 0, so limit ets bandwidth must be greater than 0. + */ + if (!ets->tc_tx_bw[i]) { + dev_err(&hdev->pdev->dev, + "tc%u ets bw cannot be 0\n", i); + return -EINVAL; + } + if (hdev->tm_info.tc_info[i].tc_sch_mode != HCLGE_SCH_MODE_DWRR) *changed = true; @@ -281,21 +290,12 @@ static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) u64 requests[HNAE3_MAX_TC], indications[HNAE3_MAX_TC]; struct hclge_vport *vport = hclge_get_vport(h); struct hclge_dev *hdev = vport->back; - u8 i, j, pfc_map, *prio_tc; int ret; + u8 i; memset(pfc, 0, sizeof(*pfc)); pfc->pfc_cap = hdev->pfc_max; - prio_tc = hdev->tm_info.prio_tc; - pfc_map = hdev->tm_info.hw_pfc_map; - - /* Pfc setting is based on TC */ - for (i = 0; i < hdev->tm_info.num_tc; i++) { - for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) { - if ((prio_tc[j] == i) && (pfc_map & BIT(i))) - pfc->pfc_en |= BIT(j); - } - } + pfc->pfc_en = hdev->tm_info.pfc_en; ret = hclge_pfc_tx_stats_get(hdev, requests); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 93f3865b679bf..d58abdfdb9b7b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1328,9 +1328,10 @@ static void hclge_init_kdump_kernel_config(struct hclge_dev *hdev) static int hclge_configure(struct hclge_dev *hdev) { + const struct cpumask *cpumask = cpu_online_mask; struct hclge_cfg cfg; unsigned int i; - int ret; + int node, ret; ret = hclge_get_cfg(hdev, &cfg); if (ret) { @@ -1390,11 +1391,12 @@ static int hclge_configure(struct hclge_dev *hdev) hclge_init_kdump_kernel_config(hdev); - /* Set the init affinity based on pci func number */ - i = cpumask_weight(cpumask_of_node(dev_to_node(&hdev->pdev->dev))); - i = i ? PCI_FUNC(hdev->pdev->devfn) % i : 0; - cpumask_set_cpu(cpumask_local_spread(i, dev_to_node(&hdev->pdev->dev)), - &hdev->affinity_mask); + /* Set the affinity based on numa node */ + node = dev_to_node(&hdev->pdev->dev); + if (node != NUMA_NO_NODE) + cpumask = cpumask_of_node(node); + + cpumask_copy(&hdev->affinity_mask, cpumask); return ret; } @@ -6683,11 +6685,12 @@ static void hclge_ae_stop(struct hnae3_handle *handle) hclge_clear_arfs_rules(handle); spin_unlock_bh(&hdev->fd_rule_lock); - /* If it is not PF reset, the firmware will disable the MAC, + /* If it is not PF reset or FLR, the firmware will disable the MAC, * so it only need to stop phy here. */ if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) && - hdev->reset_type != HNAE3_FUNC_RESET) { + hdev->reset_type != HNAE3_FUNC_RESET && + hdev->reset_type != HNAE3_FLR_RESET) { hclge_mac_stop_phy(hdev); hclge_update_link_status(hdev); return; @@ -8006,7 +8009,11 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev) static void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, bool writen_to_tbl) { - struct hclge_vport_vlan_cfg *vlan; + struct hclge_vport_vlan_cfg *vlan, *tmp; + + list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) + if (vlan->vlan_id == vlan_id) + return; vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); if (!vlan) @@ -8337,11 +8344,11 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, } if (!ret) { - if (is_kill) - hclge_rm_vport_vlan_table(vport, vlan_id, false); - else + if (!is_kill) hclge_add_vport_vlan_table(vport, vlan_id, writen_to_tbl); + else if (is_kill && vlan_id != 0) + hclge_rm_vport_vlan_table(vport, vlan_id, false); } else if (is_kill) { /* when remove hw vlan filter failed, record the vlan id, * and try to remove it from hw later, to be consistence @@ -9165,6 +9172,28 @@ static void hclge_clear_resetting_state(struct hclge_dev *hdev) } } +static int hclge_clear_hw_resource(struct hclge_dev *hdev) +{ + struct hclge_desc desc; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CLEAR_HW_RESOURCE, false); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + /* This new command is only supported by new firmware, it will + * fail with older firmware. Error value -EOPNOSUPP can only be + * returned by older firmware running this command, to keep code + * backward compatible we will override this value and return + * success. + */ + if (ret && ret != -EOPNOTSUPP) { + dev_err(&hdev->pdev->dev, + "failed to clear hw resource, ret = %d\n", ret); + return ret; + } + return 0; +} + static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) { struct pci_dev *pdev = ae_dev->pdev; @@ -9206,6 +9235,10 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) if (ret) goto err_cmd_uninit; + ret = hclge_clear_hw_resource(hdev); + if (ret) + goto err_cmd_uninit; + ret = hclge_get_cap(hdev); if (ret) { dev_err(&pdev->dev, "get hw capability error, ret = %d.\n", @@ -10241,6 +10274,7 @@ static int hclge_init(void) static void hclge_exit(void) { + hnae3_unregister_ae_algo_prepare(&ae_algo); hnae3_unregister_ae_algo(&ae_algo); } module_init(hclge_init); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 23a706a1765a7..410a9bf7bf0ab 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -64,6 +64,13 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len, enum hclge_cmd_status status; struct hclge_desc desc; + if (msg_len > HCLGE_MBX_MAX_MSG_SIZE) { + dev_err(&hdev->pdev->dev, + "msg data length(=%u) exceeds maximum(=%u)\n", + msg_len, HCLGE_MBX_MAX_MSG_SIZE); + return -EMSGSIZE; + } + resp_pf_to_vf = (struct hclge_mbx_pf_to_vf_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_PF_TO_VF, false); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 62399cc1c5a63..8448607742a6b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -633,6 +633,8 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev) hdev->tm_info.pg_info[i].tc_bit_map = hdev->hw_tc_map; for (k = 0; k < hdev->tm_info.num_tc; k++) hdev->tm_info.pg_info[i].tc_dwrr[k] = BW_PERCENT; + for (; k < HNAE3_MAX_TC; k++) + hdev->tm_info.pg_info[i].tc_dwrr[k] = 0; } } @@ -972,7 +974,6 @@ static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev) static int hclge_tm_ets_tc_dwrr_cfg(struct hclge_dev *hdev) { -#define DEFAULT_TC_WEIGHT 1 #define DEFAULT_TC_OFFSET 14 struct hclge_ets_tc_weight_cmd *ets_weight; @@ -985,13 +986,7 @@ static int hclge_tm_ets_tc_dwrr_cfg(struct hclge_dev *hdev) for (i = 0; i < HNAE3_MAX_TC; i++) { struct hclge_pg_info *pg_info; - ets_weight->tc_weight[i] = DEFAULT_TC_WEIGHT; - - if (!(hdev->hw_tc_map & BIT(i))) - continue; - - pg_info = - &hdev->tm_info.pg_info[hdev->tm_info.tc_info[i].pgid]; + pg_info = &hdev->tm_info.pg_info[hdev->tm_info.tc_info[i].pgid]; ets_weight->tc_weight[i] = pg_info->tc_dwrr[i]; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index ea348ebbbf2e9..403c1b9cf6ab8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -644,9 +644,9 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev, u16 rss_size) roundup_size = ilog2(roundup_size); for (i = 0; i < HCLGEVF_MAX_TC_NUM; i++) { - tc_valid[i] = !!(hdev->hw_tc_map & BIT(i)); + tc_valid[i] = 1; tc_size[i] = roundup_size; - tc_offset[i] = rss_size * i; + tc_offset[i] = (hdev->hw_tc_map & BIT(i)) ? rss_size * i : 0; } hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_TC_MODE, false); @@ -1956,6 +1956,8 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) hclgevf_enable_vector(&hdev->misc_vector, false); event_cause = hclgevf_check_evt_cause(hdev, &clearval); + if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER) + hclgevf_clear_event_cause(hdev, clearval); switch (event_cause) { case HCLGEVF_VECTOR0_EVENT_RST: @@ -1968,10 +1970,7 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) break; } - if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER) { - hclgevf_clear_event_cause(hdev, clearval); - hclgevf_enable_vector(&hdev->misc_vector, true); - } + hclgevf_enable_vector(&hdev->misc_vector, true); return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index 92929750f8325..54d5b402b0e8d 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -1155,7 +1155,7 @@ struct net_device * __init i82596_probe(int unit) err = -ENODEV; goto out; } - memcpy(eth_addr, (void *) 0xfffc1f2c, ETH_ALEN); /* YUCK! Get addr from NOVRAM */ + memcpy(eth_addr, absolute_pointer(0xfffc1f2c), ETH_ALEN); /* YUCK! Get addr from NOVRAM */ dev->base_addr = MVME_I596_BASE; dev->irq = (unsigned) MVME16x_IRQ_I596; goto found; diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c index 6436a98c5953f..90e237fdfbd0e 100644 --- a/drivers/net/ethernet/i825xx/sni_82596.c +++ b/drivers/net/ethernet/i825xx/sni_82596.c @@ -123,9 +123,10 @@ static int sni_82596_probe(struct platform_device *dev) netdevice->dev_addr[5] = readb(eth_addr + 0x06); iounmap(eth_addr); - if (!netdevice->irq) { + if (netdevice->irq < 0) { printk(KERN_ERR "%s: IRQ not found for i82596 at 0x%lx\n", __FILE__, netdevice->base_addr); + retval = netdevice->irq; goto probe_failed; } diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index ecfe588f330ef..bc313d85fe13a 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1462,8 +1462,6 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) netdev_tx_t ret = NETDEV_TX_OK; if (test_bit(0, &adapter->resetting)) { - if (!netif_subqueue_stopped(netdev, skb)) - netif_stop_subqueue(netdev, queue_num); dev_kfree_skb_any(skb); tx_send_failed++; @@ -2250,8 +2248,10 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, * flush reset queue and process this reset */ if (adapter->force_reset_recovery && !list_empty(&adapter->rwi_list)) { - list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) + list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) { list_del(entry); + kfree(list_entry(entry, struct ibmvnic_rwi, list)); + } } rwi->reset_reason = reason; list_add_tail(&rwi->list, &adapter->rwi_list); @@ -3260,11 +3260,25 @@ static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry) struct device *dev = &adapter->vdev->dev; union ibmvnic_crq crq; int max_entries; + int cap_reqs; + + /* We send out 6 or 7 REQUEST_CAPABILITY CRQs below (depending on + * the PROMISC flag). Initialize this count upfront. When the tasklet + * receives a response to all of these, it will send the next protocol + * message (QUERY_IP_OFFLOAD). + */ + if (!(adapter->netdev->flags & IFF_PROMISC) || + adapter->promisc_supported) + cap_reqs = 7; + else + cap_reqs = 6; if (!retry) { /* Sub-CRQ entries are 32 byte long */ int entries_page = 4 * PAGE_SIZE / (sizeof(u64) * 4); + atomic_set(&adapter->running_cap_crqs, cap_reqs); + if (adapter->min_tx_entries_per_subcrq > entries_page || adapter->min_rx_add_entries_per_subcrq > entries_page) { dev_err(dev, "Fatal, invalid entries per sub-crq\n"); @@ -3325,44 +3339,45 @@ static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry) adapter->opt_rx_comp_queues; adapter->req_rx_add_queues = adapter->max_rx_add_queues; + } else { + atomic_add(cap_reqs, &adapter->running_cap_crqs); } - memset(&crq, 0, sizeof(crq)); crq.request_capability.first = IBMVNIC_CRQ_CMD; crq.request_capability.cmd = REQUEST_CAPABILITY; crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ); crq.request_capability.number = cpu_to_be64(adapter->req_tx_entries_per_subcrq); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ); crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_entries_per_subcrq); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_MTU); crq.request_capability.number = cpu_to_be64(adapter->req_mtu); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); if (adapter->netdev->flags & IFF_PROMISC) { @@ -3370,16 +3385,21 @@ static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry) crq.request_capability.capability = cpu_to_be16(PROMISC_REQUESTED); crq.request_capability.number = cpu_to_be64(1); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); } } else { crq.request_capability.capability = cpu_to_be16(PROMISC_REQUESTED); crq.request_capability.number = cpu_to_be64(0); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); } + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); } static int pending_scrq(struct ibmvnic_adapter *adapter, @@ -3784,118 +3804,132 @@ static void send_map_query(struct ibmvnic_adapter *adapter) static void send_cap_queries(struct ibmvnic_adapter *adapter) { union ibmvnic_crq crq; + int cap_reqs; + + /* We send out 25 QUERY_CAPABILITY CRQs below. Initialize this count + * upfront. When the tasklet receives a response to all of these, it + * can send out the next protocol messaage (REQUEST_CAPABILITY). + */ + cap_reqs = 25; + + atomic_set(&adapter->running_cap_crqs, cap_reqs); - atomic_set(&adapter->running_cap_crqs, 0); memset(&crq, 0, sizeof(crq)); crq.query_capability.first = IBMVNIC_CRQ_CMD; crq.query_capability.cmd = QUERY_CAPABILITY; crq.query_capability.capability = cpu_to_be16(MIN_TX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(TCP_IP_OFFLOAD); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(PROMISC_SUPPORTED); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_MTU); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_MTU); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_MULTICAST_FILTERS); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(VLAN_HEADER_INSERTION); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(RX_VLAN_HEADER_INSERTION); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_SG_ENTRIES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(RX_SG_SUPPORTED); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_TX_COMP_SUB_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RX_COMP_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RX_BUFADD_Q_PER_RX_COMP_Q); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RXBA_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(TX_RX_DESC_REQ); - atomic_inc(&adapter->running_cap_crqs); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); } static void handle_vpd_size_rsp(union ibmvnic_crq *crq, @@ -4162,6 +4196,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, char *name; atomic_dec(&adapter->running_cap_crqs); + netdev_dbg(adapter->netdev, "Outstanding request-caps: %d\n", + atomic_read(&adapter->running_cap_crqs)); switch (be16_to_cpu(crq->request_capability_rsp.capability)) { case REQ_TX_QUEUES: req_value = &adapter->req_tx_queues; @@ -4277,6 +4313,14 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, return 0; } + if (adapter->failover_pending) { + adapter->init_done_rc = -EAGAIN; + netdev_dbg(netdev, "Failover pending, ignoring login response\n"); + complete(&adapter->init_done); + /* login response buffer will be released on reset */ + return 0; + } + netdev->mtu = adapter->req_mtu - ETH_HLEN; netdev_dbg(adapter->netdev, "Login Response Buffer:\n"); @@ -4781,12 +4825,6 @@ static void ibmvnic_tasklet(void *data) ibmvnic_handle_crq(crq, adapter); crq->generic.first = 0; } - - /* remain in tasklet until all - * capabilities responses are received - */ - if (!adapter->wait_capability) - done = true; } /* if capabilities CRQ's were sent in this tasklet, the following * tasklet must wait until all responses are received @@ -4928,6 +4966,9 @@ static int init_crq_queue(struct ibmvnic_adapter *adapter) crq->cur = 0; spin_lock_init(&crq->lock); + /* process any CRQs that were queued before we enabled interrupts */ + tasklet_schedule(&adapter->tasklet); + return retrc; req_irq_failed: @@ -4981,6 +5022,15 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter) release_sub_crqs(adapter, 0); rc = init_sub_crqs(adapter); } else { + /* no need to reinitialize completely, but we do + * need to clean up transmits that were in flight + * when we processed the reset. Failure to do so + * will confound the upper layer, usually TCP, by + * creating the illusion of transmits that are + * awaiting completion. + */ + clean_tx_pools(adapter); + rc = reset_sub_crq_queues(adapter); } } else { diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 911b3d2a94e1c..70962967d7141 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -2435,11 +2435,15 @@ static void e100_get_drvinfo(struct net_device *netdev, sizeof(info->bus_info)); } -#define E100_PHY_REGS 0x1C +#define E100_PHY_REGS 0x1D static int e100_get_regs_len(struct net_device *netdev) { struct nic *nic = netdev_priv(netdev); - return 1 + E100_PHY_REGS + sizeof(nic->mem->dump_buf); + + /* We know the number of registers, and the size of the dump buffer. + * Calculate the total size in bytes. + */ + return (1 + E100_PHY_REGS) * sizeof(u32) + sizeof(nic->mem->dump_buf); } static void e100_get_regs(struct net_device *netdev, @@ -2453,14 +2457,18 @@ static void e100_get_regs(struct net_device *netdev, buff[0] = ioread8(&nic->csr->scb.cmd_hi) << 24 | ioread8(&nic->csr->scb.cmd_lo) << 16 | ioread16(&nic->csr->scb.status); - for (i = E100_PHY_REGS; i >= 0; i--) - buff[1 + E100_PHY_REGS - i] = - mdio_read(netdev, nic->mii.phy_id, i); + for (i = 0; i < E100_PHY_REGS; i++) + /* Note that we read the registers in reverse order. This + * ordering is the ABI apparently used by ethtool and other + * applications. + */ + buff[1 + i] = mdio_read(netdev, nic->mii.phy_id, + E100_PHY_REGS - 1 - i); memset(nic->mem->dump_buf, 0, sizeof(nic->mem->dump_buf)); e100_exec_cb(nic, NULL, e100_dump); msleep(10); - memcpy(&buff[2 + E100_PHY_REGS], nic->mem->dump_buf, - sizeof(nic->mem->dump_buf)); + memcpy(&buff[1 + E100_PHY_REGS], nic->mem->dump_buf, + sizeof(nic->mem->dump_buf)); } static void e100_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index a1fab77b2096a..1241b4734896f 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -995,6 +995,8 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) { u32 reg = link << (E1000_LTRV_REQ_SHIFT + E1000_LTRV_NOSNOOP_SHIFT) | link << E1000_LTRV_REQ_SHIFT | E1000_LTRV_SEND; + u32 max_ltr_enc_d = 0; /* maximum LTR decoded by platform */ + u32 lat_enc_d = 0; /* latency decoded */ u16 lat_enc = 0; /* latency encoded */ if (link) { @@ -1048,7 +1050,17 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) E1000_PCI_LTR_CAP_LPT + 2, &max_nosnoop); max_ltr_enc = max_t(u16, max_snoop, max_nosnoop); - if (lat_enc > max_ltr_enc) + lat_enc_d = (lat_enc & E1000_LTRV_VALUE_MASK) * + (1U << (E1000_LTRV_SCALE_FACTOR * + ((lat_enc & E1000_LTRV_SCALE_MASK) + >> E1000_LTRV_SCALE_SHIFT))); + + max_ltr_enc_d = (max_ltr_enc & E1000_LTRV_VALUE_MASK) * + (1U << (E1000_LTRV_SCALE_FACTOR * + ((max_ltr_enc & E1000_LTRV_SCALE_MASK) + >> E1000_LTRV_SCALE_SHIFT))); + + if (lat_enc_d > max_ltr_enc_d) lat_enc = max_ltr_enc; } diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 1502895eb45dd..e757896287eba 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -274,8 +274,11 @@ /* Latency Tolerance Reporting */ #define E1000_LTRV 0x000F8 +#define E1000_LTRV_VALUE_MASK 0x000003FF #define E1000_LTRV_SCALE_MAX 5 #define E1000_LTRV_SCALE_FACTOR 5 +#define E1000_LTRV_SCALE_SHIFT 10 +#define E1000_LTRV_SCALE_MASK 0x00001C00 #define E1000_LTRV_REQ_SHIFT 15 #define E1000_LTRV_NOSNOOP_SHIFT 16 #define E1000_LTRV_SEND (1 << 30) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index e571c6116c4b7..4c8c31692e9e0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -169,6 +169,7 @@ enum i40e_vsi_state_t { __I40E_VSI_OVERFLOW_PROMISC, __I40E_VSI_REINIT_REQUESTED, __I40E_VSI_DOWN_REQUESTED, + __I40E_VSI_RELEASING, /* This must be last as it determines the size of the BITMAP */ __I40E_VSI_STATE_SIZE__, }; @@ -181,7 +182,6 @@ enum i40e_interrupt_policy { struct i40e_lump_tracking { u16 num_entries; - u16 search_hint; u16 list[0]; #define I40E_PILE_VALID_BIT 0x8000 #define I40E_IWARP_IRQ_PILE_ID (I40E_PILE_VALID_BIT - 2) @@ -756,12 +756,12 @@ struct i40e_vsi { struct rtnl_link_stats64 net_stats_offsets; struct i40e_eth_stats eth_stats; struct i40e_eth_stats eth_stats_offsets; - u32 tx_restart; - u32 tx_busy; + u64 tx_restart; + u64 tx_busy; u64 tx_linearize; u64 tx_force_wb; - u32 rx_buf_failed; - u32 rx_page_failed; + u64 rx_buf_failed; + u64 rx_page_failed; /* These are containers of ring pointers, allocated at run-time */ struct i40e_ring **rx_rings; @@ -1146,6 +1146,7 @@ void i40e_ptp_save_hw_time(struct i40e_pf *pf); void i40e_ptp_restore_hw_time(struct i40e_pf *pf); void i40e_ptp_init(struct i40e_pf *pf); void i40e_ptp_stop(struct i40e_pf *pf); +int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset); int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi); i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf); i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 99ea543dd2453..276f04c0e51d6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -234,7 +234,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) (unsigned long int)vsi->net_stats_offsets.rx_compressed, (unsigned long int)vsi->net_stats_offsets.tx_compressed); dev_info(&pf->pdev->dev, - " tx_restart = %d, tx_busy = %d, rx_buf_failed = %d, rx_page_failed = %d\n", + " tx_restart = %llu, tx_busy = %llu, rx_buf_failed = %llu, rx_page_failed = %llu\n", vsi->tx_restart, vsi->tx_busy, vsi->rx_buf_failed, vsi->rx_page_failed); rcu_read_lock(); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 2cc4f63b1e910..689deecb4e1a3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2578,15 +2578,16 @@ static void i40e_diag_test(struct net_device *netdev, set_bit(__I40E_TESTING, pf->state); + if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || + test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) { + dev_warn(&pf->pdev->dev, + "Cannot start offline testing when PF is in reset state.\n"); + goto skip_ol_tests; + } + if (i40e_active_vfs(pf) || i40e_active_vmdqs(pf)) { dev_warn(&pf->pdev->dev, "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n"); - data[I40E_ETH_TEST_REG] = 1; - data[I40E_ETH_TEST_EEPROM] = 1; - data[I40E_ETH_TEST_INTR] = 1; - data[I40E_ETH_TEST_LINK] = 1; - eth_test->flags |= ETH_TEST_FL_FAILED; - clear_bit(__I40E_TESTING, pf->state); goto skip_ol_tests; } @@ -2633,9 +2634,17 @@ static void i40e_diag_test(struct net_device *netdev, data[I40E_ETH_TEST_INTR] = 0; } -skip_ol_tests: - netif_info(pf, drv, netdev, "testing finished\n"); + return; + +skip_ol_tests: + data[I40E_ETH_TEST_REG] = 1; + data[I40E_ETH_TEST_EEPROM] = 1; + data[I40E_ETH_TEST_INTR] = 1; + data[I40E_ETH_TEST_LINK] = 1; + eth_test->flags |= ETH_TEST_FL_FAILED; + clear_bit(__I40E_TESTING, pf->state); + netif_info(pf, drv, netdev, "testing failed\n"); } static void i40e_get_wol(struct net_device *netdev, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index bfc59813e6d18..66f28daa05a45 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -107,6 +107,24 @@ MODULE_VERSION(DRV_VERSION); static struct workqueue_struct *i40e_wq; +static void netdev_hw_addr_refcnt(struct i40e_mac_filter *f, + struct net_device *netdev, int delta) +{ + struct netdev_hw_addr *ha; + + if (!f || !netdev) + return; + + netdev_for_each_mc_addr(ha, netdev) { + if (ether_addr_equal(ha->addr, f->macaddr)) { + ha->refcount += delta; + if (ha->refcount <= 0) + ha->refcount = 1; + break; + } + } +} + /** * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code * @hw: pointer to the HW structure @@ -186,10 +204,6 @@ int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem) * @id: an owner id to stick on the items assigned * * Returns the base item index of the lump, or negative for error - * - * The search_hint trick and lack of advanced fit-finding only work - * because we're highly likely to have all the same size lump requests. - * Linear search time and any fragmentation should be minimal. **/ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, u16 needed, u16 id) @@ -204,8 +218,21 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, return -EINVAL; } - /* start the linear search with an imperfect hint */ - i = pile->search_hint; + /* Allocate last queue in the pile for FDIR VSI queue + * so it doesn't fragment the qp_pile + */ + if (pile == pf->qp_pile && pf->vsi[id]->type == I40E_VSI_FDIR) { + if (pile->list[pile->num_entries - 1] & I40E_PILE_VALID_BIT) { + dev_err(&pf->pdev->dev, + "Cannot allocate queue %d for I40E_VSI_FDIR\n", + pile->num_entries - 1); + return -ENOMEM; + } + pile->list[pile->num_entries - 1] = id | I40E_PILE_VALID_BIT; + return pile->num_entries - 1; + } + + i = 0; while (i < pile->num_entries) { /* skip already allocated entries */ if (pile->list[i] & I40E_PILE_VALID_BIT) { @@ -224,7 +251,6 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, for (j = 0; j < needed; j++) pile->list[i+j] = id | I40E_PILE_VALID_BIT; ret = i; - pile->search_hint = i + j; break; } @@ -247,7 +273,7 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id) { int valid_id = (id | I40E_PILE_VALID_BIT); int count = 0; - int i; + u16 i; if (!pile || index >= pile->num_entries) return -EINVAL; @@ -259,8 +285,6 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id) count++; } - if (count && index < pile->search_hint) - pile->search_hint = index; return count; } @@ -780,9 +804,9 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) struct rtnl_link_stats64 *ns; /* netdev stats */ struct i40e_eth_stats *oes; struct i40e_eth_stats *es; /* device's eth stats */ - u32 tx_restart, tx_busy; + u64 tx_restart, tx_busy; struct i40e_ring *p; - u32 rx_page, rx_buf; + u64 rx_page, rx_buf; u64 bytes, packets; unsigned int start; u64 tx_linearize; @@ -1776,6 +1800,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, bool is_add) { struct i40e_pf *pf = vsi->back; + u16 num_tc_qps = 0; u16 sections = 0; u8 netdev_tc = 0; u16 numtc = 1; @@ -1783,13 +1808,37 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, u8 offset; u16 qmap; int i; - u16 num_tc_qps = 0; sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; offset = 0; + /* zero out queue mapping, it will get updated on the end of the function */ + memset(ctxt->info.queue_mapping, 0, sizeof(ctxt->info.queue_mapping)); + + if (vsi->type == I40E_VSI_MAIN) { + /* This code helps add more queue to the VSI if we have + * more cores than RSS can support, the higher cores will + * be served by ATR or other filters. Furthermore, the + * non-zero req_queue_pairs says that user requested a new + * queue count via ethtool's set_channels, so use this + * value for queues distribution across traffic classes + * We need at least one queue pair for the interface + * to be usable as we see in else statement. + */ + if (vsi->req_queue_pairs > 0) + vsi->num_queue_pairs = vsi->req_queue_pairs; + else if (pf->flags & I40E_FLAG_MSIX_ENABLED) + vsi->num_queue_pairs = pf->num_lan_msix; + else + vsi->num_queue_pairs = 1; + } /* Number of queues per enabled TC */ - num_tc_qps = vsi->alloc_queue_pairs; + if (vsi->type == I40E_VSI_MAIN || + (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs != 0)) + num_tc_qps = vsi->num_queue_pairs; + else + num_tc_qps = vsi->alloc_queue_pairs; + if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) { /* Find numtc from enabled TC bitmap */ for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { @@ -1867,15 +1916,11 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, } ctxt->info.tc_mapping[i] = cpu_to_le16(qmap); } - - /* Set actual Tx/Rx queue pairs */ - vsi->num_queue_pairs = offset; - if ((vsi->type == I40E_VSI_MAIN) && (numtc == 1)) { - if (vsi->req_queue_pairs > 0) - vsi->num_queue_pairs = vsi->req_queue_pairs; - else if (pf->flags & I40E_FLAG_MSIX_ENABLED) - vsi->num_queue_pairs = pf->num_lan_msix; - } + /* Do not change previously set num_queue_pairs for PFs and VFs*/ + if ((vsi->type == I40E_VSI_MAIN && numtc != 1) || + (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs == 0) || + (vsi->type != I40E_VSI_MAIN && vsi->type != I40E_VSI_SRIOV)) + vsi->num_queue_pairs = offset; /* Scheduler section valid can only be set for ADD VSI */ if (is_add) { @@ -2005,6 +2050,7 @@ static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi, hlist_for_each_entry_safe(new, h, from, hlist) { /* We can simply free the wrapper structure */ hlist_del(&new->hlist); + netdev_hw_addr_refcnt(new->f, vsi->netdev, -1); kfree(new); } } @@ -2352,6 +2398,10 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) &tmp_add_list, &tmp_del_list, vlan_filters); + + hlist_for_each_entry(new, &tmp_add_list, hlist) + netdev_hw_addr_refcnt(new->f, vsi->netdev, 1); + if (retval) goto err_no_memory_locked; @@ -2484,6 +2534,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) if (new->f->state == I40E_FILTER_NEW) new->f->state = new->state; hlist_del(&new->hlist); + netdev_hw_addr_refcnt(new->f, vsi->netdev, -1); kfree(new); } spin_unlock_bh(&vsi->mac_filter_hash_lock); @@ -2609,7 +2660,8 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) for (v = 0; v < pf->num_alloc_vsi; v++) { if (pf->vsi[v] && - (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED)) { + (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED) && + !test_bit(__I40E_VSI_RELEASING, pf->vsi[v]->state)) { int ret = i40e_sync_vsi_filters(pf->vsi[v]); if (ret) { @@ -4817,7 +4869,8 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf) { int i; - i40e_free_misc_vector(pf); + if (test_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) + i40e_free_misc_vector(pf); i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector, I40E_IWARP_IRQ_PILE_ID); @@ -5370,6 +5423,58 @@ static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi, sizeof(vsi->info.tc_mapping)); } +/** + * i40e_update_adq_vsi_queues - update queue mapping for ADq VSI + * @vsi: the VSI being reconfigured + * @vsi_offset: offset from main VF VSI + */ +int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset) +{ + struct i40e_vsi_context ctxt = {}; + struct i40e_pf *pf; + struct i40e_hw *hw; + int ret; + + if (!vsi) + return I40E_ERR_PARAM; + pf = vsi->back; + hw = &pf->hw; + + ctxt.seid = vsi->seid; + ctxt.pf_num = hw->pf_id; + ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id + vsi_offset; + ctxt.uplink_seid = vsi->uplink_seid; + ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; + ctxt.flags = I40E_AQ_VSI_TYPE_VF; + ctxt.info = vsi->info; + + i40e_vsi_setup_queue_map(vsi, &ctxt, vsi->tc_config.enabled_tc, + false); + if (vsi->reconfig_rss) { + vsi->rss_size = min_t(int, pf->alloc_rss_size, + vsi->num_queue_pairs); + ret = i40e_vsi_config_rss(vsi); + if (ret) { + dev_info(&pf->pdev->dev, "Failed to reconfig rss for num_queues\n"); + return ret; + } + vsi->reconfig_rss = false; + } + + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret) { + dev_info(&pf->pdev->dev, "Update vsi config failed, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + /* update the local VSI info with updated queue map */ + i40e_vsi_update_queue_map(vsi, &ctxt); + vsi->info.valid_sections = 0; + + return ret; +} + /** * i40e_vsi_config_tc - Configure VSI Tx Scheduler for given TC map * @vsi: VSI to be configured @@ -5660,24 +5765,6 @@ static void i40e_remove_queue_channels(struct i40e_vsi *vsi) INIT_LIST_HEAD(&vsi->ch_list); } -/** - * i40e_is_any_channel - channel exist or not - * @vsi: ptr to VSI to which channels are associated with - * - * Returns true or false if channel(s) exist for associated VSI or not - **/ -static bool i40e_is_any_channel(struct i40e_vsi *vsi) -{ - struct i40e_channel *ch, *ch_tmp; - - list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { - if (ch->initialized) - return true; - } - - return false; -} - /** * i40e_get_max_queues_for_channel * @vsi: ptr to VSI to which channels are associated with @@ -6185,26 +6272,15 @@ int i40e_create_queue_channel(struct i40e_vsi *vsi, /* By default we are in VEPA mode, if this is the first VF/VMDq * VSI to be added switch to VEB mode. */ - if ((!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) || - (!i40e_is_any_channel(vsi))) { - if (!is_power_of_2(vsi->tc_config.tc_info[0].qcount)) { - dev_dbg(&pf->pdev->dev, - "Failed to create channel. Override queues (%u) not power of 2\n", - vsi->tc_config.tc_info[0].qcount); - return -EINVAL; - } - if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { - pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { + pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; - if (vsi->type == I40E_VSI_MAIN) { - if (pf->flags & I40E_FLAG_TC_MQPRIO) - i40e_do_reset(pf, I40E_PF_RESET_FLAG, - true); - else - i40e_do_reset_safe(pf, - I40E_PF_RESET_FLAG); - } + if (vsi->type == I40E_VSI_MAIN) { + if (pf->flags & I40E_FLAG_TC_MQPRIO) + i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); + else + i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); } /* now onwards for main VSI, number of queues will be value * of TC0's queue count @@ -7048,42 +7124,43 @@ static void i40e_free_macvlan_channels(struct i40e_vsi *vsi) static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev, struct i40e_fwd_adapter *fwd) { + struct i40e_channel *ch = NULL, *ch_tmp, *iter; int ret = 0, num_tc = 1, i, aq_err; - struct i40e_channel *ch, *ch_tmp; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; - if (list_empty(&vsi->macvlan_list)) - return -EINVAL; - /* Go through the list and find an available channel */ - list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { - if (!i40e_is_channel_macvlan(ch)) { - ch->fwd = fwd; + list_for_each_entry_safe(iter, ch_tmp, &vsi->macvlan_list, list) { + if (!i40e_is_channel_macvlan(iter)) { + iter->fwd = fwd; /* record configuration for macvlan interface in vdev */ for (i = 0; i < num_tc; i++) netdev_bind_sb_channel_queue(vsi->netdev, vdev, i, - ch->num_queue_pairs, - ch->base_queue); - for (i = 0; i < ch->num_queue_pairs; i++) { + iter->num_queue_pairs, + iter->base_queue); + for (i = 0; i < iter->num_queue_pairs; i++) { struct i40e_ring *tx_ring, *rx_ring; u16 pf_q; - pf_q = ch->base_queue + i; + pf_q = iter->base_queue + i; /* Get to TX ring ptr */ tx_ring = vsi->tx_rings[pf_q]; - tx_ring->ch = ch; + tx_ring->ch = iter; /* Get the RX ring ptr */ rx_ring = vsi->rx_rings[pf_q]; - rx_ring->ch = ch; + rx_ring->ch = iter; } + ch = iter; break; } } + if (!ch) + return -EINVAL; + /* Guarantee all rings are updated before we update the * MAC address filter. */ @@ -7496,12 +7573,20 @@ static int i40e_setup_tc(struct net_device *netdev, void *type_data) vsi->seid); need_reset = true; goto exit; - } else { - dev_info(&vsi->back->pdev->dev, - "Setup channel (id:%u) utilizing num_queues %d\n", - vsi->seid, vsi->tc_config.tc_info[0].qcount); + } else if (enabled_tc && + (!is_power_of_2(vsi->tc_config.tc_info[0].qcount))) { + netdev_info(netdev, + "Failed to create channel. Override queues (%u) not power of 2\n", + vsi->tc_config.tc_info[0].qcount); + ret = -EINVAL; + need_reset = true; + goto exit; } + dev_info(&vsi->back->pdev->dev, + "Setup channel (id:%u) utilizing num_queues %d\n", + vsi->seid, vsi->tc_config.tc_info[0].qcount); + if (pf->flags & I40E_FLAG_TC_MQPRIO) { if (vsi->mqprio_qopt.max_rate[0]) { u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0]; @@ -8027,6 +8112,11 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi, return -EOPNOTSUPP; } + if (!tc) { + dev_err(&pf->pdev->dev, "Unable to add filter because of invalid destination"); + return -EINVAL; + } + if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) return -EBUSY; @@ -8066,9 +8156,8 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi, err = i40e_add_del_cloud_filter(vsi, filter, true); if (err) { - dev_err(&pf->pdev->dev, - "Failed to add cloud filter, err %s\n", - i40e_stat_str(&pf->hw, err)); + dev_err(&pf->pdev->dev, "Failed to add cloud filter, err %d\n", + err); goto err; } @@ -8253,6 +8342,27 @@ int i40e_open(struct net_device *netdev) return 0; } +/** + * i40e_netif_set_realnum_tx_rx_queues - Update number of tx/rx queues + * @vsi: vsi structure + * + * This updates netdev's number of tx/rx queues + * + * Returns status of setting tx/rx queues + **/ +static int i40e_netif_set_realnum_tx_rx_queues(struct i40e_vsi *vsi) +{ + int ret; + + ret = netif_set_real_num_rx_queues(vsi->netdev, + vsi->num_queue_pairs); + if (ret) + return ret; + + return netif_set_real_num_tx_queues(vsi->netdev, + vsi->num_queue_pairs); +} + /** * i40e_vsi_open - * @vsi: the VSI to open @@ -8289,13 +8399,7 @@ int i40e_vsi_open(struct i40e_vsi *vsi) goto err_setup_rx; /* Notify the stack of the actual queue counts. */ - err = netif_set_real_num_tx_queues(vsi->netdev, - vsi->num_queue_pairs); - if (err) - goto err_set_queues; - - err = netif_set_real_num_rx_queues(vsi->netdev, - vsi->num_queue_pairs); + err = i40e_netif_set_realnum_tx_rx_queues(vsi); if (err) goto err_set_queues; @@ -9616,7 +9720,7 @@ static int i40e_get_capabilities(struct i40e_pf *pf, if (pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOMEM) { /* retry with a larger buffer */ buf_len = data_size; - } else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK) { + } else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK || err) { dev_info(&pf->pdev->dev, "capability discovery failed, err %s aq_err %s\n", i40e_stat_str(&pf->hw, err), @@ -9968,7 +10072,7 @@ static int i40e_reset(struct i40e_pf *pf) **/ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { - int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state); + const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf); struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_hw *hw = &pf->hw; i40e_status ret; @@ -9976,13 +10080,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) int v; if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && - i40e_check_recovery_mode(pf)) { + is_recovery_mode_reported) i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev); - } if (test_bit(__I40E_DOWN, pf->state) && - !test_bit(__I40E_RECOVERY_MODE, pf->state) && - !old_recovery_mode_bit) + !test_bit(__I40E_RECOVERY_MODE, pf->state)) goto clear_recovery; dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n"); @@ -9996,15 +10098,9 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } i40e_get_oem_version(&pf->hw); - if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && - ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) || - hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) { - /* The following delay is necessary for 4.33 firmware and older - * to recover after EMP reset. 200 ms should suffice but we - * put here 300 ms to be sure that FW is ready to operate - * after reset. - */ - mdelay(300); + if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) { + /* The following delay is necessary for firmware update. */ + mdelay(1000); } /* re-verify the eeprom if we just had an EMP reset */ @@ -10015,13 +10111,12 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) * accordingly with regard to resources initialization * and deinitialization */ - if (test_bit(__I40E_RECOVERY_MODE, pf->state) || - old_recovery_mode_bit) { + if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { if (i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities)) goto end_unlock; - if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { + if (is_recovery_mode_reported) { /* we're staying in recovery mode so we'll reinitialize * misc vector here */ @@ -11300,7 +11395,6 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) return -ENOMEM; pf->irq_pile->num_entries = vectors; - pf->irq_pile->search_hint = 0; /* track first vector for misc interrupts, ignore return */ (void)i40e_get_lump(pf, pf->irq_pile, 1, I40E_PILE_VALID_BIT - 1); @@ -12051,7 +12145,6 @@ static int i40e_sw_init(struct i40e_pf *pf) goto sw_init_done; } pf->qp_pile->num_entries = pf->hw.func_caps.num_tx_qp; - pf->qp_pile->search_hint = 0; pf->tx_timeout_recovery_level = 1; @@ -13388,7 +13481,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi) dev_info(&pf->pdev->dev, "Can't remove PF VSI\n"); return -ENODEV; } - + set_bit(__I40E_VSI_RELEASING, vsi->state); uplink_seid = vsi->uplink_seid; if (vsi->type != I40E_VSI_SRIOV) { if (vsi->netdev_registered) { @@ -13718,6 +13811,9 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, case I40E_VSI_MAIN: case I40E_VSI_VMDQ2: ret = i40e_config_netdev(vsi); + if (ret) + goto err_netdev; + ret = i40e_netif_set_realnum_tx_rx_queues(vsi); if (ret) goto err_netdev; ret = register_netdev(vsi->netdev); @@ -14964,8 +15060,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw)) - dev_info(&pdev->dev, - "The driver for the device detected a newer version of the NVM image v%u.%u than expected v%u.%u. Please install the most recent version of the network driver.\n", + dev_dbg(&pdev->dev, + "The driver for the device detected a newer version of the NVM image v%u.%u than v%u.%u.\n", hw->aq.api_maj_ver, hw->aq.api_min_ver, I40E_FW_API_VERSION_MAJOR, @@ -15490,7 +15586,7 @@ static void i40e_remove(struct pci_dev *pdev) * remediation. **/ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev, - enum pci_channel_state error) + pci_channel_state_t error) { struct i40e_pf *pf = pci_get_drvdata(pdev); diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index e561073054865..4080fdacca4cc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -621,14 +621,13 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id, u16 vsi_queue_id, struct virtchnl_rxq_info *info) { + u16 pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx]; struct i40e_hw *hw = &pf->hw; struct i40e_hmc_obj_rxq rx_ctx; - u16 pf_queue_id; int ret = 0; - pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); - /* clear the context structure first */ memset(&rx_ctx, 0, sizeof(struct i40e_hmc_obj_rxq)); @@ -666,6 +665,10 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id, } rx_ctx.rxmax = info->max_pkt_size; + /* if port VLAN is configured increase the max packet size */ + if (vsi->info.pvid) + rx_ctx.rxmax += VLAN_HLEN; + /* enable 32bytes desc always */ rx_ctx.dsize = 1; @@ -1801,6 +1804,32 @@ static int i40e_vc_send_resp_to_vf(struct i40e_vf *vf, return i40e_vc_send_msg_to_vf(vf, opcode, retval, NULL, 0); } +/** + * i40e_sync_vf_state + * @vf: pointer to the VF info + * @state: VF state + * + * Called from a VF message to synchronize the service with a potential + * VF reset state + **/ +static bool i40e_sync_vf_state(struct i40e_vf *vf, enum i40e_vf_states state) +{ + int i; + + /* When handling some messages, it needs VF state to be set. + * It is possible that this flag is cleared during VF reset, + * so there is a need to wait until the end of the reset to + * handle the request message correctly. + */ + for (i = 0; i < I40E_VF_STATE_WAIT_COUNT; i++) { + if (test_bit(state, &vf->vf_states)) + return true; + usleep_range(10000, 20000); + } + + return test_bit(state, &vf->vf_states); +} + /** * i40e_vc_get_version_msg * @vf: pointer to the VF info @@ -1861,7 +1890,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) size_t len = 0; int ret; - if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_INIT)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -2016,7 +2045,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, u8 *msg) bool allmulti = false; bool alluni = false; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err_out; } @@ -2097,13 +2126,14 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) struct virtchnl_vsi_queue_config_info *qci = (struct virtchnl_vsi_queue_config_info *)msg; struct virtchnl_queue_pair_info *qpi; - struct i40e_pf *pf = vf->pf; u16 vsi_id, vsi_queue_id = 0; - u16 num_qps_all = 0; + struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; int i, j = 0, idx = 0; + struct i40e_vsi *vsi; + u16 num_qps_all = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2119,7 +2149,7 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) } if (vf->adq_enabled) { - for (i = 0; i < I40E_MAX_VF_VSI; i++) + for (i = 0; i < vf->num_tc; i++) num_qps_all += vf->ch[i].num_qps; if (num_qps_all != qci->num_queue_pairs) { aq_ret = I40E_ERR_PARAM; @@ -2190,9 +2220,15 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) pf->vsi[vf->lan_vsi_idx]->num_queue_pairs = qci->num_queue_pairs; } else { - for (i = 0; i < vf->num_tc; i++) - pf->vsi[vf->ch[i].vsi_idx]->num_queue_pairs = - vf->ch[i].num_qps; + for (i = 0; i < vf->num_tc; i++) { + vsi = pf->vsi[vf->ch[i].vsi_idx]; + vsi->num_queue_pairs = vf->ch[i].num_qps; + + if (i40e_update_adq_vsi_queues(vsi, i)) { + aq_ret = I40E_ERR_CONFIG; + goto error_param; + } + } } error_param: @@ -2245,7 +2281,7 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2417,7 +2453,7 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2449,6 +2485,59 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg) aq_ret); } +/** + * i40e_check_enough_queue - find big enough queue number + * @vf: pointer to the VF info + * @needed: the number of items needed + * + * Returns the base item index of the queue, or negative for error + **/ +static int i40e_check_enough_queue(struct i40e_vf *vf, u16 needed) +{ + unsigned int i, cur_queues, more, pool_size; + struct i40e_lump_tracking *pile; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi; + + vsi = pf->vsi[vf->lan_vsi_idx]; + cur_queues = vsi->alloc_queue_pairs; + + /* if current allocated queues are enough for need */ + if (cur_queues >= needed) + return vsi->base_queue; + + pile = pf->qp_pile; + if (cur_queues > 0) { + /* if the allocated queues are not zero + * just check if there are enough queues for more + * behind the allocated queues. + */ + more = needed - cur_queues; + for (i = vsi->base_queue + cur_queues; + i < pile->num_entries; i++) { + if (pile->list[i] & I40E_PILE_VALID_BIT) + break; + + if (more-- == 1) + /* there is enough */ + return vsi->base_queue; + } + } + + pool_size = 0; + for (i = 0; i < pile->num_entries; i++) { + if (pile->list[i] & I40E_PILE_VALID_BIT) { + pool_size = 0; + continue; + } + if (needed <= ++pool_size) + /* there is enough */ + return i; + } + + return -ENOMEM; +} + /** * i40e_vc_request_queues_msg * @vf: pointer to the VF info @@ -2467,7 +2556,7 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg) u8 cur_pairs = vf->num_queue_pairs; struct i40e_pf *pf = vf->pf; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) return -EINVAL; if (req_pairs > I40E_MAX_VF_QUEUES) { @@ -2483,6 +2572,12 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg) req_pairs - cur_pairs, pf->queues_left); vfres->num_queue_pairs = pf->queues_left + cur_pairs; + } else if (i40e_check_enough_queue(vf, req_pairs) < 0) { + dev_warn(&pf->pdev->dev, + "VF %d requested %d more queues, but there is not enough for it.\n", + vf->vf_id, + req_pairs - cur_pairs); + vfres->num_queue_pairs = cur_pairs; } else { /* successful request */ vf->num_req_queues = req_pairs; @@ -2513,7 +2608,7 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg) memset(&stats, 0, sizeof(struct i40e_eth_stats)); - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2622,7 +2717,7 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg) i40e_status ret = 0; int i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) { ret = I40E_ERR_PARAM; goto error_param; @@ -2691,7 +2786,7 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) i40e_status ret = 0; int i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) { ret = I40E_ERR_PARAM; goto error_param; @@ -2830,7 +2925,7 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, vfl->vsi_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; @@ -2950,9 +3045,9 @@ static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg) struct i40e_vsi *vsi = NULL; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, vrk->vsi_id) || - (vrk->key_len != I40E_HKEY_ARRAY_SIZE)) { + vrk->key_len != I40E_HKEY_ARRAY_SIZE) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -2981,9 +3076,9 @@ static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; u16 i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, vrl->vsi_id) || - (vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE)) { + vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3016,7 +3111,7 @@ static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int len = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3052,7 +3147,7 @@ static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg) struct i40e_hw *hw = &pf->hw; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3077,7 +3172,7 @@ static int i40e_vc_enable_vlan_stripping(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; struct i40e_vsi *vsi; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3103,7 +3198,7 @@ static int i40e_vc_disable_vlan_stripping(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; struct i40e_vsi *vsi; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3330,7 +3425,7 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i, ret; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3461,7 +3556,7 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i, ret; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err_out; } @@ -3570,7 +3665,7 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; u64 speed = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3677,11 +3772,6 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) /* set this flag only after making sure all inputs are sane */ vf->adq_enabled = true; - /* num_req_queues is set when user changes number of queues via ethtool - * and this causes issue for default VSI(which depends on this variable) - * when ADq is enabled, hence reset it. - */ - vf->num_req_queues = 0; /* reset the VF in order to allocate resources */ i40e_vc_notify_vf_reset(vf); @@ -3705,7 +3795,7 @@ static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -4050,34 +4140,6 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) return ret; } -/** - * i40e_vsi_has_vlans - True if VSI has configured VLANs - * @vsi: pointer to the vsi - * - * Check if a VSI has configured any VLANs. False if we have a port VLAN or if - * we have no configured VLANs. Do not call while holding the - * mac_filter_hash_lock. - */ -static bool i40e_vsi_has_vlans(struct i40e_vsi *vsi) -{ - bool have_vlans; - - /* If we have a port VLAN, then the VSI cannot have any VLANs - * configured, as all MAC/VLAN filters will be assigned to the PVID. - */ - if (vsi->info.pvid) - return false; - - /* Since we don't have a PVID, we know that if the device is in VLAN - * mode it must be because of a VLAN filter configured on this VSI. - */ - spin_lock_bh(&vsi->mac_filter_hash_lock); - have_vlans = i40e_is_vsi_in_vlan(vsi); - spin_unlock_bh(&vsi->mac_filter_hash_lock); - - return have_vlans; -} - /** * i40e_ndo_set_vf_port_vlan * @netdev: network interface device structure @@ -4134,19 +4196,9 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, /* duplicate request, so just return success */ goto error_pvid; - if (i40e_vsi_has_vlans(vsi)) { - dev_err(&pf->pdev->dev, - "VF %d has already configured VLAN filters and the administrator is requesting a port VLAN override.\nPlease unload and reload the VF driver for this change to take effect.\n", - vf_id); - /* Administrator Error - knock the VF offline until he does - * the right thing by reconfiguring his network correctly - * and then reloading the VF driver. - */ - i40e_vc_disable_vf(vf); - /* During reset the VF got a new VSI, so refresh the pointer. */ - vsi = pf->vsi[vf->lan_vsi_idx]; - } - + i40e_vc_disable_vf(vf); + /* During reset the VF got a new VSI, so refresh a pointer. */ + vsi = pf->vsi[vf->lan_vsi_idx]; /* Locked once because multiple functions below iterate list */ spin_lock_bh(&vsi->mac_filter_hash_lock); diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index f65cc0c165502..7df3e5833c5d2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -19,6 +19,8 @@ #define I40E_MAX_VF_PROMISC_FLAGS 3 +#define I40E_VF_STATE_WAIT_COUNT 20 + /* Various queue ctrls */ enum i40e_queue_ctrl { I40E_QUEUE_CTRL_UNKNOWN = 0, diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index a9ad788c4913d..fd2da58c71401 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -505,13 +505,11 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring, struct sk_buff *skb; /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - xdp->data_end - xdp->data_hard_start, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, xdp->data - xdp->data_hard_start); memcpy(__skb_put(skb, datasize), xdp->data, datasize); if (metasize) skb_metadata_set(skb, metasize); diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 758bef02a2a86..ed45f3c8338e8 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -612,23 +612,44 @@ static int iavf_set_ringparam(struct net_device *netdev, if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) return -EINVAL; - new_tx_count = clamp_t(u32, ring->tx_pending, - IAVF_MIN_TXD, - IAVF_MAX_TXD); - new_tx_count = ALIGN(new_tx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE); + if (ring->tx_pending > IAVF_MAX_TXD || + ring->tx_pending < IAVF_MIN_TXD || + ring->rx_pending > IAVF_MAX_RXD || + ring->rx_pending < IAVF_MIN_RXD) { + netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d] (increment %d)\n", + ring->tx_pending, ring->rx_pending, IAVF_MIN_TXD, + IAVF_MAX_RXD, IAVF_REQ_DESCRIPTOR_MULTIPLE); + return -EINVAL; + } + + new_tx_count = ALIGN(ring->tx_pending, IAVF_REQ_DESCRIPTOR_MULTIPLE); + if (new_tx_count != ring->tx_pending) + netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n", + new_tx_count); - new_rx_count = clamp_t(u32, ring->rx_pending, - IAVF_MIN_RXD, - IAVF_MAX_RXD); - new_rx_count = ALIGN(new_rx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE); + new_rx_count = ALIGN(ring->rx_pending, IAVF_REQ_DESCRIPTOR_MULTIPLE); + if (new_rx_count != ring->rx_pending) + netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n", + new_rx_count); /* if nothing to do return success */ if ((new_tx_count == adapter->tx_desc_count) && - (new_rx_count == adapter->rx_desc_count)) + (new_rx_count == adapter->rx_desc_count)) { + netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n"); return 0; + } - adapter->tx_desc_count = new_tx_count; - adapter->rx_desc_count = new_rx_count; + if (new_tx_count != adapter->tx_desc_count) { + netdev_dbg(netdev, "Changing Tx descriptor count from %d to %d\n", + adapter->tx_desc_count, new_tx_count); + adapter->tx_desc_count = new_tx_count; + } + + if (new_rx_count != adapter->rx_desc_count) { + netdev_dbg(netdev, "Changing Rx descriptor count from %d to %d\n", + adapter->rx_desc_count, new_rx_count); + adapter->rx_desc_count = new_rx_count; + } if (netif_running(netdev)) { adapter->flags |= IAVF_FLAG_RESET_NEEDED; @@ -719,12 +740,31 @@ static int iavf_get_per_queue_coalesce(struct net_device *netdev, u32 queue, * * Change the ITR settings for a specific queue. **/ -static void iavf_set_itr_per_queue(struct iavf_adapter *adapter, - struct ethtool_coalesce *ec, int queue) +static int iavf_set_itr_per_queue(struct iavf_adapter *adapter, + struct ethtool_coalesce *ec, int queue) { struct iavf_ring *rx_ring = &adapter->rx_rings[queue]; struct iavf_ring *tx_ring = &adapter->tx_rings[queue]; struct iavf_q_vector *q_vector; + u16 itr_setting; + + itr_setting = rx_ring->itr_setting & ~IAVF_ITR_DYNAMIC; + + if (ec->rx_coalesce_usecs != itr_setting && + ec->use_adaptive_rx_coalesce) { + netif_info(adapter, drv, adapter->netdev, + "Rx interrupt throttling cannot be changed if adaptive-rx is enabled\n"); + return -EINVAL; + } + + itr_setting = tx_ring->itr_setting & ~IAVF_ITR_DYNAMIC; + + if (ec->tx_coalesce_usecs != itr_setting && + ec->use_adaptive_tx_coalesce) { + netif_info(adapter, drv, adapter->netdev, + "Tx interrupt throttling cannot be changed if adaptive-tx is enabled\n"); + return -EINVAL; + } rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs); tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs); @@ -747,6 +787,7 @@ static void iavf_set_itr_per_queue(struct iavf_adapter *adapter, * the Tx and Rx ITR values based on the values we have entered * into the q_vector, no need to write the values now. */ + return 0; } /** @@ -788,9 +829,11 @@ static int __iavf_set_coalesce(struct net_device *netdev, */ if (queue < 0) { for (i = 0; i < adapter->num_active_queues; i++) - iavf_set_itr_per_queue(adapter, ec, i); + if (iavf_set_itr_per_queue(adapter, ec, i)) + return -EINVAL; } else if (queue < adapter->num_active_queues) { - iavf_set_itr_per_queue(adapter, ec, queue); + if (iavf_set_itr_per_queue(adapter, ec, queue)) + return -EINVAL; } else { netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", adapter->num_active_queues - 1); @@ -962,14 +1005,13 @@ static int iavf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, if (hfunc) *hfunc = ETH_RSS_HASH_TOP; - if (!indir) - return 0; - - memcpy(key, adapter->rss_key, adapter->rss_key_size); + if (key) + memcpy(key, adapter->rss_key, adapter->rss_key_size); - /* Each 32 bits pointed by 'indir' is stored with a lut entry */ - for (i = 0; i < adapter->rss_lut_size; i++) - indir[i] = (u32)adapter->rss_lut[i]; + if (indir) + /* Each 32 bits pointed by 'indir' is stored with a lut entry */ + for (i = 0; i < adapter->rss_lut_size; i++) + indir[i] = (u32)adapter->rss_lut[i]; return 0; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 94a3f000e999b..e8850ba5604c4 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -142,6 +142,30 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw, return 0; } +/** + * iavf_lock_timeout - try to set bit but give up after timeout + * @adapter: board private structure + * @bit: bit to set + * @msecs: timeout in msecs + * + * Returns 0 on success, negative on failure + **/ +static int iavf_lock_timeout(struct iavf_adapter *adapter, + enum iavf_critical_section_t bit, + unsigned int msecs) +{ + unsigned int wait, delay = 10; + + for (wait = 0; wait < msecs; wait += delay) { + if (!test_and_set_bit(bit, &adapter->crit_section)) + return 0; + + msleep(delay); + } + + return -1; +} + /** * iavf_schedule_reset - Set the flags and schedule a reset event * @adapter: board private structure @@ -1602,8 +1626,7 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) iavf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC); return 0; } - - if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) && + if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) || (adapter->aq_required & IAVF_FLAG_AQ_RELEASE_ALLMULTI)) { iavf_set_promiscuous(adapter, 0); return 0; @@ -1961,7 +1984,6 @@ static void iavf_watchdog_task(struct work_struct *work) /* check for hw reset */ reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK; if (!reg_val) { - adapter->state = __IAVF_RESETTING; adapter->flags |= IAVF_FLAG_RESET_PENDING; adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; @@ -2034,8 +2056,8 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) iavf_free_misc_irq(adapter); iavf_reset_interrupt_capability(adapter); - iavf_free_queues(adapter); iavf_free_q_vectors(adapter); + iavf_free_queues(adapter); memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE); iavf_shutdown_adminq(&adapter->hw); adapter->netdev->flags &= ~IFF_UP; @@ -2077,6 +2099,10 @@ static void iavf_reset_task(struct work_struct *work) if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) return; + if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 200)) { + schedule_work(&adapter->reset_task); + return; + } while (test_and_set_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section)) usleep_range(500, 1000); @@ -2125,6 +2151,7 @@ static void iavf_reset_task(struct work_struct *work) } pci_set_master(adapter->pdev); + pci_restore_msi_state(adapter->pdev); if (i == IAVF_RESET_WAIT_COUNT) { dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n", @@ -2291,6 +2318,8 @@ static void iavf_adminq_task(struct work_struct *work) if (!event.msg_buf) goto out; + if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 200)) + goto freedom; do { ret = iavf_clean_arq_element(hw, &event, &pending); v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); @@ -2304,6 +2333,7 @@ static void iavf_adminq_task(struct work_struct *work) if (pending != 0) memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE); } while (pending); + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); if ((adapter->flags & (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) || @@ -2312,7 +2342,7 @@ static void iavf_adminq_task(struct work_struct *work) /* check for error indications */ val = rd32(hw, hw->aq.arq.len); - if (val == 0xdeadbeef) /* indicates device in reset */ + if (val == 0xdeadbeef || val == 0xffffffff) /* device in reset */ goto freedom; oldval = val; if (val & IAVF_VF_ARQLEN1_ARQVFE_MASK) { @@ -2574,8 +2604,11 @@ static int iavf_validate_ch_config(struct iavf_adapter *adapter, total_max_rate += tx_rate; num_qps += mqprio_qopt->qopt.count[i]; } - if (num_qps > IAVF_MAX_REQ_QUEUES) + if (num_qps > adapter->num_active_queues) { + dev_err(&adapter->pdev->dev, + "Cannot support requested number of queues\n"); return -EINVAL; + } ret = iavf_validate_tx_bandwidth(adapter, total_max_rate); return ret; @@ -3004,11 +3037,11 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter, /* start out with flow type and eth type IPv4 to begin with */ filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW; err = iavf_parse_cls_flower(adapter, cls_flower, filter); - if (err < 0) + if (err) goto err; err = iavf_handle_tclass(adapter, tc, filter); - if (err < 0) + if (err) goto err; /* add filter to the list */ @@ -3290,8 +3323,11 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu) iavf_notify_client_l2_params(&adapter->vsi); adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED; } - adapter->flags |= IAVF_FLAG_RESET_NEEDED; - queue_work(iavf_wq, &adapter->reset_task); + + if (netif_running(netdev)) { + adapter->flags |= IAVF_FLAG_RESET_NEEDED; + queue_work(iavf_wq, &adapter->reset_task); + } return 0; } @@ -3395,7 +3431,8 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); - if (!(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)) + if (adapter->vf_res && + !(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)) features &= ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER); @@ -3600,6 +3637,10 @@ static void iavf_init_task(struct work_struct *work) init_task.work); struct iavf_hw *hw = &adapter->hw; + if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) { + dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__); + return; + } switch (adapter->state) { case __IAVF_STARTUP: if (iavf_startup(adapter) < 0) @@ -3612,14 +3653,14 @@ static void iavf_init_task(struct work_struct *work) case __IAVF_INIT_GET_RESOURCES: if (iavf_init_get_resources(adapter) < 0) goto init_failed; - return; + goto out; default: goto init_failed; } queue_delayed_work(iavf_wq, &adapter->init_task, msecs_to_jiffies(30)); - return; + goto out; init_failed: if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) { dev_err(&adapter->pdev->dev, @@ -3628,9 +3669,11 @@ static void iavf_init_task(struct work_struct *work) iavf_shutdown_adminq(hw); adapter->state = __IAVF_STARTUP; queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5); - return; + goto out; } queue_delayed_work(iavf_wq, &adapter->init_task, HZ); +out: + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); } /** @@ -3647,9 +3690,12 @@ static void iavf_shutdown(struct pci_dev *pdev) if (netif_running(netdev)) iavf_close(netdev); + if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) + dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__); /* Prevent the watchdog from running. */ adapter->state = __IAVF_REMOVE; adapter->aq_required = 0; + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); #ifdef CONFIG_PM pci_save_state(pdev); @@ -3878,10 +3924,6 @@ static void iavf_remove(struct pci_dev *pdev) err); } - /* Shut down all the garbage mashers on the detention level */ - adapter->state = __IAVF_REMOVE; - adapter->aq_required = 0; - adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; iavf_request_reset(adapter); msleep(50); /* If the FW isn't responding, kick it once, but only once. */ @@ -3889,6 +3931,13 @@ static void iavf_remove(struct pci_dev *pdev) iavf_request_reset(adapter); msleep(50); } + if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) + dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__); + + /* Shut down all the garbage mashers on the detention level */ + adapter->state = __IAVF_REMOVE; + adapter->aq_required = 0; + adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; iavf_free_all_tx_resources(adapter); iavf_free_all_rx_resources(adapter); iavf_misc_irq_disable(adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 7a30d5d5ef53a..c6905d1b6182c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -1263,11 +1263,10 @@ static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring, { struct iavf_rx_buffer *rx_buffer; - if (!size) - return NULL; - rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; prefetchw(rx_buffer->page); + if (!size) + return rx_buffer; /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index fc9ff985a62bd..83678120573ec 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -619,7 +619,8 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring) rx_desc = ICE_RX_DESC(rx_ring, i); if (!(rx_desc->wb.status_error0 & - cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS))) + (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) | + cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S))))) continue; rx_buf = &rx_ring->rx_buf[i]; @@ -2337,6 +2338,42 @@ ice_setup_autoneg(struct ice_port_info *p, struct ethtool_link_ksettings *ks, return err; } +/** + * ice_set_phy_type_from_speed - set phy_types based on speeds + * and advertised modes + * @ks: ethtool link ksettings struct + * @phy_type_low: pointer to the lower part of phy_type + * @phy_type_high: pointer to the higher part of phy_type + * @adv_link_speed: targeted link speeds bitmap + */ +static void +ice_set_phy_type_from_speed(const struct ethtool_link_ksettings *ks, + u64 *phy_type_low, u64 *phy_type_high, + u16 adv_link_speed) +{ + /* Handle 1000M speed in a special way because ice_update_phy_type + * enables all link modes, but having mixed copper and optical + * standards is not supported. + */ + adv_link_speed &= ~ICE_AQ_LINK_SPEED_1000MB; + + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 1000baseT_Full)) + *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_T | + ICE_PHY_TYPE_LOW_1G_SGMII; + + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 1000baseKX_Full)) + *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_KX; + + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 1000baseX_Full)) + *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_SX | + ICE_PHY_TYPE_LOW_1000BASE_LX; + + ice_update_phy_type(phy_type_low, phy_type_high, adv_link_speed); +} + /** * ice_set_link_ksettings - Set Speed and Duplex * @netdev: network interface device structure @@ -2472,7 +2509,8 @@ ice_set_link_ksettings(struct net_device *netdev, adv_link_speed = curr_link_speed; /* Convert the advertise link speeds to their corresponded PHY_TYPE */ - ice_update_phy_type(&phy_type_low, &phy_type_high, adv_link_speed); + ice_set_phy_type_from_speed(ks, &phy_type_low, &phy_type_high, + adv_link_speed); if (!autoneg_changed && adv_link_speed == curr_link_speed) { netdev_info(netdev, "Nothing changed, exiting without setting anything.\n"); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d0ccb7ad447b1..2d8cf57fc2f4c 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3005,9 +3005,6 @@ static void ice_remove(struct pci_dev *pdev) struct ice_pf *pf = pci_get_drvdata(pdev); int i; - if (!pf) - return; - for (i = 0; i < ICE_MAX_RESET_WAIT; i++) { if (!ice_is_reset_in_progress(pf->state)) break; @@ -3046,7 +3043,7 @@ static void ice_remove(struct pci_dev *pdev) * is in progress. Allows the driver to gracefully prepare/handle PCI errors. */ static pci_ers_result_t -ice_pci_err_detected(struct pci_dev *pdev, enum pci_channel_state err) +ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err) { struct ice_pf *pf = pci_get_drvdata(pdev); @@ -3498,10 +3495,12 @@ int ice_vsi_cfg(struct ice_vsi *vsi) if (vsi->netdev) { ice_set_rx_mode(vsi->netdev); - err = ice_vsi_vlan_setup(vsi); + if (vsi->type != ICE_VSI_LB) { + err = ice_vsi_vlan_setup(vsi); - if (err) - return err; + if (err) + return err; + } } ice_vsi_cfg_dcb_rings(vsi); @@ -3564,6 +3563,10 @@ static int ice_up_complete(struct ice_vsi *vsi) netif_carrier_on(vsi->netdev); } + /* Perform an initial read of the statistics registers now to + * set the baseline so counters are ready when interface is up + */ + ice_update_eth_stats(vsi); ice_service_task_schedule(pf); return 0; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 158feb0ab2739..8734dfd001bbd 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5318,7 +5318,8 @@ static void igb_watchdog_task(struct work_struct *work) break; } - if (adapter->link_speed != SPEED_1000) + if (adapter->link_speed != SPEED_1000 || + !hw->phy.ops.read_reg) goto no_wait; /* wait for Remote receiver status OK */ @@ -7374,6 +7375,20 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, struct vf_mac_filter *entry = NULL; int ret = 0; + if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) && + !vf_data->trusted) { + dev_warn(&pdev->dev, + "VF %d requested MAC filter but is administratively denied\n", + vf); + return -EINVAL; + } + if (!is_valid_ether_addr(addr)) { + dev_warn(&pdev->dev, + "VF %d attempted to set invalid MAC filter\n", + vf); + return -EINVAL; + } + switch (info) { case E1000_VF_MAC_FILTER_CLR: /* remove all unicast MAC filters related to the current VF */ @@ -7387,20 +7402,6 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, } break; case E1000_VF_MAC_FILTER_ADD: - if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) && - !vf_data->trusted) { - dev_warn(&pdev->dev, - "VF %d requested MAC filter but is administratively denied\n", - vf); - return -EINVAL; - } - if (!is_valid_ether_addr(addr)) { - dev_warn(&pdev->dev, - "VF %d attempted to set invalid MAC filter\n", - vf); - return -EINVAL; - } - /* try to find empty slot in the list */ list_for_each(pos, &adapter->vf_macs.l) { entry = list_entry(pos, struct vf_mac_filter, l); @@ -7752,7 +7753,7 @@ static int igb_poll(struct napi_struct *napi, int budget) if (likely(napi_complete_done(napi, work_done))) igb_ring_irq_enable(q_vector); - return min(work_done, budget - 1); + return work_done; } /** @@ -9403,11 +9404,10 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) struct e1000_hw *hw = &adapter->hw; u32 dmac_thr; u16 hwm; + u32 reg; if (hw->mac.type > e1000_82580) { if (adapter->flags & IGB_FLAG_DMAC) { - u32 reg; - /* force threshold to 0. */ wr32(E1000_DMCTXTH, 0); @@ -9440,7 +9440,6 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) /* Disable BMC-to-OS Watchdog Enable */ if (hw->mac.type != e1000_i354) reg &= ~E1000_DMACR_DC_BMC2OSW_EN; - wr32(E1000_DMACR, reg); /* no lower threshold to disable @@ -9457,12 +9456,12 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) */ wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE - (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6); + } - /* make low power state decision controlled - * by DMA coal - */ + if (hw->mac.type >= e1000_i210 || + (adapter->flags & IGB_FLAG_DMAC)) { reg = rd32(E1000_PCIEMISC); - reg &= ~E1000_PCIEMISC_LX_DECISION; + reg |= E1000_PCIEMISC_LX_DECISION; wr32(E1000_PCIEMISC, reg); } /* endif adapter->dmac is not disabled */ } else if (hw->mac.type == e1000_82580) { diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index ed5d09c11c389..79252ca9e2133 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -156,8 +156,15 @@ void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask) { u32 swfw_sync; - while (igc_get_hw_semaphore_i225(hw)) - ; /* Empty */ + /* Releasing the resource requires first getting the HW semaphore. + * If we fail to get the semaphore, there is nothing we can do, + * except log an error and quit. We are not allowed to hang here + * indefinitely, as it may cause denial of service or system crash. + */ + if (igc_get_hw_semaphore_i225(hw)) { + hw_dbg("Failed to release SW_FW_SYNC.\n"); + return; + } swfw_sync = rd32(IGC_SW_FW_SYNC); swfw_sync &= ~mask; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 084cf4a4114ad..9ba05d9aa8e08 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2693,6 +2693,7 @@ static irqreturn_t igc_msix_ring(int irq, void *data) */ static int igc_request_msix(struct igc_adapter *adapter) { + unsigned int num_q_vectors = adapter->num_q_vectors; int i = 0, err = 0, vector = 0, free_vector = 0; struct net_device *netdev = adapter->netdev; @@ -2701,7 +2702,13 @@ static int igc_request_msix(struct igc_adapter *adapter) if (err) goto err_out; - for (i = 0; i < adapter->num_q_vectors; i++) { + if (num_q_vectors > MAX_Q_VECTORS) { + num_q_vectors = MAX_Q_VECTORS; + dev_warn(&adapter->pdev->dev, + "The number of queue vectors (%d) is higher than max allowed (%d)\n", + adapter->num_q_vectors, MAX_Q_VECTORS); + } + for (i = 0; i < num_q_vectors; i++) { struct igc_q_vector *q_vector = adapter->q_vector[i]; vector++; diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c index f4b05af0dd2f6..6156c76d765ff 100644 --- a/drivers/net/ethernet/intel/igc/igc_phy.c +++ b/drivers/net/ethernet/intel/igc/igc_phy.c @@ -569,7 +569,7 @@ static s32 igc_read_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 *data) * the lower time out */ for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) { - usleep_range(500, 1000); + udelay(50); mdic = rd32(IGC_MDIC); if (mdic & IGC_MDIC_READY) break; @@ -626,7 +626,7 @@ static s32 igc_write_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 data) * the lower time out */ for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) { - usleep_range(500, 1000); + udelay(50); mdic = rd32(IGC_MDIC); if (mdic & IGC_MDIC_READY) break; @@ -734,8 +734,6 @@ s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data) if (ret_val) return ret_val; ret_val = igc_write_phy_reg_mdic(hw, offset, data); - if (ret_val) - return ret_val; hw->phy.ops.release(hw); } else { ret_val = igc_write_xmdio_reg(hw, (u16)offset, dev_addr, @@ -767,8 +765,6 @@ s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data) if (ret_val) return ret_val; ret_val = igc_read_phy_reg_mdic(hw, offset, data); - if (ret_val) - return ret_val; hw->phy.ops.release(hw); } else { ret_val = igc_read_xmdio_reg(hw, (u16)offset, dev_addr, diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index 50d7c04dccf59..7bc7d7618fe1e 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -236,4 +236,6 @@ do { \ #define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2))) +#define IGC_REMOVED(h) unlikely(!(h)) + #endif diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 0940a0da16f28..fb37566bf3730 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -82,7 +82,7 @@ static int ixgb_vlan_rx_kill_vid(struct net_device *netdev, static void ixgb_restore_vlan(struct ixgb_adapter *adapter); static pci_ers_result_t ixgb_io_error_detected (struct pci_dev *pdev, - enum pci_channel_state state); + pci_channel_state_t state); static pci_ers_result_t ixgb_io_slot_reset (struct pci_dev *pdev); static void ixgb_io_resume (struct pci_dev *pdev); @@ -2194,7 +2194,7 @@ ixgb_restore_vlan(struct ixgb_adapter *adapter) * a PCI bus error is detected. */ static pci_ers_result_t ixgb_io_error_detected(struct pci_dev *pdev, - enum pci_channel_state state) + pci_channel_state_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct ixgb_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 39e73ad60352f..fa49ef2afde5f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -773,6 +773,7 @@ struct ixgbe_adapter { #ifdef CONFIG_IXGBE_IPSEC struct ixgbe_ipsec *ipsec; #endif /* CONFIG_IXGBE_IPSEC */ + spinlock_t vfs_lock; }; static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index b14b164c9601f..5799b434165e0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -903,7 +903,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) /* Tx IPsec offload doesn't seem to work on this * device, so block these requests for now. */ - if (!(sam->flags & XFRM_OFFLOAD_INBOUND)) { + sam->flags = sam->flags & ~XFRM_OFFLOAD_IPV6; + if (sam->flags != XFRM_OFFLOAD_INBOUND) { err = -EOPNOTSUPP; goto err_out; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index cb6dbcfdac082..b288eb5251b12 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6396,6 +6396,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, /* n-tuple support exists, always init our spinlock */ spin_lock_init(&adapter->fdir_perfect_lock); + /* init spinlock to avoid concurrency of VF resources */ + spin_lock_init(&adapter->vfs_lock); + #ifdef CONFIG_IXGBE_DCB ixgbe_init_dcb(adapter); #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 47a920128760e..0e73e3b1af19a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -204,10 +204,13 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs) int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) { unsigned int num_vfs = adapter->num_vfs, vf; + unsigned long flags; int rss; + spin_lock_irqsave(&adapter->vfs_lock, flags); /* set num VFs to 0 to prevent access to vfinfo */ adapter->num_vfs = 0; + spin_unlock_irqrestore(&adapter->vfs_lock, flags); /* put the reference to all of the vf devices */ for (vf = 0; vf < num_vfs; ++vf) { @@ -1157,9 +1160,9 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter, switch (xcast_mode) { case IXGBEVF_XCAST_MODE_NONE: - disable = IXGBE_VMOLR_BAM | IXGBE_VMOLR_ROMPE | + disable = IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE | IXGBE_VMOLR_VPE; - enable = 0; + enable = IXGBE_VMOLR_BAM; break; case IXGBEVF_XCAST_MODE_MULTI: disable = IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE | IXGBE_VMOLR_VPE; @@ -1181,9 +1184,9 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter, return -EPERM; } - disable = 0; + disable = IXGBE_VMOLR_VPE; enable = IXGBE_VMOLR_BAM | IXGBE_VMOLR_ROMPE | - IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE | IXGBE_VMOLR_VPE; + IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE; break; default: return -EOPNOTSUPP; @@ -1305,8 +1308,10 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf) void ixgbe_msg_task(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; + unsigned long flags; u32 vf; + spin_lock_irqsave(&adapter->vfs_lock, flags); for (vf = 0; vf < adapter->num_vfs; vf++) { /* process any reset requests */ if (!ixgbe_check_for_rst(hw, vf)) @@ -1320,6 +1325,7 @@ void ixgbe_msg_task(struct ixgbe_adapter *adapter) if (!ixgbe_check_for_ack(hw, vf)) ixgbe_rcv_ack_from_vf(adapter, vf); } + spin_unlock_irqrestore(&adapter->vfs_lock, flags); } void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 9c42f741ed5ef..74728c0a44a81 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -3405,6 +3405,9 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) /* flush pending Tx transactions */ ixgbe_clear_tx_pending(hw); + /* set MDIO speed before talking to the PHY in case it's the 1st time */ + ixgbe_set_mdio_speed(hw); + /* PHY ops must be identified and initialized prior to reset */ status = hw->phy.ops.init(hw); if (status == IXGBE_ERR_SFP_NOT_SUPPORTED || diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index b43be9f141053..921a2ddb497e1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -583,12 +583,14 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) u32 cmd_type; while (budget-- > 0) { - if (unlikely(!ixgbe_desc_unused(xdp_ring)) || - !netif_carrier_ok(xdp_ring->netdev)) { + if (unlikely(!ixgbe_desc_unused(xdp_ring))) { work_done = false; break; } + if (!netif_carrier_ok(xdp_ring->netdev)) + break; + if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc)) break; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index be8e6d4e376ec..9bd02766a4bcc 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1979,14 +1979,15 @@ static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter, if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX) return; - set_ring_build_skb_enabled(rx_ring); + if (PAGE_SIZE < 8192) + if (max_frame > IXGBEVF_MAX_FRAME_BUILD_SKB) + set_ring_uses_large_buffer(rx_ring); - if (PAGE_SIZE < 8192) { - if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB) - return; + /* 82599 can't rely on RXDCTL.RLPML to restrict the size of the frame */ + if (adapter->hw.mac.type == ixgbe_mac_82599_vf && !ring_uses_large_buffer(rx_ring)) + return; - set_ring_uses_large_buffer(rx_ring); - } + set_ring_build_skb_enabled(rx_ring); } /** diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 6e504854571cf..94541bf889a23 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -209,7 +209,7 @@ static int xrx200_hw_receive(struct xrx200_chan *ch) skb->protocol = eth_type_trans(skb, net_dev); netif_receive_skb(skb); net_dev->stats.rx_packets++; - net_dev->stats.rx_bytes += len - ETH_FCS_LEN; + net_dev->stats.rx_bytes += len; return 0; } diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 7b0543056b101..64aa5510e61a6 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -101,7 +101,7 @@ #define MVNETA_DESC_SWAP BIT(6) #define MVNETA_TX_BRST_SZ_MASK(burst) ((burst) << 22) #define MVNETA_PORT_STATUS 0x2444 -#define MVNETA_TX_IN_PRGRS BIT(1) +#define MVNETA_TX_IN_PRGRS BIT(0) #define MVNETA_TX_FIFO_EMPTY BIT(8) #define MVNETA_RX_MIN_FRAME_SIZE 0x247c /* Only exists on Armada XP and Armada 370 */ diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index dac0e51e6aafd..d700f1b5a4bf7 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4545,7 +4545,7 @@ static int mvpp2_port_init(struct mvpp2_port *port) struct mvpp2 *priv = port->priv; struct mvpp2_txq_pcpu *txq_pcpu; unsigned int thread; - int queue, err, val; + int queue, err; /* Checks for hardware constraints */ if (port->first_rxq + port->nrxqs > @@ -4559,18 +4559,6 @@ static int mvpp2_port_init(struct mvpp2_port *port) mvpp2_egress_disable(port); mvpp2_port_disable(port); - if (mvpp2_is_xlg(port->phy_interface)) { - val = readl(port->base + MVPP22_XLG_CTRL0_REG); - val &= ~MVPP22_XLG_CTRL0_FORCE_LINK_PASS; - val |= MVPP22_XLG_CTRL0_FORCE_LINK_DOWN; - writel(val, port->base + MVPP22_XLG_CTRL0_REG); - } else { - val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); - val &= ~MVPP2_GMAC_FORCE_LINK_PASS; - val |= MVPP2_GMAC_FORCE_LINK_DOWN; - writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG); - } - port->tx_time_coal = MVPP2_TXDONE_COAL_USEC; port->txqs = devm_kcalloc(dev, port->ntxqs, sizeof(*port->txqs), @@ -5785,7 +5773,7 @@ static int mvpp2_probe(struct platform_device *pdev) shared = num_present_cpus() - priv->nthreads; if (shared > 0) - bitmap_fill(&priv->lock_map, + bitmap_set(&priv->lock_map, 0, min_t(int, shared, MVPP2_MAX_THREADS)); for (i = 0; i < MVPP2_MAX_THREADS; i++) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index d82a519a0cd9a..f9f246c82c974 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -2013,10 +2013,11 @@ int rvu_mbox_handler_npc_mcam_unmap_counter(struct rvu *rvu, index = find_next_bit(mcam->bmap, mcam->bmap_entries, entry); if (index >= mcam->bmap_entries) break; + entry = index + 1; + if (mcam->entry2cntr_map[index] != req->cntr) continue; - entry = index + 1; npc_unmap_mcam_entry_and_cntr(rvu, mcam, blkaddr, index, req->cntr); } diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 48b395b9c15ad..f9139150a8a26 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -215,7 +215,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, phylink_config); struct mtk_eth *eth = mac->hw; u32 mcr_cur, mcr_new, sid, i; - int val, ge_mode, err; + int val, ge_mode, err = 0; /* MT76x8 has no hardware settings between for the MAC */ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628) && @@ -802,6 +802,17 @@ static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd, rxd->rxd4 = READ_ONCE(dma_rxd->rxd4); } +static void *mtk_max_lro_buf_alloc(gfp_t gfp_mask) +{ + unsigned int size = mtk_max_frag_size(MTK_MAX_LRO_RX_LENGTH); + unsigned long data; + + data = __get_free_pages(gfp_mask | __GFP_COMP | __GFP_NOWARN, + get_order(size)); + + return (void *)data; +} + /* the qdma core needs scratch memory to be setup */ static int mtk_init_fq_dma(struct mtk_eth *eth) { @@ -1299,7 +1310,10 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, goto release_desc; /* alloc new buffer */ - new_data = napi_alloc_frag(ring->frag_size); + if (ring->frag_size <= PAGE_SIZE) + new_data = napi_alloc_frag(ring->frag_size); + else + new_data = mtk_max_lro_buf_alloc(GFP_ATOMIC); if (unlikely(!new_data)) { netdev->stats.rx_dropped++; goto release_desc; @@ -1696,7 +1710,10 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) return -ENOMEM; for (i = 0; i < rx_dma_size; i++) { - ring->data[i] = netdev_alloc_frag(ring->frag_size); + if (ring->frag_size <= PAGE_SIZE) + ring->data[i] = netdev_alloc_frag(ring->frag_size); + else + ring->data[i] = mtk_max_lro_buf_alloc(GFP_KERNEL); if (!ring->data[i]) return -ENOMEM; } @@ -1962,6 +1979,9 @@ static int mtk_hwlro_get_fdir_entry(struct net_device *dev, struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; + if (fsp->location >= ARRAY_SIZE(mac->hwlro_ip)) + return -EINVAL; + /* only tcp dst ipv4 is meaningful, others are meaningless */ fsp->flow_type = TCP_V4_FLOW; fsp->h_u.tcp_ip4_spec.ip4dst = ntohl(mac->hwlro_ip[fsp->location]); diff --git a/drivers/net/ethernet/mediatek/mtk_sgmii.c b/drivers/net/ethernet/mediatek/mtk_sgmii.c index 4db27dfc7ec1f..6702d77030885 100644 --- a/drivers/net/ethernet/mediatek/mtk_sgmii.c +++ b/drivers/net/ethernet/mediatek/mtk_sgmii.c @@ -26,6 +26,7 @@ int mtk_sgmii_init(struct mtk_sgmii *ss, struct device_node *r, u32 ana_rgc3) break; ss->regmap[i] = syscon_node_to_regmap(np); + of_node_put(np); if (IS_ERR(ss->regmap[i])) return PTR_ERR(ss->regmap[i]); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 426786a349c3c..b711148a9d503 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -663,7 +663,7 @@ void __init mlx4_en_init_ptys2ethtool_map(void) MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_T, SPEED_1000, ETHTOOL_LINK_MODE_1000baseT_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_CX_SGMII, SPEED_1000, - ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + ETHTOOL_LINK_MODE_1000baseX_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_KX, SPEED_1000, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_T, SPEED_10000, @@ -675,9 +675,9 @@ void __init mlx4_en_init_ptys2ethtool_map(void) MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_KR, SPEED_10000, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_CR, SPEED_10000, - ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_SR, SPEED_10000, - ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_20GBASE_KR2, SPEED_20000, ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT, ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT); @@ -2083,7 +2083,7 @@ static int mlx4_en_get_module_eeprom(struct net_device *dev, en_err(priv, "mlx4_get_module_info i(%d) offset(%d) bytes_to_read(%d) - FAILED (0x%x)\n", i, offset, ee->len - i, ret); - return 0; + return ret; } i += ret; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index b3e3c4d903c9d..91334229c1205 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -371,6 +371,9 @@ mlx4_en_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, int nhoff = skb_network_offset(skb); int ret = 0; + if (skb->encapsulation) + return -EPROTONOSUPPORT; + if (skb->protocol != htons(ETH_P_IP)) return -EPROTONOSUPPORT; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 76547d35cd0e1..6c7b364d0bf03 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -130,11 +130,8 @@ static int cmd_alloc_index(struct mlx5_cmd *cmd) static void cmd_free_index(struct mlx5_cmd *cmd, int idx) { - unsigned long flags; - - spin_lock_irqsave(&cmd->alloc_lock, flags); + lockdep_assert_held(&cmd->alloc_lock); set_bit(idx, &cmd->bitmask); - spin_unlock_irqrestore(&cmd->alloc_lock, flags); } static void cmd_ent_get(struct mlx5_cmd_work_ent *ent) @@ -144,13 +141,21 @@ static void cmd_ent_get(struct mlx5_cmd_work_ent *ent) static void cmd_ent_put(struct mlx5_cmd_work_ent *ent) { + struct mlx5_cmd *cmd = ent->cmd; + unsigned long flags; + + spin_lock_irqsave(&cmd->alloc_lock, flags); if (!refcount_dec_and_test(&ent->refcnt)) - return; + goto out; - if (ent->idx >= 0) - cmd_free_index(ent->cmd, ent->idx); + if (ent->idx >= 0) { + cmd_free_index(cmd, ent->idx); + up(ent->page_queue ? &cmd->pages_sem : &cmd->sem); + } cmd_free_ent(ent); +out: + spin_unlock_irqrestore(&cmd->alloc_lock, flags); } static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) @@ -865,7 +870,7 @@ static void cb_timeout_handler(struct work_struct *work) ent->ret = -ETIMEDOUT; mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n", ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); - mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); out: cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */ @@ -883,25 +888,6 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode) return cmd->allowed_opcode == opcode; } -static int cmd_alloc_index_retry(struct mlx5_cmd *cmd) -{ - unsigned long alloc_end = jiffies + msecs_to_jiffies(1000); - int idx; - -retry: - idx = cmd_alloc_index(cmd); - if (idx < 0 && time_before(jiffies, alloc_end)) { - /* Index allocation can fail on heavy load of commands. This is a temporary - * situation as the current command already holds the semaphore, meaning that - * another command completion is being handled and it is expected to release - * the entry index soon. - */ - cpu_relax(); - goto retry; - } - return idx; -} - static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); @@ -919,7 +905,7 @@ static void cmd_work_handler(struct work_struct *work) sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { - alloc_ret = cmd_alloc_index_retry(cmd); + alloc_ret = cmd_alloc_index(cmd); if (alloc_ret < 0) { mlx5_core_err(dev, "failed to allocate command entry\n"); if (ent->callback) { @@ -977,7 +963,7 @@ static void cmd_work_handler(struct work_struct *work) MLX5_SET(mbox_out, ent->out, status, status); MLX5_SET(mbox_out, ent->out, syndrome, drv_synd); - mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); return; } @@ -991,7 +977,7 @@ static void cmd_work_handler(struct work_struct *work) poll_timeout(ent); /* make sure we read the descriptor after ownership is SW */ rmb(); - mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT)); + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, (ent->ret == -ETIMEDOUT)); } } @@ -1051,7 +1037,7 @@ static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); ent->ret = -ETIMEDOUT; - mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); } static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) @@ -1577,8 +1563,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force vector = vec & 0xffffffff; for (i = 0; i < (1 << cmd->log_sz); i++) { if (test_bit(i, &vector)) { - struct semaphore *sem; - ent = cmd->ent_arr[i]; /* if we already completed the command, ignore it */ @@ -1601,10 +1585,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) cmd_ent_put(ent); - if (ent->page_queue) - sem = &cmd->pages_sem; - else - sem = &cmd->sem; ent->ts2 = ktime_get_ns(); memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out)); dump_command(dev, ent, 0); @@ -1658,7 +1638,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force */ complete(&ent->done); } - up(sem); } } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 818edc63e428f..c8f26480d1456 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -136,6 +136,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->cqn); cq->uar = dev->priv.uar; + cq->irqn = eq->core.irqn; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index dc36b0db37222..f8144ce7e476d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -673,6 +673,9 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work) if (!tracer->owner) return; + if (unlikely(!tracer->str_db.loaded)) + goto arm; + block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE; start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE; @@ -730,6 +733,7 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work) &tmp_trace_block[TRACES_PER_BLOCK - 1]); } +arm: mlx5_fw_tracer_arm(dev); } @@ -1005,7 +1009,7 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) err = mlx5_core_alloc_pd(dev, &tracer->buff.pdn); if (err) { mlx5_core_warn(dev, "FWTracer: Failed to allocate PD %d\n", err); - return err; + goto err_cancel_work; } err = mlx5_fw_tracer_create_mkey(tracer); @@ -1029,6 +1033,7 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) mlx5_core_destroy_mkey(dev, &tracer->buff.mkey); err_dealloc_pd: mlx5_core_dealloc_pd(dev, tracer->buff.pdn); +err_cancel_work: cancel_work_sync(&tracer->read_fw_strings_work); return err; } @@ -1083,8 +1088,7 @@ static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void queue_work(tracer->work_queue, &tracer->ownership_change_work); break; case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE: - if (likely(tracer->str_db.loaded)) - queue_work(tracer->work_queue, &tracer->handle_traces_work); + queue_work(tracer->work_queue, &tracer->handle_traces_work); break; default: mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index d48292ccda294..9239d767443f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -234,18 +234,12 @@ struct ttc_params { void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params); void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params); -void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params); int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, struct mlx5e_ttc_table *ttc); void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv, struct mlx5e_ttc_table *ttc); -int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, - struct mlx5e_ttc_table *ttc); -void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv, - struct mlx5e_ttc_table *ttc); - void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index e3dc2cbdc9f6c..e92cc60eade3f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1683,7 +1683,7 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, if (size_read < 0) { netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n", __func__, size_read); - return 0; + return size_read; } i += size_read; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index c4ac7a9968d16..024251e276a1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "en.h" #include "lib/mpfs.h" @@ -1123,7 +1124,7 @@ void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, ttc_params->inner_ttc = &priv->fs.inner_ttc; } -void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params) +static void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params) { struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; @@ -1142,8 +1143,8 @@ void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params) ft_attr->prio = MLX5E_NIC_PRIO; } -int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, - struct mlx5e_ttc_table *ttc) +static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, + struct mlx5e_ttc_table *ttc) { struct mlx5e_flow_table *ft = &ttc->ft; int err; @@ -1173,8 +1174,8 @@ int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *par return err; } -void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv, - struct mlx5e_ttc_table *ttc) +static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv, + struct mlx5e_ttc_table *ttc) { if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 24c49a84947f3..fe29e9ca6abd4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1582,15 +1582,9 @@ static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev, struct mlx5e_cq *cq) { struct mlx5_core_cq *mcq = &cq->mcq; - int eqn_not_used; - unsigned int irqn; int err; u32 i; - err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); - if (err) - return err; - err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, &cq->wq_ctrl); if (err) @@ -1604,7 +1598,6 @@ static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev, mcq->vector = param->eq_ix; mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; - mcq->irqn = irqn; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); @@ -1650,11 +1643,10 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) void *in; void *cqc; int inlen; - unsigned int irqn_not_used; int eqn; int err; - err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); + err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn); if (err) return err; @@ -2018,9 +2010,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel *c; unsigned int irq; int err; - int eqn; - err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); + err = mlx5_vector2irqn(priv->mdev, ix, &irq); if (err) return err; @@ -3805,20 +3796,67 @@ static int set_feature_rx_all(struct net_device *netdev, bool enable) return mlx5_set_port_fcs(mdev, !enable); } +static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {}; + bool supported, curr_state; + int err; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return 0; + + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; + + supported = MLX5_GET(pcmr_reg, in, rx_ts_over_crc_cap); + curr_state = MLX5_GET(pcmr_reg, in, rx_ts_over_crc); + + if (!supported || enable == curr_state) + return 0; + + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, rx_ts_over_crc, enable); + + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + static int set_feature_rx_fcs(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_channels *chs = &priv->channels; + struct mlx5_core_dev *mdev = priv->mdev; int err; mutex_lock(&priv->state_lock); - priv->channels.params.scatter_fcs_en = enable; - err = mlx5e_modify_channels_scatter_fcs(&priv->channels, enable); - if (err) - priv->channels.params.scatter_fcs_en = !enable; + if (enable) { + err = mlx5e_set_rx_port_ts(mdev, false); + if (err) + goto out; - mutex_unlock(&priv->state_lock); + chs->params.scatter_fcs_en = true; + err = mlx5e_modify_channels_scatter_fcs(chs, true); + if (err) { + chs->params.scatter_fcs_en = false; + mlx5e_set_rx_port_ts(mdev, true); + } + } else { + chs->params.scatter_fcs_en = false; + err = mlx5e_modify_channels_scatter_fcs(chs, false); + if (err) { + chs->params.scatter_fcs_en = true; + goto out; + } + err = mlx5e_set_rx_port_ts(mdev, true); + if (err) { + mlx5_core_warn(mdev, "Failed to set RX port timestamp %d\n", err); + err = 0; + } + } +out: + mutex_unlock(&priv->state_lock); return err; } @@ -3860,12 +3898,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable) static int mlx5e_handle_feature(struct net_device *netdev, netdev_features_t *features, - netdev_features_t wanted_features, netdev_features_t feature, mlx5e_feature_handler feature_handler) { - netdev_features_t changes = wanted_features ^ netdev->features; - bool enable = !!(wanted_features & feature); + netdev_features_t changes = *features ^ netdev->features; + bool enable = !!(*features & feature); int err; if (!(changes & feature)) @@ -3873,22 +3910,22 @@ static int mlx5e_handle_feature(struct net_device *netdev, err = feature_handler(netdev, enable); if (err) { + MLX5E_SET_FEATURE(features, feature, !enable); netdev_err(netdev, "%s feature %pNF failed, err %d\n", enable ? "Enable" : "Disable", &feature, err); return err; } - MLX5E_SET_FEATURE(features, feature, enable); return 0; } int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) { - netdev_features_t oper_features = netdev->features; + netdev_features_t oper_features = features; int err = 0; #define MLX5E_HANDLE_FEATURE(feature, handler) \ - mlx5e_handle_feature(netdev, &oper_features, features, feature, handler) + mlx5e_handle_feature(netdev, &oper_features, feature, handler) err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER, @@ -3934,6 +3971,13 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, } } + if (params->xdp_prog) { + if (features & NETIF_F_LRO) { + netdev_warn(netdev, "LRO is incompatible with XDP\n"); + features &= ~NETIF_F_LRO; + } + } + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { features &= ~NETIF_F_RXHASH; if (netdev->features & NETIF_F_RXHASH) @@ -4585,6 +4629,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) unlock: mutex_unlock(&priv->state_lock); + + /* Need to fix some features. */ + if (!err) + netdev_update_features(netdev); + return err; } @@ -5007,9 +5056,13 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) } if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) { - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL; + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; } if (mlx5e_tunnel_proto_supported(mdev, IPPROTO_GRE)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 30a2ee3c40a00..e2d7f821c956b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -859,8 +859,8 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) return err; } -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, - unsigned int *irqn) +static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn, + unsigned int *irqn) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq, *n; @@ -869,8 +869,10 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { if (i++ == vector) { - *eqn = eq->core.eqn; - *irqn = eq->core.irqn; + if (irqn) + *irqn = eq->core.irqn; + if (eqn) + *eqn = eq->core.eqn; err = 0; break; } @@ -878,8 +880,18 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, return err; } + +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn) +{ + return vector2eqnirqn(dev, vector, eqn, NULL); +} EXPORT_SYMBOL(mlx5_vector2eqn); +int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn) +{ + return vector2eqnirqn(dev, vector, NULL, irqn); +} + unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) { return dev->priv.eq_table->num_comp_eqs; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 791fc14cc6d65..c12073d35e277 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "mlx5_core.h" #include "lib/eq.h" #include "eswitch.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 65bdc6ae8430a..fd3d6c05ab263 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1978,10 +1978,6 @@ esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) return false; - if (mlx5_core_is_ecpf_esw_manager(esw->dev) || - mlx5_ecpf_vport_exists(esw->dev)) - return false; - return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index 61021133029e6..6819d8c3979ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -434,7 +434,6 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) struct mlx5_wq_param wqp; struct mlx5_cqe64 *cqe; int inlen, err, eqn; - unsigned int irqn; void *cqc, *in; __be64 *pas; u32 i; @@ -463,7 +462,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) goto err_cqwq; } - err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn); + err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn); if (err) { kvfree(in); goto err_cqwq; @@ -494,7 +493,6 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) conn->cq.mcq.vector = 0; conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; conn->cq.mcq.event = mlx5_fpga_conn_cq_event; - conn->cq.mcq.irqn = irqn; conn->cq.mcq.uar = fdev->conn_res.uar; tasklet_init(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet, (unsigned long)conn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index cb25e091777b8..e9b6f1f9883d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1441,9 +1441,22 @@ static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, return NULL; } -static bool check_conflicting_actions(u32 action1, u32 action2) +static bool check_conflicting_actions_vlan(const struct mlx5_fs_vlan *vlan0, + const struct mlx5_fs_vlan *vlan1) { - u32 xored_actions = action1 ^ action2; + return vlan0->ethtype != vlan1->ethtype || + vlan0->vid != vlan1->vid || + vlan0->prio != vlan1->prio; +} + +static bool check_conflicting_actions(const struct mlx5_flow_act *act1, + const struct mlx5_flow_act *act2) +{ + u32 action1 = act1->action; + u32 action2 = act2->action; + u32 xored_actions; + + xored_actions = action1 ^ action2; /* if one rule only wants to count, it's ok */ if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT || @@ -1460,6 +1473,22 @@ static bool check_conflicting_actions(u32 action1, u32 action2) MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2)) return true; + if (action1 & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT && + act1->pkt_reformat != act2->pkt_reformat) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + act1->modify_hdr != act2->modify_hdr) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH && + check_conflicting_actions_vlan(&act1->vlan[0], &act2->vlan[0])) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 && + check_conflicting_actions_vlan(&act1->vlan[1], &act2->vlan[1])) + return true; + return false; } @@ -1467,7 +1496,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_context *flow_context, const struct mlx5_flow_act *flow_act) { - if (check_conflicting_actions(flow_act->action, fte->action.action)) { + if (check_conflicting_actions(flow_act, &fte->action)) { mlx5_core_warn(get_dev(&fte->node), "Found two FTEs with conflicting actions\n"); return -EEXIST; @@ -1597,9 +1626,9 @@ static int build_match_list(struct match_list_head *match_head, curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); if (!curr_match) { + rcu_read_unlock(); free_match_list(match_head, ft_locked); - err = -ENOMEM; - goto out; + return -ENOMEM; } if (!tree_get_node(&g->node)) { kfree(curr_match); @@ -1608,7 +1637,6 @@ static int build_match_list(struct match_list_head *match_head, curr_match->g = g; list_add_tail(&curr_match->list, &match_head->list); } -out: rcu_read_unlock(); return err; } @@ -1929,16 +1957,18 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) down_write_ref_node(&fte->node, false); for (i = handle->num_rules - 1; i >= 0; i--) tree_remove_node(&handle->rule[i]->node, true); - if (fte->dests_size) { - if (fte->modify_mask) - modify_fte(fte); - up_write_ref_node(&fte->node, false); - } else if (list_empty(&fte->node.children)) { + if (list_empty(&fte->node.children)) { del_hw_fte(&fte->node); /* Avoid double call to del_hw_fte */ fte->node.del_hw_func = NULL; up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); + } else if (fte->dests_size) { + if (fte->modify_mask) + modify_fte(fte); + up_write_ref_node(&fte->node, false); + } else { + up_write_ref_node(&fte->node, false); } kfree(handle); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 0fed2419623d1..1f3d12faa2a5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -319,17 +319,6 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) } mlx5e_set_ttc_basic_params(priv, &ttc_params); - mlx5e_set_inner_ttc_ft_params(&ttc_params); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn; - - err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc); - if (err) { - netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", - err); - goto err_destroy_arfs_tables; - } - mlx5e_set_ttc_ft_params(&ttc_params); for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn; @@ -338,13 +327,11 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); - goto err_destroy_inner_ttc_table; + goto err_destroy_arfs_tables; } return 0; -err_destroy_inner_ttc_table: - mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -354,7 +341,6 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) { mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); mlx5e_arfs_destroy_tables(priv); } @@ -379,7 +365,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_indirect_rqts; - err = mlx5e_create_indirect_tirs(priv, true); + err = mlx5e_create_indirect_tirs(priv, false); if (err) goto err_destroy_direct_rqts; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index bdc7f915d80e3..101667c6b5843 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -265,10 +265,8 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, fen_info = container_of(info, struct fib_entry_notifier_info, info); fi = fen_info->fi; - if (fi->nh) { - NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); - return notifier_from_errno(-EINVAL); - } + if (fi->nh) + return NOTIFY_DONE; fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; if (fib_dev != ldev->pf[0].netdev && fib_dev != ldev->pf[1].netdev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 9aaf0eab7c2e1..0f7d501e2dbd9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -99,4 +99,6 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); #endif +int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index 3118e8d664072..91315b0d48fe1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "mlx5_core.h" #include "lib/mpfs.h" @@ -177,6 +178,7 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) mutex_unlock(&mpfs->lock); return err; } +EXPORT_SYMBOL(mlx5_mpfs_add_mac); int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { @@ -208,3 +210,4 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) mutex_unlock(&mpfs->lock); return err; } +EXPORT_SYMBOL(mlx5_mpfs_del_mac); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h index 4a7b2c3203a7e..4a293542a7aa1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h @@ -84,12 +84,9 @@ struct l2addr_node { #ifdef CONFIG_MLX5_MPFS int mlx5_mpfs_init(struct mlx5_core_dev *dev); void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev); -int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac); -int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac); #else /* #ifndef CONFIG_MLX5_MPFS */ static inline int mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {} -static inline int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } -static inline int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } #endif + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 56bf900eb753f..dbdb6a9592f09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include +#include #include "dr_types.h" static int dr_domain_init_cache(struct mlx5dr_domain *dmn) @@ -64,9 +65,9 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn) } dmn->uar = mlx5_get_uars_page(dmn->mdev); - if (!dmn->uar) { + if (IS_ERR(dmn->uar)) { mlx5dr_err(dmn, "Couldn't allocate UAR\n"); - ret = -ENOMEM; + ret = PTR_ERR(dmn->uar); goto clean_pd; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index f012aac83b10e..e253afa5e7419 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -603,6 +603,7 @@ static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev, MLX5_SET(qpc, qpc, log_ack_req_freq, 0); MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt); MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry); + MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */ return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, qpc, &dr_qp->mqp); @@ -705,7 +706,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, struct mlx5_cqe64 *cqe; struct mlx5dr_cq *cq; int inlen, err, eqn; - unsigned int irqn; void *cqc, *in; __be64 *pas; int vector; @@ -738,7 +738,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, goto err_cqwq; vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); - err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn); + err = mlx5_vector2eqn(mdev, vector, &eqn); if (err) { kvfree(in); goto err_cqwq; @@ -775,7 +775,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, *cq->mcq.arm_db = cpu_to_be32(2 << 28); cq->mcq.vector = 0; - cq->mcq.irqn = irqn; cq->mcq.uar = uar; return cq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 348f02e336f68..d643685067541 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -43,11 +43,10 @@ static int set_miss_action(struct mlx5_flow_root_namespace *ns, err = mlx5dr_table_set_miss_action(ft->fs_dr_table.dr_table, action); if (err && action) { err = mlx5dr_action_destroy(action); - if (err) { - action = NULL; - mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n", - err); - } + if (err) + mlx5_core_err(ns->dev, + "Failed to destroy action (%d)\n", err); + action = NULL; } ft->fs_dr_table.miss_action = action; if (old_miss_action) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index c3fc1cc4bb2b9..882ea12098b7a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -25,16 +25,8 @@ #define MLXSW_THERMAL_ZONE_MAX_NAME 16 #define MLXSW_THERMAL_TEMP_SCORE_MAX GENMASK(31, 0) #define MLXSW_THERMAL_MAX_STATE 10 +#define MLXSW_THERMAL_MIN_STATE 2 #define MLXSW_THERMAL_MAX_DUTY 255 -/* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values - * MLXSW_THERMAL_MAX_STATE + x, where x is between 2 and 10 are used for - * setting fan speed dynamic minimum. For example, if value is set to 14 (40%) - * cooling levels vector will be set to 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10 to - * introduce PWM speed in percent: 40, 40, 40, 40, 40, 50, 60. 70, 80, 90, 100. - */ -#define MLXSW_THERMAL_SPEED_MIN (MLXSW_THERMAL_MAX_STATE + 2) -#define MLXSW_THERMAL_SPEED_MAX (MLXSW_THERMAL_MAX_STATE * 2) -#define MLXSW_THERMAL_SPEED_MIN_LEVEL 2 /* 20% */ /* External cooling devices, allowed for binding to mlxsw thermal zones. */ static char * const mlxsw_thermal_external_allowed_cdev[] = { @@ -703,49 +695,16 @@ static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev, struct mlxsw_thermal *thermal = cdev->devdata; struct device *dev = thermal->bus_info->dev; char mfsc_pl[MLXSW_REG_MFSC_LEN]; - unsigned long cur_state, i; int idx; - u8 duty; int err; + if (state > MLXSW_THERMAL_MAX_STATE) + return -EINVAL; + idx = mlxsw_get_cooling_device_idx(thermal, cdev); if (idx < 0) return idx; - /* Verify if this request is for changing allowed fan dynamical - * minimum. If it is - update cooling levels accordingly and update - * state, if current state is below the newly requested minimum state. - * For example, if current state is 5, and minimal state is to be - * changed from 4 to 6, thermal->cooling_levels[0 to 5] will be changed - * all from 4 to 6. And state 5 (thermal->cooling_levels[4]) should be - * overwritten. - */ - if (state >= MLXSW_THERMAL_SPEED_MIN && - state <= MLXSW_THERMAL_SPEED_MAX) { - state -= MLXSW_THERMAL_MAX_STATE; - for (i = 0; i <= MLXSW_THERMAL_MAX_STATE; i++) - thermal->cooling_levels[i] = max(state, i); - - mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0); - err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl); - if (err) - return err; - - duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl); - cur_state = mlxsw_duty_to_state(duty); - - /* If current fan state is lower than requested dynamical - * minimum, increase fan speed up to dynamical minimum. - */ - if (state < cur_state) - return 0; - - state = cur_state; - } - - if (state > MLXSW_THERMAL_MAX_STATE) - return -EINVAL; - /* Normalize the state to the valid speed range. */ state = thermal->cooling_levels[state]; mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state)); @@ -1040,8 +999,7 @@ int mlxsw_thermal_init(struct mlxsw_core *core, /* Initialize cooling levels per PWM state. */ for (i = 0; i < MLXSW_THERMAL_MAX_STATE; i++) - thermal->cooling_levels[i] = max(MLXSW_THERMAL_SPEED_MIN_LEVEL, - i); + thermal->cooling_levels[i] = max(MLXSW_THERMAL_MIN_STATE, i); thermal->polling_delay = bus_info->low_frequency ? MLXSW_THERMAL_SLOW_POLL_INT : diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 95f408d0e103c..7cc4c30af1a71 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -649,6 +649,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client, return 0; errout: + mutex_destroy(&mlxsw_i2c->cmd.lock); i2c_set_clientdata(client, NULL); return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index aa4fef7890841..ff331251a019a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1876,6 +1876,7 @@ int mlxsw_pci_driver_register(struct pci_driver *pci_driver) { pci_driver->probe = mlxsw_pci_probe; pci_driver->remove = mlxsw_pci_remove; + pci_driver->shutdown = mlxsw_pci_remove; return pci_register_driver(pci_driver); } EXPORT_SYMBOL(mlxsw_pci_driver_register); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h index 81465e267b100..b7eb3674e2856 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h @@ -7,8 +7,8 @@ #include "spectrum.h" enum mlxsw_sp_counter_sub_pool_id { - MLXSW_SP_COUNTER_SUB_POOL_FLOW, MLXSW_SP_COUNTER_SUB_POOL_RIF, + MLXSW_SP_COUNTER_SUB_POOL_FLOW, }; int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c index 21296fa7f7fbf..bf51ed94952c5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -227,8 +227,6 @@ static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev, static int mlxsw_sp_dcbnl_app_validate(struct net_device *dev, struct dcb_app *app) { - int prio; - if (app->priority >= IEEE_8021QAZ_MAX_TCS) { netdev_err(dev, "APP entry with priority value %u is invalid\n", app->priority); @@ -242,17 +240,6 @@ static int mlxsw_sp_dcbnl_app_validate(struct net_device *dev, app->protocol); return -EINVAL; } - - /* Warn about any DSCP APP entries with the same PID. */ - prio = fls(dcb_ieee_getapp_mask(dev, app)); - if (prio--) { - if (prio < app->priority) - netdev_warn(dev, "Choosing priority %d for DSCP %d in favor of previously-active value of %d\n", - app->priority, app->protocol, prio); - else if (prio > app->priority) - netdev_warn(dev, "Ignoring new priority %d for DSCP %d in favor of current value of %d\n", - app->priority, app->protocol, prio); - } break; case IEEE_8021QAZ_APP_SEL_ETHERTYPE: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 2f013fc716985..91214cce874b1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3871,7 +3871,7 @@ static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, { const struct fib_nh *nh = fib_info_nh(fi, 0); - return nh->fib_nh_scope == RT_SCOPE_LINK || + return nh->fib_nh_gw_family || mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL); } diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index 7dc451fdaf35e..2431723bc2fb4 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -5693,7 +5693,7 @@ static void dev_set_promiscuous(struct net_device *dev, struct dev_priv *priv, * from the bridge. */ if ((hw->features & STP_SUPPORT) && !promiscuous && - (dev->priv_flags & IFF_BRIDGE_PORT)) { + netif_is_bridge_port(dev)) { struct ksz_switch *sw = hw->ksz_switch; int port = priv->port.first_port; diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c index 1f496fac70332..e2528633c09a1 100644 --- a/drivers/net/ethernet/microchip/encx24j600-regmap.c +++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c @@ -502,13 +502,19 @@ static struct regmap_bus phymap_encx24j600 = { .reg_read = regmap_encx24j600_phy_reg_read, }; -void devm_regmap_init_encx24j600(struct device *dev, - struct encx24j600_context *ctx) +int devm_regmap_init_encx24j600(struct device *dev, + struct encx24j600_context *ctx) { mutex_init(&ctx->mutex); regcfg.lock_arg = ctx; ctx->regmap = devm_regmap_init(dev, ®map_encx24j600, ctx, ®cfg); + if (IS_ERR(ctx->regmap)) + return PTR_ERR(ctx->regmap); ctx->phymap = devm_regmap_init(dev, &phymap_encx24j600, ctx, &phycfg); + if (IS_ERR(ctx->phymap)) + return PTR_ERR(ctx->phymap); + + return 0; } EXPORT_SYMBOL_GPL(devm_regmap_init_encx24j600); diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index c3a6edc0ddf62..6b43afc4e3246 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -1027,10 +1027,13 @@ static int encx24j600_spi_probe(struct spi_device *spi) priv->speed = SPEED_100; priv->ctx.spi = spi; - devm_regmap_init_encx24j600(&spi->dev, &priv->ctx); ndev->irq = spi->irq; ndev->netdev_ops = &encx24j600_netdev_ops; + ret = devm_regmap_init_encx24j600(&spi->dev, &priv->ctx); + if (ret) + goto out_free; + mutex_init(&priv->lock); /* Reset device and check if it is connected */ diff --git a/drivers/net/ethernet/microchip/encx24j600_hw.h b/drivers/net/ethernet/microchip/encx24j600_hw.h index f604a260ede79..711147a159aa9 100644 --- a/drivers/net/ethernet/microchip/encx24j600_hw.h +++ b/drivers/net/ethernet/microchip/encx24j600_hw.h @@ -15,8 +15,8 @@ struct encx24j600_context { int bank; }; -void devm_regmap_init_encx24j600(struct device *dev, - struct encx24j600_context *ctx); +int devm_regmap_init_encx24j600(struct device *dev, + struct encx24j600_context *ctx); /* Single-byte instructions */ #define BANK_SELECT(bank) (0xC0 | ((bank & (BANK_MASK >> BANK_SHIFT)) << 1)) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index dfa0ded169ee9..c69ffcfe61689 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -916,8 +916,7 @@ static int lan743x_phy_reset(struct lan743x_adapter *adapter) } static void lan743x_phy_update_flowcontrol(struct lan743x_adapter *adapter, - u8 duplex, u16 local_adv, - u16 remote_adv) + u16 local_adv, u16 remote_adv) { struct lan743x_phy *phy = &adapter->phy; u8 cap; @@ -944,22 +943,17 @@ static void lan743x_phy_link_status_change(struct net_device *netdev) phy_print_status(phydev); if (phydev->state == PHY_RUNNING) { - struct ethtool_link_ksettings ksettings; int remote_advertisement = 0; int local_advertisement = 0; - memset(&ksettings, 0, sizeof(ksettings)); - phy_ethtool_get_link_ksettings(netdev, &ksettings); local_advertisement = linkmode_adv_to_mii_adv_t(phydev->advertising); remote_advertisement = linkmode_adv_to_mii_adv_t(phydev->lp_advertising); - lan743x_phy_update_flowcontrol(adapter, - ksettings.base.duplex, - local_advertisement, + lan743x_phy_update_flowcontrol(adapter, local_advertisement, remote_advertisement); - lan743x_ptp_update_latency(adapter, ksettings.base.speed); + lan743x_ptp_update_latency(adapter, phydev->speed); } } @@ -1706,6 +1700,16 @@ static int lan743x_tx_ring_init(struct lan743x_tx *tx) ret = -EINVAL; goto cleanup; } + if (dma_set_mask_and_coherent(&tx->adapter->pdev->dev, + DMA_BIT_MASK(64))) { + if (dma_set_mask_and_coherent(&tx->adapter->pdev->dev, + DMA_BIT_MASK(32))) { + dev_warn(&tx->adapter->pdev->dev, + "lan743x_: No suitable DMA available\n"); + ret = -ENOMEM; + goto cleanup; + } + } ring_allocation_size = ALIGN(tx->ring_size * sizeof(struct lan743x_tx_descriptor), PAGE_SIZE); @@ -1888,13 +1892,13 @@ static int lan743x_rx_next_index(struct lan743x_rx *rx, int index) return ((++index) % rx->ring_size); } -static struct sk_buff *lan743x_rx_allocate_skb(struct lan743x_rx *rx) +static struct sk_buff *lan743x_rx_allocate_skb(struct lan743x_rx *rx, gfp_t gfp) { int length = 0; length = (LAN743X_MAX_FRAME_SIZE + ETH_HLEN + 4 + RX_HEAD_PADDING); return __netdev_alloc_skb(rx->adapter->netdev, - length, GFP_ATOMIC | GFP_DMA); + length, gfp); } static void lan743x_rx_update_tail(struct lan743x_rx *rx, int index) @@ -2067,7 +2071,8 @@ static int lan743x_rx_process_packet(struct lan743x_rx *rx) struct sk_buff *new_skb = NULL; int packet_length; - new_skb = lan743x_rx_allocate_skb(rx); + new_skb = lan743x_rx_allocate_skb(rx, + GFP_ATOMIC | GFP_DMA); if (!new_skb) { /* failed to allocate next skb. * Memory is very low. @@ -2256,6 +2261,16 @@ static int lan743x_rx_ring_init(struct lan743x_rx *rx) ret = -EINVAL; goto cleanup; } + if (dma_set_mask_and_coherent(&rx->adapter->pdev->dev, + DMA_BIT_MASK(64))) { + if (dma_set_mask_and_coherent(&rx->adapter->pdev->dev, + DMA_BIT_MASK(32))) { + dev_warn(&rx->adapter->pdev->dev, + "lan743x_: No suitable DMA available\n"); + ret = -ENOMEM; + goto cleanup; + } + } ring_allocation_size = ALIGN(rx->ring_size * sizeof(struct lan743x_rx_descriptor), PAGE_SIZE); @@ -2294,7 +2309,8 @@ static int lan743x_rx_ring_init(struct lan743x_rx *rx) rx->last_head = 0; for (index = 0; index < rx->ring_size; index++) { - struct sk_buff *new_skb = lan743x_rx_allocate_skb(rx); + struct sk_buff *new_skb = lan743x_rx_allocate_skb(rx, + GFP_KERNEL); ret = lan743x_rx_init_ring_element(rx, index, new_skb); if (ret) @@ -3001,6 +3017,8 @@ static int lan743x_pm_resume(struct device *dev) if (ret) { netif_err(adapter, probe, adapter->netdev, "lan743x_hardware_init returned %d\n", ret); + lan743x_pci_cleanup(adapter); + return ret; } /* open netdev when netdev is at running state while resume. diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 6030c90d50ccb..acddb3aa53de4 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1024,12 +1024,6 @@ static int ocelot_hwstamp_set(struct ocelot_port *port, struct ifreq *ifr) switch (cfg.rx_filter) { case HWTSTAMP_FILTER_NONE: break; - case HWTSTAMP_FILTER_ALL: - case HWTSTAMP_FILTER_SOME: - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - case HWTSTAMP_FILTER_NTP_ALL: case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: @@ -1189,7 +1183,10 @@ static int ocelot_get_ts_info(struct net_device *dev, SOF_TIMESTAMPING_RAW_HARDWARE; info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) | BIT(HWTSTAMP_TX_ONESTEP_SYNC); - info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL); + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT); return 0; } @@ -2156,8 +2153,12 @@ int ocelot_init(struct ocelot *ocelot) ocelot_write_rix(ocelot, ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)), ANA_PGID_PGID, PGID_MC); - ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_MCIPV4); - ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_MCIPV6); + ocelot_write_rix(ocelot, + ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)), + ANA_PGID_PGID, PGID_MCIPV4); + ocelot_write_rix(ocelot, + ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)), + ANA_PGID_PGID, PGID_MCIPV6); /* CPU port Injection/Extraction configuration */ ocelot_write_rix(ocelot, QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE | diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c index 44171d7bb434c..5117864738a7e 100644 --- a/drivers/net/ethernet/natsemi/xtsonic.c +++ b/drivers/net/ethernet/natsemi/xtsonic.c @@ -120,7 +120,7 @@ static const struct net_device_ops xtsonic_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, }; -static int __init sonic_probe1(struct net_device *dev) +static int sonic_probe1(struct net_device *dev) { unsigned int silicon_revision; struct sonic_local *lp = netdev_priv(dev); diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index e0b2bf3279053..71ab4e9c9a171 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -8565,7 +8565,7 @@ static void s2io_io_resume(struct pci_dev *pdev) return; } - if (s2io_set_mac_addr(netdev, netdev->dev_addr) == FAILURE) { + if (do_s2io_prog_unicast(netdev, netdev->dev_addr) == FAILURE) { s2io_card_down(sp); pr_err("Can't restore mac addr after reset.\n"); return; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 8f732771d3fad..bd2966eb6b7d3 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -182,15 +182,21 @@ static int nfp_bpf_check_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu) { struct nfp_net *nn = netdev_priv(netdev); - unsigned int max_mtu; + struct nfp_bpf_vnic *bv; + struct bpf_prog *prog; if (~nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) return 0; - max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; - if (new_mtu > max_mtu) { - nn_info(nn, "BPF offload active, MTU over %u not supported\n", - max_mtu); + if (nn->xdp_hw.prog) { + prog = nn->xdp_hw.prog; + } else { + bv = nn->app_priv; + prog = bv->tc_prog; + } + + if (nfp_bpf_offload_check_mtu(nn, prog, new_mtu)) { + nn_info(nn, "BPF offload active, potential packet access beyond hardware packet boundary"); return -EBUSY; } return 0; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index fac9c6f9e197b..c74620fcc539c 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -560,6 +560,8 @@ bool nfp_is_subprog_start(struct nfp_insn_meta *meta); void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog); int nfp_bpf_jit(struct nfp_prog *prog); bool nfp_bpf_supported_opcode(u8 code); +bool nfp_bpf_offload_check_mtu(struct nfp_net *nn, struct bpf_prog *prog, + unsigned int mtu); int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 88fab6a82acff..7ff388ecc7e3a 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -477,19 +477,28 @@ int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, return 0; } +bool nfp_bpf_offload_check_mtu(struct nfp_net *nn, struct bpf_prog *prog, + unsigned int mtu) +{ + unsigned int fw_mtu, pkt_off; + + fw_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + pkt_off = min(prog->aux->max_pkt_offset, mtu); + + return fw_mtu < pkt_off; +} + static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog, struct netlink_ext_ack *extack) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; - unsigned int fw_mtu, pkt_off, max_stack, max_prog_len; + unsigned int max_stack, max_prog_len; dma_addr_t dma_addr; void *img; int err; - fw_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; - pkt_off = min(prog->aux->max_pkt_offset, nn->dp.netdev->mtu); - if (fw_mtu < pkt_off) { + if (nfp_bpf_offload_check_mtu(nn, prog, nn->dp.netdev->mtu)) { NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with potential packet access beyond HW packet split boundary"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 2600ce476d6b2..26772c3310f09 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -588,8 +588,8 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, int port, bool mod) { struct nfp_flower_priv *priv = app->priv; - int ida_idx = NFP_MAX_MAC_INDEX, err; struct nfp_tun_offloaded_mac *entry; + int ida_idx = -1, err; u16 nfp_mac_idx = 0; entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr); @@ -663,7 +663,7 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, err_free_entry: kfree(entry); err_free_ida: - if (ida_idx != NFP_MAX_MAC_INDEX) + if (ida_idx != -1) ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); return err; @@ -677,6 +677,7 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, struct nfp_flower_repr_priv *repr_priv; struct nfp_tun_offloaded_mac *entry; struct nfp_repr *repr; + u16 nfp_mac_idx; int ida_idx; entry = nfp_tunnel_lookup_offloaded_macs(app, mac); @@ -695,8 +696,6 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, entry->bridge_count--; if (!entry->bridge_count && entry->ref_count) { - u16 nfp_mac_idx; - nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT; if (__nfp_tunnel_offload_mac(app, mac, nfp_mac_idx, false)) { @@ -712,7 +711,6 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, /* If MAC is now used by 1 repr set the offloaded MAC index to port. */ if (entry->ref_count == 1 && list_is_singular(&entry->repr_list)) { - u16 nfp_mac_idx; int port, err; repr_priv = list_first_entry(&entry->repr_list, @@ -740,8 +738,14 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, WARN_ON_ONCE(rhashtable_remove_fast(&priv->tun.offloaded_macs, &entry->ht_node, offloaded_macs_params)); + + if (nfp_flower_is_supported_bridge(netdev)) + nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT; + else + nfp_mac_idx = entry->index; + /* If MAC has global ID then extract and free the ida entry. */ - if (nfp_tunnel_is_mac_idx_global(entry->index)) { + if (nfp_tunnel_is_mac_idx_global(nfp_mac_idx)) { ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index); ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c index b04b83687fe21..488615434e4ae 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -196,7 +196,7 @@ int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, } reg->dst_lmextn = swreg_lmextn(dst); - reg->src_lmextn = swreg_lmextn(lreg) | swreg_lmextn(rreg); + reg->src_lmextn = swreg_lmextn(lreg) || swreg_lmextn(rreg); return 0; } @@ -277,7 +277,7 @@ int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg, } reg->dst_lmextn = swreg_lmextn(dst); - reg->src_lmextn = swreg_lmextn(lreg) | swreg_lmextn(rreg); + reg->src_lmextn = swreg_lmextn(lreg) || swreg_lmextn(rreg); return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 250f510b1d212..3dcb09f17b77f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -557,7 +557,6 @@ struct nfp_net_dp { * @exn_name: Name for Exception interrupt * @shared_handler: Handler for shared interrupts * @shared_name: Name for shared interrupt - * @me_freq_mhz: ME clock_freq (MHz) * @reconfig_lock: Protects @reconfig_posted, @reconfig_timer_active, * @reconfig_sync_present and HW reconfiguration request * regs/machinery from async requests (sync must take @@ -639,8 +638,6 @@ struct nfp_net { irq_handler_t shared_handler; char shared_name[IFNAMSIZ + 8]; - u32 me_freq_mhz; - bool link_up; spinlock_t link_status_lock; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 2354dec994184..10857914c552b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -266,8 +266,6 @@ nfp_net_get_link_ksettings(struct net_device *netdev, /* Init to unknowns */ ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); - ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); - ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); cmd->base.port = PORT_OTHER; cmd->base.speed = SPEED_UNKNOWN; cmd->base.duplex = DUPLEX_UNKNOWN; @@ -275,6 +273,8 @@ nfp_net_get_link_ksettings(struct net_device *netdev, port = nfp_port_from_netdev(netdev); eth_port = nfp_port_get_eth_port(port); if (eth_port) { + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); cmd->base.autoneg = eth_port->aneg != NFP_ANEG_DISABLED ? AUTONEG_ENABLE : AUTONEG_DISABLE; nfp_net_set_fec_link_mode(eth_port, cmd); @@ -1269,7 +1269,7 @@ static int nfp_net_set_coalesce(struct net_device *netdev, * ME timestamp ticks. There are 16 ME clock cycles for each timestamp * count. */ - factor = nn->me_freq_mhz / 16; + factor = nn->tlv_caps.me_freq_mhz / 16; /* Each pair of (usecs, max_frames) fields specifies that interrupts * should be coalesced until diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c index 94994a939277b..6ef48eb3a77d4 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c @@ -803,8 +803,10 @@ int nfp_cpp_area_cache_add(struct nfp_cpp *cpp, size_t size) return -ENOMEM; cache = kzalloc(sizeof(*cache), GFP_KERNEL); - if (!cache) + if (!cache) { + nfp_cpp_area_free(area); return -ENOMEM; + } cache->id = 0; cache->addr = 0; diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 1d59ef367a85c..d2e220a94a57c 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1007,9 +1007,6 @@ static int lpc_eth_close(struct net_device *ndev) napi_disable(&pldat->napi); netif_stop_queue(ndev); - if (ndev->phydev) - phy_stop(ndev->phydev); - spin_lock_irqsave(&pldat->lock, flags); __lpc_eth_reset(pldat); netif_carrier_off(ndev); @@ -1017,6 +1014,8 @@ static int lpc_eth_close(struct net_device *ndev) writel(0, LPC_ENET_MAC2(pldat->net_base)); spin_unlock_irqrestore(&pldat->lock, flags); + if (ndev->phydev) + phy_stop(ndev->phydev); clk_disable_unprepare(pldat->clk); return 0; @@ -1471,6 +1470,7 @@ static int lpc_eth_drv_resume(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct netdata_local *pldat; + int ret; if (device_may_wakeup(&pdev->dev)) disable_irq_wake(ndev->irq); @@ -1480,7 +1480,9 @@ static int lpc_eth_drv_resume(struct platform_device *pdev) pldat = netdev_priv(ndev); /* Enable interface clock */ - clk_enable(pldat->clk); + ret = clk_enable(pldat->clk); + if (ret) + return ret; /* Reset and initialize */ __lpc_eth_reset(pldat); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index e66002251596b..f9c303d76658a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -912,6 +912,10 @@ static int ionic_addr_add(struct net_device *netdev, const u8 *addr) static int ionic_addr_del(struct net_device *netdev, const u8 *addr) { + /* Don't delete our own address from the uc list */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; + return ionic_lif_addr(netdev_priv(netdev), addr, false); } @@ -1991,7 +1995,7 @@ static int ionic_lif_init(struct ionic_lif *lif) return -EINVAL; } - lif->dbid_inuse = bitmap_alloc(lif->dbid_count, GFP_KERNEL); + lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL); if (!lif->dbid_inuse) { dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index 9adbaccd0c5ed..934740d604709 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1307,6 +1307,14 @@ qed_iwarp_wait_cid_map_cleared(struct qed_hwfn *p_hwfn, struct qed_bmap *bmap) prev_weight = weight; while (weight) { + /* If the HW device is during recovery, all resources are + * immediately reset without receiving a per-cid indication + * from HW. In this case we don't expect the cid_map to be + * cleared. + */ + if (p_hwfn->cdev->recov_in_prog) + return 0; + msleep(QED_IWARP_MAX_CID_CLEAN_TIME); weight = bitmap_weight(bmap->bitmap, bmap->max_count); diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 19a1a58d60f89..c449ecc0add23 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -353,6 +353,9 @@ static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) unsigned long flags; int rc = -EINVAL; + if (!p_ll2_conn) + return rc; + spin_lock_irqsave(&p_tx->lock, flags); if (p_tx->b_completing_packet) { rc = -EBUSY; @@ -526,7 +529,16 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) unsigned long flags = 0; int rc = 0; + if (!p_ll2_conn) + return rc; + spin_lock_irqsave(&p_rx->lock, flags); + + if (!QED_LL2_RX_REGISTERED(p_ll2_conn)) { + spin_unlock_irqrestore(&p_rx->lock, flags); + return 0; + } + cq_new_idx = le16_to_cpu(*p_rx->p_fw_cons); cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain); @@ -847,6 +859,9 @@ static int qed_ll2_lb_rxq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)p_cookie; int rc; + if (!p_ll2_conn) + return 0; + if (!QED_LL2_RX_REGISTERED(p_ll2_conn)) return 0; @@ -870,6 +885,9 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) u16 new_idx = 0, num_bds = 0; int rc; + if (!p_ll2_conn) + return 0; + if (!QED_LL2_TX_REGISTERED(p_ll2_conn)) return 0; @@ -1642,6 +1660,8 @@ int qed_ll2_post_rx_buffer(void *cxt, if (!p_ll2_conn) return -EINVAL; p_rx = &p_ll2_conn->rx_queue; + if (!p_rx->set_prod_addr) + return -EIO; spin_lock_irqsave(&p_rx->lock, flags); if (!list_empty(&p_rx->free_descq)) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index bc1f5b36b5bf2..1cbcc27602780 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -559,7 +559,12 @@ static int qed_enable_msix(struct qed_dev *cdev, rc = cnt; } - if (rc > 0) { + /* For VFs, we should return with an error in case we didn't get the + * exact number of msix vectors as we requested. + * Not doing that will lead to a crash when starting queues for + * this VF. + */ + if ((IS_PF(cdev) && rc > 0) || (IS_VF(cdev) && rc == cnt)) { /* MSI-x configuration was achieved */ int_params->out.int_mode = QED_INT_MODE_MSIX; int_params->out.num_vectors = rc; @@ -1233,6 +1238,7 @@ static int qed_slowpath_start(struct qed_dev *cdev, } else { DP_NOTICE(cdev, "Failed to acquire PTT for aRFS\n"); + rc = -EINVAL; goto err; } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 5d85ae59bc51e..74aec5c2732a0 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -498,18 +498,14 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); - if (!qed_mcp_has_pending_cmd(p_hwfn)) { - spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); + if (!qed_mcp_has_pending_cmd(p_hwfn)) break; - } rc = qed_mcp_update_pending_cmd(p_hwfn, p_ptt); - if (!rc) { - spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); + if (!rc) break; - } else if (rc != -EAGAIN) { + else if (rc != -EAGAIN) goto err; - } spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); @@ -526,8 +522,6 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EAGAIN; } - spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); - /* Send the mailbox command */ qed_mcp_reread_offsets(p_hwfn, p_ptt); seq_num = ++p_hwfn->mcp_info->drv_mb_seq; @@ -554,18 +548,14 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); - if (p_cmd_elem->b_is_completed) { - spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); + if (p_cmd_elem->b_is_completed) break; - } rc = qed_mcp_update_pending_cmd(p_hwfn, p_ptt); - if (!rc) { - spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); + if (!rc) break; - } else if (rc != -EAGAIN) { + else if (rc != -EAGAIN) goto err; - } spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); } while (++cnt < max_retries); @@ -586,7 +576,6 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EAGAIN; } - spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); qed_mcp_cmd_del_elem(p_hwfn, p_cmd_elem); spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); @@ -3173,6 +3162,7 @@ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn, struct qed_nvm_image_att *p_image_att) { enum nvm_image_type type; + int rc; u32 i; /* Translate image_id into MFW definitions */ @@ -3198,7 +3188,10 @@ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn, return -EINVAL; } - qed_mcp_nvm_info_populate(p_hwfn); + rc = qed_mcp_nvm_info_populate(p_hwfn); + if (rc) + return rc; + for (i = 0; i < p_hwfn->nvm_info.num_images; i++) if (type == p_hwfn->nvm_info.image_att[i].image_type) break; diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 38b1f402f7ed2..b291971bcf926 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -1245,8 +1245,7 @@ qed_rdma_create_qp(void *rdma_cxt, if (!rdma_cxt || !in_params || !out_params || !p_hwfn->p_rdma_info->active) { - DP_ERR(p_hwfn->cdev, - "qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n", + pr_err("qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n", rdma_cxt, in_params, out_params); return NULL; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 83817bb50e9fa..6e6563b51d688 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -107,6 +107,14 @@ void qed_roce_stop(struct qed_hwfn *p_hwfn) * Beyond the added delay we clear the bitmap anyway. */ while (bitmap_weight(rcid_map->bitmap, rcid_map->max_count)) { + /* If the HW device is during recovery, all resources are + * immediately reset without receiving a per-cid indication + * from HW. In this case we don't expect the cid bitmap to be + * cleared. + */ + if (p_hwfn->cdev->recov_in_prog) + return; + msleep(100); if (wait_count++ > 20) { DP_NOTICE(p_hwfn, "cid bitmap wait timed out\n"); diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index fb9c3ca5d36cc..20f840ea05030 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -3003,12 +3003,16 @@ static int qed_iov_pre_update_vport(struct qed_hwfn *hwfn, u8 mask = QED_ACCEPT_UCAST_UNMATCHED | QED_ACCEPT_MCAST_UNMATCHED; struct qed_filter_accept_flags *flags = ¶ms->accept_flags; struct qed_public_vf_info *vf_info; + u16 tlv_mask; + + tlv_mask = BIT(QED_IOV_VP_UPDATE_ACCEPT_PARAM) | + BIT(QED_IOV_VP_UPDATE_ACCEPT_ANY_VLAN); /* Untrusted VFs can't even be trusted to know that fact. * Simply indicate everything is configured fine, and trace * configuration 'behind their back'. */ - if (!(*tlvs & BIT(QED_IOV_VP_UPDATE_ACCEPT_PARAM))) + if (!(*tlvs & tlv_mask)) return 0; vf_info = qed_iov_get_public_vf_info(hwfn, vfid, true); @@ -3025,6 +3029,13 @@ static int qed_iov_pre_update_vport(struct qed_hwfn *hwfn, flags->tx_accept_filter &= ~mask; } + if (params->update_accept_any_vlan_flg) { + vf_info->accept_any_vlan = params->accept_any_vlan; + + if (vf_info->forced_vlan && !vf_info->is_trusted_configured) + params->accept_any_vlan = false; + } + return 0; } @@ -3801,11 +3812,11 @@ bool qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn, u32 *p_disabled_vfs) return found; } -static void qed_iov_get_link(struct qed_hwfn *p_hwfn, - u16 vfid, - struct qed_mcp_link_params *p_params, - struct qed_mcp_link_state *p_link, - struct qed_mcp_link_capabilities *p_caps) +static int qed_iov_get_link(struct qed_hwfn *p_hwfn, + u16 vfid, + struct qed_mcp_link_params *p_params, + struct qed_mcp_link_state *p_link, + struct qed_mcp_link_capabilities *p_caps) { struct qed_vf_info *p_vf = qed_iov_get_vf_info(p_hwfn, vfid, @@ -3813,7 +3824,7 @@ static void qed_iov_get_link(struct qed_hwfn *p_hwfn, struct qed_bulletin_content *p_bulletin; if (!p_vf) - return; + return -EINVAL; p_bulletin = p_vf->bulletin.p_virt; @@ -3823,6 +3834,7 @@ static void qed_iov_get_link(struct qed_hwfn *p_hwfn, __qed_vf_get_link_state(p_hwfn, p_link, p_bulletin); if (p_caps) __qed_vf_get_link_caps(p_hwfn, p_caps, p_bulletin); + return 0; } static int @@ -4684,6 +4696,7 @@ static int qed_get_vf_config(struct qed_dev *cdev, struct qed_public_vf_info *vf_info; struct qed_mcp_link_state link; u32 tx_rate; + int ret; /* Sanitize request */ if (IS_VF(cdev)) @@ -4697,7 +4710,9 @@ static int qed_get_vf_config(struct qed_dev *cdev, vf_info = qed_iov_get_public_vf_info(hwfn, vf_id, true); - qed_iov_get_link(hwfn, vf_id, NULL, &link, NULL); + ret = qed_iov_get_link(hwfn, vf_id, NULL, &link, NULL); + if (ret) + return ret; /* Fill information about VF */ ivi->vf = vf_id; @@ -4713,6 +4728,7 @@ static int qed_get_vf_config(struct qed_dev *cdev, tx_rate = vf_info->tx_rate; ivi->max_tx_rate = tx_rate ? tx_rate : link.speed; ivi->min_tx_rate = qed_iov_get_vf_min_rate(hwfn, vf_id); + ivi->trusted = vf_info->is_trusted_request; return 0; } @@ -5143,6 +5159,12 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) params.update_ctl_frame_check = 1; params.mac_chk_en = !vf_info->is_trusted_configured; + params.update_accept_any_vlan_flg = 0; + + if (vf_info->accept_any_vlan && vf_info->forced_vlan) { + params.update_accept_any_vlan_flg = 1; + params.accept_any_vlan = vf_info->accept_any_vlan; + } if (vf_info->rx_accept_mode & mask) { flags->update_rx_mode_config = 1; @@ -5158,13 +5180,20 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) if (!vf_info->is_trusted_configured) { flags->rx_accept_filter &= ~mask; flags->tx_accept_filter &= ~mask; + params.accept_any_vlan = false; } if (flags->update_rx_mode_config || flags->update_tx_mode_config || - params.update_ctl_frame_check) + params.update_ctl_frame_check || + params.update_accept_any_vlan_flg) { + DP_VERBOSE(hwfn, QED_MSG_IOV, + "vport update config for %s VF[abs 0x%x rel 0x%x]\n", + vf_info->is_trusted_configured ? "trusted" : "untrusted", + vf->abs_vf_id, vf->relative_vf_id); qed_sp_vport_update(hwfn, ¶ms, QED_SPQ_MODE_EBLOCK, NULL); + } } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h index 9a8fd79611f24..853be06bccdf6 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h @@ -88,6 +88,7 @@ struct qed_public_vf_info { bool is_trusted_request; u8 rx_accept_mode; u8 tx_accept_mode; + bool accept_any_vlan; }; struct qed_iov_vf_init_params { diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index adc2c8f3d48ef..62e4511db8575 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -539,6 +539,9 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn) p_iov->bulletin.size, &p_iov->bulletin.phys, GFP_KERNEL); + if (!p_iov->bulletin.p_virt) + goto free_pf2vf_reply; + DP_VERBOSE(p_hwfn, QED_MSG_IOV, "VF's bulletin Board [%p virt 0x%llx phys 0x%08x bytes]\n", p_iov->bulletin.p_virt, @@ -578,6 +581,10 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn) return rc; +free_pf2vf_reply: + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(union pfvf_tlvs), + p_iov->pf2vf_reply, p_iov->pf2vf_reply_phys); free_vf2pf_request: dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(union vfpf_tlvs), diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index f310a94e04898..f16032635ba73 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -723,6 +723,9 @@ qede_build_skb(struct qede_rx_queue *rxq, buf = page_address(bd->data) + bd->page_offset; skb = build_skb(buf, rxq->rx_buf_seg_size); + if (unlikely(!skb)) + return NULL; + skb_reserve(skb, pad); skb_put(skb, len); @@ -1597,6 +1600,13 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev) data_split = true; } } else { + if (unlikely(skb->len > ETH_TX_MAX_NON_LSO_PKT_LEN)) { + DP_ERR(edev, "Unexpected non LSO skb length = 0x%x\n", skb->len); + qede_free_failed_tx_pkt(txq, first_bd, 0, false); + qede_update_tx_producer(txq); + return NETDEV_TX_OK; + } + val |= ((skb->len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT); } diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index ce3e62e73e4cd..1133f6fe21a0e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1773,6 +1773,7 @@ static void qede_sync_free_irqs(struct qede_dev *edev) } edev->int_info.used_cnt = 0; + edev->int_info.msix_cnt = 0; } static int qede_req_msix_irqs(struct qede_dev *edev) @@ -2317,7 +2318,6 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode, goto out; err4: qede_sync_free_irqs(edev); - memset(&edev->int_info.msix_cnt, 0, sizeof(struct qed_int_info)); err3: qede_napi_disable_remove(edev); err2: diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 6ca2216e40585..5e81cd317a32f 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3495,20 +3495,19 @@ static int ql_adapter_up(struct ql3_adapter *qdev) spin_lock_irqsave(&qdev->hw_lock, hw_flags); - err = ql_wait_for_drvr_lock(qdev); - if (err) { - err = ql_adapter_initialize(qdev); - if (err) { - netdev_err(ndev, "Unable to initialize adapter\n"); - goto err_init; - } - netdev_err(ndev, "Releasing driver lock\n"); - ql_sem_unlock(qdev, QL_DRVR_SEM_MASK); - } else { + if (!ql_wait_for_drvr_lock(qdev)) { netdev_err(ndev, "Could not acquire driver lock\n"); + err = -ENODEV; goto err_lock; } + err = ql_adapter_initialize(qdev); + if (err) { + netdev_err(ndev, "Unable to initialize adapter\n"); + goto err_init; + } + ql_sem_unlock(qdev, QL_DRVR_SEM_MASK); + spin_unlock_irqrestore(&qdev->hw_lock, hw_flags); set_bit(QL_ADAPTER_UP, &qdev->flags); @@ -3630,7 +3629,8 @@ static void ql_reset_work(struct work_struct *work) qdev->mem_map_registers; unsigned long hw_flags; - if (test_bit((QL_RESET_PER_SCSI | QL_RESET_START), &qdev->flags)) { + if (test_bit(QL_RESET_PER_SCSI, &qdev->flags) || + test_bit(QL_RESET_START, &qdev->flags)) { clear_bit(QL_LINK_MASTER, &qdev->flags); /* diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index f2014c10f7c97..df43d364e6a41 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -1079,8 +1079,14 @@ static int qlcnic_83xx_add_rings(struct qlcnic_adapter *adapter) sds_mbx_size = sizeof(struct qlcnic_sds_mbx); context_id = recv_ctx->context_id; num_sds = adapter->drv_sds_rings - QLCNIC_MAX_SDS_RINGS; - ahw->hw_ops->alloc_mbx_args(&cmd, adapter, - QLCNIC_CMD_ADD_RCV_RINGS); + err = ahw->hw_ops->alloc_mbx_args(&cmd, adapter, + QLCNIC_CMD_ADD_RCV_RINGS); + if (err) { + dev_err(&adapter->pdev->dev, + "Failed to alloc mbx args %d\n", err); + return err; + } + cmd.req.arg[1] = 0 | (num_sds << 8) | (context_id << 16); /* set up status rings, mbx 2-81 */ diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h index f4aa6331b367b..0a9d24e86715d 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h @@ -52,7 +52,7 @@ static inline int qlcnic_dcb_get_hw_capability(struct qlcnic_dcb *dcb) if (dcb && dcb->ops->get_hw_capability) return dcb->ops->get_hw_capability(dcb); - return 0; + return -EOPNOTSUPP; } static inline void qlcnic_dcb_free(struct qlcnic_dcb *dcb) @@ -66,7 +66,7 @@ static inline int qlcnic_dcb_attach(struct qlcnic_dcb *dcb) if (dcb && dcb->ops->attach) return dcb->ops->attach(dcb); - return 0; + return -EOPNOTSUPP; } static inline int @@ -75,7 +75,7 @@ qlcnic_dcb_query_hw_capability(struct qlcnic_dcb *dcb, char *buf) if (dcb && dcb->ops->query_hw_capability) return dcb->ops->query_hw_capability(dcb, buf); - return 0; + return -EOPNOTSUPP; } static inline void qlcnic_dcb_get_info(struct qlcnic_dcb *dcb) @@ -90,7 +90,7 @@ qlcnic_dcb_query_cee_param(struct qlcnic_dcb *dcb, char *buf, u8 type) if (dcb && dcb->ops->query_cee_param) return dcb->ops->query_cee_param(dcb, buf, type); - return 0; + return -EOPNOTSUPP; } static inline int qlcnic_dcb_get_cee_cfg(struct qlcnic_dcb *dcb) @@ -98,7 +98,7 @@ static inline int qlcnic_dcb_get_cee_cfg(struct qlcnic_dcb *dcb) if (dcb && dcb->ops->get_cee_cfg) return dcb->ops->get_cee_cfg(dcb); - return 0; + return -EOPNOTSUPP; } static inline void qlcnic_dcb_aen_handler(struct qlcnic_dcb *dcb, void *msg) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c index c48a0e2d4d7ef..6a009d51ec510 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c @@ -440,7 +440,6 @@ int qlcnic_pinit_from_rom(struct qlcnic_adapter *adapter) QLCWR32(adapter, QLCNIC_CRB_PEG_NET_4 + 0x3c, 1); msleep(20); - qlcnic_rom_unlock(adapter); /* big hammer don't reset CAM block on reset */ QLCWR32(adapter, QLCNIC_ROMUSB_GLB_SW_RESET, 0xfeffffff); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h index 5f327659efa7a..85b688f60b876 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h @@ -202,7 +202,7 @@ int qlcnic_sriov_get_vf_vport_info(struct qlcnic_adapter *, struct qlcnic_info *, u16); int qlcnic_sriov_cfg_vf_guest_vlan(struct qlcnic_adapter *, u16, u8); void qlcnic_sriov_free_vlans(struct qlcnic_adapter *); -void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *); +int qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *); bool qlcnic_sriov_check_any_vlan(struct qlcnic_vf_info *); void qlcnic_sriov_del_vlan_id(struct qlcnic_sriov *, struct qlcnic_vf_info *, u16); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index f7c2f32237cb0..400bc2c3f222e 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -433,7 +433,7 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, struct qlcnic_cmd_args *cmd) { struct qlcnic_sriov *sriov = adapter->ahw->sriov; - int i, num_vlans; + int i, num_vlans, ret; u16 *vlans; if (sriov->allowed_vlans) @@ -444,7 +444,9 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, dev_info(&adapter->pdev->dev, "Number of allowed Guest VLANs = %d\n", sriov->num_allowed_vlans); - qlcnic_sriov_alloc_vlans(adapter); + ret = qlcnic_sriov_alloc_vlans(adapter); + if (ret) + return ret; if (!sriov->any_vlan) return 0; @@ -2160,7 +2162,7 @@ static int qlcnic_sriov_vf_resume(struct qlcnic_adapter *adapter) return err; } -void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) +int qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) { struct qlcnic_sriov *sriov = adapter->ahw->sriov; struct qlcnic_vf_info *vf; @@ -2170,7 +2172,11 @@ void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) vf = &sriov->vf_info[i]; vf->sriov_vlans = kcalloc(sriov->num_allowed_vlans, sizeof(*vf->sriov_vlans), GFP_KERNEL); + if (!vf->sriov_vlans) + return -ENOMEM; } + + return 0; } void qlcnic_sriov_free_vlans(struct qlcnic_adapter *adapter) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index 5632da05145a5..ed218ed2f466d 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -598,7 +598,9 @@ static int __qlcnic_pci_sriov_enable(struct qlcnic_adapter *adapter, if (err) goto del_flr_queue; - qlcnic_sriov_alloc_vlans(adapter); + err = qlcnic_sriov_alloc_vlans(adapter); + if (err) + goto del_flr_queue; return err; diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index baac016f3ec0b..15591ad5fe4ea 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -434,7 +434,7 @@ qcaspi_receive(struct qcaspi *qca) skb_put(qca->rx_skb, retcode); qca->rx_skb->protocol = eth_type_trans( qca->rx_skb, qca->rx_skb->dev); - qca->rx_skb->ip_summed = CHECKSUM_UNNECESSARY; + skb_checksum_none_assert(qca->rx_skb); netif_rx_ni(qca->rx_skb); qca->rx_skb = netdev_alloc_skb_ip_align(net_dev, net_dev->mtu + VLAN_ETH_HLEN); diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c index 0981068504fa7..ade70f5df496c 100644 --- a/drivers/net/ethernet/qualcomm/qca_uart.c +++ b/drivers/net/ethernet/qualcomm/qca_uart.c @@ -107,7 +107,7 @@ qca_tty_receive(struct serdev_device *serdev, const unsigned char *data, skb_put(qca->rx_skb, retcode); qca->rx_skb->protocol = eth_type_trans( qca->rx_skb, qca->rx_skb->dev); - qca->rx_skb->ip_summed = CHECKSUM_UNNECESSARY; + skb_checksum_none_assert(qca->rx_skb); netif_rx_ni(qca->rx_skb); qca->rx_skb = netdev_alloc_skb_ip_align(netdev, netdev->mtu + diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 274e5b4bc4ac8..f158fdf3aab2c 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -119,6 +119,8 @@ #define PHY_ST 0x8A /* PHY status register */ #define MAC_SM 0xAC /* MAC status machine */ #define MAC_SM_RST 0x0002 /* MAC status machine reset */ +#define MD_CSC 0xb6 /* MDC speed control register */ +#define MD_CSC_DEFAULT 0x0030 #define MAC_ID 0xBE /* Identifier register */ #define TX_DCNT 0x80 /* TX descriptor count */ @@ -354,8 +356,9 @@ static void r6040_reset_mac(struct r6040_private *lp) { void __iomem *ioaddr = lp->base; int limit = MAC_DEF_TIMEOUT; - u16 cmd; + u16 cmd, md_csc; + md_csc = ioread16(ioaddr + MD_CSC); iowrite16(MAC_RST, ioaddr + MCR1); while (limit--) { cmd = ioread16(ioaddr + MCR1); @@ -367,6 +370,10 @@ static void r6040_reset_mac(struct r6040_private *lp) iowrite16(MAC_SM_RST, ioaddr + MAC_SM); iowrite16(0, ioaddr + MAC_SM); mdelay(5); + + /* Restore MDIO clock frequency */ + if (md_csc != MD_CSC_DEFAULT) + iowrite16(md_csc, ioaddr + MD_CSC); } static void r6040_init_mac_regs(struct net_device *dev) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index fb51548c57e94..8d2b184ff5758 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -212,6 +212,7 @@ static const struct pci_device_id rtl8169_pci_tbl[] = { { PCI_VDEVICE(REALTEK, 0x8129) }, { PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_NO_GBIT }, { PCI_VDEVICE(REALTEK, 0x8161) }, + { PCI_VDEVICE(REALTEK, 0x8162) }, { PCI_VDEVICE(REALTEK, 0x8167) }, { PCI_VDEVICE(REALTEK, 0x8168) }, { PCI_VDEVICE(NCUBE, 0x8168) }, @@ -4713,6 +4714,7 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp) rtl_eri_clear_bits(tp, 0x1b0, ERIAR_MASK_0011, BIT(12)); rtl_pcie_state_l2l3_disable(tp); + rtl_hw_aspm_clkreq_enable(tp, true); } static void rtl_hw_start_8168g_1(struct rtl8169_private *tp) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 931a44fe7afe8..50d85d0372302 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2567,6 +2567,7 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev) else txdesc->status |= cpu_to_le32(TD_TACT); + wmb(); /* cur_tx must be incremented after TACT bit was set */ mdp->cur_tx++; if (!(sh_eth_read(ndev, EDTRR) & mdp->cd->edtrr_trns)) diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 7072b249c8bd6..8157666209798 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2795,7 +2795,8 @@ static void ofdpa_fib4_abort(struct rocker *rocker) if (!ofdpa_port) continue; nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; - ofdpa_flow_tbl_del(ofdpa_port, OFDPA_OP_FLAG_REMOVE, + ofdpa_flow_tbl_del(ofdpa_port, + OFDPA_OP_FLAG_REMOVE | OFDPA_OP_FLAG_NOWAIT, flow_entry); } spin_unlock_irqrestore(&ofdpa->flow_tbl_lock, flags); diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 38767d7979147..431146b391d0c 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -2277,18 +2277,18 @@ static int __init sxgbe_cmdline_opt(char *str) char *opt; if (!str || !*str) - return -EINVAL; + return 1; while ((opt = strsep(&str, ",")) != NULL) { if (!strncmp(opt, "eee_timer:", 10)) { if (kstrtoint(opt + 10, 0, &eee_timer)) goto err; } } - return 0; + return 1; err: pr_err("%s: ERROR broken module parameter conversion\n", __func__); - return -EINVAL; + return 1; } __setup("sxgbeeth=", sxgbe_cmdline_opt); diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 0ec13f520e907..b23741d3c9be3 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -2056,7 +2056,10 @@ static int efx_ef10_try_update_nic_stats_vf(struct efx_nic *efx) efx_update_sw_stats(efx, stats); out: + /* releasing a DMA coherent buffer with BH disabled can panic */ + spin_unlock_bh(&efx->stats_lock); efx_nic_free_buffer(efx, &stats_buf); + spin_lock_bh(&efx->stats_lock); return rc; } @@ -6160,6 +6163,11 @@ static int efx_ef10_mtd_probe(struct efx_nic *efx) n_parts++; } + if (!n_parts) { + kfree(parts); + return 0; + } + rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts)); fail: if (rc) diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index e7c6aa29d3232..b318ce83f5bf4 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -412,8 +412,9 @@ static int efx_ef10_pci_sriov_enable(struct efx_nic *efx, int num_vfs) static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force) { struct pci_dev *dev = efx->pci_dev; + struct efx_ef10_nic_data *nic_data = efx->nic_data; unsigned int vfs_assigned = pci_vfs_assigned(dev); - int rc = 0; + int i, rc = 0; if (vfs_assigned && !force) { netif_info(efx, drv, efx->net_dev, "VFs are assigned to guests; " @@ -421,10 +422,13 @@ static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force) return -EBUSY; } - if (!vfs_assigned) + if (!vfs_assigned) { + for (i = 0; i < efx->vf_count; i++) + nic_data->vf[i].pci_dev = NULL; pci_disable_sriov(dev); - else + } else { rc = -EBUSY; + } efx_ef10_sriov_free_vf_vswitching(efx); efx->vf_count = 0; diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 82391abbd42b4..4430a05d1778c 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -3776,7 +3776,7 @@ static const struct dev_pm_ops efx_pm_ops = { * Stop the software path and request a slot reset. */ static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev, - enum pci_channel_state state) + pci_channel_state_t state) { pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; struct efx_nic *efx = pci_get_drvdata(pdev); diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 86b965875540d..d53e945dd08fc 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -128,20 +128,14 @@ efx_ethtool_get_link_ksettings(struct net_device *net_dev, { struct efx_nic *efx = netdev_priv(net_dev); struct efx_link_state *link_state = &efx->link_state; - u32 supported; mutex_lock(&efx->mac_lock); efx->phy_op->get_link_ksettings(efx, cmd); mutex_unlock(&efx->mac_lock); /* Both MACs support pause frames (bidirectional and respond-only) */ - ethtool_convert_link_mode_to_legacy_u32(&supported, - cmd->link_modes.supported); - - supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; - - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - supported); + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + ethtool_link_ksettings_add_link_mode(cmd, supported, Asym_Pause); if (LOOPBACK_INTERNAL(efx)) { cmd->base.speed = link_state->speed; diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index eecc348b1c32f..d578bbef9604d 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -3118,7 +3118,7 @@ static const struct dev_pm_ops ef4_pm_ops = { * Stop the software path and request a slot reset. */ static pci_ers_result_t ef4_io_error_detected(struct pci_dev *pdev, - enum pci_channel_state state) + pci_channel_state_t state) { pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; struct ef4_nic *efx = pci_get_drvdata(pdev); diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c index 05ea3523890a9..d516f81ab37a7 100644 --- a/drivers/net/ethernet/sfc/falcon/rx.c +++ b/drivers/net/ethernet/sfc/falcon/rx.c @@ -726,7 +726,10 @@ static void ef4_init_rx_recycle_ring(struct ef4_nic *efx, efx->rx_bufs_per_page); rx_queue->page_ring = kcalloc(page_ring_size, sizeof(*rx_queue->page_ring), GFP_KERNEL); - rx_queue->page_ptr_mask = page_ring_size - 1; + if (!rx_queue->page_ring) + rx_queue->page_ptr_mask = 0; + else + rx_queue->page_ptr_mask = page_ring_size - 1; } void ef4_init_rx_queue(struct ef4_rx_queue *rx_queue) diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 2713300343c7f..ec551def58359 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -163,9 +163,9 @@ static void efx_mcdi_send_request(struct efx_nic *efx, unsigned cmd, /* Serialise with efx_mcdi_ev_cpl() and efx_mcdi_ev_death() */ spin_lock_bh(&mcdi->iface_lock); ++mcdi->seqno; + seqno = mcdi->seqno & SEQ_MASK; spin_unlock_bh(&mcdi->iface_lock); - seqno = mcdi->seqno & SEQ_MASK; xflags = 0; if (mcdi->mode == MCDI_MODE_EVENTS) xflags |= MCDI_HEADER_XFLAGS_EVREQ; diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 59b4f16896a81..ed1140ecca603 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -648,7 +648,7 @@ static int efx_ptp_get_attributes(struct efx_nic *efx) } else if (rc == -EINVAL) { fmt = MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_NANOSECONDS; } else if (rc == -EPERM) { - netif_info(efx, probe, efx->net_dev, "no PTP support\n"); + pci_info(efx->pci_dev, "no PTP support\n"); return rc; } else { efx_mcdi_display_error(efx, MC_CMD_PTP, sizeof(inbuf), @@ -824,7 +824,7 @@ static int efx_ptp_disable(struct efx_nic *efx) * should only have been called during probe. */ if (rc == -ENOSYS || rc == -EPERM) - netif_info(efx, probe, efx->net_dev, "no PTP support\n"); + pci_info(efx->pci_dev, "no PTP support\n"); else if (rc) efx_mcdi_display_error(efx, MC_CMD_PTP, MC_CMD_PTP_IN_DISABLE_LEN, @@ -1093,7 +1093,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb) tx_queue = &ptp_data->channel->tx_queue[type]; if (tx_queue && tx_queue->timestamping) { + /* This code invokes normal driver TX code which is always + * protected from softirqs when called from generic TX code, + * which in turn disables preemption. Look at __dev_queue_xmit + * which uses rcu_read_lock_bh disabling preemption for RCU + * plus disabling softirqs. We do not need RCU reader + * protection here. + * + * Although it is theoretically safe for current PTP TX/RX code + * running without disabling softirqs, there are three good + * reasond for doing so: + * + * 1) The code invoked is mainly implemented for non-PTP + * packets and it is always executed with softirqs + * disabled. + * 2) This being a single PTP packet, better to not + * interrupt its processing by softirqs which can lead + * to high latencies. + * 3) netdev_xmit_more checks preemption is disabled and + * triggers a BUG_ON if not. + */ + local_bh_disable(); efx_enqueue_skb(tx_queue, skb); + local_bh_enable(); } else { WARN_ONCE(1, "PTP channel has no timestamped tx queue\n"); dev_kfree_skb_any(skb); diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c index dfbdf05dcf794..68f092881d137 100644 --- a/drivers/net/ethernet/sfc/siena_sriov.c +++ b/drivers/net/ethernet/sfc/siena_sriov.c @@ -1056,7 +1056,7 @@ void efx_siena_sriov_probe(struct efx_nic *efx) return; if (efx_siena_sriov_cmd(efx, false, &efx->vi_scale, &count)) { - netif_info(efx, probe, efx->net_dev, "no SR-IOV VFs probed\n"); + pci_info(efx->pci_dev, "no SR-IOV VFs probed\n"); return; } if (count > 0 && count > max_vfs) diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 7b65e79d6ae91..82ca29b6781b2 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -2069,6 +2069,11 @@ static int smc911x_drv_probe(struct platform_device *pdev) ndev->dma = (unsigned char)-1; ndev->irq = platform_get_irq(pdev, 0); + if (ndev->irq < 0) { + ret = ndev->irq; + goto release_both; + } + lp = netdev_priv(ndev); lp->netdev = ndev; #ifdef SMC_DYNAMIC_BUS_CONFIG diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index c7bdada4d1b97..7767d0ae9ebc1 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2433,7 +2433,7 @@ static int smsc911x_drv_probe(struct platform_device *pdev) if (irq == -EPROBE_DEFER) { retval = -EPROBE_DEFER; goto out_0; - } else if (irq <= 0) { + } else if (irq < 0) { pr_warn("Could not allocate irq resource\n"); retval = -ENODEV; goto out_0; diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c index cd478d2cd871a..00f6d347eaf75 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c +++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c @@ -57,10 +57,6 @@ #define TSE_PCS_USE_SGMII_ENA BIT(0) #define TSE_PCS_IF_USE_SGMII 0x03 -#define SGMII_ADAPTER_CTRL_REG 0x00 -#define SGMII_ADAPTER_DISABLE 0x0001 -#define SGMII_ADAPTER_ENABLE 0x0000 - #define AUTONEGO_LINK_TIMER 20 static int tse_pcs_reset(void __iomem *base, struct tse_pcs *pcs) @@ -202,12 +198,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev, unsigned int speed) { void __iomem *tse_pcs_base = pcs->tse_pcs_base; - void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base; u32 val; - writew(SGMII_ADAPTER_ENABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); - pcs->autoneg = phy_dev->autoneg; if (phy_dev->autoneg == AUTONEG_ENABLE) { diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h index 442812c0a4bdc..694ac25ef426b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h +++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h @@ -10,6 +10,10 @@ #include #include +#define SGMII_ADAPTER_CTRL_REG 0x00 +#define SGMII_ADAPTER_ENABLE 0x0000 +#define SGMII_ADAPTER_DISABLE 0x0001 + struct tse_pcs { struct device *dev; void __iomem *tse_pcs_base; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index dd9967aeda221..0f0094ced776b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -357,6 +357,7 @@ static void *tegra_eqos_probe(struct platform_device *pdev, data->fix_mac_speed = tegra_eqos_fix_speed; data->init = tegra_eqos_init; data->bsp_priv = eqos; + data->sph_disable = 1; err = tegra_eqos_init(pdev, eqos); if (err < 0) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c index fad503820e040..b3365b34cac7c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c @@ -71,6 +71,7 @@ static int dwmac_generic_probe(struct platform_device *pdev) static const struct of_device_id dwmac_generic_match[] = { { .compatible = "st,spear600-gmac"}, + { .compatible = "snps,dwmac-3.40a"}, { .compatible = "snps,dwmac-3.50a"}, { .compatible = "snps,dwmac-3.610"}, { .compatible = "snps,dwmac-3.70a"}, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 0f56f8e336917..03b11f191c262 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -288,10 +288,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) val &= ~NSS_COMMON_GMAC_CTL_PHY_IFACE_SEL; break; default: - dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n", - phy_modes(gmac->phy_mode)); - err = -EINVAL; - goto err_remove_config_dt; + goto err_unsupported_phy; } regmap_write(gmac->nss_common, NSS_COMMON_GMAC_CTL(gmac->id), val); @@ -308,10 +305,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) NSS_COMMON_CLK_SRC_CTRL_OFFSET(gmac->id); break; default: - dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n", - phy_modes(gmac->phy_mode)); - err = -EINVAL; - goto err_remove_config_dt; + goto err_unsupported_phy; } regmap_write(gmac->nss_common, NSS_COMMON_CLK_SRC_CTRL, val); @@ -328,8 +322,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) NSS_COMMON_CLK_GATE_GMII_TX_EN(gmac->id); break; default: - /* We don't get here; the switch above will have errored out */ - unreachable(); + goto err_unsupported_phy; } regmap_write(gmac->nss_common, NSS_COMMON_CLK_GATE, val); @@ -360,6 +353,11 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) return 0; +err_unsupported_phy: + dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n", + phy_modes(gmac->phy_mode)); + err = -EINVAL; + err_remove_config_dt: stmmac_remove_config_dt(pdev, plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 70d41783329dd..934c34e98d55f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -18,9 +18,6 @@ #include "altr_tse_pcs.h" -#define SGMII_ADAPTER_CTRL_REG 0x00 -#define SGMII_ADAPTER_DISABLE 0x0001 - #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2 @@ -62,14 +59,13 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) { struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv; void __iomem *splitter_base = dwmac->splitter_base; - void __iomem *tse_pcs_base = dwmac->pcs.tse_pcs_base; void __iomem *sgmii_adapter_base = dwmac->pcs.sgmii_adapter_base; struct device *dev = dwmac->dev; struct net_device *ndev = dev_get_drvdata(dev); struct phy_device *phy_dev = ndev->phydev; u32 val; - if ((tse_pcs_base) && (sgmii_adapter_base)) + if (sgmii_adapter_base) writew(SGMII_ADAPTER_DISABLE, sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); @@ -93,8 +89,11 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG); } - if (tse_pcs_base && sgmii_adapter_base) + if (phy_dev && sgmii_adapter_base) { + writew(SGMII_ADAPTER_ENABLE, + sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed); + } } static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *dev) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 2f6258ca95155..497ce6e6b16ff 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -718,7 +718,7 @@ static int sun8i_dwmac_reset(struct stmmac_priv *priv) if (err) { dev_err(priv->device, "EMAC reset timeout\n"); - return -EFAULT; + return err; } return 0; } @@ -879,6 +879,7 @@ static int sun8i_dwmac_register_mdio_mux(struct stmmac_priv *priv) ret = mdio_mux_init(priv->device, mdio_mux, mdio_mux_syscon_switch_fn, &gmac->mux_handle, priv, priv->mii); + of_node_put(mdio_mux); return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c index 2bac49b49f739..fbf2deafe8ba3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c @@ -218,11 +218,18 @@ static void dwmac1000_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space) readl(ioaddr + DMA_BUS_MODE + i * 4); } -static void dwmac1000_get_hw_feature(void __iomem *ioaddr, - struct dma_features *dma_cap) +static int dwmac1000_get_hw_feature(void __iomem *ioaddr, + struct dma_features *dma_cap) { u32 hw_cap = readl(ioaddr + DMA_HW_FEATURE); + if (!hw_cap) { + /* 0x00000000 is the value read on old hardware that does not + * implement this register + */ + return -EOPNOTSUPP; + } + dma_cap->mbps_10_100 = (hw_cap & DMA_HW_FEAT_MIISEL); dma_cap->mbps_1000 = (hw_cap & DMA_HW_FEAT_GMIISEL) >> 1; dma_cap->half_duplex = (hw_cap & DMA_HW_FEAT_HDSEL) >> 2; @@ -252,6 +259,8 @@ static void dwmac1000_get_hw_feature(void __iomem *ioaddr, dma_cap->number_tx_channel = (hw_cap & DMA_HW_FEAT_TXCHCNT) >> 22; /* Alternate (enhanced) DESC mode */ dma_cap->enh_desc = (hw_cap & DMA_HW_FEAT_ENHDESSEL) >> 24; + + return 0; } static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 66e60c7e98504..c440b192ec715 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -215,6 +215,9 @@ static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan) if (queue == 0 || queue == 4) { value &= ~MTL_RXQ_DMA_Q04MDMACH_MASK; value |= MTL_RXQ_DMA_Q04MDMACH(chan); + } else if (queue > 4) { + value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue - 4); + value |= MTL_RXQ_DMA_QXMDMACH(chan, queue - 4); } else { value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue); value |= MTL_RXQ_DMA_QXMDMACH(chan, queue); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 0d993f4b701c2..ae473d85b7fb8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -336,8 +336,8 @@ static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode, writel(mtl_tx_op, ioaddr + MTL_CHAN_TX_OP_MODE(channel)); } -static void dwmac4_get_hw_feature(void __iomem *ioaddr, - struct dma_features *dma_cap) +static int dwmac4_get_hw_feature(void __iomem *ioaddr, + struct dma_features *dma_cap) { u32 hw_cap = readl(ioaddr + GMAC_HW_FEATURE0); @@ -400,6 +400,8 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr, dma_cap->frpbs = (hw_cap & GMAC_HW_FEAT_FRPBS) >> 11; dma_cap->frpsel = (hw_cap & GMAC_HW_FEAT_FRPSEL) >> 10; dma_cap->dvlan = (hw_cap & GMAC_HW_FEAT_DVLAN) >> 5; + + return 0; } /* Enable/disable TSO feature and set MSS */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h index 292b880f3f9fc..3587b2e7843ef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h @@ -128,6 +128,7 @@ #define NUM_DWMAC100_DMA_REGS 9 #define NUM_DWMAC1000_DMA_REGS 23 +#define NUM_DWMAC4_DMA_REGS 27 void dwmac_enable_dma_transmission(void __iomem *ioaddr); void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 4af7271cea561..07ef0ac725b3e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -356,8 +356,8 @@ static int dwxgmac2_dma_interrupt(void __iomem *ioaddr, return ret; } -static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, - struct dma_features *dma_cap) +static int dwxgmac2_get_hw_feature(void __iomem *ioaddr, + struct dma_features *dma_cap) { u32 hw_cap; @@ -425,6 +425,8 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, dma_cap->frpes = (hw_cap & XGMAC_HWFEAT_FRPES) >> 11; dma_cap->frpbs = (hw_cap & XGMAC_HWFEAT_FRPPB) >> 9; dma_cap->frpsel = (hw_cap & XGMAC_HWFEAT_FRPSEL) >> 3; + + return 0; } static void dwxgmac2_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 nchan) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 9010d881b12e5..59a1d8c003721 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -196,8 +196,8 @@ struct stmmac_dma_ops { int (*dma_interrupt) (void __iomem *ioaddr, struct stmmac_extra_stats *x, u32 chan); /* If supported then get the optional core features */ - void (*get_hw_feature)(void __iomem *ioaddr, - struct dma_features *dma_cap); + int (*get_hw_feature)(void __iomem *ioaddr, + struct dma_features *dma_cap); /* Program the HW RX Watchdog */ void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 number_chan); void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan); @@ -247,7 +247,7 @@ struct stmmac_dma_ops { #define stmmac_dma_interrupt_status(__priv, __args...) \ stmmac_do_callback(__priv, dma, dma_interrupt, __args) #define stmmac_get_hw_feature(__priv, __args...) \ - stmmac_do_void_callback(__priv, dma, get_hw_feature, __args) + stmmac_do_callback(__priv, dma, get_hw_feature, __args) #define stmmac_rx_watchdog(__priv, __args...) \ stmmac_do_void_callback(__priv, dma, rx_watchdog, __args) #define stmmac_set_tx_ring_len(__priv, __args...) \ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index ce1346c14b05a..30b9393ae8f2f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -21,10 +21,18 @@ #include "dwxgmac2.h" #define REG_SPACE_SIZE 0x1060 +#define GMAC4_REG_SPACE_SIZE 0x116C #define MAC100_ETHTOOL_NAME "st_mac100" #define GMAC_ETHTOOL_NAME "st_gmac" #define XGMAC_ETHTOOL_NAME "st_xgmac" +/* Same as DMA_CHAN_BASE_ADDR defined in dwmac4_dma.h + * + * It is here because dwmac_dma.h and dwmac4_dam.h can not be included at the + * same time due to the conflicting macro names. + */ +#define GMAC4_DMA_CHAN_BASE_ADDR 0x00001100 + #define ETHTOOL_DMA_OFFSET 55 struct stmmac_stats { @@ -413,6 +421,8 @@ static int stmmac_ethtool_get_regs_len(struct net_device *dev) if (priv->plat->has_xgmac) return XGMAC_REGSIZE * 4; + else if (priv->plat->has_gmac4) + return GMAC4_REG_SPACE_SIZE; return REG_SPACE_SIZE; } @@ -425,8 +435,13 @@ static void stmmac_ethtool_gregs(struct net_device *dev, stmmac_dump_mac_regs(priv, priv->hw, reg_space); stmmac_dump_dma_regs(priv, priv->ioaddr, reg_space); - if (!priv->plat->has_xgmac) { - /* Copy DMA registers to where ethtool expects them */ + /* Copy DMA registers to where ethtool expects them */ + if (priv->plat->has_gmac4) { + /* GMAC4 dumps its DMA registers at its DMA_CHAN_BASE_ADDR */ + memcpy(®_space[ETHTOOL_DMA_OFFSET], + ®_space[GMAC4_DMA_CHAN_BASE_ADDR / 4], + NUM_DWMAC4_DMA_REGS * 4); + } else if (!priv->plat->has_xgmac) { memcpy(®_space[ETHTOOL_DMA_OFFSET], ®_space[DMA_BUS_MODE / 4], NUM_DWMAC1000_DMA_REGS * 4); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index e5d9007c8090b..8c14c99663941 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -149,15 +149,20 @@ static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, static void get_systime(void __iomem *ioaddr, u64 *systime) { - u64 ns; - - /* Get the TSSS value */ - ns = readl(ioaddr + PTP_STNSR); - /* Get the TSS and convert sec time value to nanosecond */ - ns += readl(ioaddr + PTP_STSR) * 1000000000ULL; + u64 ns, sec0, sec1; + + /* Get the TSS value */ + sec1 = readl_relaxed(ioaddr + PTP_STSR); + do { + sec0 = sec1; + /* Get the TSSS value */ + ns = readl_relaxed(ioaddr + PTP_STNSR); + /* Get the TSS value */ + sec1 = readl_relaxed(ioaddr + PTP_STSR); + } while (sec0 != sec1); if (systime) - *systime = ns; + *systime = ns + (sec1 * 1000000000ULL); } const struct stmmac_hwtimestamp stmmac_ptp = { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 10d28be73f456..9931724c4727d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -225,7 +225,7 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv) priv->clk_csr = STMMAC_CSR_100_150M; else if ((clk_rate >= CSR_F_150M) && (clk_rate < CSR_F_250M)) priv->clk_csr = STMMAC_CSR_150_250M; - else if ((clk_rate >= CSR_F_250M) && (clk_rate < CSR_F_300M)) + else if ((clk_rate >= CSR_F_250M) && (clk_rate <= CSR_F_300M)) priv->clk_csr = STMMAC_CSR_250_300M; } @@ -604,7 +604,7 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; ptp_v2 = PTP_TCR_TSVER2ENA; snap_type_sel = PTP_TCR_SNAPTYPSEL_1; - if (priv->synopsys_id != DWMAC_CORE_5_10) + if (priv->synopsys_id < DWMAC_CORE_4_10) ts_event_en = PTP_TCR_TSEVNTENA; ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; @@ -1987,6 +1987,8 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan) tx_q->cur_tx = 0; tx_q->mss = 0; netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, chan)); + stmmac_init_tx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, + tx_q->dma_tx_phy, chan); stmmac_start_tx_dma(priv, chan); priv->dev->stats.tx_errors++; @@ -4529,7 +4531,7 @@ int stmmac_dvr_probe(struct device *device, dev_info(priv->device, "TSO feature enabled\n"); } - if (priv->dma_cap.sphen) { + if (priv->dma_cap.sphen && !priv->plat->sph_disable) { ndev->hw_features |= NETIF_F_GRO; priv->sph = true; dev_info(priv->device, "SPH feature enabled\n"); @@ -4853,8 +4855,6 @@ int stmmac_resume(struct device *dev) stmmac_mdio_reset(priv->mii); } - netif_device_attach(ndev); - mutex_lock(&priv->lock); stmmac_reset_queues_param(priv); @@ -4878,6 +4878,8 @@ int stmmac_resume(struct device *dev) phylink_mac_change(priv->phylink, true); + netif_device_attach(ndev); + return 0; } EXPORT_SYMBOL_GPL(stmmac_resume); @@ -4888,7 +4890,7 @@ static int __init stmmac_cmdline_opt(char *str) char *opt; if (!str || !*str) - return -EINVAL; + return 1; while ((opt = strsep(&str, ",")) != NULL) { if (!strncmp(opt, "debug:", 6)) { if (kstrtoint(opt + 6, 0, &debug)) @@ -4919,11 +4921,11 @@ static int __init stmmac_cmdline_opt(char *str) goto err; } } - return 0; + return 1; err: pr_err("%s: ERROR broken module parameter conversion", __func__); - return -EINVAL; + return 1; } __setup("stmmaceth=", stmmac_cmdline_opt); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 292045f4581f7..0edcf3f704b74 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -119,6 +119,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->has_gmac4 = 1; plat->force_sf_dma_mode = 0; plat->tso_en = 1; + plat->sph_disable = 1; plat->rx_sched_algorithm = MTL_RX_ALGORITHM_SP; @@ -481,7 +482,7 @@ static int stmmac_pci_probe(struct pci_dev *pdev, return -ENOMEM; /* Enable pci device */ - ret = pci_enable_device(pdev); + ret = pcim_enable_device(pdev); if (ret) { dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__); @@ -538,8 +539,6 @@ static void stmmac_pci_remove(struct pci_dev *pdev) pcim_iounmap_regions(pdev, BIT(i)); break; } - - pci_disable_device(pdev); } static int __maybe_unused stmmac_pci_suspend(struct device *dev) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 678aa2b001e01..70cbf48c2c038 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -428,8 +428,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) plat->phylink_node = np; /* Get max speed of operation from device tree */ - if (of_property_read_u32(np, "max-speed", &plat->max_speed)) - plat->max_speed = -1; + of_property_read_u32(np, "max-speed", &plat->max_speed); plat->bus_id = of_alias_get_id(np, "ethernet"); if (plat->bus_id < 0) @@ -505,6 +504,14 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) plat->pmt = 1; } + if (of_device_is_compatible(np, "snps,dwmac-3.40a")) { + plat->has_gmac = 1; + plat->enh_desc = 1; + plat->tx_coe = 1; + plat->bugged_jumbo = 1; + plat->pmt = 1; + } + if (of_device_is_compatible(np, "snps,dwmac-4.00") || of_device_is_compatible(np, "snps,dwmac-4.10a") || of_device_is_compatible(np, "snps,dwmac-4.20a")) { diff --git a/drivers/net/ethernet/sun/Kconfig b/drivers/net/ethernet/sun/Kconfig index 7b982e02ea3a4..1080a2a3e13a2 100644 --- a/drivers/net/ethernet/sun/Kconfig +++ b/drivers/net/ethernet/sun/Kconfig @@ -73,6 +73,7 @@ config CASSINI config SUNVNET_COMMON tristate "Common routines to support Sun Virtual Networking" depends on SUN_LDOMS + depends on INET default m config SUNVNET diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index d007dfeba5c35..3133f903279ce 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -3164,7 +3164,7 @@ static int happy_meal_pci_probe(struct pci_dev *pdev, if (err) { printk(KERN_ERR "happymeal(PCI): Cannot register net device, " "aborting.\n"); - goto err_out_iounmap; + goto err_out_free_coherent; } pci_set_drvdata(pdev, hp); @@ -3197,6 +3197,10 @@ static int happy_meal_pci_probe(struct pci_dev *pdev, return 0; +err_out_free_coherent: + dma_free_coherent(hp->dma_dev, PAGE_SIZE, + hp->happy_block, hp->hblock_dvma); + err_out_iounmap: iounmap(hp->gregs); diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index 26cfe3f7ed8df..453ad1247288d 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -454,7 +454,9 @@ int cpts_register(struct cpts *cpts) for (i = 0; i < CPTS_MAX_EVENTS; i++) list_add(&cpts->pool_data[i].list, &cpts->pool); - clk_enable(cpts->refclk); + err = clk_enable(cpts->refclk); + if (err) + return err; cpts_write32(cpts, CPTS_EN, control); cpts_write32(cpts, TS_PEND_EN, int_enable); diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 6869c5c74b9f7..fac59032bf83a 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -412,8 +412,20 @@ static int emac_set_coalesce(struct net_device *ndev, u32 int_ctrl, num_interrupts = 0; u32 prescale = 0, addnl_dvdr = 1, coal_intvl = 0; - if (!coal->rx_coalesce_usecs) - return -EINVAL; + if (!coal->rx_coalesce_usecs) { + priv->coal_intvl = 0; + + switch (priv->version) { + case EMAC_VERSION_2: + emac_ctrl_write(EMAC_DM646X_CMINTCTRL, 0); + break; + default: + emac_ctrl_write(EMAC_CTRL_EWINTTCNT, 0); + break; + } + + return 0; + } coal_intvl = coal->rx_coalesce_usecs; diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index bede1ff289c59..a65b7291e12a2 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -1052,6 +1052,8 @@ static int w5100_mmio_probe(struct platform_device *pdev) mac_addr = data->mac_addr; mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) + return -EINVAL; if (resource_size(mem) < W5100_BUS_DIRECT_SIZE) ops = &w5100_mmio_indirect_ops; else diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 9a7af7dda70dc..a109438f4a78e 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -939,10 +939,8 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) wmb(); lp->dma_out(lp, TX_TAILDESC_PTR, tail_p); /* DMA start */ - if (temac_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) { - netdev_info(ndev, "%s -> netif_stop_queue\n", __func__); + if (temac_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) netif_stop_queue(ndev); - } return NETDEV_TX_OK; } @@ -1347,6 +1345,8 @@ static int temac_probe(struct platform_device *pdev) lp->indirect_lock = devm_kmalloc(&pdev->dev, sizeof(*lp->indirect_lock), GFP_KERNEL); + if (!lp->indirect_lock) + return -ENOMEM; spin_lock_init(lp->indirect_lock); } diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index f98318d93ce72..0ef806ea18327 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -41,8 +41,9 @@ #include "xilinx_axienet.h" /* Descriptors defines for Tx and Rx DMA */ -#define TX_BD_NUM_DEFAULT 64 +#define TX_BD_NUM_DEFAULT 128 #define RX_BD_NUM_DEFAULT 1024 +#define TX_BD_NUM_MIN (MAX_SKB_FRAGS + 1) #define TX_BD_NUM_MAX 4096 #define RX_BD_NUM_MAX 4096 @@ -635,7 +636,7 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) num_frag = skb_shinfo(skb)->nr_frags; cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; - if (axienet_check_tx_bd_space(lp, num_frag)) { + if (axienet_check_tx_bd_space(lp, num_frag + 1)) { if (netif_queue_stopped(ndev)) return NETDEV_TX_BUSY; @@ -645,7 +646,7 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) smp_mb(); /* Space might have just been freed - check again */ - if (axienet_check_tx_bd_space(lp, num_frag)) + if (axienet_check_tx_bd_space(lp, num_frag + 1)) return NETDEV_TX_BUSY; netif_wake_queue(ndev); @@ -1223,7 +1224,8 @@ static int axienet_ethtools_set_ringparam(struct net_device *ndev, if (ering->rx_pending > RX_BD_NUM_MAX || ering->rx_mini_pending || ering->rx_jumbo_pending || - ering->rx_pending > TX_BD_NUM_MAX) + ering->tx_pending < TX_BD_NUM_MIN || + ering->tx_pending > TX_BD_NUM_MAX) return -EINVAL; if (netif_running(ndev)) diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 53dbf3e28f1ef..6e5ea68b6a7e6 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -820,10 +820,10 @@ static int xemaclite_mdio_write(struct mii_bus *bus, int phy_id, int reg, static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) { struct mii_bus *bus; - int rc; struct resource res; struct device_node *np = of_get_parent(lp->phy_node); struct device_node *npp; + int rc, ret; /* Don't register the MDIO bus if the phy_node or its parent node * can't be found. @@ -833,8 +833,14 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) return -ENODEV; } npp = of_get_parent(np); - - of_address_to_resource(npp, 0, &res); + ret = of_address_to_resource(npp, 0, &res); + of_node_put(npp); + if (ret) { + dev_err(dev, "%s resource error!\n", + dev->of_node->full_name); + of_node_put(np); + return ret; + } if (lp->ndev->mem_start != res.start) { struct phy_device *phydev; phydev = of_phy_find_device(lp->phy_node); @@ -843,6 +849,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) "MDIO of the phy is not registered yet\n"); else put_device(&phydev->mdio.dev); + of_node_put(np); return 0; } @@ -855,6 +862,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) bus = mdiobus_alloc(); if (!bus) { dev_err(dev, "Failed to allocate mdiobus\n"); + of_node_put(np); return -ENOMEM; } @@ -867,6 +875,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) bus->parent = dev; rc = of_mdiobus_register(bus, np); + of_node_put(np); if (rc) { dev_err(dev, "Failed to register mdio bus.\n"); goto err_register; @@ -923,8 +932,6 @@ static int xemaclite_open(struct net_device *dev) xemaclite_disable_interrupts(lp); if (lp->phy_node) { - u32 bmcr; - lp->phy_dev = of_phy_connect(lp->ndev, lp->phy_node, xemaclite_adjust_link, 0, PHY_INTERFACE_MODE_MII); @@ -935,19 +942,6 @@ static int xemaclite_open(struct net_device *dev) /* EmacLite doesn't support giga-bit speeds */ phy_set_max_speed(lp->phy_dev, SPEED_100); - - /* Don't advertise 1000BASE-T Full/Half duplex speeds */ - phy_write(lp->phy_dev, MII_CTRL1000, 0); - - /* Advertise only 10 and 100mbps full/half duplex speeds */ - phy_write(lp->phy_dev, MII_ADVERTISE, ADVERTISE_ALL | - ADVERTISE_CSMA); - - /* Restart auto negotiation */ - bmcr = phy_read(lp->phy_dev, MII_BMCR); - bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART); - phy_write(lp->phy_dev, MII_BMCR, bmcr); - phy_start(lp->phy_dev); } @@ -1187,7 +1181,7 @@ static int xemaclite_of_probe(struct platform_device *ofdev) if (rc) { dev_err(dev, "Cannot register network device, aborting\n"); - goto error; + goto put_node; } dev_info(dev, @@ -1195,6 +1189,8 @@ static int xemaclite_of_probe(struct platform_device *ofdev) (unsigned int __force)ndev->mem_start, lp->base_addr, ndev->irq); return 0; +put_node: + of_node_put(lp->phy_node); error: free_netdev(ndev); return rc; diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index b89b4a3800a4d..bef0133696c31 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -1269,6 +1269,11 @@ static int fjes_probe(struct platform_device *plat_dev) hw->hw_res.start = res->start; hw->hw_res.size = resource_size(res); hw->hw_res.irq = platform_get_irq(plat_dev, 0); + if (hw->hw_res.irq < 0) { + err = hw->hw_res.irq; + goto err_free_control_wq; + } + err = fjes_hw_init(&adapter->hw); if (err) goto err_free_control_wq; diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index da13683d52d1a..83dc1c2c3b84b 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -68,9 +68,9 @@ #define SIXP_DAMA_OFF 0 /* default level 2 parameters */ -#define SIXP_TXDELAY (HZ/4) /* in 1 s */ +#define SIXP_TXDELAY 25 /* 250 ms */ #define SIXP_PERSIST 50 /* in 256ths */ -#define SIXP_SLOTTIME (HZ/10) /* in 1 s */ +#define SIXP_SLOTTIME 10 /* 100 ms */ #define SIXP_INIT_RESYNC_TIMEOUT (3*HZ/2) /* in 1 s */ #define SIXP_RESYNC_TIMEOUT 5*HZ /* in 1 s */ @@ -311,7 +311,6 @@ static void sp_setup(struct net_device *dev) { /* Finish setting up the DEVICE info. */ dev->netdev_ops = &sp_netdev_ops; - dev->needs_free_netdev = true; dev->mtu = SIXP_MTU; dev->hard_header_len = AX25_MAX_HEADER_LEN; dev->header_ops = &ax25_header_ops; @@ -674,14 +673,16 @@ static void sixpack_close(struct tty_struct *tty) */ netif_stop_queue(sp->dev); + unregister_netdev(sp->dev); + del_timer_sync(&sp->tx_t); del_timer_sync(&sp->resync_t); - /* Free all 6pack frame buffers. */ + /* Free all 6pack frame buffers after unreg. */ kfree(sp->rbuff); kfree(sp->xbuff); - unregister_netdev(sp->dev); + free_netdev(sp->dev); } /* Perform I/O control on an active 6pack channel. */ diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 352f9e75954ce..43aab2835f8ff 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -31,6 +31,8 @@ #define AX_MTU 236 +/* some arch define END as assembly function ending, just undef it */ +#undef END /* SLIP/KISS protocol characters. */ #define END 0300 /* indicates end of frame */ #define ESC 0333 /* indicates byte stuffing */ @@ -793,13 +795,14 @@ static void mkiss_close(struct tty_struct *tty) */ netif_stop_queue(ax->dev); - /* Free all AX25 frame buffers. */ + unregister_netdev(ax->dev); + + /* Free all AX25 frame buffers after unreg. */ kfree(ax->rbuff); kfree(ax->xbuff); ax->tty = NULL; - unregister_netdev(ax->dev); free_netdev(ax->dev); } diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index 5ab53e9942f30..5d30b3e1806ab 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -951,9 +951,7 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) sizeof(struct yamdrv_ioctl_mcs)); if (IS_ERR(ym)) return PTR_ERR(ym); - if (ym->cmd != SIOCYAMSMCS) - return -EINVAL; - if (ym->bitrate > YAM_MAXBITRATE) { + if (ym->cmd != SIOCYAMSMCS || ym->bitrate > YAM_MAXBITRATE) { kfree(ym); return -EINVAL; } diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index a4b3fce69ecd9..6016e182f0089 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -1346,7 +1346,9 @@ static int rr_close(struct net_device *dev) rrpriv->fw_running = 0; + spin_unlock_irqrestore(&rrpriv->lock, flags); del_timer_sync(&rrpriv->timer); + spin_lock_irqsave(&rrpriv->lock, flags); writel(0, ®s->TxPi); writel(0, ®s->IpRxPi); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 362b7ca6f3b2a..57e92c5bfcc92 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1445,6 +1445,9 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, pcpu_sum = kvmalloc_array(num_possible_cpus(), sizeof(struct netvsc_ethtool_pcpu_stats), GFP_KERNEL); + if (!pcpu_sum) + return; + netvsc_get_pcpu_stats(dev, pcpu_sum); for_each_present_cpu(cpu) { struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu]; diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 7d67f41387f55..4f5ef8a9a9a87 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -100,6 +100,7 @@ struct at86rf230_local { unsigned long cal_timeout; bool is_tx; bool is_tx_from_off; + bool was_tx; u8 tx_retry; struct sk_buff *tx_skb; struct at86rf230_state_change tx; @@ -343,7 +344,11 @@ at86rf230_async_error_recover_complete(void *context) if (ctx->free) kfree(ctx); - ieee802154_wake_queue(lp->hw); + if (lp->was_tx) { + lp->was_tx = 0; + dev_kfree_skb_any(lp->tx_skb); + ieee802154_wake_queue(lp->hw); + } } static void @@ -352,7 +357,11 @@ at86rf230_async_error_recover(void *context) struct at86rf230_state_change *ctx = context; struct at86rf230_local *lp = ctx->lp; - lp->is_tx = 0; + if (lp->is_tx) { + lp->was_tx = 1; + lp->is_tx = 0; + } + at86rf230_async_state_change(lp, ctx, STATE_RX_AACK_ON, at86rf230_async_error_recover_complete); } diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index 23ee0b14cbfa1..2f5e7b31032aa 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -93,7 +93,9 @@ static int atusb_control_msg(struct atusb *atusb, unsigned int pipe, ret = usb_control_msg(usb_dev, pipe, request, requesttype, value, index, data, size, timeout); - if (ret < 0) { + if (ret < size) { + ret = ret < 0 ? ret : -ENODATA; + atusb->err = ret; dev_err(&usb_dev->dev, "%s: req 0x%02x val 0x%x idx 0x%x, error %d\n", @@ -861,9 +863,9 @@ static int atusb_get_and_show_build(struct atusb *atusb) if (!build) return -ENOMEM; - ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0), - ATUSB_BUILD, ATUSB_REQ_FROM_DEV, 0, 0, - build, ATUSB_BUILD_SIZE, 1000); + /* We cannot call atusb_control_msg() here, since this request may read various length data */ + ret = usb_control_msg(atusb->usb_dev, usb_rcvctrlpipe(usb_dev, 0), ATUSB_BUILD, + ATUSB_REQ_FROM_DEV, 0, 0, build, ATUSB_BUILD_SIZE, 1000); if (ret >= 0) { build[ret] = 0; dev_info(&usb_dev->dev, "Firmware: build %s\n", build); diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 25dbea302fb6d..47959aadbc503 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -1770,6 +1770,7 @@ static int ca8210_async_xmit_complete( status ); if (status != MAC_TRANSACTION_OVERFLOW) { + dev_kfree_skb_any(priv->tx_skb); ieee802154_wake_queue(priv->hw); return 0; } @@ -2975,8 +2976,8 @@ static void ca8210_hw_setup(struct ieee802154_hw *ca8210_hw) ca8210_hw->phy->cca.opt = NL802154_CCA_OPT_ENERGY_CARRIER_AND; ca8210_hw->phy->cca_ed_level = -9800; ca8210_hw->phy->symbol_duration = 16; - ca8210_hw->phy->lifs_period = 40; - ca8210_hw->phy->sifs_period = 12; + ca8210_hw->phy->lifs_period = 40 * ca8210_hw->phy->symbol_duration; + ca8210_hw->phy->sifs_period = 12 * ca8210_hw->phy->symbol_duration; ca8210_hw->flags = IEEE802154_HW_AFILT | IEEE802154_HW_OMIT_CKSUM | diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index 2a78084aeaedb..1d181eff0c299 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -786,6 +786,7 @@ static int hwsim_add_one(struct genl_info *info, struct device *dev, goto err_pib; } + pib->channel = 13; rcu_assign_pointer(phy->pib, pib); phy->idx = idx; INIT_LIST_HEAD(&phy->edges); diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 8dc04e2590b18..383231b854642 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -976,8 +976,8 @@ static void mcr20a_hw_setup(struct mcr20a_local *lp) dev_dbg(printdev(lp), "%s\n", __func__); phy->symbol_duration = 16; - phy->lifs_period = 40; - phy->sifs_period = 12; + phy->lifs_period = 40 * phy->symbol_duration; + phy->sifs_period = 12 * phy->symbol_duration; hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AFILT | diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index f233a6933a976..f729f55f6a174 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3247,6 +3247,15 @@ static int macsec_newlink(struct net *net, struct net_device *dev, macsec->real_dev = real_dev; + /* send_sci must be set to true when transmit sci explicitly is set */ + if ((data && data[IFLA_MACSEC_SCI]) && + (data && data[IFLA_MACSEC_INC_SCI])) { + u8 send_sci = !!nla_get_u8(data[IFLA_MACSEC_INC_SCI]); + + if (!send_sci) + return -EINVAL; + } + if (data && data[IFLA_MACSEC_ICV_LEN]) icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]); mtu = real_dev->mtu - icv_len - macsec_extra_len(true); diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 694e2f5dbbe59..39801c31e5071 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -133,11 +133,17 @@ static void macvtap_setup(struct net_device *dev) dev->tx_queue_len = TUN_READQ_SIZE; } +static struct net *macvtap_link_net(const struct net_device *dev) +{ + return dev_net(macvlan_dev_real_dev(dev)); +} + static struct rtnl_link_ops macvtap_link_ops __read_mostly = { .kind = "macvtap", .setup = macvtap_setup, .newlink = macvtap_newlink, .dellink = macvtap_dellink, + .get_link_net = macvtap_link_net, .priv_size = sizeof(struct macvtap_dev), }; diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 2b74425822ab1..e0a4acc6144bf 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -510,6 +510,7 @@ nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) goto err_free; key = nmap->entry[i].key; *key = i; + memset(nmap->entry[i].value, 0, offmap->map.value_size); } } diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c index 975789d9349dc..75d8351ee2506 100644 --- a/drivers/net/phy/aquantia_main.c +++ b/drivers/net/phy/aquantia_main.c @@ -34,6 +34,8 @@ #define MDIO_AN_VEND_PROV 0xc400 #define MDIO_AN_VEND_PROV_1000BASET_FULL BIT(15) #define MDIO_AN_VEND_PROV_1000BASET_HALF BIT(14) +#define MDIO_AN_VEND_PROV_5000BASET_FULL BIT(11) +#define MDIO_AN_VEND_PROV_2500BASET_FULL BIT(10) #define MDIO_AN_VEND_PROV_DOWNSHIFT_EN BIT(4) #define MDIO_AN_VEND_PROV_DOWNSHIFT_MASK GENMASK(3, 0) #define MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT 4 @@ -230,9 +232,20 @@ static int aqr_config_aneg(struct phy_device *phydev) phydev->advertising)) reg |= MDIO_AN_VEND_PROV_1000BASET_HALF; + /* Handle the case when the 2.5G and 5G speeds are not advertised */ + if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + phydev->advertising)) + reg |= MDIO_AN_VEND_PROV_2500BASET_FULL; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + phydev->advertising)) + reg |= MDIO_AN_VEND_PROV_5000BASET_FULL; + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_VEND_PROV, MDIO_AN_VEND_PROV_1000BASET_HALF | - MDIO_AN_VEND_PROV_1000BASET_FULL, reg); + MDIO_AN_VEND_PROV_1000BASET_FULL | + MDIO_AN_VEND_PROV_2500BASET_FULL | + MDIO_AN_VEND_PROV_5000BASET_FULL, reg); if (ret < 0) return ret; if (ret > 0) diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index af8eabe7a6d44..d372626c603d4 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -26,7 +26,12 @@ #define MII_BCM7XXX_SHD_2_ADDR_CTRL 0xe #define MII_BCM7XXX_SHD_2_CTRL_STAT 0xf #define MII_BCM7XXX_SHD_2_BIAS_TRIM 0x1a +#define MII_BCM7XXX_SHD_3_PCS_CTRL 0x0 +#define MII_BCM7XXX_SHD_3_PCS_STATUS 0x1 +#define MII_BCM7XXX_SHD_3_EEE_CAP 0x2 #define MII_BCM7XXX_SHD_3_AN_EEE_ADV 0x3 +#define MII_BCM7XXX_SHD_3_EEE_LP 0x4 +#define MII_BCM7XXX_SHD_3_EEE_WK_ERR 0x5 #define MII_BCM7XXX_SHD_3_PCS_CTRL_2 0x6 #define MII_BCM7XXX_PCS_CTRL_2_DEF 0x4400 #define MII_BCM7XXX_SHD_3_AN_STAT 0xb @@ -210,25 +215,37 @@ static int bcm7xxx_28nm_resume(struct phy_device *phydev) return genphy_config_aneg(phydev); } -static int phy_set_clr_bits(struct phy_device *dev, int location, - int set_mask, int clr_mask) +static int __phy_set_clr_bits(struct phy_device *dev, int location, + int set_mask, int clr_mask) { int v, ret; - v = phy_read(dev, location); + v = __phy_read(dev, location); if (v < 0) return v; v &= ~clr_mask; v |= set_mask; - ret = phy_write(dev, location, v); + ret = __phy_write(dev, location, v); if (ret < 0) return ret; return v; } +static int phy_set_clr_bits(struct phy_device *dev, int location, + int set_mask, int clr_mask) +{ + int ret; + + mutex_lock(&dev->mdio.bus->mdio_lock); + ret = __phy_set_clr_bits(dev, location, set_mask, clr_mask); + mutex_unlock(&dev->mdio.bus->mdio_lock); + + return ret; +} + static int bcm7xxx_28nm_ephy_01_afe_config_init(struct phy_device *phydev) { int ret; @@ -392,6 +409,93 @@ static int bcm7xxx_28nm_ephy_config_init(struct phy_device *phydev) return bcm7xxx_28nm_ephy_apd_enable(phydev); } +#define MII_BCM7XXX_REG_INVALID 0xff + +static u8 bcm7xxx_28nm_ephy_regnum_to_shd(u16 regnum) +{ + switch (regnum) { + case MDIO_CTRL1: + return MII_BCM7XXX_SHD_3_PCS_CTRL; + case MDIO_STAT1: + return MII_BCM7XXX_SHD_3_PCS_STATUS; + case MDIO_PCS_EEE_ABLE: + return MII_BCM7XXX_SHD_3_EEE_CAP; + case MDIO_AN_EEE_ADV: + return MII_BCM7XXX_SHD_3_AN_EEE_ADV; + case MDIO_AN_EEE_LPABLE: + return MII_BCM7XXX_SHD_3_EEE_LP; + case MDIO_PCS_EEE_WK_ERR: + return MII_BCM7XXX_SHD_3_EEE_WK_ERR; + default: + return MII_BCM7XXX_REG_INVALID; + } +} + +static bool bcm7xxx_28nm_ephy_dev_valid(int devnum) +{ + return devnum == MDIO_MMD_AN || devnum == MDIO_MMD_PCS; +} + +static int bcm7xxx_28nm_ephy_read_mmd(struct phy_device *phydev, + int devnum, u16 regnum) +{ + u8 shd = bcm7xxx_28nm_ephy_regnum_to_shd(regnum); + int ret; + + if (!bcm7xxx_28nm_ephy_dev_valid(devnum) || + shd == MII_BCM7XXX_REG_INVALID) + return -EOPNOTSUPP; + + /* set shadow mode 2 */ + ret = __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, + MII_BCM7XXX_SHD_MODE_2, 0); + if (ret < 0) + return ret; + + /* Access the desired shadow register address */ + ret = __phy_write(phydev, MII_BCM7XXX_SHD_2_ADDR_CTRL, shd); + if (ret < 0) + goto reset_shadow_mode; + + ret = __phy_read(phydev, MII_BCM7XXX_SHD_2_CTRL_STAT); + +reset_shadow_mode: + /* reset shadow mode 2 */ + __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0, + MII_BCM7XXX_SHD_MODE_2); + return ret; +} + +static int bcm7xxx_28nm_ephy_write_mmd(struct phy_device *phydev, + int devnum, u16 regnum, u16 val) +{ + u8 shd = bcm7xxx_28nm_ephy_regnum_to_shd(regnum); + int ret; + + if (!bcm7xxx_28nm_ephy_dev_valid(devnum) || + shd == MII_BCM7XXX_REG_INVALID) + return -EOPNOTSUPP; + + /* set shadow mode 2 */ + ret = __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, + MII_BCM7XXX_SHD_MODE_2, 0); + if (ret < 0) + return ret; + + /* Access the desired shadow register address */ + ret = __phy_write(phydev, MII_BCM7XXX_SHD_2_ADDR_CTRL, shd); + if (ret < 0) + goto reset_shadow_mode; + + /* Write the desired value in the shadow register */ + __phy_write(phydev, MII_BCM7XXX_SHD_2_CTRL_STAT, val); + +reset_shadow_mode: + /* reset shadow mode 2 */ + return __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0, + MII_BCM7XXX_SHD_MODE_2); +} + static int bcm7xxx_28nm_ephy_resume(struct phy_device *phydev) { int ret; @@ -563,6 +667,8 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) .get_strings = bcm_phy_get_strings, \ .get_stats = bcm7xxx_28nm_get_phy_stats, \ .probe = bcm7xxx_28nm_probe, \ + .read_mmd = bcm7xxx_28nm_ephy_read_mmd, \ + .write_mmd = bcm7xxx_28nm_ephy_write_mmd, \ } #define BCM7XXX_40NM_EPHY(_oui, _name) \ diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 5e956089bf525..7be75a611e9e8 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -11,6 +11,7 @@ */ #include "bcm-phy-lib.h" +#include #include #include #include @@ -479,6 +480,26 @@ static int brcm_fet_config_init(struct phy_device *phydev) if (err < 0) return err; + /* The datasheet indicates the PHY needs up to 1us to complete a reset, + * build some slack here. + */ + usleep_range(1000, 2000); + + /* The PHY requires 65 MDC clock cycles to complete a write operation + * and turnaround the line properly. + * + * We ignore -EIO here as the MDIO controller (e.g.: mdio-bcm-unimac) + * may flag the lack of turn-around as a read failure. This is + * particularly true with this combination since the MDIO controller + * only used 64 MDC cycles. This is not a critical failure in this + * specific case and it has no functional impact otherwise, so we let + * that one go through. If there is a genuine bus error, the next read + * of MII_BRCM_FET_INTREG will error out. + */ + err = phy_read(phydev, MII_BMCR); + if (err < 0 && err != -EIO) + return err; + reg = phy_read(phydev, MII_BRCM_FET_INTREG); if (reg < 0) return reg; @@ -646,6 +667,7 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM54616S", /* PHY_GBIT_FEATURES */ + .soft_reset = genphy_soft_reset, .config_init = bcm54xx_config_init, .config_aneg = bcm54616s_config_aneg, .ack_interrupt = bcm_phy_ack_intr, diff --git a/drivers/net/phy/dp83640_reg.h b/drivers/net/phy/dp83640_reg.h index 21aa24c741b96..daae7fa58fb82 100644 --- a/drivers/net/phy/dp83640_reg.h +++ b/drivers/net/phy/dp83640_reg.h @@ -5,7 +5,7 @@ #ifndef HAVE_DP83640_REGISTERS #define HAVE_DP83640_REGISTERS -#define PAGE0 0x0000 +/* #define PAGE0 0x0000 */ #define PHYCR2 0x001c /* PHY Control Register 2 */ #define PAGE4 0x0004 diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 8a4b1d167ce2f..ae17d2f9d5347 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -238,7 +238,7 @@ static int dp83822_config_intr(struct phy_device *phydev) if (err < 0) return err; - err = phy_write(phydev, MII_DP83822_MISR1, 0); + err = phy_write(phydev, MII_DP83822_MISR2, 0); if (err < 0) return err; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 9dbe625ad4477..49801c2eb6271 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -444,9 +444,9 @@ static int m88e1121_config_aneg_rgmii_delays(struct phy_device *phydev) else mscr = 0; - return phy_modify_paged(phydev, MII_MARVELL_MSCR_PAGE, - MII_88E1121_PHY_MSCR_REG, - MII_88E1121_PHY_MSCR_DELAY_MASK, mscr); + return phy_modify_paged_changed(phydev, MII_MARVELL_MSCR_PAGE, + MII_88E1121_PHY_MSCR_REG, + MII_88E1121_PHY_MSCR_DELAY_MASK, mscr); } static int m88e1121_config_aneg(struct phy_device *phydev) @@ -460,11 +460,13 @@ static int m88e1121_config_aneg(struct phy_device *phydev) return err; } + changed = err; + err = marvell_set_polarity(phydev, phydev->mdix_ctrl); if (err < 0) return err; - changed = err; + changed |= err; err = genphy_config_aneg(phydev); if (err < 0) @@ -886,16 +888,15 @@ static int m88e1118_config_aneg(struct phy_device *phydev) { int err; - err = genphy_soft_reset(phydev); + err = marvell_set_polarity(phydev, phydev->mdix_ctrl); if (err < 0) return err; - err = marvell_set_polarity(phydev, phydev->mdix_ctrl); + err = genphy_config_aneg(phydev); if (err < 0) return err; - err = genphy_config_aneg(phydev); - return 0; + return genphy_soft_reset(phydev); } static int m88e1118_config_init(struct phy_device *phydev) @@ -917,6 +918,12 @@ static int m88e1118_config_init(struct phy_device *phydev) if (err < 0) return err; + if (phy_interface_is_rgmii(phydev)) { + err = m88e1121_config_aneg_rgmii_delays(phydev); + if (err < 0) + return err; + } + /* Adjust LED Control */ if (phydev->dev_flags & MARVELL_PHY_M1118_DNS323_LEDS) err = phy_write(phydev, 0x10, 0x1100); @@ -1401,8 +1408,8 @@ static int marvell_suspend(struct phy_device *phydev) int err; /* Suspend the fiber mode first */ - if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, - phydev->supported)) { + if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, + phydev->supported)) { err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE); if (err < 0) goto error; @@ -1436,8 +1443,8 @@ static int marvell_resume(struct phy_device *phydev) int err; /* Resume the fiber mode first */ - if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, - phydev->supported)) { + if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, + phydev->supported)) { err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE); if (err < 0) goto error; diff --git a/drivers/net/phy/mdio-aspeed.c b/drivers/net/phy/mdio-aspeed.c index cad820568f751..e2273588c75b6 100644 --- a/drivers/net/phy/mdio-aspeed.c +++ b/drivers/net/phy/mdio-aspeed.c @@ -61,6 +61,13 @@ static int aspeed_mdio_read(struct mii_bus *bus, int addr, int regnum) iowrite32(ctrl, ctx->base + ASPEED_MDIO_CTRL); + rc = readl_poll_timeout(ctx->base + ASPEED_MDIO_CTRL, ctrl, + !(ctrl & ASPEED_MDIO_CTRL_FIRE), + ASPEED_MDIO_INTERVAL_US, + ASPEED_MDIO_TIMEOUT_US); + if (rc < 0) + return rc; + rc = readl_poll_timeout(ctx->base + ASPEED_MDIO_DATA, data, data & ASPEED_MDIO_DATA_IDLE, ASPEED_MDIO_INTERVAL_US, @@ -141,6 +148,7 @@ static const struct of_device_id aspeed_mdio_of_match[] = { { .compatible = "aspeed,ast2600-mdio", }, { }, }; +MODULE_DEVICE_TABLE(of, aspeed_mdio_of_match); static struct platform_driver aspeed_mdio_driver = { .driver = { diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 5bf06eac04ba3..05c24db507a2c 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -385,6 +385,13 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) bus->dev.groups = NULL; dev_set_name(&bus->dev, "%s", bus->id); + /* We need to set state to MDIOBUS_UNREGISTERED to correctly release + * the device in mdiobus_free() + * + * State will be updated later in this function in case of success + */ + bus->state = MDIOBUS_UNREGISTERED; + err = device_register(&bus->dev); if (err) { pr_err("mii_bus %s failed to register\n", bus->id); @@ -426,7 +433,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) mdiobus_setup_mdiodev_from_board_info(bus, mdiobus_create_device); bus->state = MDIOBUS_REGISTERED; - pr_info("%s: probed\n", bus->name); + dev_dbg(&bus->dev, "probed\n"); return 0; error: @@ -746,7 +753,6 @@ int __init mdio_bus_init(void) return ret; } -EXPORT_SYMBOL_GPL(mdio_bus_init); #if IS_ENABLED(CONFIG_PHYLIB) void mdio_bus_exit(void) diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c index c1d345c3cab35..b2dd293fc87eb 100644 --- a/drivers/net/phy/mdio_device.c +++ b/drivers/net/phy/mdio_device.c @@ -180,6 +180,16 @@ static int mdio_remove(struct device *dev) return 0; } +static void mdio_shutdown(struct device *dev) +{ + struct mdio_device *mdiodev = to_mdio_device(dev); + struct device_driver *drv = mdiodev->dev.driver; + struct mdio_driver *mdiodrv = to_mdio_driver(drv); + + if (mdiodrv->shutdown) + mdiodrv->shutdown(mdiodev); +} + /** * mdio_driver_register - register an mdio_driver with the MDIO layer * @new_driver: new mdio_driver to register @@ -194,6 +204,7 @@ int mdio_driver_register(struct mdio_driver *drv) mdiodrv->driver.bus = &mdio_bus_type; mdiodrv->driver.probe = mdio_probe; mdiodrv->driver.remove = mdio_remove; + mdiodrv->driver.shutdown = mdio_shutdown; retval = driver_register(&mdiodrv->driver); if (retval) { diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index f95bd1b0fb965..721153dcfd15a 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -282,7 +282,7 @@ static int kszphy_config_reset(struct phy_device *phydev) } } - if (priv->led_mode >= 0) + if (priv->type && priv->led_mode >= 0) kszphy_setup_led(phydev, priv->type->led_mode_reg, priv->led_mode); return 0; @@ -298,10 +298,10 @@ static int kszphy_config_init(struct phy_device *phydev) type = priv->type; - if (type->has_broadcast_disable) + if (type && type->has_broadcast_disable) kszphy_broadcast_disable(phydev); - if (type->has_nand_tree_disable) + if (type && type->has_nand_tree_disable) kszphy_nand_tree_disable(phydev); return kszphy_config_reset(phydev); @@ -939,7 +939,7 @@ static int kszphy_probe(struct phy_device *phydev) priv->type = type; - if (type->led_mode_reg) { + if (type && type->led_mode_reg) { ret = of_property_read_u32(np, "micrel,led-mode", &priv->led_mode); if (ret) @@ -960,7 +960,8 @@ static int kszphy_probe(struct phy_device *phydev) unsigned long rate = clk_get_rate(clk); bool rmii_ref_clk_sel_25_mhz; - priv->rmii_ref_clk_sel = type->has_rmii_ref_clk_sel; + if (type) + priv->rmii_ref_clk_sel = type->has_rmii_ref_clk_sel; rmii_ref_clk_sel_25_mhz = of_property_read_bool(np, "micrel,rmii-reference-clock-select-25-mhz"); @@ -1040,8 +1041,9 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, - .suspend = genphy_suspend, - .resume = genphy_resume, + /* No suspend/resume callbacks because of errata DS80000700A, + * receiver error following software power down. + */ }, { .phy_id = PHY_ID_KSZ8041RNLI, .phy_id_mask = MICREL_PHY_ID_MASK, @@ -1095,6 +1097,7 @@ static struct phy_driver ksphy_driver[] = { .probe = kszphy_probe, .config_init = ksz8081_config_init, .ack_interrupt = kszphy_ack_interrupt, + .soft_reset = genphy_soft_reset, .config_intr = kszphy_config_intr, .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index a1caeee122361..bceb0dcdecbd6 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -239,9 +239,10 @@ int genphy_c45_read_link(struct phy_device *phydev) /* The link state is latched low so that momentary link * drops can be detected. Do not double-read the status - * in polling mode to detect such short link drops. + * in polling mode to detect such short link drops except + * the link was already down. */ - if (!phy_polling_mode(phydev)) { + if (!phy_polling_mode(phydev) || !phydev->link) { val = phy_read_mmd(phydev, devad, MDIO_STAT1); if (val < 0) return val; diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 9412669b579c7..84064120918f0 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -128,11 +128,11 @@ static const struct phy_setting settings[] = { PHY_SETTING( 2500, FULL, 2500baseT_Full ), PHY_SETTING( 2500, FULL, 2500baseX_Full ), /* 1G */ - PHY_SETTING( 1000, FULL, 1000baseKX_Full ), PHY_SETTING( 1000, FULL, 1000baseT_Full ), PHY_SETTING( 1000, HALF, 1000baseT_Half ), PHY_SETTING( 1000, FULL, 1000baseT1_Full ), PHY_SETTING( 1000, FULL, 1000baseX_Full ), + PHY_SETTING( 1000, FULL, 1000baseKX_Full ), /* 100M */ PHY_SETTING( 100, FULL, 100baseT_Full ), PHY_SETTING( 100, FULL, 100baseT1_Full ), diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index b0b8a3ce82b68..9e8cf64f04420 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -116,10 +116,15 @@ EXPORT_SYMBOL(phy_print_status); */ static int phy_clear_interrupt(struct phy_device *phydev) { - if (phydev->drv->ack_interrupt) - return phydev->drv->ack_interrupt(phydev); + int ret = 0; - return 0; + if (phydev->drv->ack_interrupt) { + mutex_lock(&phydev->lock); + ret = phydev->drv->ack_interrupt(phydev); + mutex_unlock(&phydev->lock); + } + + return ret; } /** @@ -367,6 +372,7 @@ EXPORT_SYMBOL(phy_ethtool_ksettings_set); void phy_ethtool_ksettings_get(struct phy_device *phydev, struct ethtool_link_ksettings *cmd) { + mutex_lock(&phydev->lock); linkmode_copy(cmd->link_modes.supported, phydev->supported); linkmode_copy(cmd->link_modes.advertising, phydev->advertising); linkmode_copy(cmd->link_modes.lp_advertising, phydev->lp_advertising); @@ -383,6 +389,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev, cmd->base.autoneg = phydev->autoneg; cmd->base.eth_tp_mdix_ctrl = phydev->mdix_ctrl; cmd->base.eth_tp_mdix = phydev->mdix; + mutex_unlock(&phydev->lock); } EXPORT_SYMBOL(phy_ethtool_ksettings_get); @@ -553,7 +560,7 @@ static int phy_check_link_status(struct phy_device *phydev) } /** - * phy_start_aneg - start auto-negotiation for this PHY device + * _phy_start_aneg - start auto-negotiation for this PHY device * @phydev: the phy_device struct * * Description: Sanitizes the settings (if we're not autonegotiating @@ -561,25 +568,43 @@ static int phy_check_link_status(struct phy_device *phydev) * If the PHYCONTROL Layer is operating, we change the state to * reflect the beginning of Auto-negotiation or forcing. */ -int phy_start_aneg(struct phy_device *phydev) +static int _phy_start_aneg(struct phy_device *phydev) { int err; + lockdep_assert_held(&phydev->lock); + if (!phydev->drv) return -EIO; - mutex_lock(&phydev->lock); - if (AUTONEG_DISABLE == phydev->autoneg) phy_sanitize_settings(phydev); err = phy_config_aneg(phydev); if (err < 0) - goto out_unlock; + return err; if (phy_is_started(phydev)) err = phy_check_link_status(phydev); -out_unlock: + + return err; +} + +/** + * phy_start_aneg - start auto-negotiation for this PHY device + * @phydev: the phy_device struct + * + * Description: Sanitizes the settings (if we're not autonegotiating + * them), and then calls the driver's config_aneg function. + * If the PHYCONTROL Layer is operating, we change the state to + * reflect the beginning of Auto-negotiation or forcing. + */ +int phy_start_aneg(struct phy_device *phydev) +{ + int err; + + mutex_lock(&phydev->lock); + err = _phy_start_aneg(phydev); mutex_unlock(&phydev->lock); return err; @@ -740,6 +765,36 @@ static int phy_disable_interrupts(struct phy_device *phydev) return phy_clear_interrupt(phydev); } +/** + * phy_did_interrupt - Checks if the PHY generated an interrupt + * @phydev: target phy_device struct + */ +static int phy_did_interrupt(struct phy_device *phydev) +{ + int ret; + + mutex_lock(&phydev->lock); + ret = phydev->drv->did_interrupt(phydev); + mutex_unlock(&phydev->lock); + + return ret; +} + +/** + * phy_handle_interrupt - PHY specific interrupt handler + * @phydev: target phy_device struct + */ +static int phy_handle_interrupt(struct phy_device *phydev) +{ + int ret; + + mutex_lock(&phydev->lock); + ret = phydev->drv->handle_interrupt(phydev); + mutex_unlock(&phydev->lock); + + return ret; +} + /** * phy_interrupt - PHY interrupt handler * @irq: interrupt line @@ -751,11 +806,11 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) { struct phy_device *phydev = phy_dat; - if (phydev->drv->did_interrupt && !phydev->drv->did_interrupt(phydev)) + if (phydev->drv->did_interrupt && !phy_did_interrupt(phydev)) return IRQ_NONE; if (phydev->drv->handle_interrupt) { - if (phydev->drv->handle_interrupt(phydev)) + if (phy_handle_interrupt(phydev)) goto phy_err; } else { /* reschedule state queue work to run as soon as possible */ diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 9d0a306f05623..78b918dcd5472 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1433,6 +1433,9 @@ void phy_detach(struct phy_device *phydev) phy_driver_is_genphy_10g(phydev)) device_release_driver(&phydev->mdio.dev); + /* Assert the reset signal */ + phy_device_reset(phydev, 1); + /* * The phydev might go away on the put_device() below, so avoid * a use-after-free bug by reading the underlying bus first. @@ -1444,9 +1447,6 @@ void phy_detach(struct phy_device *phydev) ndev_owner = dev->dev.parent->driver->owner; if (ndev_owner != bus->owner) module_put(bus->owner); - - /* Assert the reset signal */ - phy_device_reset(phydev, 1); } EXPORT_SYMBOL(phy_detach); @@ -1766,9 +1766,10 @@ int genphy_update_link(struct phy_device *phydev) /* The link state is latched low so that momentary link * drops can be detected. Do not double-read the status - * in polling mode to detect such short link drops. + * in polling mode to detect such short link drops except + * the link was already down. */ - if (!phy_polling_mode(phydev)) { + if (!phy_polling_mode(phydev) || !phydev->link) { status = phy_read(phydev, MII_BMSR); if (status < 0) return status; diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index bf5bbb565cf5e..5b2bf75269033 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -582,6 +582,11 @@ static int phylink_register_sfp(struct phylink *pl, return ret; } + if (!fwnode_device_is_available(ref.fwnode)) { + fwnode_handle_put(ref.fwnode); + return 0; + } + pl->sfp_bus = sfp_register_upstream(ref.fwnode, pl, &sfp_phylink_ops); if (!pl->sfp_bus) return -ENOMEM; @@ -1331,7 +1336,7 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl, return -EOPNOTSUPP; if (!phylink_test(pl->supported, Asym_Pause) && - !pause->autoneg && pause->rx_pause != pause->tx_pause) + pause->rx_pause != pause->tx_pause) return -EINVAL; config->pause &= ~(MLO_PAUSE_AN | MLO_PAUSE_TXRX_MASK); diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 816e59fe68f57..19ff5f25c8d15 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -479,6 +479,26 @@ int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee, } EXPORT_SYMBOL_GPL(sfp_get_module_eeprom); +/** + * sfp_get_module_eeprom_by_page() - Read a page from the SFP module EEPROM + * @bus: a pointer to the &struct sfp_bus structure for the sfp module + * @page: a &struct ethtool_module_eeprom + * @extack: extack for reporting problems + * + * Read an EEPROM page as specified by the supplied @page. See the + * documentation for &struct ethtool_module_eeprom for the page to be read. + * + * Returns 0 on success or a negative errno number. More error + * information might be provided via extack + */ +int sfp_get_module_eeprom_by_page(struct sfp_bus *bus, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack) +{ + return bus->socket_ops->module_eeprom_by_page(bus->sfp, page, extack); +} +EXPORT_SYMBOL_GPL(sfp_get_module_eeprom_by_page); + /** * sfp_upstream_start() - Inform the SFP that the network device is up * @bus: a pointer to the &struct sfp_bus structure for the sfp module diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 27b67f12ec455..051efc7397234 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -115,7 +115,7 @@ static const char * const sm_state_strings[] = { [SFP_S_LINK_UP] = "link_up", [SFP_S_TX_FAULT] = "tx_fault", [SFP_S_REINIT] = "reinit", - [SFP_S_TX_DISABLE] = "rx_disable", + [SFP_S_TX_DISABLE] = "tx_disable", }; static const char *sm_state_to_str(unsigned short sm_state) @@ -1763,6 +1763,30 @@ static int sfp_module_eeprom(struct sfp *sfp, struct ethtool_eeprom *ee, return 0; } +static int sfp_module_eeprom_by_page(struct sfp *sfp, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack) +{ + if (page->bank) { + NL_SET_ERR_MSG(extack, "Banks not supported"); + return -EOPNOTSUPP; + } + + if (page->page) { + NL_SET_ERR_MSG(extack, "Only page 0 supported"); + return -EOPNOTSUPP; + } + + if (page->i2c_address != 0x50 && + page->i2c_address != 0x51) { + NL_SET_ERR_MSG(extack, "Only address 0x50 and 0x51 supported"); + return -EOPNOTSUPP; + } + + return sfp_read(sfp, page->i2c_address == 0x51, page->offset, + page->data, page->length); +}; + static const struct sfp_socket_ops sfp_module_ops = { .attach = sfp_attach, .detach = sfp_detach, @@ -1770,6 +1794,7 @@ static const struct sfp_socket_ops sfp_module_ops = { .stop = sfp_stop, .module_info = sfp_module_info, .module_eeprom = sfp_module_eeprom, + .module_eeprom_by_page = sfp_module_eeprom_by_page, }; static void sfp_timeout(struct work_struct *work) @@ -1878,7 +1903,7 @@ static int sfp_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sfp); - err = devm_add_action(sfp->dev, sfp_cleanup, sfp); + err = devm_add_action_or_reset(sfp->dev, sfp_cleanup, sfp); if (err < 0) return err; diff --git a/drivers/net/phy/sfp.h b/drivers/net/phy/sfp.h index 64f54b0bbd8c4..3afa512bc2b0a 100644 --- a/drivers/net/phy/sfp.h +++ b/drivers/net/phy/sfp.h @@ -14,6 +14,9 @@ struct sfp_socket_ops { int (*module_info)(struct sfp *sfp, struct ethtool_modinfo *modinfo); int (*module_eeprom)(struct sfp *sfp, struct ethtool_eeprom *ee, u8 *data); + int (*module_eeprom_by_page)(struct sfp *sfp, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack); }; int sfp_add_phy(struct sfp_bus *bus, struct phy_device *phydev); diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index c6c41a7836c93..a085213dc2eaa 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -69,6 +69,8 @@ #define MPHDRLEN 6 /* multilink protocol header length */ #define MPHDRLEN_SSN 4 /* ditto with short sequence numbers */ +#define PPP_PROTO_LEN 2 + /* * An instance of /dev/ppp can be associated with either a ppp * interface unit or a ppp channel. In both cases, file->private_data @@ -498,6 +500,9 @@ static ssize_t ppp_write(struct file *file, const char __user *buf, if (!pf) return -ENXIO; + /* All PPP packets should start with the 2-byte protocol */ + if (count < PPP_PROTO_LEN) + return -EINVAL; ret = -ENOMEM; skb = alloc_skb(count + pf->hdrlen, GFP_KERNEL); if (!skb) @@ -1544,7 +1549,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) } ++ppp->stats64.tx_packets; - ppp->stats64.tx_bytes += skb->len - 2; + ppp->stats64.tx_bytes += skb->len - PPP_PROTO_LEN; switch (proto) { case PPP_IP: diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 8e56a41dd7585..096617982998f 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -471,7 +471,7 @@ static void sl_tx_timeout(struct net_device *dev) spin_lock(&sl->lock); if (netif_queue_stopped(dev)) { - if (!netif_running(dev)) + if (!netif_running(dev) || !sl->tty) goto out; /* May be we must check transmitter timeout here ? diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c index 291fa449993fb..45f295403cb55 100644 --- a/drivers/net/sungem_phy.c +++ b/drivers/net/sungem_phy.c @@ -454,6 +454,7 @@ static int bcm5421_init(struct mii_phy* phy) int can_low_power = 1; if (np == NULL || of_get_property(np, "no-autolowpower", NULL)) can_low_power = 0; + of_node_put(np); if (can_low_power) { /* Enable automatic low-power */ sungem_phy_write(phy, 0x1c, 0x9002); diff --git a/drivers/net/tap.c b/drivers/net/tap.c index f285422a80717..f870d08bb1f86 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1214,7 +1214,8 @@ static int tap_sendmsg(struct socket *sock, struct msghdr *m, struct xdp_buff *xdp; int i; - if (ctl && (ctl->type == TUN_MSG_PTR)) { + if (m->msg_controllen == sizeof(struct tun_msg_ctl) && + ctl && ctl->type == TUN_MSG_PTR) { for (i = 0; i < ctl->num; i++) { xdp = &((struct xdp_buff *)ctl->ptr)[i]; tap_get_user_xdp(q, xdp); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 7c40ae058e6d1..dd02fcc972774 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -327,6 +327,12 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, } } +static void tun_napi_enable(struct tun_file *tfile) +{ + if (tfile->napi_enabled) + napi_enable(&tfile->napi); +} + static void tun_napi_disable(struct tun_file *tfile) { if (tfile->napi_enabled) @@ -690,7 +696,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun = rtnl_dereference(tfile->tun); if (tun && clean) { - tun_napi_disable(tfile); + if (!tfile->detached) + tun_napi_disable(tfile); tun_napi_del(tfile); } @@ -709,8 +716,10 @@ static void __tun_detach(struct tun_file *tfile, bool clean) if (clean) { RCU_INIT_POINTER(tfile->tun, NULL); sock_put(&tfile->sk); - } else + } else { tun_disable_queue(tun, tfile); + tun_napi_disable(tfile); + } synchronize_net(); tun_flow_delete_by_queue(tun, tun->numqueues + 1); @@ -783,6 +792,7 @@ static void tun_detach_all(struct net_device *dev) sock_put(&tfile->sk); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { + tun_napi_del(tfile); tun_enable_queue(tfile); tun_queue_purge(tfile); xdp_rxq_info_unreg(&tfile->xdp_rxq); @@ -863,6 +873,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, if (tfile->detached) { tun_enable_queue(tfile); + tun_napi_enable(tfile); } else { sock_hold(&tfile->sk); tun_napi_init(tun, tfile, napi, napi_frags); @@ -1071,6 +1082,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); int txq = skb->queue_mapping; + struct netdev_queue *queue; struct tun_file *tfile; int len = skb->len; @@ -1117,6 +1129,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) if (ptr_ring_produce(&tfile->tx_ring, skb)) goto drop; + /* NETIF_F_LLTX requires to do our own update of trans_start */ + queue = netdev_get_tx_queue(dev, txq); + queue->trans_start = jiffies; + /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC) kill_fasync(&tfile->fasync, SIGIO, POLL_IN); @@ -2556,7 +2572,8 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) if (!tun) return -EBADFD; - if (ctl && (ctl->type == TUN_MSG_PTR)) { + if (m->msg_controllen == sizeof(struct tun_msg_ctl) && + ctl && ctl->type == TUN_MSG_PTR) { struct tun_page tpage; int n = ctl->num; int flush = 0; diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 05bdcc5917f6b..d7005cc76ce91 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -99,6 +99,10 @@ config USB_RTL8150 config USB_RTL8152 tristate "Realtek RTL8152/RTL8153 Based USB Ethernet Adapters" select MII + select CRC32 + select CRYPTO + select CRYPTO_HASH + select CRYPTO_SHA256 help This option adds support for Realtek RTL8152 based USB 2.0 10/100 Ethernet adapters and RTL8153 based USB 3.0 10/100/1000 @@ -113,6 +117,7 @@ config USB_LAN78XX select PHYLIB select MICROCHIP_PHY select FIXED_PHY + select CRC32 help This option adds support for Microchip LAN78XX based USB 2 & USB 3 10/100/1000 Ethernet adapters. diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index 7e44110746dd0..68912e266826b 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -1102,10 +1102,15 @@ static int aqc111_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if (start_of_descs != desc_offset) goto err; - /* self check desc_offset from header*/ - if (desc_offset >= skb_len) + /* self check desc_offset from header and make sure that the + * bounds of the metadata array are inside the SKB + */ + if (pkt_count * 2 + desc_offset >= skb_len) goto err; + /* Packets must not overlap the metadata array */ + skb_trim(skb, desc_offset); + if (pkt_count == 0) goto err; diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index ec4b148b1e6c3..5ee3e457a79c7 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1361,58 +1361,119 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) u16 hdr_off; u32 *pkt_hdr; - /* This check is no longer done by usbnet */ - if (skb->len < dev->net->hard_header_len) + /* At the end of the SKB, there's a header telling us how many packets + * are bundled into this buffer and where we can find an array of + * per-packet metadata (which contains elements encoded into u16). + */ + + /* SKB contents for current firmware: + * + * ... + * + * + * ... + * + * + * + * where: + * contains pkt_len bytes: + * 2 bytes of IP alignment pseudo header + * packet received + * contains 4 bytes: + * pkt_len and fields AX_RXHDR_* + * 0-7 bytes to terminate at + * 8 bytes boundary (64-bit). + * 4 bytes to make rx_hdr terminate at + * 8 bytes boundary (64-bit) + * contains 4 bytes: + * pkt_len=0 and AX_RXHDR_DROP_ERR + * contains 4 bytes: + * pkt_cnt and hdr_off (offset of + * ) + * + * pkt_cnt is number of entrys in the per-packet metadata. + * In current firmware there is 2 entrys per packet. + * The first points to the packet and the + * second is a dummy header. + * This was done probably to align fields in 64-bit and + * maintain compatibility with old firmware. + * This code assumes that and are + * optional. + */ + + if (skb->len < 4) return 0; - skb_trim(skb, skb->len - 4); rx_hdr = get_unaligned_le32(skb_tail_pointer(skb)); - pkt_cnt = (u16)rx_hdr; hdr_off = (u16)(rx_hdr >> 16); + + if (pkt_cnt == 0) + return 0; + + /* Make sure that the bounds of the metadata array are inside the SKB + * (and in front of the counter at the end). + */ + if (pkt_cnt * 4 + hdr_off > skb->len) + return 0; pkt_hdr = (u32 *)(skb->data + hdr_off); - while (pkt_cnt--) { + /* Packets must not overlap the metadata array */ + skb_trim(skb, hdr_off); + + for (; pkt_cnt > 0; pkt_cnt--, pkt_hdr++) { + u16 pkt_len_plus_padd; u16 pkt_len; le32_to_cpus(pkt_hdr); pkt_len = (*pkt_hdr >> 16) & 0x1fff; + pkt_len_plus_padd = (pkt_len + 7) & 0xfff8; + + /* Skip dummy header used for alignment + */ + if (pkt_len == 0) + continue; + + if (pkt_len_plus_padd > skb->len) + return 0; /* Check CRC or runt packet */ - if ((*pkt_hdr & AX_RXHDR_CRC_ERR) || - (*pkt_hdr & AX_RXHDR_DROP_ERR)) { - skb_pull(skb, (pkt_len + 7) & 0xFFF8); - pkt_hdr++; + if ((*pkt_hdr & (AX_RXHDR_CRC_ERR | AX_RXHDR_DROP_ERR)) || + pkt_len < 2 + ETH_HLEN) { + dev->net->stats.rx_errors++; + skb_pull(skb, pkt_len_plus_padd); continue; } - if (pkt_cnt == 0) { - skb->len = pkt_len; + /* last packet */ + if (pkt_len_plus_padd == skb->len) { + skb_trim(skb, pkt_len); + /* Skip IP alignment pseudo header */ skb_pull(skb, 2); - skb_set_tail_pointer(skb, skb->len); - skb->truesize = pkt_len + sizeof(struct sk_buff); + + skb->truesize = SKB_TRUESIZE(pkt_len_plus_padd); ax88179_rx_checksum(skb, pkt_hdr); return 1; } ax_skb = skb_clone(skb, GFP_ATOMIC); - if (ax_skb) { - ax_skb->len = pkt_len; - /* Skip IP alignment pseudo header */ - skb_pull(ax_skb, 2); - skb_set_tail_pointer(ax_skb, ax_skb->len); - ax_skb->truesize = pkt_len + sizeof(struct sk_buff); - ax88179_rx_checksum(ax_skb, pkt_hdr); - usbnet_skb_return(dev, ax_skb); - } else { + if (!ax_skb) return 0; - } + skb_trim(ax_skb, pkt_len); + + /* Skip IP alignment pseudo header */ + skb_pull(ax_skb, 2); + + skb->truesize = pkt_len_plus_padd + + SKB_DATA_ALIGN(sizeof(struct sk_buff)); + ax88179_rx_checksum(ax_skb, pkt_hdr); + usbnet_skb_return(dev, ax_skb); - skb_pull(skb, (pkt_len + 7) & 0xFFF8); - pkt_hdr++; + skb_pull(skb, pkt_len_plus_padd); } - return 1; + + return 0; } static struct sk_buff * @@ -1629,7 +1690,7 @@ static const struct driver_info ax88179_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1642,7 +1703,7 @@ static const struct driver_info ax88178a_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1655,7 +1716,7 @@ static const struct driver_info cypress_GX3_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1668,7 +1729,7 @@ static const struct driver_info dlink_dub1312_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1681,7 +1742,7 @@ static const struct driver_info sitecom_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1694,7 +1755,7 @@ static const struct driver_info samsung_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1707,7 +1768,7 @@ static const struct driver_info lenovo_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1720,7 +1781,7 @@ static const struct driver_info belkin_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 8325f6d65dccc..eee402a59f6da 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -571,6 +571,11 @@ static const struct usb_device_id products[] = { .bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, \ .bInterfaceProtocol = USB_CDC_PROTO_NONE +#define ZAURUS_FAKE_INTERFACE \ + .bInterfaceClass = USB_CLASS_COMM, \ + .bInterfaceSubClass = USB_CDC_SUBCLASS_MDLM, \ + .bInterfaceProtocol = USB_CDC_PROTO_NONE + /* SA-1100 based Sharp Zaurus ("collie"), or compatible; * wire-incompatible with true CDC Ethernet implementations. * (And, it seems, needlessly so...) @@ -624,6 +629,13 @@ static const struct usb_device_id products[] = { .idProduct = 0x9032, /* SL-6000 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, +}, { + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = 0x04DD, + .idProduct = 0x9032, /* SL-6000 */ + ZAURUS_FAKE_INTERFACE, + .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index eb100eb33de3d..414341c9cf5ae 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -653,6 +653,16 @@ static const struct usb_device_id mbim_devs[] = { .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle, }, + /* Telit LN920 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x1061, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle, + }, + + /* Telit FN990 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x1071, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle, + }, + /* default entry */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info_zlp, diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 4cff0a6b1098a..1c9a1b94f6e28 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -177,6 +177,8 @@ static u32 cdc_ncm_check_tx_max(struct usbnet *dev, u32 new_tx) /* clamp new_tx to sane values */ min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth16); max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_TX, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)); + if (max == 0) + max = CDC_NCM_NTB_MAX_SIZE_TX; /* dwNtbOutMaxSize not set */ /* some devices set dwNtbOutMaxSize too low for the above default */ min = min(min, max); diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 22450c4a92251..31200275a0640 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2497,7 +2497,7 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, hso_net_init); if (!net) { dev_err(&interface->dev, "Unable to create ethernet device\n"); - goto exit; + goto err_hso_dev; } hso_net = netdev_priv(net); @@ -2510,13 +2510,13 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, USB_DIR_IN); if (!hso_net->in_endp) { dev_err(&interface->dev, "Can't find BULK IN endpoint\n"); - goto exit; + goto err_net; } hso_net->out_endp = hso_get_ep(interface, USB_ENDPOINT_XFER_BULK, USB_DIR_OUT); if (!hso_net->out_endp) { dev_err(&interface->dev, "Can't find BULK OUT endpoint\n"); - goto exit; + goto err_net; } SET_NETDEV_DEV(net, &interface->dev); SET_NETDEV_DEVTYPE(net, &hso_type); @@ -2525,18 +2525,18 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) { hso_net->mux_bulk_rx_urb_pool[i] = usb_alloc_urb(0, GFP_KERNEL); if (!hso_net->mux_bulk_rx_urb_pool[i]) - goto exit; + goto err_mux_bulk_rx; hso_net->mux_bulk_rx_buf_pool[i] = kzalloc(MUX_BULK_RX_BUF_SIZE, GFP_KERNEL); if (!hso_net->mux_bulk_rx_buf_pool[i]) - goto exit; + goto err_mux_bulk_rx; } hso_net->mux_bulk_tx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!hso_net->mux_bulk_tx_urb) - goto exit; + goto err_mux_bulk_rx; hso_net->mux_bulk_tx_buf = kzalloc(MUX_BULK_TX_BUF_SIZE, GFP_KERNEL); if (!hso_net->mux_bulk_tx_buf) - goto exit; + goto err_free_tx_urb; add_net_device(hso_dev); @@ -2544,7 +2544,7 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, result = register_netdev(net); if (result) { dev_err(&interface->dev, "Failed to register device\n"); - goto exit; + goto err_free_tx_buf; } hso_log_port(hso_dev); @@ -2552,8 +2552,21 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, hso_create_rfkill(hso_dev, interface); return hso_dev; -exit: - hso_free_net_device(hso_dev); + +err_free_tx_buf: + remove_net_device(hso_dev); + kfree(hso_net->mux_bulk_tx_buf); +err_free_tx_urb: + usb_free_urb(hso_net->mux_bulk_tx_urb); +err_mux_bulk_rx: + for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) { + usb_free_urb(hso_net->mux_bulk_rx_urb_pool[i]); + kfree(hso_net->mux_bulk_rx_buf_pool[i]); + } +err_net: + free_netdev(net); +err_hso_dev: + kfree(hso_dev); return NULL; } @@ -2704,14 +2717,14 @@ struct hso_device *hso_create_mux_serial_device(struct usb_interface *interface, serial = kzalloc(sizeof(*serial), GFP_KERNEL); if (!serial) - goto exit; + goto err_free_dev; hso_dev->port_data.dev_serial = serial; serial->parent = hso_dev; if (hso_serial_common_create (serial, 1, CTRL_URB_RX_SIZE, CTRL_URB_TX_SIZE)) - goto exit; + goto err_free_serial; serial->tx_data_length--; serial->write_data = hso_mux_serial_write_data; @@ -2727,11 +2740,9 @@ struct hso_device *hso_create_mux_serial_device(struct usb_interface *interface, /* done, return it */ return hso_dev; -exit: - if (serial) { - tty_unregister_device(tty_drv, serial->minor); - kfree(serial); - } +err_free_serial: + kfree(serial); +err_free_dev: kfree(hso_dev); return NULL; diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 345576f1a7470..73ad78f47763c 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -121,7 +121,7 @@ static int ipheth_alloc_urbs(struct ipheth_device *iphone) if (tx_buf == NULL) goto free_rx_urb; - rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE, + rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, GFP_KERNEL, &rx_urb->transfer_dma); if (rx_buf == NULL) goto free_tx_buf; @@ -146,7 +146,7 @@ static int ipheth_alloc_urbs(struct ipheth_device *iphone) static void ipheth_free_urbs(struct ipheth_device *iphone) { - usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->rx_buf, + usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, iphone->rx_buf, iphone->rx_urb->transfer_dma); usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->tx_buf, iphone->tx_urb->transfer_dma); @@ -317,7 +317,7 @@ static int ipheth_rx_submit(struct ipheth_device *dev, gfp_t mem_flags) usb_fill_bulk_urb(dev->rx_urb, udev, usb_rcvbulkpipe(udev, dev->bulk_in), - dev->rx_buf, IPHETH_BUF_SIZE, + dev->rx_buf, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, ipheth_rcvbulk_callback, dev); dev->rx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 92d9d3407b79b..ce3c8f476d75c 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -64,6 +64,8 @@ #define LAN7801_USB_PRODUCT_ID (0x7801) #define LAN78XX_EEPROM_MAGIC (0x78A5) #define LAN78XX_OTP_MAGIC (0x78F3) +#define AT29M2AF_USB_VENDOR_ID (0x07C9) +#define AT29M2AF_USB_PRODUCT_ID (0x0012) #define MII_READ 1 #define MII_WRITE 0 @@ -2136,7 +2138,7 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) if (dev->domain_data.phyirq > 0) phydev->irq = dev->domain_data.phyirq; else - phydev->irq = 0; + phydev->irq = PHY_POLL; netdev_dbg(dev->net, "phydev->irq = %d\n", phydev->irq); /* set to AUTOMDIX */ @@ -3753,6 +3755,12 @@ static int lan78xx_probe(struct usb_interface *intf, dev->maxpacket = usb_maxpacket(dev->udev, dev->pipe_out, 1); + /* Reject broken descriptors. */ + if (dev->maxpacket == 0) { + ret = -ENODEV; + goto out4; + } + /* driver requires remote-wakeup capability during autosuspend. */ intf->needs_remote_wakeup = 1; @@ -4147,6 +4155,10 @@ static const struct usb_device_id products[] = { /* LAN7801 USB Gigabit Ethernet Device */ USB_DEVICE(LAN78XX_USB_VENDOR_ID, LAN7801_USB_PRODUCT_ID), }, + { + /* ATM2-AF USB Gigabit Ethernet Device */ + USB_DEVICE(AT29M2AF_USB_VENDOR_ID, AT29M2AF_USB_PRODUCT_ID), + }, {}, }; MODULE_DEVICE_TABLE(usb, products); diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c index 09bfa6a4dfbc1..7e40e2e2f3723 100644 --- a/drivers/net/usb/mcs7830.c +++ b/drivers/net/usb/mcs7830.c @@ -108,8 +108,16 @@ static const char driver_name[] = "MOSCHIP usb-ethernet driver"; static int mcs7830_get_reg(struct usbnet *dev, u16 index, u16 size, void *data) { - return usbnet_read_cmd(dev, MCS7830_RD_BREQ, MCS7830_RD_BMREQ, - 0x0000, index, data, size); + int ret; + + ret = usbnet_read_cmd(dev, MCS7830_RD_BREQ, MCS7830_RD_BMREQ, + 0x0000, index, data, size); + if (ret < 0) + return ret; + else if (ret < size) + return -ENODATA; + + return ret; } static int mcs7830_set_reg(struct usbnet *dev, u16 index, u16 size, const void *data) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index b744c09346a7c..dda051c94fb4d 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -495,11 +495,11 @@ static void read_bulk_callback(struct urb *urb) goto goon; rx_status = buf[count - 2]; - if (rx_status & 0x1e) { + if (rx_status & 0x1c) { netif_dbg(pegasus, rx_err, net, "RX packet error %x\n", rx_status); net->stats.rx_errors++; - if (rx_status & 0x06) /* long or runt */ + if (rx_status & 0x04) /* runt */ net->stats.rx_length_errors++; if (rx_status & 0x08) net->stats.rx_crc_errors++; diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 6508d70056b3a..8ef0a013874c5 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1315,6 +1315,8 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1031, 3)}, /* Telit LE910C1-EUX */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ @@ -1355,6 +1357,8 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */ {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e preproduction config */ {QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/ + {QMI_FIXED_INTF(0x413c, 0x81e4, 0)}, /* Dell Wireless 5829e with eSIM support*/ + {QMI_FIXED_INTF(0x413c, 0x81e6, 0)}, /* Dell Wireless 5829e */ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index f9b359d4e2939..1505fe3f87ed3 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -608,6 +608,11 @@ static const struct usb_device_id products [] = { USB_DEVICE_AND_INTERFACE_INFO(0x1630, 0x0042, USB_CLASS_COMM, 2 /* ACM */, 0x0ff), .driver_info = (unsigned long) &rndis_poll_status_info, +}, { + /* Hytera Communications DMR radios' "Radio to PC Network" */ + USB_VENDOR_AND_INTERFACE_INFO(0x238b, + USB_CLASS_COMM, 2 /* ACM */, 0x0ff), + .driver_info = (unsigned long)&rndis_info, }, { /* RNDIS is MSFT's un-official variant of CDC ACM */ USB_INTERFACE_INFO(USB_CLASS_COMM, 2 /* ACM */, 0x0ff), diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index e04c8054c2cf3..fce6713e970ba 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -410,7 +410,7 @@ static int sr9700_rx_fixup(struct usbnet *dev, struct sk_buff *skb) /* ignore the CRC length */ len = (skb->data[1] | (skb->data[2] << 8)) - 4; - if (len > ETH_FRAME_LEN) + if (len > ETH_FRAME_LEN || len > skb->len) return 0; /* the last packet of current skb */ diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index dde05e2fdc3e6..6598a4cba158a 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1773,6 +1773,11 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) if (!dev->rx_urb_size) dev->rx_urb_size = dev->hard_mtu; dev->maxpacket = usb_maxpacket (dev->udev, dev->out, 1); + if (dev->maxpacket == 0) { + /* that is a broken device */ + status = -ENODEV; + goto out4; + } /* let userspace know we have a random address */ if (ether_addr_equal(net->dev_addr, node_id)) @@ -1982,7 +1987,7 @@ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, cmd, reqtype, value, index, size); if (size) { - buf = kmalloc(size, GFP_KERNEL); + buf = kmalloc(size, GFP_NOIO); if (!buf) goto out; } @@ -2014,7 +2019,7 @@ static int __usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, cmd, reqtype, value, index, size); if (data) { - buf = kmemdup(data, size, GFP_KERNEL); + buf = kmemdup(data, size, GFP_NOIO); if (!buf) goto out; } else { @@ -2115,7 +2120,7 @@ static void usbnet_async_cmd_cb(struct urb *urb) int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype, u16 value, u16 index, const void *data, u16 size) { - struct usb_ctrlrequest *req = NULL; + struct usb_ctrlrequest *req; struct urb *urb; int err = -ENOMEM; void *buf = NULL; @@ -2133,7 +2138,7 @@ int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype, if (!buf) { netdev_err(dev->net, "Error allocating buffer" " in %s!\n", __func__); - goto fail_free; + goto fail_free_urb; } } @@ -2157,14 +2162,21 @@ int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype, if (err < 0) { netdev_err(dev->net, "Error submitting the control" " message: status=%d\n", err); - goto fail_free; + goto fail_free_all; } return 0; +fail_free_all: + kfree(req); fail_free_buf: kfree(buf); -fail_free: - kfree(req); + /* + * avoid a double free + * needed because the flag can be set only + * after filling the URB + */ + urb->transfer_flags = 0; +fail_free_urb: usb_free_urb(urb); fail: return err; diff --git a/drivers/net/usb/zaurus.c b/drivers/net/usb/zaurus.c index 8e717a0b559b3..7984f2157d222 100644 --- a/drivers/net/usb/zaurus.c +++ b/drivers/net/usb/zaurus.c @@ -256,6 +256,11 @@ static const struct usb_device_id products [] = { .bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, \ .bInterfaceProtocol = USB_CDC_PROTO_NONE +#define ZAURUS_FAKE_INTERFACE \ + .bInterfaceClass = USB_CLASS_COMM, \ + .bInterfaceSubClass = USB_CDC_SUBCLASS_MDLM, \ + .bInterfaceProtocol = USB_CDC_PROTO_NONE + /* SA-1100 based Sharp Zaurus ("collie"), or compatible. */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO @@ -313,6 +318,13 @@ static const struct usb_device_id products [] = { .idProduct = 0x9032, /* SL-6000 */ ZAURUS_MASTER_INTERFACE, .driver_info = ZAURUS_PXA_INFO, +}, { + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = 0x04DD, + .idProduct = 0x9032, /* SL-6000 */ + ZAURUS_FAKE_INTERFACE, + .driver_info = (unsigned long)&bogus_mdlm_info, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 24bd6bda4a920..57f6d287f521a 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -209,9 +209,10 @@ static void __veth_xdp_flush(struct veth_rq *rq) { /* Write ptr_ring before reading rx_notify_masked */ smp_mb(); - if (!rq->rx_notify_masked) { - rq->rx_notify_masked = true; - napi_schedule(&rq->xdp_napi); + if (!READ_ONCE(rq->rx_notify_masked) && + napi_schedule_prep(&rq->xdp_napi)) { + WRITE_ONCE(rq->rx_notify_masked, true); + __napi_schedule(&rq->xdp_napi); } } @@ -244,7 +245,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); rcv = rcu_dereference(priv->peer); - if (unlikely(!rcv)) { + if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) { kfree_skb(skb); goto drop; } @@ -254,7 +255,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) if (rxq < rcv->real_num_rx_queues) { rq = &rcv_priv->rq[rxq]; rcv_xdp = rcu_access_pointer(rq->xdp_prog); - skb_record_rx_queue(skb, rxq); } skb_tx_timestamp(skb); @@ -789,8 +789,10 @@ static int veth_poll(struct napi_struct *napi, int budget) /* Write rx_notify_masked before reading ptr_ring */ smp_store_mb(rq->rx_notify_masked, false); if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) { - rq->rx_notify_masked = true; - napi_schedule(&rq->xdp_napi); + if (napi_schedule_prep(&rq->xdp_napi)) { + WRITE_ONCE(rq->rx_notify_masked, true); + __napi_schedule(&rq->xdp_napi); + } } } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 37c2cecd1e503..579df7c5411d3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -213,9 +213,15 @@ struct virtnet_info { /* Packet virtio header size */ u8 hdr_len; - /* Work struct for refilling if we run low on memory. */ + /* Work struct for delayed refilling if we run low on memory. */ struct delayed_work refill; + /* Is delayed refill enabled? */ + bool refill_enabled; + + /* The lock to synchronize the access to refill_enabled */ + spinlock_t refill_lock; + /* Work struct for config space updates */ struct work_struct config_work; @@ -319,6 +325,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) return p; } +static void enable_delayed_refill(struct virtnet_info *vi) +{ + spin_lock_bh(&vi->refill_lock); + vi->refill_enabled = true; + spin_unlock_bh(&vi->refill_lock); +} + +static void disable_delayed_refill(struct virtnet_info *vi) +{ + spin_lock_bh(&vi->refill_lock); + vi->refill_enabled = false; + spin_unlock_bh(&vi->refill_lock); +} + static void virtqueue_napi_schedule(struct napi_struct *napi, struct virtqueue *vq) { @@ -1388,8 +1408,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget, } if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { - if (!try_fill_recv(vi, rq, GFP_ATOMIC)) - schedule_delayed_work(&vi->refill, 0); + if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { + spin_lock(&vi->refill_lock); + if (vi->refill_enabled) + schedule_delayed_work(&vi->refill, 0); + spin_unlock(&vi->refill_lock); + } } u64_stats_update_begin(&rq->stats.syncp); @@ -1508,6 +1532,8 @@ static int virtnet_open(struct net_device *dev) struct virtnet_info *vi = netdev_priv(dev); int i, err; + enable_delayed_refill(vi); + for (i = 0; i < vi->max_queue_pairs; i++) { if (i < vi->curr_queue_pairs) /* Make sure we have some buffers: if oom use wq. */ @@ -1878,6 +1904,8 @@ static int virtnet_close(struct net_device *dev) struct virtnet_info *vi = netdev_priv(dev); int i; + /* Make sure NAPI doesn't schedule refill work */ + disable_delayed_refill(vi); /* Make sure refill_work doesn't re-enable napi! */ cancel_delayed_work_sync(&vi->refill); @@ -2393,7 +2421,6 @@ static const struct ethtool_ops virtnet_ethtool_ops = { static void virtnet_freeze_down(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - int i; /* Make sure no work handler is accessing the device */ flush_work(&vi->config_work); @@ -2401,14 +2428,8 @@ static void virtnet_freeze_down(struct virtio_device *vdev) netif_tx_lock_bh(vi->dev); netif_device_detach(vi->dev); netif_tx_unlock_bh(vi->dev); - cancel_delayed_work_sync(&vi->refill); - - if (netif_running(vi->dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) { - napi_disable(&vi->rq[i].napi); - virtnet_napi_tx_disable(&vi->sq[i].napi); - } - } + if (netif_running(vi->dev)) + virtnet_close(vi->dev); } static int init_vqs(struct virtnet_info *vi); @@ -2416,7 +2437,7 @@ static int init_vqs(struct virtnet_info *vi); static int virtnet_restore_up(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - int err, i; + int err; err = init_vqs(vi); if (err) @@ -2424,16 +2445,12 @@ static int virtnet_restore_up(struct virtio_device *vdev) virtio_device_ready(vdev); - if (netif_running(vi->dev)) { - for (i = 0; i < vi->curr_queue_pairs; i++) - if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); + enable_delayed_refill(vi); - for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); - virtnet_napi_tx_enable(vi, vi->sq[i].vq, - &vi->sq[i].napi); - } + if (netif_running(vi->dev)) { + err = virtnet_open(vi->dev); + if (err) + return err; } netif_tx_lock_bh(vi->dev); @@ -3153,6 +3170,7 @@ static int virtnet_probe(struct virtio_device *vdev) vdev->priv = vi; INIT_WORK(&vi->config_work, virtnet_config_changed_work); + spin_lock_init(&vi->refill_lock); /* If we can receive ANY GSO packets, we must allocate large ones. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || @@ -3232,14 +3250,20 @@ static int virtnet_probe(struct virtio_device *vdev) } } - err = register_netdev(dev); + /* serialize netdev register + virtio_device_ready() with ndo_open() */ + rtnl_lock(); + + err = register_netdevice(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); + rtnl_unlock(); goto free_failover; } virtio_device_ready(vdev); + rtnl_unlock(); + err = virtnet_cpu_notif_add(vi); if (err) { pr_debug("virtio_net: registering cpu notifier failed\n"); diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index a06e6ab453f50..609f65530b9b0 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -595,6 +595,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, if (dma_mapping_error(&adapter->pdev->dev, rbi->dma_addr)) { dev_kfree_skb_any(rbi->skb); + rbi->skb = NULL; rq->stats.rx_buf_alloc_failure++; break; } @@ -619,6 +620,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, if (dma_mapping_error(&adapter->pdev->dev, rbi->dma_addr)) { put_page(rbi->page); + rbi->page = NULL; rq->stats.rx_buf_alloc_failure++; break; } @@ -1584,6 +1586,10 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq, u32 i, ring_idx; struct Vmxnet3_RxDesc *rxd; + /* ring has already been cleaned up */ + if (!rq->rx_ring[0].base) + return; + for (ring_idx = 0; ring_idx < 2; ring_idx++) { for (i = 0; i < rq->rx_ring[ring_idx].size; i++) { #ifdef __BIG_ENDIAN_BITFIELD @@ -3634,7 +3640,6 @@ vmxnet3_suspend(struct device *device) vmxnet3_free_intr_resources(adapter); netif_device_detach(netdev); - netif_tx_stop_all_queues(netdev); /* Create wake-up filters. */ pmConf = adapter->pm_conf; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 9b626c169554f..b51a2367dbaa0 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -33,6 +33,7 @@ #include #include #include +#include #define DRV_NAME "vrf" #define DRV_VERSION "1.0" @@ -147,12 +148,26 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, return NETDEV_TX_OK; } +static void vrf_nf_set_untracked(struct sk_buff *skb) +{ + if (skb_get_nfct(skb) == 0) + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); +} + +static void vrf_nf_reset_ct(struct sk_buff *skb) +{ + if (skb_get_nfct(skb) == IP_CT_UNTRACKED) + nf_reset_ct(skb); +} + #if IS_ENABLED(CONFIG_IPV6) static int vrf_ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; + vrf_nf_reset_ct(skb); + err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); @@ -206,6 +221,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, /* strip the ethernet header added for pass through VRF device */ __skb_pull(skb, skb_network_offset(skb)); + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); ret = vrf_ip6_local_out(net, skb->sk, skb); if (unlikely(net_xmit_eval(ret))) dev->stats.tx_errors++; @@ -232,6 +248,8 @@ static int vrf_ip_local_out(struct net *net, struct sock *sk, { int err; + vrf_nf_reset_ct(skb); + err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); if (likely(err == 1)) @@ -287,6 +305,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, RT_SCOPE_LINK); } + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); if (unlikely(net_xmit_eval(ret))) vrf_dev->stats.tx_errors++; @@ -351,8 +370,7 @@ static void vrf_finish_direct(struct sk_buff *skb) skb_pull(skb, ETH_HLEN); } - /* reset skb device */ - nf_reset_ct(skb); + vrf_nf_reset_ct(skb); } #if IS_ENABLED(CONFIG_IPV6) @@ -366,7 +384,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, struct neighbour *neigh; int ret; - nf_reset_ct(skb); + vrf_nf_reset_ct(skb); skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -497,6 +515,8 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) return skb; + vrf_nf_set_untracked(skb); + if (qdisc_tx_is_default(vrf_dev) || IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) return vrf_ip6_out_direct(vrf_dev, sk, skb); @@ -584,7 +604,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s bool is_v6gw = false; int ret = -EINVAL; - nf_reset_ct(skb); + vrf_nf_reset_ct(skb); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { @@ -733,6 +753,8 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, ipv4_is_lbcast(ip_hdr(skb)->daddr)) return skb; + vrf_nf_set_untracked(skb); + if (qdisc_tx_is_default(vrf_dev) || IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) return vrf_ip_out_direct(vrf_dev, sk, skb); @@ -1036,8 +1058,6 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); bool is_ndisc = ipv6_ndisc_frame(skb); - nf_reset_ct(skb); - /* loopback, multicast & non-ND link-local traffic; do not push through * packet taps again. Reset pkt_type for upper layers to process skb. * For strict packets with a source LLA, determine the dst using the @@ -1094,8 +1114,6 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, skb->skb_iif = vrf_dev->ifindex; IPCB(skb)->flags |= IPSKB_L3SLAVE; - nf_reset_ct(skb); - if (ipv4_is_multicast(ip_hdr(skb)->daddr)) goto out; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index c5991e31c557e..f4869b1836f30 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -679,11 +679,11 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, rd = kmalloc(sizeof(*rd), GFP_ATOMIC); if (rd == NULL) - return -ENOBUFS; + return -ENOMEM; if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) { kfree(rd); - return -ENOBUFS; + return -ENOMEM; } rd->remote_ip = *ip; diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 4c57e79e5779a..58e189ec672f9 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -153,6 +153,10 @@ static void ar5523_cmd_rx_cb(struct urb *urb) ar5523_err(ar, "Invalid reply to WDCMSG_TARGET_START"); return; } + if (!cmd->odata) { + ar5523_err(ar, "Unexpected WDCMSG_TARGET_START reply"); + return; + } memcpy(cmd->odata, hdr + 1, sizeof(u32)); cmd->olen = sizeof(u32); cmd->res = 0; diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 760d24a28f392..420d5fa360927 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -1780,7 +1780,6 @@ static bool ath10k_htt_rx_h_frag_pn_check(struct ath10k *ar, struct ath10k_peer *peer; union htt_rx_pn_t *last_pn, new_pn = {0}; struct ieee80211_hdr *hdr; - bool more_frags; u8 tid, frag_number; u32 seq; @@ -1798,7 +1797,6 @@ static bool ath10k_htt_rx_h_frag_pn_check(struct ath10k *ar, last_pn = &peer->frag_tids_last_pn[tid]; new_pn.pn48 = ath10k_htt_rx_h_get_pn(ar, skb, offset, enctype); - more_frags = ieee80211_has_morefrags(hdr->frame_control); frag_number = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG; seq = (__le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ) >> 4; diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c index c38e1963ebc05..f73ed1044390c 100644 --- a/drivers/net/wireless/ath/ath10k/htt_tx.c +++ b/drivers/net/wireless/ath/ath10k/htt_tx.c @@ -147,6 +147,9 @@ void ath10k_htt_tx_dec_pending(struct ath10k_htt *htt) htt->num_pending_tx--; if (htt->num_pending_tx == htt->max_num_pending_tx - 1) ath10k_mac_tx_unlock(htt->ar, ATH10K_TX_PAUSE_Q_FULL); + + if (htt->num_pending_tx == 0) + wake_up(&htt->empty_tx_wq); } int ath10k_htt_tx_inc_pending(struct ath10k_htt *htt) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 20e248fd43642..3026eb54a7f23 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -985,8 +985,12 @@ static void ath10k_mac_vif_beacon_cleanup(struct ath10k_vif *arvif) ath10k_mac_vif_beacon_free(arvif); if (arvif->beacon_buf) { - dma_free_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN, - arvif->beacon_buf, arvif->beacon_paddr); + if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL) + kfree(arvif->beacon_buf); + else + dma_free_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN, + arvif->beacon_buf, + arvif->beacon_paddr); arvif->beacon_buf = NULL; } } @@ -1040,7 +1044,7 @@ static int ath10k_monitor_vdev_start(struct ath10k *ar, int vdev_id) arg.channel.min_power = 0; arg.channel.max_power = channel->max_power * 2; arg.channel.max_reg_power = channel->max_reg_power * 2; - arg.channel.max_antenna_gain = channel->max_antenna_gain * 2; + arg.channel.max_antenna_gain = channel->max_antenna_gain; reinit_completion(&ar->vdev_setup_done); reinit_completion(&ar->vdev_delete_done); @@ -1486,7 +1490,7 @@ static int ath10k_vdev_start_restart(struct ath10k_vif *arvif, arg.channel.min_power = 0; arg.channel.max_power = chandef->chan->max_power * 2; arg.channel.max_reg_power = chandef->chan->max_reg_power * 2; - arg.channel.max_antenna_gain = chandef->chan->max_antenna_gain * 2; + arg.channel.max_antenna_gain = chandef->chan->max_antenna_gain; if (arvif->vdev_type == WMI_VDEV_TYPE_AP) { arg.ssid = arvif->u.ap.ssid; @@ -3145,7 +3149,7 @@ static int ath10k_update_channel_list(struct ath10k *ar) ch->min_power = 0; ch->max_power = channel->max_power * 2; ch->max_reg_power = channel->max_reg_power * 2; - ch->max_antenna_gain = channel->max_antenna_gain * 2; + ch->max_antenna_gain = channel->max_antenna_gain; ch->reg_class_id = 0; /* FIXME */ /* FIXME: why use only legacy modes, why not any @@ -5251,10 +5255,25 @@ static int ath10k_add_interface(struct ieee80211_hw *hw, if (vif->type == NL80211_IFTYPE_ADHOC || vif->type == NL80211_IFTYPE_MESH_POINT || vif->type == NL80211_IFTYPE_AP) { - arvif->beacon_buf = dma_alloc_coherent(ar->dev, - IEEE80211_MAX_FRAME_LEN, - &arvif->beacon_paddr, - GFP_ATOMIC); + if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL) { + arvif->beacon_buf = kmalloc(IEEE80211_MAX_FRAME_LEN, + GFP_KERNEL); + + /* Using a kernel pointer in place of a dma_addr_t + * token can lead to undefined behavior if that + * makes it into cache management functions. Use a + * known-invalid address token instead, which + * avoids the warning and makes it easier to catch + * bugs if it does end up getting used. + */ + arvif->beacon_paddr = DMA_MAPPING_ERROR; + } else { + arvif->beacon_buf = + dma_alloc_coherent(ar->dev, + IEEE80211_MAX_FRAME_LEN, + &arvif->beacon_paddr, + GFP_ATOMIC); + } if (!arvif->beacon_buf) { ret = -ENOMEM; ath10k_warn(ar, "failed to allocate beacon buffer: %d\n", @@ -5469,8 +5488,12 @@ static int ath10k_add_interface(struct ieee80211_hw *hw, err: if (arvif->beacon_buf) { - dma_free_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN, - arvif->beacon_buf, arvif->beacon_paddr); + if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL) + kfree(arvif->beacon_buf); + else + dma_free_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN, + arvif->beacon_buf, + arvif->beacon_paddr); arvif->beacon_buf = NULL; } diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c index f46b9083bbf10..2c254f43790d2 100644 --- a/drivers/net/wireless/ath/ath10k/txrx.c +++ b/drivers/net/wireless/ath/ath10k/txrx.c @@ -80,8 +80,6 @@ int ath10k_txrx_tx_unref(struct ath10k_htt *htt, ath10k_htt_tx_free_msdu_id(htt, tx_done->msdu_id); ath10k_htt_tx_dec_pending(htt); - if (htt->num_pending_tx == 0) - wake_up(&htt->empty_tx_wq); spin_unlock_bh(&htt->tx_lock); rcu_read_lock(); diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c index 05c0d5e92475e..326e1dc4c6734 100644 --- a/drivers/net/wireless/ath/ath10k/usb.c +++ b/drivers/net/wireless/ath/ath10k/usb.c @@ -525,7 +525,7 @@ static int ath10k_usb_submit_ctrl_in(struct ath10k *ar, req, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, buf, - size, 2 * HZ); + size, 2000); if (ret < 0) { ath10k_warn(ar, "Failed to read usb control message: %d\n", @@ -865,6 +865,11 @@ static int ath10k_usb_setup_pipe_resources(struct ath10k *ar, le16_to_cpu(endpoint->wMaxPacketSize), endpoint->bInterval); } + + /* Ignore broken descriptors. */ + if (usb_endpoint_maxp(endpoint) == 0) + continue; + urbcount = 0; pipe_num = diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 91604a14a8f46..796bd93c599b1 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -2541,6 +2541,10 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) if (ieee80211_is_beacon(hdr->frame_control)) ath10k_mac_handle_beacon(ar, skb); + if (ieee80211_is_beacon(hdr->frame_control) || + ieee80211_is_probe_resp(hdr->frame_control)) + status->boottime_ns = ktime_get_boottime_ns(); + ath10k_dbg(ar, ATH10K_DBG_MGMT, "event mgmt rx skb %pK len %d ftype %02x stype %02x\n", skb, skb->len, diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 761bc4a7064df..de22396d085ce 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -2045,7 +2045,9 @@ struct wmi_channel { union { __le32 reginfo1; struct { + /* note: power unit is 1 dBm */ u8 antenna_max; + /* note: power unit is 0.5 dBm */ u8 max_tx_power; } __packed; } __packed; @@ -2065,6 +2067,7 @@ struct wmi_channel_arg { u32 min_power; u32 max_power; u32 max_reg_power; + /* note: power unit is 1 dBm */ u32 max_antenna_gain; u32 reg_class_id; enum wmi_phy_mode mode; diff --git a/drivers/net/wireless/ath/ath10k/wow.c b/drivers/net/wireless/ath/ath10k/wow.c index 8c26adddd034e..b4f54ca127561 100644 --- a/drivers/net/wireless/ath/ath10k/wow.c +++ b/drivers/net/wireless/ath/ath10k/wow.c @@ -337,14 +337,15 @@ static int ath10k_vif_wow_set_wakeups(struct ath10k_vif *arvif, if (patterns[i].mask[j / 8] & BIT(j % 8)) bitmask[j] = 0xff; old_pattern.mask = bitmask; - new_pattern = old_pattern; if (ar->wmi.rx_decap_mode == ATH10K_HW_TXRX_NATIVE_WIFI) { - if (patterns[i].pkt_offset < ETH_HLEN) + if (patterns[i].pkt_offset < ETH_HLEN) { ath10k_wow_convert_8023_to_80211(&new_pattern, &old_pattern); - else + } else { + new_pattern = old_pattern; new_pattern.pkt_offset += WOW_HDR_LEN - ETH_HLEN; + } } if (WARN_ON(new_pattern.pattern_len > WOW_MAX_PATTERN_SIZE)) diff --git a/drivers/net/wireless/ath/ath5k/eeprom.c b/drivers/net/wireless/ath/ath5k/eeprom.c index 94d34ee02265d..01163b3339451 100644 --- a/drivers/net/wireless/ath/ath5k/eeprom.c +++ b/drivers/net/wireless/ath/ath5k/eeprom.c @@ -746,6 +746,9 @@ ath5k_eeprom_convert_pcal_info_5111(struct ath5k_hw *ah, int mode, } } + if (idx == AR5K_EEPROM_N_PD_CURVES) + goto err_out; + ee->ee_pd_gains[mode] = 1; pd = &chinfo[pier].pd_curves[idx]; diff --git a/drivers/net/wireless/ath/ath6kl/usb.c b/drivers/net/wireless/ath/ath6kl/usb.c index 53b66e9434c99..8b24964304b1f 100644 --- a/drivers/net/wireless/ath/ath6kl/usb.c +++ b/drivers/net/wireless/ath/ath6kl/usb.c @@ -340,6 +340,11 @@ static int ath6kl_usb_setup_pipe_resources(struct ath6kl_usb *ar_usb) le16_to_cpu(endpoint->wMaxPacketSize), endpoint->bInterval); } + + /* Ignore broken descriptors. */ + if (usb_endpoint_maxp(endpoint) == 0) + continue; + urbcount = 0; pipe_num = @@ -907,7 +912,7 @@ static int ath6kl_usb_submit_ctrl_in(struct ath6kl_usb *ar_usb, req, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, buf, - size, 2 * HZ); + size, 2000); if (ret < 0) { ath6kl_warn("Failed to read usb control message: %d\n", ret); diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c index c610fe21c85c0..31ffec3a59727 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.c +++ b/drivers/net/wireless/ath/ath6kl/wmi.c @@ -2510,8 +2510,10 @@ static int ath6kl_wmi_sync_point(struct wmi *wmi, u8 if_idx) goto free_data_skb; for (index = 0; index < num_pri_streams; index++) { - if (WARN_ON(!data_sync_bufs[index].skb)) + if (WARN_ON(!data_sync_bufs[index].skb)) { + ret = -ENOMEM; goto free_data_skb; + } ep_id = ath6kl_ac2_endpoint_id(wmi->parent_dev, data_sync_bufs[index]. diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index b4885a700296e..abed1effd95ca 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -3351,7 +3351,8 @@ static int ar9300_eeprom_restore_internal(struct ath_hw *ah, "Found block at %x: code=%d ref=%d length=%d major=%d minor=%d\n", cptr, code, reference, length, major, minor); if ((!AR_SREV_9485(ah) && length >= 1024) || - (AR_SREV_9485(ah) && length > EEPROM_DATA_LEN_9485)) { + (AR_SREV_9485(ah) && length > EEPROM_DATA_LEN_9485) || + (length > cptr)) { ath_dbg(common, EEPROM, "Skipping bad header\n"); cptr -= COMP_HDR_LEN; continue; @@ -5614,7 +5615,7 @@ unsigned int ar9003_get_paprd_scale_factor(struct ath_hw *ah, static u8 ar9003_get_eepmisc(struct ath_hw *ah) { - return ah->eeprom.map4k.baseEepHeader.eepMisc; + return ah->eeprom.ar9300_eep.baseEepHeader.opCapFlags.eepMisc; } const struct eeprom_ops eep_ar9300_ops = { diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.h b/drivers/net/wireless/ath/ath9k/ar9003_phy.h index a171dbb29fbb6..ad949eb02f3d2 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.h +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.h @@ -720,7 +720,7 @@ #define AR_CH0_TOP2 (AR_SREV_9300(ah) ? 0x1628c : \ (AR_SREV_9462(ah) ? 0x16290 : 0x16284)) #define AR_CH0_TOP2_XPABIASLVL (AR_SREV_9561(ah) ? 0x1e00 : 0xf000) -#define AR_CH0_TOP2_XPABIASLVL_S 12 +#define AR_CH0_TOP2_XPABIASLVL_S (AR_SREV_9561(ah) ? 9 : 12) #define AR_CH0_XTAL (AR_SREV_9300(ah) ? 0x16294 : \ ((AR_SREV_9462(ah) || AR_SREV_9565(ah)) ? 0x16298 : \ diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index 2ed98aaed6fb5..c8c7afe0e343e 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -590,6 +590,13 @@ static void ath9k_hif_usb_rx_stream(struct hif_device_usb *hif_dev, return; } + if (pkt_len > 2 * MAX_RX_BUF_SIZE) { + dev_err(&hif_dev->udev->dev, + "ath9k_htc: invalid pkt_len (%x)\n", pkt_len); + RX_STAT_INC(skb_dropped); + return; + } + pad_len = 4 - (pkt_len & 0x3); if (pad_len == 4) pad_len = 0; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index 628f45c8c06f2..eeaf63de71bfd 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -1005,6 +1005,14 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, goto rx_next; } + if (rxstatus->rs_keyix >= ATH_KEYMAX && + rxstatus->rs_keyix != ATH9K_RXKEYIX_INVALID) { + ath_dbg(common, ANY, + "Invalid keyix, dropping (keyix: %d)\n", + rxstatus->rs_keyix); + goto rx_next; + } + /* Get the RX status information */ memset(rx_status, 0, sizeof(struct ieee80211_rx_status)); diff --git a/drivers/net/wireless/ath/ath9k/htc_hst.c b/drivers/net/wireless/ath/ath9k/htc_hst.c index 510e61e97dbcb..994ec48b2f669 100644 --- a/drivers/net/wireless/ath/ath9k/htc_hst.c +++ b/drivers/net/wireless/ath/ath9k/htc_hst.c @@ -30,6 +30,7 @@ static int htc_issue_send(struct htc_target *target, struct sk_buff* skb, hdr->endpoint_id = epid; hdr->flags = flags; hdr->payload_len = cpu_to_be16(len); + memset(hdr->control, 0, sizeof(hdr->control)); status = target->hif->send(target->hif_dev, endpoint->ul_pipeid, skb); @@ -272,6 +273,10 @@ int htc_connect_service(struct htc_target *target, conn_msg->dl_pipeid = endpoint->dl_pipeid; conn_msg->ul_pipeid = endpoint->ul_pipeid; + /* To prevent infoleak */ + conn_msg->svc_meta_len = 0; + conn_msg->pad = 0; + ret = htc_issue_send(target, skb, skb->len, 0, ENDPOINT0); if (ret) goto err; diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 9fd8e64288ffa..7e2e22b6bbbc5 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -1622,7 +1622,6 @@ static void ath9k_hw_apply_gpio_override(struct ath_hw *ah) ath9k_hw_gpio_request_out(ah, i, NULL, AR_GPIO_OUTPUT_MUX_AS_OUTPUT); ath9k_hw_set_gpio(ah, i, !!(ah->gpio_val & BIT(i))); - ath9k_hw_gpio_free(ah, i); } } @@ -2730,14 +2729,17 @@ static void ath9k_hw_gpio_cfg_output_mux(struct ath_hw *ah, u32 gpio, u32 type) static void ath9k_hw_gpio_cfg_soc(struct ath_hw *ah, u32 gpio, bool out, const char *label) { + int err; + if (ah->caps.gpio_requested & BIT(gpio)) return; - /* may be requested by BSP, free anyway */ - gpio_free(gpio); - - if (gpio_request_one(gpio, out ? GPIOF_OUT_INIT_LOW : GPIOF_IN, label)) + err = gpio_request_one(gpio, out ? GPIOF_OUT_INIT_LOW : GPIOF_IN, label); + if (err) { + ath_err(ath9k_hw_common(ah), "request GPIO%d failed:%d\n", + gpio, err); return; + } ah->caps.gpio_requested |= BIT(gpio); } diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 28ccdcb197de2..eb5751a45f266 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -530,8 +530,10 @@ irqreturn_t ath_isr(int irq, void *dev) ath9k_debug_sync_cause(sc, sync_cause); status &= ah->imask; /* discard unasked-for bits */ - if (test_bit(ATH_OP_HW_RESET, &common->op_flags)) + if (test_bit(ATH_OP_HW_RESET, &common->op_flags)) { + ath9k_hw_kill_interrupts(sc->sc_ah); return IRQ_HANDLED; + } /* * If there are no status bits set, then this interrupt was not @@ -834,7 +836,7 @@ static bool ath9k_txq_list_has_key(struct list_head *txq_list, u32 keyix) continue; txinfo = IEEE80211_SKB_CB(bf->bf_mpdu); - fi = (struct ath_frame_info *)&txinfo->rate_driver_data[0]; + fi = (struct ath_frame_info *)&txinfo->status.status_driver_data[0]; if (fi->keyix == keyix) return true; } diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 14e6871a14054..fdb2152345eba 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -141,8 +141,8 @@ static struct ath_frame_info *get_frame_info(struct sk_buff *skb) { struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); BUILD_BUG_ON(sizeof(struct ath_frame_info) > - sizeof(tx_info->rate_driver_data)); - return (struct ath_frame_info *) &tx_info->rate_driver_data[0]; + sizeof(tx_info->status.status_driver_data)); + return (struct ath_frame_info *) &tx_info->status.status_driver_data[0]; } static void ath_send_bar(struct ath_atx_tid *tid, u16 seqno) @@ -2498,6 +2498,16 @@ static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf, spin_unlock_irqrestore(&sc->tx.txbuflock, flags); } +static void ath_clear_tx_status(struct ieee80211_tx_info *tx_info) +{ + void *ptr = &tx_info->status; + + memset(ptr + sizeof(tx_info->status.rates), 0, + sizeof(tx_info->status) - + sizeof(tx_info->status.rates) - + sizeof(tx_info->status.status_driver_data)); +} + static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, struct ath_tx_status *ts, int nframes, int nbad, int txok) @@ -2509,6 +2519,8 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, struct ath_hw *ah = sc->sc_ah; u8 i, tx_rateindex; + ath_clear_tx_status(tx_info); + if (txok) tx_info->status.ack_signal = ts->ts_rssi; @@ -2523,6 +2535,13 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, tx_info->status.ampdu_len = nframes; tx_info->status.ampdu_ack_len = nframes - nbad; + tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1; + + for (i = tx_rateindex + 1; i < hw->max_rates; i++) { + tx_info->status.rates[i].count = 0; + tx_info->status.rates[i].idx = -1; + } + if ((ts->ts_status & ATH9K_TXERR_FILT) == 0 && (tx_info->flags & IEEE80211_TX_CTL_NO_ACK) == 0) { /* @@ -2544,16 +2563,6 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, tx_info->status.rates[tx_rateindex].count = hw->max_rate_tries; } - - for (i = tx_rateindex + 1; i < hw->max_rates; i++) { - tx_info->status.rates[i].count = 0; - tx_info->status.rates[i].idx = -1; - } - - tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1; - - /* we report airtime in ath_tx_count_airtime(), don't report twice */ - tx_info->status.tx_time = 0; } static void ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq) diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 21ca62b06214a..8d708e7588bcb 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1917,7 +1917,7 @@ static int carl9170_parse_eeprom(struct ar9170 *ar) WARN_ON(!(tx_streams >= 1 && tx_streams <= IEEE80211_HT_MCS_TX_MAX_STREAMS)); - tx_params = (tx_streams - 1) << + tx_params |= (tx_streams - 1) << IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT; carl9170_band_2GHz.ht_cap.mcs.tx_params |= tx_params; diff --git a/drivers/net/wireless/ath/carl9170/tx.c b/drivers/net/wireless/ath/carl9170/tx.c index 2407931440edb..dfab6be1080cb 100644 --- a/drivers/net/wireless/ath/carl9170/tx.c +++ b/drivers/net/wireless/ath/carl9170/tx.c @@ -1557,6 +1557,9 @@ static struct carl9170_vif_info *carl9170_pick_beaconing_vif(struct ar9170 *ar) goto out; } } while (ar->beacon_enabled && i--); + + /* no entry found in list */ + return NULL; } out: diff --git a/drivers/net/wireless/ath/dfs_pattern_detector.c b/drivers/net/wireless/ath/dfs_pattern_detector.c index a274eb0d19688..a0ad6e48a35b4 100644 --- a/drivers/net/wireless/ath/dfs_pattern_detector.c +++ b/drivers/net/wireless/ath/dfs_pattern_detector.c @@ -182,10 +182,12 @@ static void channel_detector_exit(struct dfs_pattern_detector *dpd, if (cd == NULL) return; list_del(&cd->head); - for (i = 0; i < dpd->num_radar_types; i++) { - struct pri_detector *de = cd->detectors[i]; - if (de != NULL) - de->exit(de); + if (cd->detectors) { + for (i = 0; i < dpd->num_radar_types; i++) { + struct pri_detector *de = cd->detectors[i]; + if (de != NULL) + de->exit(de); + } } kfree(cd->detectors); kfree(cd); diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.c b/drivers/net/wireless/ath/wcn36xx/dxe.c index bab30f7a443ce..c400261352bc8 100644 --- a/drivers/net/wireless/ath/wcn36xx/dxe.c +++ b/drivers/net/wireless/ath/wcn36xx/dxe.c @@ -563,6 +563,10 @@ static int wcn36xx_rx_handle_packets(struct wcn36xx *wcn, dxe = ctl->desc; while (!(READ_ONCE(dxe->ctrl) & WCN36xx_DXE_CTRL_VLD)) { + /* do not read until we own DMA descriptor */ + dma_rmb(); + + /* read/modify DMA descriptor */ skb = ctl->skb; dma_addr = dxe->dst_addr_l; ret = wcn36xx_dxe_fill_skb(wcn->dev, ctl, GFP_ATOMIC); @@ -573,9 +577,15 @@ static int wcn36xx_rx_handle_packets(struct wcn36xx *wcn, dma_unmap_single(wcn->dev, dma_addr, WCN36XX_PKT_SIZE, DMA_FROM_DEVICE); wcn36xx_rx_skb(wcn, skb); - } /* else keep old skb not submitted and use it for rx DMA */ + } + /* else keep old skb not submitted and reuse it for rx DMA + * (dropping the packet that it contained) + */ + /* flush descriptor changes before re-marking as valid */ + dma_wmb(); dxe->ctrl = ctrl; + ctl = ctl->next; dxe = ctl->desc; } @@ -942,4 +952,9 @@ void wcn36xx_dxe_deinit(struct wcn36xx *wcn) wcn36xx_dxe_ch_free_skbs(wcn, &wcn->dxe_rx_l_ch); wcn36xx_dxe_ch_free_skbs(wcn, &wcn->dxe_rx_h_ch); + + wcn36xx_dxe_deinit_descs(wcn->dev, &wcn->dxe_tx_l_ch); + wcn36xx_dxe_deinit_descs(wcn->dev, &wcn->dxe_tx_h_ch); + wcn36xx_dxe_deinit_descs(wcn->dev, &wcn->dxe_rx_l_ch); + wcn36xx_dxe_deinit_descs(wcn->dev, &wcn->dxe_rx_h_ch); } diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index 556ba3c6c5d8e..eb5d08bf25032 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -134,7 +134,9 @@ static struct ieee80211_supported_band wcn_band_2ghz = { .cap = IEEE80211_HT_CAP_GRN_FLD | IEEE80211_HT_CAP_SGI_20 | IEEE80211_HT_CAP_DSSSCCK40 | - IEEE80211_HT_CAP_LSIG_TXOP_PROT, + IEEE80211_HT_CAP_LSIG_TXOP_PROT | + IEEE80211_HT_CAP_SGI_40 | + IEEE80211_HT_CAP_SUP_WIDTH_20_40, .ht_supported = true, .ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K, .ampdu_density = IEEE80211_HT_MPDU_DENSITY_16, diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c index 523550f94a3f0..74cf173c186ff 100644 --- a/drivers/net/wireless/ath/wcn36xx/smd.c +++ b/drivers/net/wireless/ath/wcn36xx/smd.c @@ -2311,7 +2311,7 @@ static int wcn36xx_smd_missed_beacon_ind(struct wcn36xx *wcn, wcn36xx_dbg(WCN36XX_DBG_HAL, "beacon missed bss_index %d\n", tmp->bss_index); vif = wcn36xx_priv_to_vif(tmp); - ieee80211_connection_loss(vif); + ieee80211_beacon_loss(vif); } return 0; } @@ -2326,7 +2326,7 @@ static int wcn36xx_smd_missed_beacon_ind(struct wcn36xx *wcn, wcn36xx_dbg(WCN36XX_DBG_HAL, "beacon missed bss_index %d\n", rsp->bss_index); vif = wcn36xx_priv_to_vif(tmp); - ieee80211_connection_loss(vif); + ieee80211_beacon_loss(vif); return 0; } } @@ -2340,30 +2340,52 @@ static int wcn36xx_smd_delete_sta_context_ind(struct wcn36xx *wcn, size_t len) { struct wcn36xx_hal_delete_sta_context_ind_msg *rsp = buf; - struct wcn36xx_vif *tmp; + struct wcn36xx_vif *vif_priv; + struct ieee80211_vif *vif; + struct ieee80211_bss_conf *bss_conf; struct ieee80211_sta *sta; + bool found = false; if (len != sizeof(*rsp)) { wcn36xx_warn("Corrupted delete sta indication\n"); return -EIO; } - wcn36xx_dbg(WCN36XX_DBG_HAL, "delete station indication %pM index %d\n", - rsp->addr2, rsp->sta_id); + wcn36xx_dbg(WCN36XX_DBG_HAL, + "delete station indication %pM index %d reason %d\n", + rsp->addr2, rsp->sta_id, rsp->reason_code); - list_for_each_entry(tmp, &wcn->vif_list, list) { + list_for_each_entry(vif_priv, &wcn->vif_list, list) { rcu_read_lock(); - sta = ieee80211_find_sta(wcn36xx_priv_to_vif(tmp), rsp->addr2); - if (sta) - ieee80211_report_low_ack(sta, 0); + vif = wcn36xx_priv_to_vif(vif_priv); + + if (vif->type == NL80211_IFTYPE_STATION) { + /* We could call ieee80211_find_sta too, but checking + * bss_conf is clearer. + */ + bss_conf = &vif->bss_conf; + if (vif_priv->sta_assoc && + !memcmp(bss_conf->bssid, rsp->addr2, ETH_ALEN)) { + found = true; + wcn36xx_dbg(WCN36XX_DBG_HAL, + "connection loss bss_index %d\n", + vif_priv->bss_index); + ieee80211_connection_loss(vif); + } + } else { + sta = ieee80211_find_sta(vif, rsp->addr2); + if (sta) { + found = true; + ieee80211_report_low_ack(sta, 0); + } + } + rcu_read_unlock(); - if (sta) + if (found) return 0; } - wcn36xx_warn("STA with addr %pM and index %d not found\n", - rsp->addr2, - rsp->sta_id); + wcn36xx_warn("BSS or STA with addr %pM not found\n", rsp->addr2); return -ENOENT; } diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c index 598c1fba9dac4..579bfa2670416 100644 --- a/drivers/net/wireless/ath/wil6210/txrx.c +++ b/drivers/net/wireless/ath/wil6210/txrx.c @@ -907,7 +907,6 @@ static void wil_rx_handle_eapol(struct wil6210_vif *vif, struct sk_buff *skb) void wil_netif_rx(struct sk_buff *skb, struct net_device *ndev, int cid, struct wil_net_stats *stats, bool gro) { - gro_result_t rc = GRO_NORMAL; struct wil6210_vif *vif = ndev_to_vif(ndev); struct wil6210_priv *wil = ndev_to_wil(ndev); struct wireless_dev *wdev = vif_to_wdev(vif); @@ -918,22 +917,16 @@ void wil_netif_rx(struct sk_buff *skb, struct net_device *ndev, int cid, */ int mcast = is_multicast_ether_addr(da); struct sk_buff *xmit_skb = NULL; - static const char * const gro_res_str[] = { - [GRO_MERGED] = "GRO_MERGED", - [GRO_MERGED_FREE] = "GRO_MERGED_FREE", - [GRO_HELD] = "GRO_HELD", - [GRO_NORMAL] = "GRO_NORMAL", - [GRO_DROP] = "GRO_DROP", - [GRO_CONSUMED] = "GRO_CONSUMED", - }; if (wdev->iftype == NL80211_IFTYPE_STATION) { sa = wil_skb_get_sa(skb); if (mcast && ether_addr_equal(sa, ndev->dev_addr)) { /* mcast packet looped back to us */ - rc = GRO_DROP; dev_kfree_skb(skb); - goto stats; + ndev->stats.rx_dropped++; + stats->rx_dropped++; + wil_dbg_txrx(wil, "Rx drop %d bytes\n", len); + return; } } else if (wdev->iftype == NL80211_IFTYPE_AP && !vif->ap_isolate) { if (mcast) { @@ -977,26 +970,16 @@ void wil_netif_rx(struct sk_buff *skb, struct net_device *ndev, int cid, wil_rx_handle_eapol(vif, skb); if (gro) - rc = napi_gro_receive(&wil->napi_rx, skb); + napi_gro_receive(&wil->napi_rx, skb); else netif_rx_ni(skb); - wil_dbg_txrx(wil, "Rx complete %d bytes => %s\n", - len, gro_res_str[rc]); - } -stats: - /* statistics. rc set to GRO_NORMAL for AP bridging */ - if (unlikely(rc == GRO_DROP)) { - ndev->stats.rx_dropped++; - stats->rx_dropped++; - wil_dbg_txrx(wil, "Rx drop %d bytes\n", len); - } else { - ndev->stats.rx_packets++; - stats->rx_packets++; - ndev->stats.rx_bytes += len; - stats->rx_bytes += len; - if (mcast) - ndev->stats.multicast++; } + ndev->stats.rx_packets++; + stats->rx_packets++; + ndev->stats.rx_bytes += len; + stats->rx_bytes += len; + if (mcast) + ndev->stats.multicast++; } void wil_netif_rx_any(struct sk_buff *skb, struct net_device *ndev) diff --git a/drivers/net/wireless/broadcom/b43/phy_g.c b/drivers/net/wireless/broadcom/b43/phy_g.c index 1e022ec733a37..8b8fdb4965bde 100644 --- a/drivers/net/wireless/broadcom/b43/phy_g.c +++ b/drivers/net/wireless/broadcom/b43/phy_g.c @@ -2297,7 +2297,7 @@ static u8 b43_gphy_aci_scan(struct b43_wldev *dev) b43_phy_mask(dev, B43_PHY_G_CRS, 0x7FFF); b43_set_all_gains(dev, 3, 8, 1); - start = (channel - 5 > 0) ? channel - 5 : 1; + start = (channel > 5) ? channel - 5 : 1; end = (channel + 5 < 14) ? channel + 5 : 13; for (i = start; i <= end; i++) { diff --git a/drivers/net/wireless/broadcom/b43/phy_n.c b/drivers/net/wireless/broadcom/b43/phy_n.c index 32ce1b42ce08b..0ef62ef77af64 100644 --- a/drivers/net/wireless/broadcom/b43/phy_n.c +++ b/drivers/net/wireless/broadcom/b43/phy_n.c @@ -582,7 +582,7 @@ static void b43_nphy_adjust_lna_gain_table(struct b43_wldev *dev) u16 data[4]; s16 gain[2]; u16 minmax[2]; - static const u16 lna_gain[4] = { -2, 10, 19, 25 }; + static const s16 lna_gain[4] = { -2, 10, 19, 25 }; if (nphy->hang_avoid) b43_nphy_stay_in_carrier_search(dev, 1); diff --git a/drivers/net/wireless/broadcom/b43legacy/phy.c b/drivers/net/wireless/broadcom/b43legacy/phy.c index a659259bc51aa..6e76055e136d2 100644 --- a/drivers/net/wireless/broadcom/b43legacy/phy.c +++ b/drivers/net/wireless/broadcom/b43legacy/phy.c @@ -1123,7 +1123,7 @@ void b43legacy_phy_lo_b_measure(struct b43legacy_wldev *dev) struct b43legacy_phy *phy = &dev->phy; u16 regstack[12] = { 0 }; u16 mls; - u16 fval; + s16 fval; int i; int j; diff --git a/drivers/net/wireless/broadcom/b43legacy/radio.c b/drivers/net/wireless/broadcom/b43legacy/radio.c index da40d1ca8723d..e4dce76a4481d 100644 --- a/drivers/net/wireless/broadcom/b43legacy/radio.c +++ b/drivers/net/wireless/broadcom/b43legacy/radio.c @@ -283,7 +283,7 @@ u8 b43legacy_radio_aci_scan(struct b43legacy_wldev *dev) & 0x7FFF); b43legacy_set_all_gains(dev, 3, 8, 1); - start = (channel - 5 > 0) ? channel - 5 : 1; + start = (channel > 5) ? channel - 5 : 1; end = (channel + 5 < 14) ? channel + 5 : 13; for (i = start; i <= end; i++) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c index 6d5188b78f2de..0af452dca7664 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c @@ -75,6 +75,16 @@ static const struct dmi_system_id dmi_platform_data[] = { }, .driver_data = (void *)&acepc_t8_data, }, + { + /* Cyberbook T116 rugged tablet */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Default string"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "20170531"), + }, + /* The factory image nvram file is identical to the ACEPC T8 one */ + .driver_data = (void *)&acepc_t8_data, + }, { /* Match for the GPDwin which unfortunately uses somewhat * generic dmi strings, which is why we test for 4 strings. diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c index 3aed4c4b887aa..544ad80629a99 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c @@ -207,6 +207,8 @@ static int brcmf_init_nvram_parser(struct nvram_parser *nvp, size = BRCMF_FW_MAX_NVRAM_SIZE; else size = data_len; + /* Add space for properties we may add */ + size += strlen(BRCMF_FW_DEFAULT_BOARDREV) + 1; /* Alloc for extra 0 byte + roundup by 4 + length field */ size += 1 + 3 + sizeof(u32); nvp->nvram = kzalloc(size, GFP_KERNEL); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index bda042138e967..b5d2e5b9f67cc 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -444,47 +445,6 @@ brcmf_pcie_write_ram32(struct brcmf_pciedev_info *devinfo, u32 mem_offset, } -static void -brcmf_pcie_copy_mem_todev(struct brcmf_pciedev_info *devinfo, u32 mem_offset, - void *srcaddr, u32 len) -{ - void __iomem *address = devinfo->tcm + mem_offset; - __le32 *src32; - __le16 *src16; - u8 *src8; - - if (((ulong)address & 4) || ((ulong)srcaddr & 4) || (len & 4)) { - if (((ulong)address & 2) || ((ulong)srcaddr & 2) || (len & 2)) { - src8 = (u8 *)srcaddr; - while (len) { - iowrite8(*src8, address); - address++; - src8++; - len--; - } - } else { - len = len / 2; - src16 = (__le16 *)srcaddr; - while (len) { - iowrite16(le16_to_cpu(*src16), address); - address += 2; - src16++; - len--; - } - } - } else { - len = len / 4; - src32 = (__le32 *)srcaddr; - while (len) { - iowrite32(le32_to_cpu(*src32), address); - address += 4; - src32++; - len--; - } - } -} - - static void brcmf_pcie_copy_dev_tomem(struct brcmf_pciedev_info *devinfo, u32 mem_offset, void *dstaddr, u32 len) @@ -1346,6 +1306,18 @@ static void brcmf_pcie_down(struct device *dev) { } +static int brcmf_pcie_preinit(struct device *dev) +{ + struct brcmf_bus *bus_if = dev_get_drvdata(dev); + struct brcmf_pciedev *buspub = bus_if->bus_priv.pcie; + + brcmf_dbg(PCIE, "Enter\n"); + + brcmf_pcie_intr_enable(buspub->devinfo); + brcmf_pcie_hostready(buspub->devinfo); + + return 0; +} static int brcmf_pcie_tx(struct device *dev, struct sk_buff *skb) { @@ -1454,6 +1426,7 @@ static int brcmf_pcie_reset(struct device *dev) } static const struct brcmf_bus_ops brcmf_pcie_bus_ops = { + .preinit = brcmf_pcie_preinit, .txdata = brcmf_pcie_tx, .stop = brcmf_pcie_down, .txctl = brcmf_pcie_tx_ctlpkt, @@ -1561,8 +1534,8 @@ static int brcmf_pcie_download_fw_nvram(struct brcmf_pciedev_info *devinfo, return err; brcmf_dbg(PCIE, "Download FW %s\n", devinfo->fw_name); - brcmf_pcie_copy_mem_todev(devinfo, devinfo->ci->rambase, - (void *)fw->data, fw->size); + memcpy_toio(devinfo->tcm + devinfo->ci->rambase, + (void *)fw->data, fw->size); resetintr = get_unaligned_le32(fw->data); release_firmware(fw); @@ -1576,7 +1549,7 @@ static int brcmf_pcie_download_fw_nvram(struct brcmf_pciedev_info *devinfo, brcmf_dbg(PCIE, "Download NVRAM %s\n", devinfo->nvram_name); address = devinfo->ci->rambase + devinfo->ci->ramsize - nvram_len; - brcmf_pcie_copy_mem_todev(devinfo, address, nvram, nvram_len); + memcpy_toio(devinfo->tcm + address, nvram, nvram_len); brcmf_fw_nvram_free(nvram); } else { brcmf_dbg(PCIE, "No matching NVRAM file found %s\n", @@ -1775,6 +1748,8 @@ static void brcmf_pcie_setup(struct device *dev, int ret, ret = brcmf_chip_get_raminfo(devinfo->ci); if (ret) { brcmf_err(bus, "Failed to get RAM info\n"); + release_firmware(fw); + brcmf_fw_nvram_free(nvram); goto fail; } @@ -1824,9 +1799,6 @@ static void brcmf_pcie_setup(struct device *dev, int ret, init_waitqueue_head(&devinfo->mbdata_resp_wait); - brcmf_pcie_intr_enable(devinfo); - brcmf_pcie_hostready(devinfo); - ret = brcmf_attach(&devinfo->pdev->dev); if (ret) goto fail; @@ -2073,7 +2045,7 @@ static int brcmf_pcie_pm_leave_D3(struct device *dev) err = brcmf_pcie_probe(pdev, NULL); if (err) - brcmf_err(bus, "probe after resume failed, err=%d\n", err); + __brcmf_err(NULL, __func__, "probe after resume failed, err=%d\n", err); return err; } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index ef5521b9b3577..ddc999670484f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -550,7 +550,7 @@ enum brcmf_sdio_frmtype { BRCMF_SDIO_FT_SUB, }; -#define SDIOD_DRVSTR_KEY(chip, pmu) (((chip) << 16) | (pmu)) +#define SDIOD_DRVSTR_KEY(chip, pmu) (((unsigned int)(chip) << 16) | (pmu)) /* SDIO Pad drive strength to select value mappings */ struct sdiod_drive_str { diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c index d9baa2fa603b2..e4c60caa6543c 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c @@ -383,7 +383,7 @@ netdev_tx_t libipw_xmit(struct sk_buff *skb, struct net_device *dev) /* Each fragment may need to have room for encryption * pre/postfix */ - if (host_encrypt) + if (host_encrypt && crypt && crypt->ops) bytes_per_frag -= crypt->ops->extra_mpdu_prefix_len + crypt->ops->extra_mpdu_postfix_len; diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c index 6512d25e35630..f3bb1f91b5878 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c @@ -303,7 +303,7 @@ static int iwlagn_mac_start(struct ieee80211_hw *hw) priv->is_open = 1; IWL_DEBUG_MAC80211(priv, "leave\n"); - return 0; + return ret; } static void iwlagn_mac_stop(struct ieee80211_hw *hw) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index ff0519ea00a5f..1848b957dc5cd 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -183,6 +183,9 @@ static void iwl_dealloc_ucode(struct iwl_drv *drv) for (i = 0; i < IWL_UCODE_TYPE_MAX; i++) iwl_free_fw_img(drv, drv->fw.img + i); + + /* clear the data for the aborted load case */ + memset(&drv->fw, 0, sizeof(drv->fw)); } static int iwl_alloc_fw_desc(struct iwl_drv *drv, struct fw_desc *desc, @@ -1276,23 +1279,31 @@ _iwl_op_mode_start(struct iwl_drv *drv, struct iwlwifi_opmode_table *op) const struct iwl_op_mode_ops *ops = op->ops; struct dentry *dbgfs_dir = NULL; struct iwl_op_mode *op_mode = NULL; + int retry, max_retry = !!iwlwifi_mod_params.fw_restart * IWL_MAX_INIT_RETRY; + + for (retry = 0; retry <= max_retry; retry++) { #ifdef CONFIG_IWLWIFI_DEBUGFS - drv->dbgfs_op_mode = debugfs_create_dir(op->name, - drv->dbgfs_drv); - dbgfs_dir = drv->dbgfs_op_mode; + drv->dbgfs_op_mode = debugfs_create_dir(op->name, + drv->dbgfs_drv); + dbgfs_dir = drv->dbgfs_op_mode; #endif - op_mode = ops->start(drv->trans, drv->trans->cfg, &drv->fw, dbgfs_dir); + op_mode = ops->start(drv->trans, drv->trans->cfg, + &drv->fw, dbgfs_dir); + + if (op_mode) + return op_mode; + + IWL_ERR(drv, "retry init count %d\n", retry); #ifdef CONFIG_IWLWIFI_DEBUGFS - if (!op_mode) { debugfs_remove_recursive(drv->dbgfs_op_mode); drv->dbgfs_op_mode = NULL; - } #endif + } - return op_mode; + return NULL; } static void _iwl_op_mode_stop(struct iwl_drv *drv) @@ -1330,6 +1341,7 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context) int i; bool load_module = false; bool usniffer_images = false; + bool failure = true; fw->ucode_capa.max_probe_length = IWL_DEFAULT_MAX_PROBE_LENGTH; fw->ucode_capa.standard_phy_calibration_size = @@ -1587,15 +1599,9 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context) * else from proceeding if the module fails to load * or hangs loading. */ - if (load_module) { + if (load_module) request_module("%s", op->name); -#ifdef CONFIG_IWLWIFI_OPMODE_MODULAR - if (err) - IWL_ERR(drv, - "failed to load module %s (error %d), is dynamic loading enabled?\n", - op->name, err); -#endif - } + failure = false; goto free; try_again: @@ -1610,7 +1616,12 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context) out_unbind: complete(&drv->request_firmware_complete); device_release_driver(drv->trans->dev); + /* drv has just been freed by the release */ + failure = false; free: + if (failure) + iwl_dealloc_ucode(drv); + if (pieces) { for (i = 0; i < ARRAY_SIZE(pieces->img); i++) kfree(pieces->img[i].sec); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h index 2be30af7bdc30..9663db12b6f32 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h @@ -145,4 +145,7 @@ void iwl_drv_stop(struct iwl_drv *drv); #define IWL_EXPORT_SYMBOL(sym) #endif +/* max retry for init flow */ +#define IWL_MAX_INIT_RETRY 2 + #endif /* __iwl_drv_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 022f2faccab41..4a433e34ee7a1 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -519,8 +519,7 @@ static struct ieee80211_sband_iftype_data iwl_he_capa[] = { .has_he = true, .he_cap_elem = { .mac_cap_info[0] = - IEEE80211_HE_MAC_CAP0_HTC_HE | - IEEE80211_HE_MAC_CAP0_TWT_REQ, + IEEE80211_HE_MAC_CAP0_HTC_HE, .mac_cap_info[1] = IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US | IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 7272d8522a9e9..c5b08a68f6fa1 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1417,8 +1417,10 @@ int iwl_mvm_up(struct iwl_mvm *mvm) while (!sband && i < NUM_NL80211_BANDS) sband = mvm->hw->wiphy->bands[i++]; - if (WARN_ON_ONCE(!sband)) + if (WARN_ON_ONCE(!sband)) { + ret = -ENODEV; goto error; + } chan = &sband->channels[0]; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 9c417dd062913..7736621dca653 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -1043,8 +1043,10 @@ int iwl_mvm_mac_ctxt_beacon_changed(struct iwl_mvm *mvm, return -ENOMEM; #ifdef CONFIG_IWLWIFI_DEBUGFS - if (mvm->beacon_inject_active) + if (mvm->beacon_inject_active) { + dev_kfree_skb(beacon); return -EBUSY; + } #endif ret = iwl_mvm_mac_ctxt_send_beacon(mvm, vif, beacon); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 09b1a6beee77c..3c523774ef0e6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -71,6 +71,7 @@ #include #include +#include "iwl-drv.h" #include "iwl-op-mode.h" #include "iwl-io.h" #include "mvm.h" @@ -342,7 +343,6 @@ const static u8 he_if_types_ext_capa_sta[] = { [0] = WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING, [2] = WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT, [7] = WLAN_EXT_CAPA8_OPMODE_NOTIF, - [9] = WLAN_EXT_CAPA10_TWT_REQUESTER_SUPPORT, }; const static struct wiphy_iftype_ext_capab he_iftypes_ext_capa[] = { @@ -1129,9 +1129,30 @@ static int iwl_mvm_mac_start(struct ieee80211_hw *hw) { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); int ret; + int retry, max_retry = 0; mutex_lock(&mvm->mutex); - ret = __iwl_mvm_mac_start(mvm); + + /* we are starting the mac not in error flow, and restart is enabled */ + if (!test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status) && + iwlwifi_mod_params.fw_restart) { + max_retry = IWL_MAX_INIT_RETRY; + /* + * This will prevent mac80211 recovery flows to trigger during + * init failures + */ + set_bit(IWL_MVM_STATUS_STARTING, &mvm->status); + } + + for (retry = 0; retry <= max_retry; retry++) { + ret = __iwl_mvm_mac_start(mvm); + if (!ret) + break; + + IWL_ERR(mvm, "mac start retry %d\n", retry); + } + clear_bit(IWL_MVM_STATUS_STARTING, &mvm->status); + mutex_unlock(&mvm->mutex); return ret; @@ -1674,6 +1695,7 @@ static void iwl_mvm_recalc_multicast(struct iwl_mvm *mvm) struct iwl_mvm_mc_iter_data iter_data = { .mvm = mvm, }; + int ret; lockdep_assert_held(&mvm->mutex); @@ -1683,6 +1705,22 @@ static void iwl_mvm_recalc_multicast(struct iwl_mvm *mvm) ieee80211_iterate_active_interfaces_atomic( mvm->hw, IEEE80211_IFACE_ITER_NORMAL, iwl_mvm_mc_iface_iterator, &iter_data); + + /* + * Send a (synchronous) ech command so that we wait for the + * multiple asynchronous MCAST_FILTER_CMD commands sent by + * the interface iterator. Otherwise, we might get here over + * and over again (by userspace just sending a lot of these) + * and the CPU can send them faster than the firmware can + * process them. + * Note that the CPU is still faster - but with this we'll + * actually send fewer commands overall because the CPU will + * not schedule the work in mac80211 as frequently if it's + * still running when rescheduled (possibly multiple times). + */ + ret = iwl_mvm_send_cmd_pdu(mvm, ECHO_CMD, 0, 0, NULL); + if (ret) + IWL_ERR(mvm, "Failed to synchronize multicast groups update\n"); } static u64 iwl_mvm_prepare_multicast(struct ieee80211_hw *hw, @@ -2970,16 +3008,20 @@ static void iwl_mvm_check_he_obss_narrow_bw_ru_iter(struct wiphy *wiphy, void *_data) { struct iwl_mvm_he_obss_narrow_bw_ru_data *data = _data; + const struct cfg80211_bss_ies *ies; const struct element *elem; - elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, bss->ies->data, - bss->ies->len); + rcu_read_lock(); + ies = rcu_dereference(bss->ies); + elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, ies->data, + ies->len); if (!elem || elem->datalen < 10 || !(elem->data[10] & WLAN_EXT_CAPA10_OBSS_NARROW_BW_RU_TOLERANCE_SUPPORT)) { data->tolerated = false; } + rcu_read_unlock(); } static void iwl_mvm_check_he_obss_narrow_bw_ru(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 5f1ecbb6fb71d..b06a9da753ff7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1167,6 +1167,8 @@ struct iwl_mvm { * @IWL_MVM_STATUS_ROC_AUX_RUNNING: AUX remain-on-channel is running * @IWL_MVM_STATUS_FIRMWARE_RUNNING: firmware is running * @IWL_MVM_STATUS_NEED_FLUSH_P2P: need to flush P2P bcast STA + * @IWL_MVM_STATUS_STARTING: starting mac, + * used to disable restart flow while in STARTING state */ enum iwl_mvm_status { IWL_MVM_STATUS_HW_RFKILL, @@ -1177,6 +1179,7 @@ enum iwl_mvm_status { IWL_MVM_STATUS_ROC_AUX_RUNNING, IWL_MVM_STATUS_FIRMWARE_RUNNING, IWL_MVM_STATUS_NEED_FLUSH_P2P, + IWL_MVM_STATUS_STARTING, }; /* Keep track of completed init configuration */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 8b0576cde797e..5973eecbc0378 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -687,10 +687,26 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, mvm->fw_restart = iwlwifi_mod_params.fw_restart ? -1 : 0; - mvm->aux_queue = IWL_MVM_DQA_AUX_QUEUE; - mvm->snif_queue = IWL_MVM_DQA_INJECT_MONITOR_QUEUE; - mvm->probe_queue = IWL_MVM_DQA_AP_PROBE_RESP_QUEUE; - mvm->p2p_dev_queue = IWL_MVM_DQA_P2P_DEVICE_QUEUE; + if (iwl_mvm_has_new_tx_api(mvm)) { + /* + * If we have the new TX/queue allocation API initialize them + * all to invalid numbers. We'll rewrite the ones that we need + * later, but that doesn't happen for all of them all of the + * time (e.g. P2P Device is optional), and if a dynamic queue + * ends up getting number 2 (IWL_MVM_DQA_P2P_DEVICE_QUEUE) then + * iwl_mvm_is_static_queue() erroneously returns true, and we + * might have things getting stuck. + */ + mvm->aux_queue = IWL_MVM_INVALID_QUEUE; + mvm->snif_queue = IWL_MVM_INVALID_QUEUE; + mvm->probe_queue = IWL_MVM_INVALID_QUEUE; + mvm->p2p_dev_queue = IWL_MVM_INVALID_QUEUE; + } else { + mvm->aux_queue = IWL_MVM_DQA_AUX_QUEUE; + mvm->snif_queue = IWL_MVM_DQA_INJECT_MONITOR_QUEUE; + mvm->probe_queue = IWL_MVM_DQA_AP_PROBE_RESP_QUEUE; + mvm->p2p_dev_queue = IWL_MVM_DQA_P2P_DEVICE_QUEUE; + } mvm->sf_state = SF_UNINIT; if (iwl_mvm_has_unified_ucode(mvm)) @@ -1272,6 +1288,9 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error) */ if (!mvm->fw_restart && fw_error) { iwl_fw_error_collect(&mvm->fwrt); + } else if (test_bit(IWL_MVM_STATUS_STARTING, + &mvm->status)) { + IWL_ERR(mvm, "Starting mac, retry will be triggered anyway\n"); } else if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) { struct iwl_mvm_reprobe *reprobe; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/power.c b/drivers/net/wireless/intel/iwlwifi/mvm/power.c index 22136e4832ea6..b2a6e9b7d0a10 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/power.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/power.c @@ -626,6 +626,9 @@ static void iwl_mvm_power_get_vifs_iterator(void *_data, u8 *mac, struct iwl_power_vifs *power_iterator = _data; bool active = mvmvif->phy_ctxt && mvmvif->phy_ctxt->id < NUM_PHY_CTX; + if (!mvmvif->uploaded) + return; + switch (ieee80211_vif_type_p2p(vif)) { case NL80211_IFTYPE_P2P_DEVICE: break; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index a6e2a30eb3109..52c6edc621ced 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -177,12 +177,39 @@ static int iwl_mvm_create_skb(struct iwl_mvm *mvm, struct sk_buff *skb, struct iwl_rx_mpdu_desc *desc = (void *)pkt->data; unsigned int headlen, fraglen, pad_len = 0; unsigned int hdrlen = ieee80211_hdrlen(hdr->frame_control); + u8 mic_crc_len = u8_get_bits(desc->mac_flags1, + IWL_RX_MPDU_MFLG1_MIC_CRC_LEN_MASK) << 1; if (desc->mac_flags2 & IWL_RX_MPDU_MFLG2_PAD) { len -= 2; pad_len = 2; } + /* + * For non monitor interface strip the bytes the RADA might not have + * removed. As monitor interface cannot exist with other interfaces + * this removal is safe. + */ + if (mic_crc_len && !ieee80211_hw_check(mvm->hw, RX_INCLUDES_FCS)) { + u32 pkt_flags = le32_to_cpu(pkt->len_n_flags); + + /* + * If RADA was not enabled then decryption was not performed so + * the MIC cannot be removed. + */ + if (!(pkt_flags & FH_RSCSR_RADA_EN)) { + if (WARN_ON(crypt_len > mic_crc_len)) + return -EINVAL; + + mic_crc_len -= crypt_len; + } + + if (WARN_ON(mic_crc_len > len)) + return -EINVAL; + + len -= mic_crc_len; + } + /* If frame is small enough to fit in skb->head, pull it completely. * If not, only pull ieee80211_hdr (including crypto if present, and * an additional 8 bytes for SNAP/ethertype, see below) so that diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index 8aa567d7912c2..3a58267d3d710 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -1700,7 +1700,7 @@ static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type) return -EIO; } -#define SCAN_TIMEOUT 20000 +#define SCAN_TIMEOUT 30000 void iwl_mvm_scan_timeout_wk(struct work_struct *work) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 40cafcf40ccf0..5df4bbb6c6de3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -346,8 +346,9 @@ static int iwl_mvm_invalidate_sta_queue(struct iwl_mvm *mvm, int queue, } static int iwl_mvm_disable_txq(struct iwl_mvm *mvm, struct ieee80211_sta *sta, - int queue, u8 tid, u8 flags) + u16 *queueptr, u8 tid, u8 flags) { + int queue = *queueptr; struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, .action = SCD_CFG_DISABLE_QUEUE, @@ -356,6 +357,7 @@ static int iwl_mvm_disable_txq(struct iwl_mvm *mvm, struct ieee80211_sta *sta, if (iwl_mvm_has_new_tx_api(mvm)) { iwl_trans_txq_free(mvm->trans, queue); + *queueptr = IWL_MVM_INVALID_QUEUE; return 0; } @@ -517,6 +519,7 @@ static int iwl_mvm_free_inactive_queue(struct iwl_mvm *mvm, int queue, u8 sta_id, tid; unsigned long disable_agg_tids = 0; bool same_sta; + u16 queue_tmp = queue; int ret; lockdep_assert_held(&mvm->mutex); @@ -539,7 +542,7 @@ static int iwl_mvm_free_inactive_queue(struct iwl_mvm *mvm, int queue, iwl_mvm_invalidate_sta_queue(mvm, queue, disable_agg_tids, false); - ret = iwl_mvm_disable_txq(mvm, old_sta, queue, tid, 0); + ret = iwl_mvm_disable_txq(mvm, old_sta, &queue_tmp, tid, 0); if (ret) { IWL_ERR(mvm, "Failed to free inactive queue %d (ret=%d)\n", @@ -1209,6 +1212,7 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, unsigned int wdg_timeout = iwl_mvm_get_wd_timeout(mvm, mvmsta->vif, false, false); int queue = -1; + u16 queue_tmp; unsigned long disable_agg_tids = 0; enum iwl_mvm_agg_state queue_state; bool shared_queue = false, inc_ssn; @@ -1357,7 +1361,8 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, return 0; out_err: - iwl_mvm_disable_txq(mvm, sta, queue, tid, 0); + queue_tmp = queue; + iwl_mvm_disable_txq(mvm, sta, &queue_tmp, tid, 0); return ret; } @@ -1795,7 +1800,7 @@ static void iwl_mvm_disable_sta_queues(struct iwl_mvm *mvm, if (mvm_sta->tid_data[i].txq_id == IWL_MVM_INVALID_QUEUE) continue; - iwl_mvm_disable_txq(mvm, sta, mvm_sta->tid_data[i].txq_id, i, + iwl_mvm_disable_txq(mvm, sta, &mvm_sta->tid_data[i].txq_id, i, 0); mvm_sta->tid_data[i].txq_id = IWL_MVM_INVALID_QUEUE; } @@ -2005,7 +2010,7 @@ static int iwl_mvm_add_int_sta_with_queue(struct iwl_mvm *mvm, int macidx, ret = iwl_mvm_add_int_sta_common(mvm, sta, NULL, macidx, maccolor); if (ret) { if (!iwl_mvm_has_new_tx_api(mvm)) - iwl_mvm_disable_txq(mvm, NULL, *queue, + iwl_mvm_disable_txq(mvm, NULL, queue, IWL_MAX_TID_COUNT, 0); return ret; } @@ -2073,7 +2078,7 @@ int iwl_mvm_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) if (WARN_ON_ONCE(mvm->snif_sta.sta_id == IWL_MVM_INVALID_STA)) return -EINVAL; - iwl_mvm_disable_txq(mvm, NULL, mvm->snif_queue, IWL_MAX_TID_COUNT, 0); + iwl_mvm_disable_txq(mvm, NULL, &mvm->snif_queue, IWL_MAX_TID_COUNT, 0); ret = iwl_mvm_rm_sta_common(mvm, mvm->snif_sta.sta_id); if (ret) IWL_WARN(mvm, "Failed sending remove station\n"); @@ -2090,7 +2095,7 @@ int iwl_mvm_rm_aux_sta(struct iwl_mvm *mvm) if (WARN_ON_ONCE(mvm->aux_sta.sta_id == IWL_MVM_INVALID_STA)) return -EINVAL; - iwl_mvm_disable_txq(mvm, NULL, mvm->aux_queue, IWL_MAX_TID_COUNT, 0); + iwl_mvm_disable_txq(mvm, NULL, &mvm->aux_queue, IWL_MAX_TID_COUNT, 0); ret = iwl_mvm_rm_sta_common(mvm, mvm->aux_sta.sta_id); if (ret) IWL_WARN(mvm, "Failed sending remove station\n"); @@ -2186,7 +2191,7 @@ static void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm, struct ieee80211_vif *vif) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - int queue; + u16 *queueptr, queue; lockdep_assert_held(&mvm->mutex); @@ -2195,10 +2200,10 @@ static void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm, switch (vif->type) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_ADHOC: - queue = mvm->probe_queue; + queueptr = &mvm->probe_queue; break; case NL80211_IFTYPE_P2P_DEVICE: - queue = mvm->p2p_dev_queue; + queueptr = &mvm->p2p_dev_queue; break; default: WARN(1, "Can't free bcast queue on vif type %d\n", @@ -2206,7 +2211,8 @@ static void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm, return; } - iwl_mvm_disable_txq(mvm, NULL, queue, IWL_MAX_TID_COUNT, 0); + queue = *queueptr; + iwl_mvm_disable_txq(mvm, NULL, queueptr, IWL_MAX_TID_COUNT, 0); if (iwl_mvm_has_new_tx_api(mvm)) return; @@ -2441,7 +2447,7 @@ int iwl_mvm_rm_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) iwl_mvm_flush_sta(mvm, &mvmvif->mcast_sta, true, 0); - iwl_mvm_disable_txq(mvm, NULL, mvmvif->cab_queue, 0, 0); + iwl_mvm_disable_txq(mvm, NULL, &mvmvif->cab_queue, 0, 0); ret = iwl_mvm_rm_sta_common(mvm, mvmvif->mcast_sta.sta_id); if (ret) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 8686107da1168..a637d7fb4b261 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -758,6 +758,9 @@ bool iwl_mvm_rx_diversity_allowed(struct iwl_mvm *mvm) lockdep_assert_held(&mvm->mutex); + if (iwlmvm_mod_params.power_scheme != IWL_POWER_SCHEME_CAM) + return false; + if (num_of_ant(iwl_mvm_get_valid_rx_ant(mvm)) == 1) return false; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c index ee45e475405a1..fba6fff13349e 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c @@ -292,8 +292,7 @@ int iwl_trans_pcie_gen2_start_fw(struct iwl_trans *trans, /* This may fail if AMT took ownership of the device */ if (iwl_pcie_prepare_card_hw(trans)) { IWL_WARN(trans, "Exit HW not ready\n"); - ret = -EIO; - goto out; + return -EIO; } iwl_enable_rfkill_int(trans); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 0581f082301e0..8915030030c4c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1335,8 +1335,7 @@ static int iwl_trans_pcie_start_fw(struct iwl_trans *trans, /* This may fail if AMT took ownership of the device */ if (iwl_pcie_prepare_card_hw(trans)) { IWL_WARN(trans, "Exit HW not ready\n"); - ret = -EIO; - goto out; + return -EIO; } iwl_enable_rfkill_int(trans); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 1033513d3d9de..ffe27104f654b 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1603,8 +1603,8 @@ mac80211_hwsim_beacon(struct hrtimer *timer) bcn_int -= data->bcn_delta; data->bcn_delta = 0; } - hrtimer_forward(&data->beacon_timer, hrtimer_get_expires(timer), - ns_to_ktime(bcn_int * NSEC_PER_USEC)); + hrtimer_forward_now(&data->beacon_timer, + ns_to_ktime(bcn_int * NSEC_PER_USEC)); return HRTIMER_RESTART; } @@ -2062,9 +2062,21 @@ static void hw_scan_work(struct work_struct *work) if (req->ie_len) skb_put_data(probe, req->ie, req->ie_len); + rcu_read_lock(); + if (!ieee80211_tx_prepare_skb(hwsim->hw, + hwsim->hw_scan_vif, + probe, + hwsim->tmp_chan->band, + NULL)) { + rcu_read_unlock(); + kfree_skb(probe); + continue; + } + local_bh_disable(); mac80211_hwsim_tx_frame(hwsim->hw, probe, hwsim->tmp_chan); + rcu_read_unlock(); local_bh_enable(); } } @@ -3316,6 +3328,10 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, } txi->flags |= IEEE80211_TX_STAT_ACK; } + + if (hwsim_flags & HWSIM_TX_CTL_NO_ACK) + txi->flags |= IEEE80211_TX_STAT_NOACK_TRANSMITTED; + ieee80211_tx_status_irqsafe(data2->hw, skb); return 0; out: diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c index 20436a289d5cd..5d6dc1dd050d4 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.c +++ b/drivers/net/wireless/marvell/libertas/if_usb.c @@ -292,6 +292,7 @@ static int if_usb_probe(struct usb_interface *intf, if_usb_reset_device(cardp); dealloc: if_usb_free(cardp); + kfree(cardp); error: return r; @@ -316,6 +317,7 @@ static void if_usb_disconnect(struct usb_interface *intf) /* Unlink and free urb */ if_usb_free(cardp); + kfree(cardp); usb_set_intfdata(intf, NULL); usb_put_dev(interface_to_usbdev(intf)); diff --git a/drivers/net/wireless/marvell/libertas_tf/if_usb.c b/drivers/net/wireless/marvell/libertas_tf/if_usb.c index bedc092150884..b30bcb28503ae 100644 --- a/drivers/net/wireless/marvell/libertas_tf/if_usb.c +++ b/drivers/net/wireless/marvell/libertas_tf/if_usb.c @@ -230,6 +230,7 @@ static int if_usb_probe(struct usb_interface *intf, dealloc: if_usb_free(cardp); + kfree(cardp); error: lbtf_deb_leave(LBTF_DEB_MAIN); return -ENOMEM; @@ -254,6 +255,7 @@ static void if_usb_disconnect(struct usb_interface *intf) /* Unlink and free urb */ if_usb_free(cardp); + kfree(cardp); usb_set_intfdata(intf, NULL); usb_put_dev(interface_to_usbdev(intf)); diff --git a/drivers/net/wireless/marvell/mwifiex/11h.c b/drivers/net/wireless/marvell/mwifiex/11h.c index 238accfe4f41d..c4176e357b22c 100644 --- a/drivers/net/wireless/marvell/mwifiex/11h.c +++ b/drivers/net/wireless/marvell/mwifiex/11h.c @@ -303,5 +303,7 @@ void mwifiex_dfs_chan_sw_work_queue(struct work_struct *work) mwifiex_dbg(priv->adapter, MSG, "indicating channel switch completion to kernel\n"); + mutex_lock(&priv->wdev.mtx); cfg80211_ch_switch_notify(priv->netdev, &priv->dfs_chandef); + mutex_unlock(&priv->wdev.mtx); } diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c index e435f801bc912..acbef9f1a83b6 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n.c +++ b/drivers/net/wireless/marvell/mwifiex/11n.c @@ -657,14 +657,15 @@ int mwifiex_send_delba(struct mwifiex_private *priv, int tid, u8 *peer_mac, uint16_t del_ba_param_set; memset(&delba, 0, sizeof(delba)); - delba.del_ba_param_set = cpu_to_le16(tid << DELBA_TID_POS); - del_ba_param_set = le16_to_cpu(delba.del_ba_param_set); + del_ba_param_set = tid << DELBA_TID_POS; + if (initiator) del_ba_param_set |= IEEE80211_DELBA_PARAM_INITIATOR_MASK; else del_ba_param_set &= ~IEEE80211_DELBA_PARAM_INITIATOR_MASK; + delba.del_ba_param_set = cpu_to_le16(del_ba_param_set); memcpy(&delba.peer_mac_addr, peer_mac, ETH_ALEN); /* We don't wait for the response of this command */ diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 9e6dc289ec3e8..1599ae74b066b 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -912,16 +912,20 @@ mwifiex_init_new_priv_params(struct mwifiex_private *priv, switch (type) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: - priv->bss_role = MWIFIEX_BSS_ROLE_STA; + priv->bss_role = MWIFIEX_BSS_ROLE_STA; + priv->bss_type = MWIFIEX_BSS_TYPE_STA; break; case NL80211_IFTYPE_P2P_CLIENT: - priv->bss_role = MWIFIEX_BSS_ROLE_STA; + priv->bss_role = MWIFIEX_BSS_ROLE_STA; + priv->bss_type = MWIFIEX_BSS_TYPE_P2P; break; case NL80211_IFTYPE_P2P_GO: - priv->bss_role = MWIFIEX_BSS_ROLE_UAP; + priv->bss_role = MWIFIEX_BSS_ROLE_UAP; + priv->bss_type = MWIFIEX_BSS_TYPE_P2P; break; case NL80211_IFTYPE_AP: priv->bss_role = MWIFIEX_BSS_ROLE_UAP; + priv->bss_type = MWIFIEX_BSS_TYPE_UAP; break; default: mwifiex_dbg(adapter, ERROR, @@ -1233,29 +1237,15 @@ mwifiex_cfg80211_change_virtual_intf(struct wiphy *wiphy, break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: + if (mwifiex_cfg80211_deinit_p2p(priv)) + return -EFAULT; + switch (type) { - case NL80211_IFTYPE_STATION: - if (mwifiex_cfg80211_deinit_p2p(priv)) - return -EFAULT; - priv->adapter->curr_iface_comb.p2p_intf--; - priv->adapter->curr_iface_comb.sta_intf++; - dev->ieee80211_ptr->iftype = type; - if (mwifiex_deinit_priv_params(priv)) - return -1; - if (mwifiex_init_new_priv_params(priv, dev, type)) - return -1; - if (mwifiex_sta_init_cmd(priv, false, false)) - return -1; - break; case NL80211_IFTYPE_ADHOC: - if (mwifiex_cfg80211_deinit_p2p(priv)) - return -EFAULT; + case NL80211_IFTYPE_STATION: return mwifiex_change_vif_to_sta_adhoc(dev, curr_iftype, type, params); - break; case NL80211_IFTYPE_AP: - if (mwifiex_cfg80211_deinit_p2p(priv)) - return -EFAULT; return mwifiex_change_vif_to_ap(dev, curr_iftype, type, params); case NL80211_IFTYPE_UNSPECIFIED: diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c index e8788c35a453d..ec04515bd9dfa 100644 --- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c +++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c @@ -322,9 +322,9 @@ static int mwifiex_dnld_sleep_confirm_cmd(struct mwifiex_adapter *adapter) adapter->seq_num++; sleep_cfm_buf->seq_num = - cpu_to_le16((HostCmd_SET_SEQ_NO_BSS_INFO + cpu_to_le16(HostCmd_SET_SEQ_NO_BSS_INFO (adapter->seq_num, priv->bss_num, - priv->bss_type))); + priv->bss_type)); mwifiex_dbg(adapter, CMD, "cmd: DNLD_CMD: %#x, act %#x, len %d, seqno %#x\n", diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index 8b9d0809daf62..076ea1c4b921d 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -512,10 +512,10 @@ enum mwifiex_channel_flags { #define RF_ANTENNA_AUTO 0xFFFF -#define HostCmd_SET_SEQ_NO_BSS_INFO(seq, num, type) { \ - (((seq) & 0x00ff) | \ - (((num) & 0x000f) << 8)) | \ - (((type) & 0x000f) << 12); } +#define HostCmd_SET_SEQ_NO_BSS_INFO(seq, num, type) \ + ((((seq) & 0x00ff) | \ + (((num) & 0x000f) << 8)) | \ + (((type) & 0x000f) << 12)) #define HostCmd_GET_SEQ_NO(seq) \ ((seq) & HostCmd_SEQ_NUM_MASK) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index bc46a0aa06eb7..b316e34917958 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -1326,6 +1326,14 @@ mwifiex_pcie_send_data(struct mwifiex_adapter *adapter, struct sk_buff *skb, ret = -1; goto done_unmap; } + + /* The firmware (latest version 15.68.19.p21) of the 88W8897 PCIe+USB card + * seems to crash randomly after setting the TX ring write pointer when + * ASPM powersaving is enabled. A workaround seems to be keeping the bus + * busy by reading a random register afterwards. + */ + mwifiex_read_reg(adapter, PCI_VENDOR_ID, &rx_val); + if ((mwifiex_pcie_txbd_not_full(card)) && tx_param->next_pkt_len) { /* have more packets and TxBD still can hold more */ diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c index 5fdffb114913d..fd12093863801 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_event.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c @@ -364,10 +364,12 @@ static void mwifiex_process_uap_tx_pause(struct mwifiex_private *priv, sta_ptr = mwifiex_get_sta_entry(priv, tp->peermac); if (sta_ptr && sta_ptr->tx_pause != tp->tx_pause) { sta_ptr->tx_pause = tp->tx_pause; + spin_unlock_bh(&priv->sta_list_spinlock); mwifiex_update_ralist_tx_pause(priv, tp->peermac, tp->tx_pause); + } else { + spin_unlock_bh(&priv->sta_list_spinlock); } - spin_unlock_bh(&priv->sta_list_spinlock); } } @@ -399,11 +401,13 @@ static void mwifiex_process_sta_tx_pause(struct mwifiex_private *priv, sta_ptr = mwifiex_get_sta_entry(priv, tp->peermac); if (sta_ptr && sta_ptr->tx_pause != tp->tx_pause) { sta_ptr->tx_pause = tp->tx_pause; + spin_unlock_bh(&priv->sta_list_spinlock); mwifiex_update_ralist_tx_pause(priv, tp->peermac, tp->tx_pause); + } else { + spin_unlock_bh(&priv->sta_list_spinlock); } - spin_unlock_bh(&priv->sta_list_spinlock); } } } diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index 528107d70c1cb..39cf713d5054c 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -130,7 +130,8 @@ static int mwifiex_usb_recv(struct mwifiex_adapter *adapter, default: mwifiex_dbg(adapter, ERROR, "unknown recv_type %#x\n", recv_type); - return -1; + ret = -1; + goto exit_restore_skb; } break; case MWIFIEX_USB_EP_DATA: @@ -505,6 +506,22 @@ static int mwifiex_usb_probe(struct usb_interface *intf, } } + switch (card->usb_boot_state) { + case USB8XXX_FW_DNLD: + /* Reject broken descriptors. */ + if (!card->rx_cmd_ep || !card->tx_cmd_ep) + return -ENODEV; + if (card->bulk_out_maxpktsize == 0) + return -ENODEV; + break; + case USB8XXX_FW_READY: + /* Assume the driver can handle missing endpoints for now. */ + break; + default: + WARN_ON(1); + return -ENODEV; + } + usb_set_intfdata(intf, card); ret = mwifiex_add_card(card, &card->fw_done, &usb_ops, diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c index 1b76b24191866..14ac2384218df 100644 --- a/drivers/net/wireless/marvell/mwl8k.c +++ b/drivers/net/wireless/marvell/mwl8k.c @@ -5796,8 +5796,8 @@ static void mwl8k_fw_state_machine(const struct firmware *fw, void *context) fail: priv->fw_state = FW_STATE_ERROR; complete(&priv->firmware_loading_complete); - device_release_driver(&priv->pdev->dev); mwl8k_release_firmware(priv); + device_release_driver(&priv->pdev->dev); } #define MAX_RESTART_ATTEMPTS 1 diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index 0a5695c3d9241..625492284389c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -605,6 +605,9 @@ mt7603_sta_rate_tbl_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta_rates *sta_rates = rcu_dereference(sta->rates); int i; + if (!sta_rates) + return; + spin_lock_bh(&dev->mt76.lock); for (i = 0; i < ARRAY_SIZE(msta->rates); i++) { msta->rates[i].idx = sta_rates->rate[i].idx; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c index 38183aef0eb92..5ad8b0afa41f0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -385,6 +385,9 @@ static void mt7615_sta_rate_tbl_update(struct ieee80211_hw *hw, struct ieee80211_sta_rates *sta_rates = rcu_dereference(sta->rates); int i; + if (!sta_rates) + return; + spin_lock_bh(&dev->mt76.lock); for (i = 0; i < ARRAY_SIZE(msta->rates); i++) { msta->rates[i].idx = sta_rates->rate[i].idx; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index abacb4ea7179d..5c12cd7fce940 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -154,7 +154,7 @@ void mt76x02_mac_wcid_set_drop(struct mt76x02_dev *dev, u8 idx, bool drop) mt76_wr(dev, MT_WCID_DROP(idx), (val & ~bit) | (bit * drop)); } -static __le16 +static u16 mt76x02_mac_tx_rate_val(struct mt76x02_dev *dev, const struct ieee80211_tx_rate *rate, u8 *nss_val) { @@ -200,14 +200,14 @@ mt76x02_mac_tx_rate_val(struct mt76x02_dev *dev, rateval |= MT_RXWI_RATE_SGI; *nss_val = nss; - return cpu_to_le16(rateval); + return rateval; } void mt76x02_mac_wcid_set_rate(struct mt76x02_dev *dev, struct mt76_wcid *wcid, const struct ieee80211_tx_rate *rate) { s8 max_txpwr_adj = mt76x02_tx_get_max_txpwr_adj(dev, rate); - __le16 rateval; + u16 rateval; u32 tx_info; s8 nss; @@ -320,7 +320,7 @@ void mt76x02_mac_write_txwi(struct mt76x02_dev *dev, struct mt76x02_txwi *txwi, struct ieee80211_key_conf *key = info->control.hw_key; u32 wcid_tx_info; u16 rate_ht_mask = FIELD_PREP(MT_RXWI_RATE_PHY, BIT(1) | BIT(2)); - u16 txwi_flags = 0; + u16 txwi_flags = 0, rateval; u8 nss; s8 txpwr_adj, max_txpwr_adj; u8 ccmp_pn[8], nstreams = dev->mt76.chainmask & 0xf; @@ -356,14 +356,15 @@ void mt76x02_mac_write_txwi(struct mt76x02_dev *dev, struct mt76x02_txwi *txwi, if (wcid && (rate->idx < 0 || !rate->count)) { wcid_tx_info = wcid->tx_info; - txwi->rate = FIELD_GET(MT_WCID_TX_INFO_RATE, wcid_tx_info); + rateval = FIELD_GET(MT_WCID_TX_INFO_RATE, wcid_tx_info); max_txpwr_adj = FIELD_GET(MT_WCID_TX_INFO_TXPWR_ADJ, wcid_tx_info); nss = FIELD_GET(MT_WCID_TX_INFO_NSS, wcid_tx_info); } else { - txwi->rate = mt76x02_mac_tx_rate_val(dev, rate, &nss); + rateval = mt76x02_mac_tx_rate_val(dev, rate, &nss); max_txpwr_adj = mt76x02_tx_get_max_txpwr_adj(dev, rate); } + txwi->rate = cpu_to_le16(rateval); txpwr_adj = mt76x02_tx_get_txpwr_adj(dev, dev->mt76.txpower_conf, max_txpwr_adj); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c index cf611d1b817c0..e6d7646a0d9ca 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c @@ -76,7 +76,7 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) mt76_rmw_field(dev, 0x15a10, 0x1f << 16, 0x9); /* RG_SSUSB_G1_CDR_BIC_LTR = 0xf */ - mt76_rmw_field(dev, 0x15a0c, 0xf << 28, 0xf); + mt76_rmw_field(dev, 0x15a0c, 0xfU << 28, 0xf); /* RG_SSUSB_CDR_BR_PE1D = 0x3 */ mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3); diff --git a/drivers/net/wireless/mediatek/mt7601u/usb.c b/drivers/net/wireless/mediatek/mt7601u/usb.c index 6bcc4a13ae6c7..cc772045d526f 100644 --- a/drivers/net/wireless/mediatek/mt7601u/usb.c +++ b/drivers/net/wireless/mediatek/mt7601u/usb.c @@ -26,6 +26,7 @@ static const struct usb_device_id mt7601u_device_table[] = { { USB_DEVICE(0x2717, 0x4106) }, { USB_DEVICE(0x2955, 0x0001) }, { USB_DEVICE(0x2955, 0x1001) }, + { USB_DEVICE(0x2955, 0x1003) }, { USB_DEVICE(0x2a5f, 0x1000) }, { USB_DEVICE(0x7392, 0x7710) }, { 0, } diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c index bc2dfef0de22b..24af321f625f2 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c @@ -25,6 +25,9 @@ static bool rt2x00usb_check_usb_error(struct rt2x00_dev *rt2x00dev, int status) if (status == -ENODEV || status == -ENOENT) return true; + if (!test_bit(DEVICE_STATE_STARTED, &rt2x00dev->flags)) + return false; + if (status == -EPROTO || status == -ETIMEDOUT) rt2x00dev->num_proto_errs++; else diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index cf372684b6812..3836d6ac53049 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -382,6 +382,8 @@ static int ray_config(struct pcmcia_device *link) goto failed; local->sram = ioremap(link->resource[2]->start, resource_size(link->resource[2])); + if (!local->sram) + goto failed; /*** Set up 16k window for shared memory (receive buffer) ***************/ link->resource[3]->flags |= @@ -396,6 +398,8 @@ static int ray_config(struct pcmcia_device *link) goto failed; local->rmem = ioremap(link->resource[3]->start, resource_size(link->resource[3])); + if (!local->rmem) + goto failed; /*** Set up window for attribute memory ***********************************/ link->resource[4]->flags |= @@ -410,6 +414,8 @@ static int ray_config(struct pcmcia_device *link) goto failed; local->amem = ioremap(link->resource[4]->start, resource_size(link->resource[4])); + if (!local->amem) + goto failed; dev_dbg(&link->dev, "ray_config sram=%p\n", local->sram); dev_dbg(&link->dev, "ray_config rmem=%p\n", local->rmem); diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c index d5f65372356bf..0b305badae989 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c @@ -460,8 +460,10 @@ static void rtl8180_tx(struct ieee80211_hw *dev, struct rtl8180_priv *priv = dev->priv; struct rtl8180_tx_ring *ring; struct rtl8180_tx_desc *entry; + unsigned int prio = 0; unsigned long flags; - unsigned int idx, prio, hw_prio; + unsigned int idx, hw_prio; + dma_addr_t mapping; u32 tx_flags; u8 rc_flags; @@ -470,7 +472,9 @@ static void rtl8180_tx(struct ieee80211_hw *dev, /* do arithmetic and then convert to le16 */ u16 frame_duration = 0; - prio = skb_get_queue_mapping(skb); + /* rtl8180/rtl8185 only has one useable tx queue */ + if (dev->queues > IEEE80211_AC_BK) + prio = skb_get_queue_mapping(skb); ring = &priv->tx_ring[prio]; mapping = pci_map_single(priv->pdev, skb->data, diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c index b2616d61b66d8..843e3f41a2bc3 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c @@ -28,7 +28,7 @@ u8 rtl818x_ioread8_idx(struct rtl8187_priv *priv, usb_control_msg(priv->udev, usb_rcvctrlpipe(priv->udev, 0), RTL8187_REQ_GET_REG, RTL8187_REQT_READ, (unsigned long)addr, idx & 0x03, - &priv->io_dmabuf->bits8, sizeof(val), HZ / 2); + &priv->io_dmabuf->bits8, sizeof(val), 500); val = priv->io_dmabuf->bits8; mutex_unlock(&priv->io_mutex); @@ -45,7 +45,7 @@ u16 rtl818x_ioread16_idx(struct rtl8187_priv *priv, usb_control_msg(priv->udev, usb_rcvctrlpipe(priv->udev, 0), RTL8187_REQ_GET_REG, RTL8187_REQT_READ, (unsigned long)addr, idx & 0x03, - &priv->io_dmabuf->bits16, sizeof(val), HZ / 2); + &priv->io_dmabuf->bits16, sizeof(val), 500); val = priv->io_dmabuf->bits16; mutex_unlock(&priv->io_mutex); @@ -62,7 +62,7 @@ u32 rtl818x_ioread32_idx(struct rtl8187_priv *priv, usb_control_msg(priv->udev, usb_rcvctrlpipe(priv->udev, 0), RTL8187_REQ_GET_REG, RTL8187_REQT_READ, (unsigned long)addr, idx & 0x03, - &priv->io_dmabuf->bits32, sizeof(val), HZ / 2); + &priv->io_dmabuf->bits32, sizeof(val), 500); val = priv->io_dmabuf->bits32; mutex_unlock(&priv->io_mutex); @@ -79,7 +79,7 @@ void rtl818x_iowrite8_idx(struct rtl8187_priv *priv, usb_control_msg(priv->udev, usb_sndctrlpipe(priv->udev, 0), RTL8187_REQ_SET_REG, RTL8187_REQT_WRITE, (unsigned long)addr, idx & 0x03, - &priv->io_dmabuf->bits8, sizeof(val), HZ / 2); + &priv->io_dmabuf->bits8, sizeof(val), 500); mutex_unlock(&priv->io_mutex); } @@ -93,7 +93,7 @@ void rtl818x_iowrite16_idx(struct rtl8187_priv *priv, usb_control_msg(priv->udev, usb_sndctrlpipe(priv->udev, 0), RTL8187_REQ_SET_REG, RTL8187_REQT_WRITE, (unsigned long)addr, idx & 0x03, - &priv->io_dmabuf->bits16, sizeof(val), HZ / 2); + &priv->io_dmabuf->bits16, sizeof(val), 500); mutex_unlock(&priv->io_mutex); } @@ -107,7 +107,7 @@ void rtl818x_iowrite32_idx(struct rtl8187_priv *priv, usb_control_msg(priv->udev, usb_sndctrlpipe(priv->udev, 0), RTL8187_REQ_SET_REG, RTL8187_REQT_WRITE, (unsigned long)addr, idx & 0x03, - &priv->io_dmabuf->bits32, sizeof(val), HZ / 2); + &priv->io_dmabuf->bits32, sizeof(val), 500); mutex_unlock(&priv->io_mutex); } @@ -183,7 +183,7 @@ static void rtl8225_write_8051(struct ieee80211_hw *dev, u8 addr, __le16 data) usb_control_msg(priv->udev, usb_sndctrlpipe(priv->udev, 0), RTL8187_REQ_SET_REG, RTL8187_REQT_WRITE, addr, 0x8225, &priv->io_dmabuf->bits16, sizeof(data), - HZ / 2); + 500); mutex_unlock(&priv->io_mutex); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c index f070f25bb735a..df7a14320fd29 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c @@ -1000,6 +1000,7 @@ int rtl92cu_hw_init(struct ieee80211_hw *hw) _initpabias(hw); rtl92c_dm_init(hw); exit: + local_irq_disable(); local_irq_restore(flags); return err; } diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index bad06939a247c..9bcb187d37dcd 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -1013,7 +1013,7 @@ int rtl_usb_probe(struct usb_interface *intf, hw = ieee80211_alloc_hw(sizeof(struct rtl_priv) + sizeof(struct rtl_usb_priv), &rtl_ops); if (!hw) { - WARN_ONCE(true, "rtl_usb: ieee80211 alloc failed\n"); + pr_warn("rtl_usb: ieee80211 alloc failed\n"); return -ENOMEM; } rtlpriv = hw->priv; diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c index 3644d7d994638..c6c29034b2ead 100644 --- a/drivers/net/wireless/rsi/rsi_91x_core.c +++ b/drivers/net/wireless/rsi/rsi_91x_core.c @@ -400,6 +400,8 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb) info = IEEE80211_SKB_CB(skb); tx_params = (struct skb_info *)info->driver_data; + /* info->driver_data and info->control part of union so make copy */ + tx_params->have_key = !!info->control.hw_key; wh = (struct ieee80211_hdr *)&skb->data[0]; tx_params->sta_id = 0; diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index 03791f3fe480c..7d0b44fd56901 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -203,7 +203,7 @@ int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) wh->frame_control |= cpu_to_le16(RSI_SET_PS_ENABLE); if ((!(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)) && - info->control.hw_key) { + tx_params->have_key) { if (rsi_is_cipher_wep(common)) ieee80211_size += 4; else @@ -214,15 +214,17 @@ int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) RSI_WIFI_DATA_Q); data_desc->header_len = ieee80211_size; - if (common->min_rate != RSI_RATE_AUTO) { + if (common->rate_config[common->band].fixed_enabled) { /* Send fixed rate */ + u16 fixed_rate = common->rate_config[common->band].fixed_hw_rate; + data_desc->frame_info = cpu_to_le16(RATE_INFO_ENABLE); - data_desc->rate_info = cpu_to_le16(common->min_rate); + data_desc->rate_info = cpu_to_le16(fixed_rate); if (conf_is_ht40(&common->priv->hw->conf)) data_desc->bbp_info = cpu_to_le16(FULL40M_ENABLE); - if ((common->vif_info[0].sgi) && (common->min_rate & 0x100)) { + if (common->vif_info[0].sgi && (fixed_rate & 0x100)) { /* Only MCS rates */ data_desc->rate_info |= cpu_to_le16(ENABLE_SHORTGI_RATE); @@ -1038,8 +1040,10 @@ static int rsi_load_9116_firmware(struct rsi_hw *adapter) } ta_firmware = kmemdup(fw_entry->data, fw_entry->size, GFP_KERNEL); - if (!ta_firmware) + if (!ta_firmware) { + status = -ENOMEM; goto fail_release_fw; + } fw_p = ta_firmware; instructions_sz = fw_entry->size; rsi_dbg(INFO_ZONE, "FW Length = %d bytes\n", instructions_sz); diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index ca1e609f637e4..1279339a4b59b 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -510,7 +510,6 @@ static int rsi_mac80211_add_interface(struct ieee80211_hw *hw, if ((vif->type == NL80211_IFTYPE_AP) || (vif->type == NL80211_IFTYPE_P2P_GO)) { rsi_send_rx_filter_frame(common, DISALLOW_BEACONS); - common->min_rate = RSI_RATE_AUTO; for (i = 0; i < common->max_stations; i++) common->stations[i].sta = NULL; } @@ -1211,20 +1210,32 @@ static int rsi_mac80211_set_rate_mask(struct ieee80211_hw *hw, struct ieee80211_vif *vif, const struct cfg80211_bitrate_mask *mask) { + const unsigned int mcs_offset = ARRAY_SIZE(rsi_rates); struct rsi_hw *adapter = hw->priv; struct rsi_common *common = adapter->priv; - enum nl80211_band band = hw->conf.chandef.chan->band; + int i; mutex_lock(&common->mutex); - common->fixedrate_mask[band] = 0; - if (mask->control[band].legacy == 0xfff) { - common->fixedrate_mask[band] = - (mask->control[band].ht_mcs[0] << 12); - } else { - common->fixedrate_mask[band] = - mask->control[band].legacy; + for (i = 0; i < ARRAY_SIZE(common->rate_config); i++) { + struct rsi_rate_config *cfg = &common->rate_config[i]; + u32 bm; + + bm = mask->control[i].legacy | (mask->control[i].ht_mcs[0] << mcs_offset); + if (hweight32(bm) == 1) { /* single rate */ + int rate_index = ffs(bm) - 1; + + if (rate_index < mcs_offset) + cfg->fixed_hw_rate = rsi_rates[rate_index].hw_value; + else + cfg->fixed_hw_rate = rsi_mcsrates[rate_index - mcs_offset]; + cfg->fixed_enabled = true; + } else { + cfg->configured_mask = bm; + cfg->fixed_enabled = false; + } } + mutex_unlock(&common->mutex); return 0; @@ -1360,46 +1371,6 @@ void rsi_indicate_pkt_to_os(struct rsi_common *common, ieee80211_rx_irqsafe(hw, skb); } -static void rsi_set_min_rate(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, - struct rsi_common *common) -{ - u8 band = hw->conf.chandef.chan->band; - u8 ii; - u32 rate_bitmap; - bool matched = false; - - common->bitrate_mask[band] = sta->supp_rates[band]; - - rate_bitmap = (common->fixedrate_mask[band] & sta->supp_rates[band]); - - if (rate_bitmap & 0xfff) { - /* Find out the min rate */ - for (ii = 0; ii < ARRAY_SIZE(rsi_rates); ii++) { - if (rate_bitmap & BIT(ii)) { - common->min_rate = rsi_rates[ii].hw_value; - matched = true; - break; - } - } - } - - common->vif_info[0].is_ht = sta->ht_cap.ht_supported; - - if ((common->vif_info[0].is_ht) && (rate_bitmap >> 12)) { - for (ii = 0; ii < ARRAY_SIZE(rsi_mcsrates); ii++) { - if ((rate_bitmap >> 12) & BIT(ii)) { - common->min_rate = rsi_mcsrates[ii]; - matched = true; - break; - } - } - } - - if (!matched) - common->min_rate = 0xffff; -} - /** * rsi_mac80211_sta_add() - This function notifies driver about a peer getting * connected. @@ -1498,9 +1469,9 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw, if ((vif->type == NL80211_IFTYPE_STATION) || (vif->type == NL80211_IFTYPE_P2P_CLIENT)) { - rsi_set_min_rate(hw, sta, common); + common->bitrate_mask[common->band] = sta->supp_rates[common->band]; + common->vif_info[0].is_ht = sta->ht_cap.ht_supported; if (sta->ht_cap.ht_supported) { - common->vif_info[0].is_ht = true; common->bitrate_mask[NL80211_BAND_2GHZ] = sta->supp_rates[NL80211_BAND_2GHZ]; if ((sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_20) || @@ -1574,7 +1545,6 @@ static int rsi_mac80211_sta_remove(struct ieee80211_hw *hw, bss->qos = sta->wme; common->bitrate_mask[NL80211_BAND_2GHZ] = 0; common->bitrate_mask[NL80211_BAND_5GHZ] = 0; - common->min_rate = 0xffff; common->vif_info[0].is_ht = false; common->vif_info[0].sgi = false; common->vif_info[0].seq_start = 0; diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c index 29d83049c5f56..d92337169ee3a 100644 --- a/drivers/net/wireless/rsi/rsi_91x_main.c +++ b/drivers/net/wireless/rsi/rsi_91x_main.c @@ -23,6 +23,7 @@ #include "rsi_common.h" #include "rsi_coex.h" #include "rsi_hal.h" +#include "rsi_usb.h" u32 rsi_zone_enabled = /* INFO_ZONE | INIT_ZONE | @@ -167,6 +168,9 @@ int rsi_read_pkt(struct rsi_common *common, u8 *rx_pkt, s32 rcv_pkt_len) frame_desc = &rx_pkt[index]; actual_length = *(u16 *)&frame_desc[0]; offset = *(u16 *)&frame_desc[2]; + if (!rcv_pkt_len && offset > + RSI_MAX_RX_USB_PKT_SIZE - FRAME_DESC_SZ) + goto fail; queueno = rsi_get_queueno(frame_desc, offset); length = rsi_get_length(frame_desc, offset); @@ -210,9 +214,10 @@ int rsi_read_pkt(struct rsi_common *common, u8 *rx_pkt, s32 rcv_pkt_len) bt_pkt_type = frame_desc[offset + BT_RX_PKT_TYPE_OFST]; if (bt_pkt_type == BT_CARD_READY_IND) { rsi_dbg(INFO_ZONE, "BT Card ready recvd\n"); - if (rsi_bt_ops.attach(common, &g_proto_ops)) - rsi_dbg(ERR_ZONE, - "Failed to attach BT module\n"); + if (common->fsm_state == FSM_MAC_INIT_DONE) + rsi_attach_bt(common); + else + common->bt_defer_attach = true; } else { if (common->bt_adapter) rsi_bt_ops.recv_pkt(common->bt_adapter, @@ -277,6 +282,15 @@ void rsi_set_bt_context(void *priv, void *bt_context) } #endif +void rsi_attach_bt(struct rsi_common *common) +{ +#ifdef CONFIG_RSI_COEX + if (rsi_bt_ops.attach(common, &g_proto_ops)) + rsi_dbg(ERR_ZONE, + "Failed to attach BT module\n"); +#endif +} + /** * rsi_91x_init() - This function initializes os interface operations. * @void: Void. @@ -358,6 +372,7 @@ struct rsi_hw *rsi_91x_init(u16 oper_mode) if (common->coex_mode > 1) { if (rsi_coex_attach(common)) { rsi_dbg(ERR_ZONE, "Failed to init coex module\n"); + rsi_kill_thread(&common->tx_thread); goto err; } } diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index ed67f65986775..0c0e4c08be8aa 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -276,7 +276,7 @@ static void rsi_set_default_parameters(struct rsi_common *common) common->channel_width = BW_20MHZ; common->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; common->channel = 1; - common->min_rate = 0xffff; + memset(&common->rate_config, 0, sizeof(common->rate_config)); common->fsm_state = FSM_CARD_NOT_READY; common->iface_down = true; common->endpoint = EP_2GHZ_20MHZ; @@ -1304,7 +1304,7 @@ static int rsi_send_auto_rate_request(struct rsi_common *common, u8 band = hw->conf.chandef.chan->band; u8 num_supported_rates = 0; u8 rate_table_offset, rate_offset = 0; - u32 rate_bitmap; + u32 rate_bitmap, configured_rates; u16 *selected_rates, min_rate; bool is_ht = false, is_sgi = false; u16 frame_len = sizeof(struct rsi_auto_rate); @@ -1354,6 +1354,10 @@ static int rsi_send_auto_rate_request(struct rsi_common *common, is_sgi = true; } + /* Limit to any rates administratively configured by cfg80211 */ + configured_rates = common->rate_config[band].configured_mask ?: 0xffffffff; + rate_bitmap &= configured_rates; + if (band == NL80211_BAND_2GHZ) { if ((rate_bitmap == 0) && (is_ht)) min_rate = RSI_RATE_MCS0; @@ -1379,10 +1383,13 @@ static int rsi_send_auto_rate_request(struct rsi_common *common, num_supported_rates = jj; if (is_ht) { - for (ii = 0; ii < ARRAY_SIZE(mcs); ii++) - selected_rates[jj++] = mcs[ii]; - num_supported_rates += ARRAY_SIZE(mcs); - rate_offset += ARRAY_SIZE(mcs); + for (ii = 0; ii < ARRAY_SIZE(mcs); ii++) { + if (configured_rates & BIT(ii + ARRAY_SIZE(rsi_rates))) { + selected_rates[jj++] = mcs[ii]; + num_supported_rates++; + rate_offset++; + } + } } sort(selected_rates, jj, sizeof(u16), &rsi_compare, NULL); @@ -1467,7 +1474,7 @@ void rsi_inform_bss_status(struct rsi_common *common, qos_enable, aid, sta_id, vif); - if (common->min_rate == 0xffff) + if (!common->rate_config[common->band].fixed_enabled) rsi_send_auto_rate_request(common, sta, sta_id, vif); if (opmode == RSI_OPMODE_STA && !(assoc_cap & WLAN_CAPABILITY_PRIVACY) && @@ -2056,6 +2063,9 @@ static int rsi_handle_ta_confirm_type(struct rsi_common *common, if (common->reinit_hw) { complete(&common->wlan_init_completion); } else { + if (common->bt_defer_attach) + rsi_attach_bt(common); + return rsi_mac80211_attach(common); } } diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c index d51ec7104fb7c..4fe837090cdae 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c @@ -24,10 +24,7 @@ /* Default operating mode is wlan STA + BT */ static u16 dev_oper_mode = DEV_OPMODE_STA_BT_DUAL; module_param(dev_oper_mode, ushort, 0444); -MODULE_PARM_DESC(dev_oper_mode, - "1[Wi-Fi], 4[BT], 8[BT LE], 5[Wi-Fi STA + BT classic]\n" - "9[Wi-Fi STA + BT LE], 13[Wi-Fi STA + BT classic + BT LE]\n" - "6[AP + BT classic], 14[AP + BT classic + BT LE]"); +MODULE_PARM_DESC(dev_oper_mode, DEV_OPMODE_PARAM_DESC); /** * rsi_sdio_set_cmd52_arg() - This function prepares cmd 52 read/write arg. diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c index a296f4e0d324a..94bf2a7ca635d 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb.c @@ -25,10 +25,7 @@ /* Default operating mode is wlan STA + BT */ static u16 dev_oper_mode = DEV_OPMODE_STA_BT_DUAL; module_param(dev_oper_mode, ushort, 0444); -MODULE_PARM_DESC(dev_oper_mode, - "1[Wi-Fi], 4[BT], 8[BT LE], 5[Wi-Fi STA + BT classic]\n" - "9[Wi-Fi STA + BT LE], 13[Wi-Fi STA + BT classic + BT LE]\n" - "6[AP + BT classic], 14[AP + BT classic + BT LE]"); +MODULE_PARM_DESC(dev_oper_mode, DEV_OPMODE_PARAM_DESC); static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num, gfp_t flags); @@ -61,7 +58,7 @@ static int rsi_usb_card_write(struct rsi_hw *adapter, (void *)seg, (int)len, &transfer, - HZ * 5); + USB_CTRL_SET_TIMEOUT); if (status < 0) { rsi_dbg(ERR_ZONE, @@ -264,8 +261,12 @@ static void rsi_rx_done_handler(struct urb *urb) struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)rx_cb->data; int status = -EINVAL; + if (!rx_cb->rx_skb) + return; + if (urb->status) { dev_kfree_skb(rx_cb->rx_skb); + rx_cb->rx_skb = NULL; return; } @@ -289,8 +290,10 @@ static void rsi_rx_done_handler(struct urb *urb) if (rsi_rx_urb_submit(dev->priv, rx_cb->ep_num, GFP_ATOMIC)) rsi_dbg(ERR_ZONE, "%s: Failed in urb submission", __func__); - if (status) + if (status) { dev_kfree_skb(rx_cb->rx_skb); + rx_cb->rx_skb = NULL; + } } static void rsi_rx_urb_kill(struct rsi_hw *adapter, u8 ep_num) @@ -317,7 +320,6 @@ static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num, gfp_t mem_flags) struct sk_buff *skb; u8 dword_align_bytes = 0; -#define RSI_MAX_RX_USB_PKT_SIZE 3000 skb = dev_alloc_skb(RSI_MAX_RX_USB_PKT_SIZE); if (!skb) return -ENOMEM; @@ -806,6 +808,7 @@ static int rsi_probe(struct usb_interface *pfunction, } else { rsi_dbg(ERR_ZONE, "%s: Unsupported RSI device id 0x%x\n", __func__, id->idProduct); + status = -ENODEV; goto err1; } diff --git a/drivers/net/wireless/rsi/rsi_hal.h b/drivers/net/wireless/rsi/rsi_hal.h index 46e36df9e8e3c..a2fbec1cec4c3 100644 --- a/drivers/net/wireless/rsi/rsi_hal.h +++ b/drivers/net/wireless/rsi/rsi_hal.h @@ -28,6 +28,17 @@ #define DEV_OPMODE_AP_BT 6 #define DEV_OPMODE_AP_BT_DUAL 14 +#define DEV_OPMODE_PARAM_DESC \ + __stringify(DEV_OPMODE_WIFI_ALONE) "[Wi-Fi alone], " \ + __stringify(DEV_OPMODE_BT_ALONE) "[BT classic alone], " \ + __stringify(DEV_OPMODE_BT_LE_ALONE) "[BT LE alone], " \ + __stringify(DEV_OPMODE_BT_DUAL) "[BT classic + BT LE alone], " \ + __stringify(DEV_OPMODE_STA_BT) "[Wi-Fi STA + BT classic], " \ + __stringify(DEV_OPMODE_STA_BT_LE) "[Wi-Fi STA + BT LE], " \ + __stringify(DEV_OPMODE_STA_BT_DUAL) "[Wi-Fi STA + BT classic + BT LE], " \ + __stringify(DEV_OPMODE_AP_BT) "[Wi-Fi AP + BT classic], " \ + __stringify(DEV_OPMODE_AP_BT_DUAL) "[Wi-Fi AP + BT classic + BT LE]" + #define FLASH_WRITE_CHUNK_SIZE (4 * 1024) #define FLASH_SECTOR_SIZE (4 * 1024) diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h index b3e25bc28682c..de595025c0197 100644 --- a/drivers/net/wireless/rsi/rsi_main.h +++ b/drivers/net/wireless/rsi/rsi_main.h @@ -61,6 +61,7 @@ enum RSI_FSM_STATES { extern u32 rsi_zone_enabled; extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...); +#define RSI_MAX_BANDS 2 #define RSI_MAX_VIFS 3 #define NUM_EDCA_QUEUES 4 #define IEEE80211_ADDR_LEN 6 @@ -139,6 +140,7 @@ struct skb_info { u8 internal_hdr_size; struct ieee80211_vif *vif; u8 vap_id; + bool have_key; }; enum edca_queue { @@ -229,6 +231,12 @@ struct rsi_9116_features { u32 ps_options; }; +struct rsi_rate_config { + u32 configured_mask; /* configured by mac80211 bits 0-11=legacy 12+ mcs */ + u16 fixed_hw_rate; + bool fixed_enabled; +}; + struct rsi_common { struct rsi_hw *priv; struct vif_priv vif_info[RSI_MAX_VIFS]; @@ -254,8 +262,8 @@ struct rsi_common { u8 channel_width; u16 rts_threshold; - u16 bitrate_mask[2]; - u32 fixedrate_mask[2]; + u32 bitrate_mask[RSI_MAX_BANDS]; + struct rsi_rate_config rate_config[RSI_MAX_BANDS]; u8 rf_reset; struct transmit_q_stats tx_stats; @@ -276,7 +284,6 @@ struct rsi_common { u8 mac_id; u8 radio_id; u16 rate_pwr[20]; - u16 min_rate; /* WMM algo related */ u8 selected_qnum; @@ -320,6 +327,7 @@ struct rsi_common { struct ieee80211_vif *roc_vif; bool eapol4_confirm; + bool bt_defer_attach; void *bt_adapter; struct cfg80211_scan_request *hwscan; @@ -401,5 +409,6 @@ struct rsi_host_intf_ops { enum rsi_host_intf rsi_get_host_intf(void *priv); void rsi_set_bt_context(void *priv, void *bt_context); +void rsi_attach_bt(struct rsi_common *common); #endif diff --git a/drivers/net/wireless/rsi/rsi_usb.h b/drivers/net/wireless/rsi/rsi_usb.h index 8702f434b5699..ad88f8c70a351 100644 --- a/drivers/net/wireless/rsi/rsi_usb.h +++ b/drivers/net/wireless/rsi/rsi_usb.h @@ -44,6 +44,8 @@ #define RSI_USB_BUF_SIZE 4096 #define RSI_USB_CTRL_BUF_SIZE 0x04 +#define RSI_MAX_RX_USB_PKT_SIZE 3000 + struct rx_usb_ctrl_block { u8 *data; struct urb *rx_urb; diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 32fe131ba366d..f7e746f1c9fb3 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -203,6 +203,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ unsigned int rx_queue_max; unsigned int rx_queue_len; unsigned long last_rx_time; + unsigned int rx_slots_needed; bool stalled; struct xenvif_copy_state rx_copy; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index c213f2b812691..995566a2785fa 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -492,7 +492,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, * the header's copy failed, and they are * sharing a slot, send an error */ - if (i == 0 && sharedslot) + if (i == 0 && !first_shinfo && sharedslot) xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR); else diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index 48e2006f96ce6..85a5a622ec184 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -33,28 +33,36 @@ #include #include -static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) +/* + * Update the needed ring page slots for the first SKB queued. + * Note that any call sequence outside the RX thread calling this function + * needs to wake up the RX thread via a call of xenvif_kick_thread() + * afterwards in order to avoid a race with putting the thread to sleep. + */ +static void xenvif_update_needed_slots(struct xenvif_queue *queue, + const struct sk_buff *skb) { - RING_IDX prod, cons; - struct sk_buff *skb; - int needed; - unsigned long flags; - - spin_lock_irqsave(&queue->rx_queue.lock, flags); + unsigned int needed = 0; - skb = skb_peek(&queue->rx_queue); - if (!skb) { - spin_unlock_irqrestore(&queue->rx_queue.lock, flags); - return false; + if (skb) { + needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); + if (skb_is_gso(skb)) + needed++; + if (skb->sw_hash) + needed++; } - needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); - if (skb_is_gso(skb)) - needed++; - if (skb->sw_hash) - needed++; + WRITE_ONCE(queue->rx_slots_needed, needed); +} - spin_unlock_irqrestore(&queue->rx_queue.lock, flags); +static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) +{ + RING_IDX prod, cons; + unsigned int needed; + + needed = READ_ONCE(queue->rx_slots_needed); + if (!needed) + return false; do { prod = queue->rx.sring->req_prod; @@ -80,13 +88,19 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) spin_lock_irqsave(&queue->rx_queue.lock, flags); - __skb_queue_tail(&queue->rx_queue, skb); - - queue->rx_queue_len += skb->len; - if (queue->rx_queue_len > queue->rx_queue_max) { + if (queue->rx_queue_len >= queue->rx_queue_max) { struct net_device *dev = queue->vif->dev; netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); + kfree_skb(skb); + queue->vif->dev->stats.rx_dropped++; + } else { + if (skb_queue_empty(&queue->rx_queue)) + xenvif_update_needed_slots(queue, skb); + + __skb_queue_tail(&queue->rx_queue, skb); + + queue->rx_queue_len += skb->len; } spin_unlock_irqrestore(&queue->rx_queue.lock, flags); @@ -100,6 +114,8 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) skb = __skb_dequeue(&queue->rx_queue); if (skb) { + xenvif_update_needed_slots(queue, skb_peek(&queue->rx_queue)); + queue->rx_queue_len -= skb->len; if (queue->rx_queue_len < queue->rx_queue_max) { struct netdev_queue *txq; @@ -134,6 +150,7 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) break; xenvif_rx_dequeue(queue); kfree_skb(skb); + queue->vif->dev->stats.rx_dropped++; } } @@ -465,6 +482,7 @@ void xenvif_rx_action(struct xenvif_queue *queue) queue->rx_copy.completed = &completed_skbs; while (xenvif_rx_ring_slots_available(queue) && + !skb_queue_empty(&queue->rx_queue) && work_done < RX_BATCH_SIZE) { xenvif_rx_skb(queue); work_done++; @@ -474,27 +492,31 @@ void xenvif_rx_action(struct xenvif_queue *queue) xenvif_rx_copy_flush(queue); } -static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) +static RING_IDX xenvif_rx_queue_slots(const struct xenvif_queue *queue) { RING_IDX prod, cons; prod = queue->rx.sring->req_prod; cons = queue->rx.req_cons; + return prod - cons; +} + +static bool xenvif_rx_queue_stalled(const struct xenvif_queue *queue) +{ + unsigned int needed = READ_ONCE(queue->rx_slots_needed); + return !queue->stalled && - prod - cons < 1 && + xenvif_rx_queue_slots(queue) < needed && time_after(jiffies, queue->last_rx_time + queue->vif->stall_timeout); } static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) { - RING_IDX prod, cons; - - prod = queue->rx.sring->req_prod; - cons = queue->rx.req_cons; + unsigned int needed = READ_ONCE(queue->rx_slots_needed); - return queue->stalled && prod - cons >= 1; + return queue->stalled && xenvif_rx_queue_slots(queue) >= needed; } bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 416305e6d0932..44e353dd2ba19 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -435,6 +435,7 @@ static void backend_disconnect(struct backend_info *be) unsigned int queue_index; xen_unregister_watchers(vif); + xenbus_rm(XBT_NIL, be->dev->nodename, "hotplug-status"); #ifdef CONFIG_DEBUG_FS xenvif_debugfs_delif(vif); #endif /* CONFIG_DEBUG_FS */ @@ -979,15 +980,11 @@ static void connect(struct backend_info *be) xenvif_carrier_on(be->vif); unregister_hotplug_status_watch(be); - if (xenbus_exists(XBT_NIL, dev->nodename, "hotplug-status")) { - err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, - NULL, hotplug_status_changed, - "%s/%s", dev->nodename, - "hotplug-status"); - if (err) - goto err; + err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, NULL, + hotplug_status_changed, + "%s/%s", dev->nodename, "hotplug-status"); + if (!err) be->have_hotplug_status_watch = 1; - } netif_tx_wake_all_queues(be->vif->dev); diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 88280057e0321..810fa9968be7d 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -63,6 +63,10 @@ module_param_named(max_queues, xennet_max_queues, uint, 0644); MODULE_PARM_DESC(max_queues, "Maximum number of queues per virtual interface"); +static bool __read_mostly xennet_trusted = true; +module_param_named(trusted, xennet_trusted, bool, 0644); +MODULE_PARM_DESC(trusted, "Is the backend trusted"); + #define XENNET_TIMEOUT (5 * HZ) static const struct ethtool_ops xennet_ethtool_ops; @@ -121,21 +125,17 @@ struct netfront_queue { /* * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries - * are linked from tx_skb_freelist through skb_entry.link. - * - * NB. Freelist index entries are always going to be less than - * PAGE_OFFSET, whereas pointers to skbs will always be equal or - * greater than PAGE_OFFSET: we use this property to distinguish - * them. + * are linked from tx_skb_freelist through tx_link. */ - union skb_entry { - struct sk_buff *skb; - unsigned long link; - } tx_skbs[NET_TX_RING_SIZE]; + struct sk_buff *tx_skbs[NET_TX_RING_SIZE]; + unsigned short tx_link[NET_TX_RING_SIZE]; +#define TX_LINK_NONE 0xffff +#define TX_PENDING 0xfffe grant_ref_t gref_tx_head; grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; struct page *grant_tx_page[NET_TX_RING_SIZE]; unsigned tx_skb_freelist; + unsigned int tx_pend_queue; spinlock_t rx_lock ____cacheline_aligned_in_smp; struct xen_netif_rx_front_ring rx; @@ -146,6 +146,9 @@ struct netfront_queue { struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; grant_ref_t gref_rx_head; grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; + + unsigned int rx_rsp_unconsumed; + spinlock_t rx_cons_lock; }; struct netfront_info { @@ -161,6 +164,12 @@ struct netfront_info { struct netfront_stats __percpu *rx_stats; struct netfront_stats __percpu *tx_stats; + /* Is device behaving sane? */ + bool broken; + + /* Should skbs be bounced into a zeroed buffer? */ + bool bounce; + atomic_t rx_gso_checksum_fixup; }; @@ -169,33 +178,25 @@ struct netfront_rx_info { struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; }; -static void skb_entry_set_link(union skb_entry *list, unsigned short id) -{ - list->link = id; -} - -static int skb_entry_is_link(const union skb_entry *list) -{ - BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link)); - return (unsigned long)list->skb < PAGE_OFFSET; -} - /* * Access macros for acquiring freeing slots in tx_skbs[]. */ -static void add_id_to_freelist(unsigned *head, union skb_entry *list, - unsigned short id) +static void add_id_to_list(unsigned *head, unsigned short *list, + unsigned short id) { - skb_entry_set_link(&list[id], *head); + list[id] = *head; *head = id; } -static unsigned short get_id_from_freelist(unsigned *head, - union skb_entry *list) +static unsigned short get_id_from_list(unsigned *head, unsigned short *list) { unsigned int id = *head; - *head = list[id].link; + + if (id != TX_LINK_NONE) { + *head = list[id]; + list[id] = TX_LINK_NONE; + } return id; } @@ -267,7 +268,7 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue) if (unlikely(!skb)) return NULL; - page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO); if (!page) { kfree_skb(skb); return NULL; @@ -351,7 +352,7 @@ static int xennet_open(struct net_device *dev) unsigned int i = 0; struct netfront_queue *queue = NULL; - if (!np->queues) + if (!np->queues || np->broken) return -ENODEV; for (i = 0; i < num_queues; ++i) { @@ -373,41 +374,62 @@ static int xennet_open(struct net_device *dev) return 0; } -static void xennet_tx_buf_gc(struct netfront_queue *queue) +static bool xennet_tx_buf_gc(struct netfront_queue *queue) { RING_IDX cons, prod; unsigned short id; struct sk_buff *skb; bool more_to_do; + bool work_done = false; + const struct device *dev = &queue->info->netdev->dev; BUG_ON(!netif_carrier_ok(queue->info->netdev)); do { prod = queue->tx.sring->rsp_prod; + if (RING_RESPONSE_PROD_OVERFLOW(&queue->tx, prod)) { + dev_alert(dev, "Illegal number of responses %u\n", + prod - queue->tx.rsp_cons); + goto err; + } rmb(); /* Ensure we see responses up to 'rp'. */ for (cons = queue->tx.rsp_cons; cons != prod; cons++) { - struct xen_netif_tx_response *txrsp; + struct xen_netif_tx_response txrsp; - txrsp = RING_GET_RESPONSE(&queue->tx, cons); - if (txrsp->status == XEN_NETIF_RSP_NULL) + work_done = true; + + RING_COPY_RESPONSE(&queue->tx, cons, &txrsp); + if (txrsp.status == XEN_NETIF_RSP_NULL) continue; - id = txrsp->id; - skb = queue->tx_skbs[id].skb; - if (unlikely(gnttab_query_foreign_access( - queue->grant_tx_ref[id]) != 0)) { - pr_alert("%s: warning -- grant still in use by backend domain\n", - __func__); - BUG(); + id = txrsp.id; + if (id >= RING_SIZE(&queue->tx)) { + dev_alert(dev, + "Response has incorrect id (%u)\n", + id); + goto err; + } + if (queue->tx_link[id] != TX_PENDING) { + dev_alert(dev, + "Response for inactive request\n"); + goto err; + } + + queue->tx_link[id] = TX_LINK_NONE; + skb = queue->tx_skbs[id]; + queue->tx_skbs[id] = NULL; + if (unlikely(!gnttab_end_foreign_access_ref( + queue->grant_tx_ref[id], GNTMAP_readonly))) { + dev_alert(dev, + "Grant still in use by backend domain\n"); + goto err; } - gnttab_end_foreign_access_ref( - queue->grant_tx_ref[id], GNTMAP_readonly); gnttab_release_grant_reference( &queue->gref_tx_head, queue->grant_tx_ref[id]); queue->grant_tx_ref[id] = GRANT_INVALID_REF; queue->grant_tx_page[id] = NULL; - add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, id); + add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, id); dev_kfree_skb_irq(skb); } @@ -417,13 +439,22 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) } while (more_to_do); xennet_maybe_wake_tx(queue); + + return work_done; + + err: + queue->info->broken = true; + dev_alert(dev, "Disabled for further use\n"); + + return work_done; } struct xennet_gnttab_make_txreq { struct netfront_queue *queue; struct sk_buff *skb; struct page *page; - struct xen_netif_tx_request *tx; /* Last request */ + struct xen_netif_tx_request *tx; /* Last request on ring page */ + struct xen_netif_tx_request tx_local; /* Last request local copy*/ unsigned int size; }; @@ -439,7 +470,7 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset, struct netfront_queue *queue = info->queue; struct sk_buff *skb = info->skb; - id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs); + id = get_id_from_list(&queue->tx_skb_freelist, queue->tx_link); tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++); ref = gnttab_claim_grant_reference(&queue->gref_tx_head); WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref)); @@ -447,34 +478,37 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset, gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id, gfn, GNTMAP_readonly); - queue->tx_skbs[id].skb = skb; + queue->tx_skbs[id] = skb; queue->grant_tx_page[id] = page; queue->grant_tx_ref[id] = ref; - tx->id = id; - tx->gref = ref; - tx->offset = offset; - tx->size = len; - tx->flags = 0; + info->tx_local.id = id; + info->tx_local.gref = ref; + info->tx_local.offset = offset; + info->tx_local.size = len; + info->tx_local.flags = 0; + + *tx = info->tx_local; + + /* + * Put the request in the pending queue, it will be set to be pending + * when the producer index is about to be raised. + */ + add_id_to_list(&queue->tx_pend_queue, queue->tx_link, id); info->tx = tx; - info->size += tx->size; + info->size += info->tx_local.size; } static struct xen_netif_tx_request *xennet_make_first_txreq( - struct netfront_queue *queue, struct sk_buff *skb, - struct page *page, unsigned int offset, unsigned int len) + struct xennet_gnttab_make_txreq *info, + unsigned int offset, unsigned int len) { - struct xennet_gnttab_make_txreq info = { - .queue = queue, - .skb = skb, - .page = page, - .size = 0, - }; + info->size = 0; - gnttab_for_one_grant(page, offset, len, xennet_tx_setup_grant, &info); + gnttab_for_one_grant(info->page, offset, len, xennet_tx_setup_grant, info); - return info.tx; + return info->tx; } static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset, @@ -487,35 +521,27 @@ static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset, xennet_tx_setup_grant(gfn, offset, len, data); } -static struct xen_netif_tx_request *xennet_make_txreqs( - struct netfront_queue *queue, struct xen_netif_tx_request *tx, - struct sk_buff *skb, struct page *page, +static void xennet_make_txreqs( + struct xennet_gnttab_make_txreq *info, + struct page *page, unsigned int offset, unsigned int len) { - struct xennet_gnttab_make_txreq info = { - .queue = queue, - .skb = skb, - .tx = tx, - }; - /* Skip unused frames from start of page */ page += offset >> PAGE_SHIFT; offset &= ~PAGE_MASK; while (len) { - info.page = page; - info.size = 0; + info->page = page; + info->size = 0; gnttab_foreach_grant_in_range(page, offset, len, xennet_make_one_txreq, - &info); + info); page++; offset = 0; - len -= info.size; + len -= info->size; } - - return info.tx; } /* @@ -562,13 +588,50 @@ static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb, return queue_idx; } +static void xennet_mark_tx_pending(struct netfront_queue *queue) +{ + unsigned int i; + + while ((i = get_id_from_list(&queue->tx_pend_queue, queue->tx_link)) != + TX_LINK_NONE) + queue->tx_link[i] = TX_PENDING; +} + +struct sk_buff *bounce_skb(const struct sk_buff *skb) +{ + unsigned int headerlen = skb_headroom(skb); + /* Align size to allocate full pages and avoid contiguous data leaks */ + unsigned int size = ALIGN(skb_end_offset(skb) + skb->data_len, + XEN_PAGE_SIZE); + struct sk_buff *n = alloc_skb(size, GFP_ATOMIC | __GFP_ZERO); + + if (!n) + return NULL; + + if (!IS_ALIGNED((uintptr_t)n->head, XEN_PAGE_SIZE)) { + WARN_ONCE(1, "misaligned skb allocated\n"); + kfree_skb(n); + return NULL; + } + + /* Set the data pointer */ + skb_reserve(n, headerlen); + /* Set the tail pointer and length */ + skb_put(n, skb->len); + + BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)); + + skb_copy_header(n, skb); + return n; +} + #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1) static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats); - struct xen_netif_tx_request *tx, *first_tx; + struct xen_netif_tx_request *first_tx; unsigned int i; int notify; int slots; @@ -577,6 +640,7 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev unsigned int len; unsigned long flags; struct netfront_queue *queue = NULL; + struct xennet_gnttab_make_txreq info = { }; unsigned int num_queues = dev->real_num_tx_queues; u16 queue_index; struct sk_buff *nskb; @@ -584,6 +648,8 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev /* Drop the packet if no queues are set up */ if (num_queues < 1) goto drop; + if (unlikely(np->broken)) + goto drop; /* Determine which queue to transmit this SKB on */ queue_index = skb_get_queue_mapping(skb); queue = &np->queues[queue_index]; @@ -611,9 +677,13 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev /* The first req should be at least ETH_HLEN size or the packet will be * dropped by netback. + * + * If the backend is not trusted bounce all data to zeroed pages to + * avoid exposing contiguous data on the granted page not belonging to + * the skb. */ - if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) { - nskb = skb_copy(skb, GFP_ATOMIC); + if (np->bounce || unlikely(PAGE_SIZE - offset < ETH_HLEN)) { + nskb = bounce_skb(skb); if (!nskb) goto drop; dev_consume_skb_any(skb); @@ -634,21 +704,24 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev } /* First request for the linear area. */ - first_tx = tx = xennet_make_first_txreq(queue, skb, - page, offset, len); - offset += tx->size; + info.queue = queue; + info.skb = skb; + info.page = page; + first_tx = xennet_make_first_txreq(&info, offset, len); + offset += info.tx_local.size; if (offset == PAGE_SIZE) { page++; offset = 0; } - len -= tx->size; + len -= info.tx_local.size; if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ - tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated; + first_tx->flags |= XEN_NETTXF_csum_blank | + XEN_NETTXF_data_validated; else if (skb->ip_summed == CHECKSUM_UNNECESSARY) /* remote but checksummed. */ - tx->flags |= XEN_NETTXF_data_validated; + first_tx->flags |= XEN_NETTXF_data_validated; /* Optional extra info after the first request. */ if (skb_shinfo(skb)->gso_size) { @@ -657,7 +730,7 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev gso = (struct xen_netif_extra_info *) RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++); - tx->flags |= XEN_NETTXF_extra_info; + first_tx->flags |= XEN_NETTXF_extra_info; gso->u.gso.size = skb_shinfo(skb)->gso_size; gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ? @@ -671,12 +744,12 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev } /* Requests for the rest of the linear area. */ - tx = xennet_make_txreqs(queue, tx, skb, page, offset, len); + xennet_make_txreqs(&info, page, offset, len); /* Requests for all the frags. */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - tx = xennet_make_txreqs(queue, tx, skb, skb_frag_page(frag), + xennet_make_txreqs(&info, skb_frag_page(frag), skb_frag_off(frag), skb_frag_size(frag)); } @@ -684,6 +757,8 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev /* First request has the packet length. */ first_tx->size = skb->len; + xennet_mark_tx_pending(queue); + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify); if (notify) notify_remote_via_irq(queue->tx_irq); @@ -723,6 +798,38 @@ static int xennet_close(struct net_device *dev) return 0; } +static void xennet_destroy_queues(struct netfront_info *info) +{ + unsigned int i; + + for (i = 0; i < info->netdev->real_num_tx_queues; i++) { + struct netfront_queue *queue = &info->queues[i]; + + if (netif_running(info->netdev)) + napi_disable(&queue->napi); + netif_napi_del(&queue->napi); + } + + kfree(info->queues); + info->queues = NULL; +} + +static void xennet_uninit(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + xennet_destroy_queues(np); +} + +static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val) +{ + unsigned long flags; + + spin_lock_irqsave(&queue->rx_cons_lock, flags); + queue->rx.rsp_cons = val; + queue->rx_rsp_unconsumed = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx); + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); +} + static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb, grant_ref_t ref) { @@ -741,7 +848,7 @@ static int xennet_get_extras(struct netfront_queue *queue, RING_IDX rp) { - struct xen_netif_extra_info *extra; + struct xen_netif_extra_info extra; struct device *dev = &queue->info->netdev->dev; RING_IDX cons = queue->rx.rsp_cons; int err = 0; @@ -757,26 +864,24 @@ static int xennet_get_extras(struct netfront_queue *queue, break; } - extra = (struct xen_netif_extra_info *) - RING_GET_RESPONSE(&queue->rx, ++cons); + RING_COPY_RESPONSE(&queue->rx, ++cons, &extra); - if (unlikely(!extra->type || - extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { + if (unlikely(!extra.type || + extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { if (net_ratelimit()) dev_warn(dev, "Invalid extra type: %d\n", - extra->type); + extra.type); err = -EINVAL; } else { - memcpy(&extras[extra->type - 1], extra, - sizeof(*extra)); + extras[extra.type - 1] = extra; } skb = xennet_get_rx_skb(queue, cons); ref = xennet_get_rx_ref(queue, cons); xennet_move_rx_slot(queue, skb, ref); - } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); + } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); - queue->rx.rsp_cons = cons; + xennet_set_rx_rsp_cons(queue, cons); return err; } @@ -784,7 +889,7 @@ static int xennet_get_responses(struct netfront_queue *queue, struct netfront_rx_info *rinfo, RING_IDX rp, struct sk_buff_head *list) { - struct xen_netif_rx_response *rx = &rinfo->rx; + struct xen_netif_rx_response *rx = &rinfo->rx, rx_local; struct xen_netif_extra_info *extras = rinfo->extras; struct device *dev = &queue->info->netdev->dev; RING_IDX cons = queue->rx.rsp_cons; @@ -793,7 +898,6 @@ static int xennet_get_responses(struct netfront_queue *queue, int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD); int slots = 1; int err = 0; - unsigned long ret; if (rx->flags & XEN_NETRXF_extra_info) { err = xennet_get_extras(queue, extras, rp); @@ -824,8 +928,13 @@ static int xennet_get_responses(struct netfront_queue *queue, goto next; } - ret = gnttab_end_foreign_access_ref(ref, 0); - BUG_ON(!ret); + if (!gnttab_end_foreign_access_ref(ref, 0)) { + dev_alert(dev, + "Grant still in use by backend domain\n"); + queue->info->broken = true; + dev_alert(dev, "Disabled for further use\n"); + return -EINVAL; + } gnttab_release_grant_reference(&queue->gref_rx_head, ref); @@ -842,7 +951,8 @@ static int xennet_get_responses(struct netfront_queue *queue, break; } - rx = RING_GET_RESPONSE(&queue->rx, cons + slots); + RING_COPY_RESPONSE(&queue->rx, cons + slots, &rx_local); + rx = &rx_local; skb = xennet_get_rx_skb(queue, cons + slots); ref = xennet_get_rx_ref(queue, cons + slots); slots++; @@ -855,7 +965,7 @@ static int xennet_get_responses(struct netfront_queue *queue, } if (unlikely(err)) - queue->rx.rsp_cons = cons + slots; + xennet_set_rx_rsp_cons(queue, cons + slots); return err; } @@ -897,10 +1007,11 @@ static int xennet_fill_frags(struct netfront_queue *queue, struct sk_buff *nskb; while ((nskb = __skb_dequeue(list))) { - struct xen_netif_rx_response *rx = - RING_GET_RESPONSE(&queue->rx, ++cons); + struct xen_netif_rx_response rx; skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0]; + RING_COPY_RESPONSE(&queue->rx, ++cons, &rx); + if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) { unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to; @@ -908,20 +1019,21 @@ static int xennet_fill_frags(struct netfront_queue *queue, __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); } if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) { - queue->rx.rsp_cons = ++cons + skb_queue_len(list); + xennet_set_rx_rsp_cons(queue, + ++cons + skb_queue_len(list)); kfree_skb(nskb); return -ENOENT; } skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, skb_frag_page(nfrag), - rx->offset, rx->status, PAGE_SIZE); + rx.offset, rx.status, PAGE_SIZE); skb_shinfo(nskb)->nr_frags = 0; kfree_skb(nskb); } - queue->rx.rsp_cons = cons; + xennet_set_rx_rsp_cons(queue, cons); return 0; } @@ -1008,17 +1120,28 @@ static int xennet_poll(struct napi_struct *napi, int budget) skb_queue_head_init(&tmpq); rp = queue->rx.sring->rsp_prod; + if (RING_RESPONSE_PROD_OVERFLOW(&queue->rx, rp)) { + dev_alert(&dev->dev, "Illegal number of responses %u\n", + rp - queue->rx.rsp_cons); + queue->info->broken = true; + spin_unlock(&queue->rx_lock); + return 0; + } rmb(); /* Ensure we see queued responses up to 'rp'. */ i = queue->rx.rsp_cons; work_done = 0; while ((i != rp) && (work_done < budget)) { - memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx)); + RING_COPY_RESPONSE(&queue->rx, i, rx); memset(extras, 0, sizeof(rinfo.extras)); err = xennet_get_responses(queue, &rinfo, rp, &tmpq); if (unlikely(err)) { + if (queue->info->broken) { + spin_unlock(&queue->rx_lock); + return 0; + } err: while ((skb = __skb_dequeue(&tmpq))) __skb_queue_tail(&errq, skb); @@ -1035,7 +1158,9 @@ static int xennet_poll(struct napi_struct *napi, int budget) if (unlikely(xennet_set_skb_gso(skb, gso))) { __skb_queue_head(&tmpq, skb); - queue->rx.rsp_cons += skb_queue_len(&tmpq); + xennet_set_rx_rsp_cons(queue, + queue->rx.rsp_cons + + skb_queue_len(&tmpq)); goto err; } } @@ -1059,7 +1184,8 @@ static int xennet_poll(struct napi_struct *napi, int budget) __skb_queue_tail(&rxq, skb); - i = ++queue->rx.rsp_cons; + i = queue->rx.rsp_cons + 1; + xennet_set_rx_rsp_cons(queue, i); work_done++; } @@ -1135,17 +1261,18 @@ static void xennet_release_tx_bufs(struct netfront_queue *queue) for (i = 0; i < NET_TX_RING_SIZE; i++) { /* Skip over entries which are actually freelist references */ - if (skb_entry_is_link(&queue->tx_skbs[i])) + if (!queue->tx_skbs[i]) continue; - skb = queue->tx_skbs[i].skb; + skb = queue->tx_skbs[i]; + queue->tx_skbs[i] = NULL; get_page(queue->grant_tx_page[i]); gnttab_end_foreign_access(queue->grant_tx_ref[i], GNTMAP_readonly, (unsigned long)page_address(queue->grant_tx_page[i])); queue->grant_tx_page[i] = NULL; queue->grant_tx_ref[i] = GRANT_INVALID_REF; - add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, i); + add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, i); dev_kfree_skb_irq(skb); } } @@ -1220,34 +1347,79 @@ static int xennet_set_features(struct net_device *dev, return 0; } -static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) +static bool xennet_handle_tx(struct netfront_queue *queue, unsigned int *eoi) { - struct netfront_queue *queue = dev_id; unsigned long flags; + if (unlikely(queue->info->broken)) + return false; + spin_lock_irqsave(&queue->tx_lock, flags); - xennet_tx_buf_gc(queue); + if (xennet_tx_buf_gc(queue)) + *eoi = 0; spin_unlock_irqrestore(&queue->tx_lock, flags); + return true; +} + +static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) +{ + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (likely(xennet_handle_tx(dev_id, &eoiflag))) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } -static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id) +static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi) { - struct netfront_queue *queue = dev_id; - struct net_device *dev = queue->info->netdev; + unsigned int work_queued; + unsigned long flags; + + if (unlikely(queue->info->broken)) + return false; + + spin_lock_irqsave(&queue->rx_cons_lock, flags); + work_queued = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx); + if (work_queued > queue->rx_rsp_unconsumed) { + queue->rx_rsp_unconsumed = work_queued; + *eoi = 0; + } else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) { + const struct device *dev = &queue->info->netdev->dev; - if (likely(netif_carrier_ok(dev) && - RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))) + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); + dev_alert(dev, "RX producer index going backwards\n"); + dev_alert(dev, "Disabled for further use\n"); + queue->info->broken = true; + return false; + } + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); + + if (likely(netif_carrier_ok(queue->info->netdev) && work_queued)) napi_schedule(&queue->napi); + return true; +} + +static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id) +{ + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (likely(xennet_handle_rx(dev_id, &eoiflag))) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } static irqreturn_t xennet_interrupt(int irq, void *dev_id) { - xennet_tx_interrupt(irq, dev_id); - xennet_rx_interrupt(irq, dev_id); + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (xennet_handle_tx(dev_id, &eoiflag) && + xennet_handle_rx(dev_id, &eoiflag)) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } @@ -1258,12 +1430,17 @@ static void xennet_poll_controller(struct net_device *dev) struct netfront_info *info = netdev_priv(dev); unsigned int num_queues = dev->real_num_tx_queues; unsigned int i; + + if (info->broken) + return; + for (i = 0; i < num_queues; ++i) xennet_interrupt(0, &info->queues[i]); } #endif static const struct net_device_ops xennet_netdev_ops = { + .ndo_uninit = xennet_uninit, .ndo_open = xennet_open, .ndo_stop = xennet_close, .ndo_start_xmit = xennet_start_xmit, @@ -1439,6 +1616,10 @@ static int netfront_resume(struct xenbus_device *dev) dev_dbg(&dev->dev, "%s\n", dev->nodename); + netif_tx_lock_bh(info->netdev); + netif_device_detach(info->netdev); + netif_tx_unlock_bh(info->netdev); + xennet_disconnect_backend(info); return 0; } @@ -1473,9 +1654,10 @@ static int setup_netfront_single(struct netfront_queue *queue) if (err < 0) goto fail; - err = bind_evtchn_to_irqhandler(queue->tx_evtchn, - xennet_interrupt, - 0, queue->info->netdev->name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn, + xennet_interrupt, 0, + queue->info->netdev->name, + queue); if (err < 0) goto bind_fail; queue->rx_evtchn = queue->tx_evtchn; @@ -1503,18 +1685,18 @@ static int setup_netfront_split(struct netfront_queue *queue) snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name), "%s-tx", queue->name); - err = bind_evtchn_to_irqhandler(queue->tx_evtchn, - xennet_tx_interrupt, - 0, queue->tx_irq_name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn, + xennet_tx_interrupt, 0, + queue->tx_irq_name, queue); if (err < 0) goto bind_tx_fail; queue->tx_irq = err; snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name), "%s-rx", queue->name); - err = bind_evtchn_to_irqhandler(queue->rx_evtchn, - xennet_rx_interrupt, - 0, queue->rx_irq_name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->rx_evtchn, + xennet_rx_interrupt, 0, + queue->rx_irq_name, queue); if (err < 0) goto bind_rx_fail; queue->rx_irq = err; @@ -1538,7 +1720,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_queue *queue, unsigned int feature_split_evtchn) { struct xen_netif_tx_sring *txs; - struct xen_netif_rx_sring *rxs; + struct xen_netif_rx_sring *rxs = NULL; grant_ref_t gref; int err; @@ -1558,21 +1740,21 @@ static int setup_netfront(struct xenbus_device *dev, err = xenbus_grant_ring(dev, txs, 1, &gref); if (err < 0) - goto grant_tx_ring_fail; + goto fail; queue->tx_ring_ref = gref; rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH); if (!rxs) { err = -ENOMEM; xenbus_dev_fatal(dev, err, "allocating rx ring page"); - goto alloc_rx_ring_fail; + goto fail; } SHARED_RING_INIT(rxs); FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE); err = xenbus_grant_ring(dev, rxs, 1, &gref); if (err < 0) - goto grant_rx_ring_fail; + goto fail; queue->rx_ring_ref = gref; if (feature_split_evtchn) @@ -1585,22 +1767,28 @@ static int setup_netfront(struct xenbus_device *dev, err = setup_netfront_single(queue); if (err) - goto alloc_evtchn_fail; + goto fail; return 0; /* If we fail to setup netfront, it is safe to just revoke access to * granted pages because backend is not accessing it at this point. */ -alloc_evtchn_fail: - gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0); -grant_rx_ring_fail: - free_page((unsigned long)rxs); -alloc_rx_ring_fail: - gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0); -grant_tx_ring_fail: - free_page((unsigned long)txs); -fail: + fail: + if (queue->rx_ring_ref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(queue->rx_ring_ref, 0, + (unsigned long)rxs); + queue->rx_ring_ref = GRANT_INVALID_REF; + } else { + free_page((unsigned long)rxs); + } + if (queue->tx_ring_ref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(queue->tx_ring_ref, 0, + (unsigned long)txs); + queue->tx_ring_ref = GRANT_INVALID_REF; + } else { + free_page((unsigned long)txs); + } return err; } @@ -1616,6 +1804,7 @@ static int xennet_init_queue(struct netfront_queue *queue) spin_lock_init(&queue->tx_lock); spin_lock_init(&queue->rx_lock); + spin_lock_init(&queue->rx_cons_lock); timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0); @@ -1623,13 +1812,15 @@ static int xennet_init_queue(struct netfront_queue *queue) snprintf(queue->name, sizeof(queue->name), "vif%s-q%u", devid, queue->id); - /* Initialise tx_skbs as a free chain containing every entry. */ + /* Initialise tx_skb_freelist as a free chain containing every entry. */ queue->tx_skb_freelist = 0; + queue->tx_pend_queue = TX_LINK_NONE; for (i = 0; i < NET_TX_RING_SIZE; i++) { - skb_entry_set_link(&queue->tx_skbs[i], i+1); + queue->tx_link[i] = i + 1; queue->grant_tx_ref[i] = GRANT_INVALID_REF; queue->grant_tx_page[i] = NULL; } + queue->tx_link[NET_TX_RING_SIZE - 1] = TX_LINK_NONE; /* Clear out rx_skbs */ for (i = 0; i < NET_RX_RING_SIZE; i++) { @@ -1743,22 +1934,6 @@ static int write_queue_xenstore_keys(struct netfront_queue *queue, return err; } -static void xennet_destroy_queues(struct netfront_info *info) -{ - unsigned int i; - - for (i = 0; i < info->netdev->real_num_tx_queues; i++) { - struct netfront_queue *queue = &info->queues[i]; - - if (netif_running(info->netdev)) - napi_disable(&queue->napi); - netif_napi_del(&queue->napi); - } - - kfree(info->queues); - info->queues = NULL; -} - static int xennet_create_queues(struct netfront_info *info, unsigned int *num_queues) { @@ -1814,6 +1989,10 @@ static int talk_to_netback(struct xenbus_device *dev, info->netdev->irq = 0; + /* Check if backend is trusted. */ + info->bounce = !xennet_trusted || + !xenbus_read_unsigned(dev->nodename, "trusted", 1); + /* Check if backend supports multiple queues */ max_queues = xenbus_read_unsigned(info->xbdev->otherend, "multi-queue-max-queues", 1); @@ -1834,6 +2013,9 @@ static int talk_to_netback(struct xenbus_device *dev, if (info->queues) xennet_destroy_queues(info); + /* For the case of a reconnect reset the "broken" indicator. */ + info->broken = false; + err = xennet_create_queues(info, &num_queues); if (err < 0) { xenbus_dev_fatal(dev, err, "creating queues"); @@ -1964,6 +2146,9 @@ static int xennet_connect(struct net_device *dev) err = talk_to_netback(np->xbdev, np); if (err) return err; + if (np->bounce) + dev_info(&np->xbdev->dev, + "bouncing transmitted data to zeroed pages\n"); /* talk_to_netback() sets the correct number of queues */ num_queues = dev->real_num_tx_queues; @@ -1987,6 +2172,10 @@ static int xennet_connect(struct net_device *dev) * domain a kick because we've probably just requeued some * packets. */ + netif_tx_lock_bh(np->netdev); + netif_device_attach(np->netdev); + netif_tx_unlock_bh(np->netdev); + netif_carrier_on(np->netdev); for (j = 0; j < num_queues; ++j) { queue = &np->queues[j]; diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index 0f22379887ca8..919b4d2f5d8b5 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -186,9 +186,9 @@ static int nfcmrvl_i2c_parse_dt(struct device_node *node, pdata->irq_polarity = IRQF_TRIGGER_RISING; ret = irq_of_parse_and_map(node, 0); - if (ret < 0) { - pr_err("Unable to get irq, error: %d\n", ret); - return ret; + if (!ret) { + pr_err("Unable to get irq\n"); + return -EINVAL; } pdata->irq = ret; diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c index 529be35ac1782..54d228acc0f5d 100644 --- a/drivers/nfc/nfcmrvl/main.c +++ b/drivers/nfc/nfcmrvl/main.c @@ -194,6 +194,7 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv) { struct nci_dev *ndev = priv->ndev; + nci_unregister_device(ndev); if (priv->ndev->nfc_dev->fw_download_in_progress) nfcmrvl_fw_dnld_abort(priv); @@ -202,7 +203,6 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv) if (gpio_is_valid(priv->config.reset_n_io)) gpio_free(priv->config.reset_n_io); - nci_unregister_device(ndev); nci_free_device(ndev); kfree(priv); } diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c index 8e0ddb4347704..1f4120e3314b2 100644 --- a/drivers/nfc/nfcmrvl/spi.c +++ b/drivers/nfc/nfcmrvl/spi.c @@ -129,9 +129,9 @@ static int nfcmrvl_spi_parse_dt(struct device_node *node, } ret = irq_of_parse_and_map(node, 0); - if (ret < 0) { - pr_err("Unable to get irq, error: %d\n", ret); - return ret; + if (!ret) { + pr_err("Unable to get irq\n"); + return -EINVAL; } pdata->irq = ret; diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c index 888e298f610b8..f26986eb53f19 100644 --- a/drivers/nfc/nfcmrvl/usb.c +++ b/drivers/nfc/nfcmrvl/usb.c @@ -401,13 +401,25 @@ static void nfcmrvl_play_deferred(struct nfcmrvl_usb_drv_data *drv_data) int err; while ((urb = usb_get_from_anchor(&drv_data->deferred))) { + usb_anchor_urb(urb, &drv_data->tx_anchor); + err = usb_submit_urb(urb, GFP_ATOMIC); - if (err) + if (err) { + kfree(urb->setup_packet); + usb_unanchor_urb(urb); + usb_free_urb(urb); break; + } drv_data->tx_in_flight++; + usb_free_urb(urb); + } + + /* Cleanup the rest deferred urbs. */ + while ((urb = usb_get_from_anchor(&drv_data->deferred))) { + kfree(urb->setup_packet); + usb_free_urb(urb); } - usb_scuttle_anchored_urbs(&drv_data->deferred); } static int nfcmrvl_resume(struct usb_interface *intf) diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index 9f60e4dc5a908..f426dcdfcdd6a 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -122,7 +122,9 @@ static int nxp_nci_i2c_fw_read(struct nxp_nci_i2c_phy *phy, skb_put_data(*skb, &header, NXP_NCI_FW_HDR_LEN); r = i2c_master_recv(client, skb_put(*skb, frame_len), frame_len); - if (r != frame_len) { + if (r < 0) { + goto fw_read_exit_free_skb; + } else if (r != frame_len) { nfc_err(&client->dev, "Invalid frame length: %u (expected %zu)\n", r, frame_len); @@ -162,8 +164,13 @@ static int nxp_nci_i2c_nci_read(struct nxp_nci_i2c_phy *phy, skb_put_data(*skb, (void *)&header, NCI_CTRL_HDR_SIZE); + if (!header.plen) + return 0; + r = i2c_master_recv(client, skb_put(*skb, header.plen), header.plen); - if (r != header.plen) { + if (r < 0) { + goto nci_read_exit_free_skb; + } else if (r != header.plen) { nfc_err(&client->dev, "Invalid frame payload length: %u (expected %u)\n", r, header.plen); diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c index 3ea38ce86cc9f..807b7b37d9dce 100644 --- a/drivers/nfc/pn533/pn533.c +++ b/drivers/nfc/pn533/pn533.c @@ -2072,7 +2072,7 @@ static int pn533_fill_fragment_skbs(struct pn533 *dev, struct sk_buff *skb) frag = pn533_alloc_skb(dev, frag_size); if (!frag) { skb_queue_purge(&dev->fragment_skb); - break; + return -ENOMEM; } if (!dev->tgt_mode) { @@ -2143,7 +2143,7 @@ static int pn533_transceive(struct nfc_dev *nfc_dev, /* jumbo frame ? */ if (skb->len > PN533_CMD_DATAEXCH_DATA_MAXLEN) { rc = pn533_fill_fragment_skbs(dev, skb); - if (rc <= 0) + if (rc < 0) goto error; skb = skb_dequeue(&dev->fragment_skb); @@ -2215,7 +2215,7 @@ static int pn533_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb) /* let's split in multiple chunks if size's too big */ if (skb->len > PN533_CMD_DATAEXCH_DATA_MAXLEN) { rc = pn533_fill_fragment_skbs(dev, skb); - if (rc <= 0) + if (rc < 0) goto error; /* get the first skb */ diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 8e4d355dc3aec..2ae1474faede9 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -1003,11 +1003,11 @@ static u64 port100_get_command_type_mask(struct port100 *dev) skb = port100_alloc_skb(dev, 0); if (!skb) - return -ENOMEM; + return 0; resp = port100_send_cmd_sync(dev, PORT100_CMD_GET_COMMAND_TYPE, skb); if (IS_ERR(resp)) - return PTR_ERR(resp); + return 0; if (resp->len < 8) mask = 0; @@ -1609,7 +1609,9 @@ static int port100_probe(struct usb_interface *interface, nfc_digital_free_device(dev->nfc_digital_dev); error: + usb_kill_urb(dev->in_urb); usb_free_urb(dev->in_urb); + usb_kill_urb(dev->out_urb); usb_free_urb(dev->out_urb); usb_put_dev(dev->udev); diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c index 23ed11f91213d..6ea59426ab0bf 100644 --- a/drivers/nfc/st21nfca/i2c.c +++ b/drivers/nfc/st21nfca/i2c.c @@ -533,7 +533,8 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, phy->gpiod_ena = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(phy->gpiod_ena)) { nfc_err(dev, "Unable to get ENABLE GPIO\n"); - return PTR_ERR(phy->gpiod_ena); + r = PTR_ERR(phy->gpiod_ena); + goto out_free; } phy->se_status.is_ese_present = @@ -544,7 +545,7 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, r = st21nfca_hci_platform_init(phy); if (r < 0) { nfc_err(&client->dev, "Unable to reboot st21nfca\n"); - return r; + goto out_free; } r = devm_request_threaded_irq(&client->dev, client->irq, NULL, @@ -553,15 +554,23 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, ST21NFCA_HCI_DRIVER_NAME, phy); if (r < 0) { nfc_err(&client->dev, "Unable to register IRQ handler\n"); - return r; + goto out_free; } - return st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME, - ST21NFCA_FRAME_HEADROOM, - ST21NFCA_FRAME_TAILROOM, - ST21NFCA_HCI_LLC_MAX_PAYLOAD, - &phy->hdev, - &phy->se_status); + r = st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME, + ST21NFCA_FRAME_HEADROOM, + ST21NFCA_FRAME_TAILROOM, + ST21NFCA_HCI_LLC_MAX_PAYLOAD, + &phy->hdev, + &phy->se_status); + if (r) + goto out_free; + + return 0; + +out_free: + kfree_skb(phy->pending_skb); + return r; } static int st21nfca_hci_i2c_remove(struct i2c_client *client) @@ -574,6 +583,8 @@ static int st21nfca_hci_i2c_remove(struct i2c_client *client) if (phy->powered) st21nfca_hci_i2c_disable(phy); + if (phy->pending_skb) + kfree_skb(phy->pending_skb); return 0; } diff --git a/drivers/nfc/st21nfca/se.c b/drivers/nfc/st21nfca/se.c index 6586378cacb05..d416365042462 100644 --- a/drivers/nfc/st21nfca/se.c +++ b/drivers/nfc/st21nfca/se.c @@ -241,7 +241,7 @@ int st21nfca_hci_se_io(struct nfc_hci_dev *hdev, u32 se_idx, } EXPORT_SYMBOL(st21nfca_hci_se_io); -static void st21nfca_se_wt_timeout(struct timer_list *t) +static void st21nfca_se_wt_work(struct work_struct *work) { /* * No answer from the secure element @@ -254,8 +254,9 @@ static void st21nfca_se_wt_timeout(struct timer_list *t) */ /* hardware reset managed through VCC_UICC_OUT power supply */ u8 param = 0x01; - struct st21nfca_hci_info *info = from_timer(info, t, - se_info.bwi_timer); + struct st21nfca_hci_info *info = container_of(work, + struct st21nfca_hci_info, + se_info.timeout_work); pr_debug("\n"); @@ -273,6 +274,13 @@ static void st21nfca_se_wt_timeout(struct timer_list *t) info->se_info.cb(info->se_info.cb_context, NULL, 0, -ETIME); } +static void st21nfca_se_wt_timeout(struct timer_list *t) +{ + struct st21nfca_hci_info *info = from_timer(info, t, se_info.bwi_timer); + + schedule_work(&info->se_info.timeout_work); +} + static void st21nfca_se_activation_timeout(struct timer_list *t) { struct st21nfca_hci_info *info = from_timer(info, t, @@ -296,6 +304,8 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host, int r = 0; struct device *dev = &hdev->ndev->dev; struct nfc_evt_transaction *transaction; + u32 aid_len; + u8 params_len; pr_debug("connectivity gate event: %x\n", event); @@ -304,34 +314,48 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host, r = nfc_se_connectivity(hdev->ndev, host); break; case ST21NFCA_EVT_TRANSACTION: - /* - * According to specification etsi 102 622 + /* According to specification etsi 102 622 * 11.2.2.4 EVT_TRANSACTION Table 52 * Description Tag Length * AID 81 5 to 16 * PARAMETERS 82 0 to 255 + * + * The key differences are aid storage length is variably sized + * in the packet, but fixed in nfc_evt_transaction, and that the aid_len + * is u8 in the packet, but u32 in the structure, and the tags in + * the packet are not included in nfc_evt_transaction. + * + * size in bytes: 1 1 5-16 1 1 0-255 + * offset: 0 1 2 aid_len + 2 aid_len + 3 aid_len + 4 + * member name: aid_tag(M) aid_len aid params_tag(M) params_len params + * example: 0x81 5-16 X 0x82 0-255 X */ - if (skb->len < NFC_MIN_AID_LENGTH + 2 && - skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) + if (skb->len < 2 || skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) return -EPROTO; - transaction = (struct nfc_evt_transaction *)devm_kzalloc(dev, - skb->len - 2, GFP_KERNEL); - if (!transaction) - return -ENOMEM; + aid_len = skb->data[1]; + + if (skb->len < aid_len + 4 || aid_len > sizeof(transaction->aid)) + return -EPROTO; - transaction->aid_len = skb->data[1]; - memcpy(transaction->aid, &skb->data[2], - transaction->aid_len); + params_len = skb->data[aid_len + 3]; - /* Check next byte is PARAMETERS tag (82) */ - if (skb->data[transaction->aid_len + 2] != - NFC_EVT_TRANSACTION_PARAMS_TAG) + /* Verify PARAMETERS tag is (82), and final check that there is enough + * space in the packet to read everything. + */ + if ((skb->data[aid_len + 2] != NFC_EVT_TRANSACTION_PARAMS_TAG) || + (skb->len < aid_len + 4 + params_len)) return -EPROTO; - transaction->params_len = skb->data[transaction->aid_len + 3]; - memcpy(transaction->params, skb->data + - transaction->aid_len + 4, transaction->params_len); + transaction = devm_kzalloc(dev, sizeof(*transaction) + params_len, GFP_KERNEL); + if (!transaction) + return -ENOMEM; + + transaction->aid_len = aid_len; + transaction->params_len = params_len; + + memcpy(transaction->aid, &skb->data[2], aid_len); + memcpy(transaction->params, &skb->data[aid_len + 4], params_len); r = nfc_se_transaction(hdev->ndev, host, transaction); break; @@ -355,6 +379,7 @@ int st21nfca_apdu_reader_event_received(struct nfc_hci_dev *hdev, switch (event) { case ST21NFCA_EVT_TRANSMIT_DATA: del_timer_sync(&info->se_info.bwi_timer); + cancel_work_sync(&info->se_info.timeout_work); info->se_info.bwi_active = false; r = nfc_hci_send_event(hdev, ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_EVT_SE_END_OF_APDU_TRANSFER, NULL, 0); @@ -384,6 +409,7 @@ void st21nfca_se_init(struct nfc_hci_dev *hdev) struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev); init_completion(&info->se_info.req_completion); + INIT_WORK(&info->se_info.timeout_work, st21nfca_se_wt_work); /* initialize timers */ timer_setup(&info->se_info.bwi_timer, st21nfca_se_wt_timeout, 0); info->se_info.bwi_active = false; @@ -411,6 +437,7 @@ void st21nfca_se_deinit(struct nfc_hci_dev *hdev) if (info->se_info.se_active) del_timer_sync(&info->se_info.se_active_timer); + cancel_work_sync(&info->se_info.timeout_work); info->se_info.bwi_active = false; info->se_info.se_active = false; } diff --git a/drivers/nfc/st21nfca/st21nfca.h b/drivers/nfc/st21nfca/st21nfca.h index 5e0de0fef1d4e..0e4a93d11efb7 100644 --- a/drivers/nfc/st21nfca/st21nfca.h +++ b/drivers/nfc/st21nfca/st21nfca.h @@ -141,6 +141,7 @@ struct st21nfca_se_info { se_io_cb_t cb; void *cb_context; + struct work_struct timeout_work; }; struct st21nfca_hci_info { diff --git a/drivers/ntb/test/ntb_msi_test.c b/drivers/ntb/test/ntb_msi_test.c index 99d826ed9c341..662067dc9ce2c 100644 --- a/drivers/ntb/test/ntb_msi_test.c +++ b/drivers/ntb/test/ntb_msi_test.c @@ -372,8 +372,10 @@ static int ntb_msit_probe(struct ntb_client *client, struct ntb_dev *ntb) if (ret) goto remove_dbgfs; - if (!nm->isr_ctx) + if (!nm->isr_ctx) { + ret = -ENOMEM; goto remove_dbgfs; + } ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 5ce4766a6c9eb..251fe75798c13 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -597,6 +597,7 @@ static int perf_setup_inbuf(struct perf_peer *peer) return -ENOMEM; } if (!IS_ALIGNED(peer->inbuf_xlat, xlat_align)) { + ret = -EINVAL; dev_err(&perf->ntb->dev, "Unaligned inbuf allocated\n"); goto err_free_inbuf; } diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 5e5c6aafc070b..56f189c3129af 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -187,8 +187,8 @@ static int nvdimm_clear_badblocks_region(struct device *dev, void *data) ndr_end = nd_region->ndr_start + nd_region->ndr_size - 1; /* make sure we are in the region */ - if (ctx->phys < nd_region->ndr_start - || (ctx->phys + ctx->cleared) > ndr_end) + if (ctx->phys < nd_region->ndr_start || + (ctx->phys + ctx->cleared - 1) > ndr_end) return 0; sector = (ctx->phys - nd_region->ndr_start) / 512; diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c index 35d265014e1ec..0e23d8c277925 100644 --- a/drivers/nvdimm/security.c +++ b/drivers/nvdimm/security.c @@ -379,11 +379,6 @@ static int security_overwrite(struct nvdimm *nvdimm, unsigned int keyid) || !nvdimm->sec.flags) return -EOPNOTSUPP; - if (dev->driver == NULL) { - dev_dbg(dev, "Unable to overwrite while DIMM active.\n"); - return -EINVAL; - } - rc = check_security_state(nvdimm); if (rc) return rc; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a5b5a2305791d..15513eb5da1e0 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3237,10 +3237,6 @@ static ssize_t nvme_sysfs_delete(struct device *dev, { struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - /* Can't delete non-created controllers */ - if (!ctrl->created) - return -EBUSY; - if (device_remove_file_self(dev, attr)) nvme_delete_ctrl_sync(ctrl); return count; @@ -3896,7 +3892,14 @@ static void nvme_async_event_work(struct work_struct *work) container_of(work, struct nvme_ctrl, async_event_work); nvme_aen_uevent(ctrl); - ctrl->ops->submit_async_event(ctrl); + + /* + * The transport drivers must guarantee AER submission here is safe by + * flushing ctrl async_event_work after changing the controller state + * from LIVE and before freeing the admin queue. + */ + if (ctrl->state == NVME_CTRL_LIVE) + ctrl->ops->submit_async_event(ctrl); } static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl) @@ -4027,6 +4030,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) nvme_stop_keep_alive(ctrl); flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fw_act_work); + if (ctrl->ops->stop_ctrl) + ctrl->ops->stop_ctrl(ctrl); } EXPORT_SYMBOL_GPL(nvme_stop_ctrl); @@ -4040,8 +4045,8 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); nvme_start_queues(ctrl); + nvme_mpath_update(ctrl); } - ctrl->created = true; } EXPORT_SYMBOL_GPL(nvme_start_ctrl); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 590b040e90a34..811f7b96b5517 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -156,13 +156,12 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->scan_lock); down_read(&ctrl->namespaces_rwsem); - list_for_each_entry(ns, &ctrl->namespaces, list) - if (nvme_mpath_clear_current_path(ns)) - kblockd_schedule_work(&ns->head->requeue_work); + list_for_each_entry(ns, &ctrl->namespaces, list) { + nvme_mpath_clear_current_path(ns); + kblockd_schedule_work(&ns->head->requeue_work); + } up_read(&ctrl->namespaces_rwsem); - mutex_unlock(&ctrl->scan_lock); } static bool nvme_path_is_disabled(struct nvme_ns *ns) @@ -459,8 +458,14 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data, for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) { struct nvme_ana_group_desc *desc = base + offset; - u32 nr_nsids = le32_to_cpu(desc->nnsids); - size_t nsid_buf_size = nr_nsids * sizeof(__le32); + u32 nr_nsids; + size_t nsid_buf_size; + + if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc))) + return -EINVAL; + + nr_nsids = le32_to_cpu(desc->nnsids); + nsid_buf_size = nr_nsids * sizeof(__le32); if (WARN_ON_ONCE(desc->grpid == 0)) return -EINVAL; @@ -480,8 +485,6 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data, return error; offset += nsid_buf_size; - if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc))) - return -EINVAL; } return 0; @@ -498,8 +501,17 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, ns->ana_grpid = le32_to_cpu(desc->grpid); ns->ana_state = desc->state; clear_bit(NVME_NS_ANA_PENDING, &ns->flags); - - if (nvme_state_is_live(ns->ana_state)) + /* + * nvme_mpath_set_live() will trigger I/O to the multipath path device + * and in turn to this path device. However we cannot accept this I/O + * if the controller is not live. This may deadlock if called from + * nvme_mpath_init_identify() and the ctrl will never complete + * initialization, preventing I/O from completing. For this case we + * will reprocess the ANA log page in nvme_mpath_update() once the + * controller is ready. + */ + if (nvme_state_is_live(ns->ana_state) && + ns->ctrl->state == NVME_CTRL_LIVE) nvme_mpath_set_live(ns); } @@ -522,14 +534,17 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { - unsigned nsid = le32_to_cpu(desc->nsids[n]); - + unsigned nsid; +again: + nsid = le32_to_cpu(desc->nsids[n]); if (ns->head->ns_id < nsid) continue; if (ns->head->ns_id == nsid) nvme_update_ns_ana_state(desc, ns); if (++n == nr_nsids) break; + if (ns->head->ns_id > nsid) + goto again; } up_read(&ctrl->namespaces_rwsem); return 0; @@ -580,6 +595,18 @@ static void nvme_ana_work(struct work_struct *work) nvme_read_ana_log(ctrl); } +void nvme_mpath_update(struct nvme_ctrl *ctrl) +{ + u32 nr_change_groups = 0; + + if (!ctrl->ana_log_buf) + return; + + mutex_lock(&ctrl->ana_lock); + nvme_parse_ana_log(ctrl, &nr_change_groups, nvme_update_ana_state); + mutex_unlock(&ctrl->ana_lock); +} + static void nvme_anatt_timeout(struct timer_list *t) { struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2df90d4355b9a..42f549d2664c5 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -253,7 +253,6 @@ struct nvme_ctrl { struct nvme_command ka_cmd; struct work_struct fw_act_work; unsigned long events; - bool created; #ifdef CONFIG_NVME_MULTIPATH /* asymmetric namespace access: */ @@ -402,6 +401,7 @@ struct nvme_ctrl_ops { void (*free_ctrl)(struct nvme_ctrl *ctrl); void (*submit_async_event)(struct nvme_ctrl *ctrl); void (*delete_ctrl)(struct nvme_ctrl *ctrl); + void (*stop_ctrl)(struct nvme_ctrl *ctrl); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); }; @@ -551,6 +551,7 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); void nvme_mpath_remove_disk(struct nvme_ns_head *head); int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl); +void nvme_mpath_update(struct nvme_ctrl *ctrl); void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); @@ -648,6 +649,9 @@ static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, "Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n"); return 0; } +static inline void nvme_mpath_update(struct nvme_ctrl *ctrl) +{ +} static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl) { } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index cb9aa78919e7c..accbaba74ff5b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1674,6 +1674,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset); if (IS_ERR(dev->ctrl.admin_q)) { blk_mq_free_tag_set(&dev->admin_tagset); + dev->ctrl.admin_q = NULL; return -ENOMEM; } if (!blk_get_queue(dev->ctrl.admin_q)) { diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index b8c0f75bfb7ba..d5d7b2f98edc9 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -665,13 +665,13 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) if (ret) return ret; - ctrl->ctrl.queue_count = nr_io_queues + 1; - if (ctrl->ctrl.queue_count < 2) { + if (nr_io_queues == 0) { dev_err(ctrl->ctrl.device, "unable to set any I/O queues\n"); return -ENOMEM; } + ctrl->ctrl.queue_count = nr_io_queues + 1; dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues); @@ -973,6 +973,14 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, } } +static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl) +{ + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); + + cancel_work_sync(&ctrl->err_work); + cancel_delayed_work_sync(&ctrl->reconnect_work); +} + static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); @@ -1019,11 +1027,13 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) return ret; if (ctrl->ctrl.icdoff) { + ret = -EOPNOTSUPP; dev_err(ctrl->ctrl.device, "icdoff is not supported!\n"); goto destroy_admin; } if (!(ctrl->ctrl.sgls & (1 << 2))) { + ret = -EOPNOTSUPP; dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not supported!\n"); goto destroy_admin; @@ -1108,6 +1118,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) struct nvme_rdma_ctrl, err_work); nvme_stop_keep_alive(&ctrl->ctrl); + flush_work(&ctrl->ctrl.async_event_work); nvme_rdma_teardown_io_queues(ctrl, false); nvme_start_queues(&ctrl->ctrl); nvme_rdma_teardown_admin_queue(ctrl, false); @@ -1944,9 +1955,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { - cancel_work_sync(&ctrl->err_work); - cancel_delayed_work_sync(&ctrl->reconnect_work); - nvme_rdma_teardown_io_queues(ctrl, shutdown); blk_mq_quiesce_queue(ctrl->ctrl.admin_q); if (shutdown) @@ -1996,6 +2004,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .submit_async_event = nvme_rdma_submit_async_event, .delete_ctrl = nvme_rdma_delete_ctrl, .get_address = nvmf_get_address, + .stop_ctrl = nvme_rdma_stop_ctrl, }; /* diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 718152adc6254..2a27ac9aedbaa 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -642,17 +642,9 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, unsigned int *offset, size_t *len) { struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu; - struct nvme_tcp_request *req; - struct request *rq; - - rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id); - if (!rq) { - dev_err(queue->ctrl->ctrl.device, - "queue %d tag %#x not found\n", - nvme_tcp_queue_id(queue), pdu->command_id); - return -ENOENT; - } - req = blk_mq_rq_to_pdu(rq); + struct request *rq = + blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id); + struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); while (true) { int recv_len, ret; @@ -848,7 +840,15 @@ static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) static void nvme_tcp_fail_request(struct nvme_tcp_request *req) { - nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_HOST_PATH_ERROR); + if (nvme_tcp_async_req(req)) { + union nvme_result res = {}; + + nvme_complete_async_event(&req->queue->ctrl->ctrl, + cpu_to_le16(NVME_SC_HOST_PATH_ERROR), &res); + } else { + nvme_tcp_end_request(blk_mq_rq_from_pdu(req), + NVME_SC_HOST_PATH_ERROR); + } } static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) @@ -970,7 +970,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) int ret; struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR }; struct kvec iov = { - .iov_base = &req->ddgst + req->offset, + .iov_base = (u8 *)&req->ddgst + req->offset, .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset }; @@ -1649,13 +1649,13 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) if (ret) return ret; - ctrl->queue_count = nr_io_queues + 1; - if (ctrl->queue_count < 2) { + if (nr_io_queues == 0) { dev_err(ctrl->device, "unable to set any I/O queues\n"); return -ENOMEM; } + ctrl->queue_count = nr_io_queues + 1; dev_info(ctrl->device, "creating %d I/O queues.\n", nr_io_queues); @@ -1955,6 +1955,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; nvme_stop_keep_alive(ctrl); + flush_work(&ctrl->async_event_work); nvme_tcp_teardown_io_queues(ctrl, false); /* unquiesce to fail fast pending requests */ nvme_start_queues(ctrl); @@ -1972,9 +1973,6 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) { - cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work); - cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); - nvme_tcp_teardown_io_queues(ctrl, shutdown); blk_mq_quiesce_queue(ctrl->admin_q); if (shutdown) @@ -2013,6 +2011,12 @@ static void nvme_reset_ctrl_work(struct work_struct *work) nvme_tcp_reconnect_or_remove(ctrl); } +static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl) +{ + cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work); + cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); +} + static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); @@ -2321,6 +2325,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = { .submit_async_event = nvme_tcp_submit_async_event, .delete_ctrl = nvme_tcp_delete_ctrl, .get_address = nvmf_get_address, + .stop_ctrl = nvme_tcp_stop_ctrl, }; static bool diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index cd2c3a79f3b52..40eeb9231ca8b 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -382,7 +382,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) /* no enforcement soft-limit for maxcmd - pick arbitrary high value */ id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); - id->nn = cpu_to_le32(ctrl->subsys->max_nsid); + id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES); id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES); id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM | NVME_CTRL_ONCS_WRITE_ZEROES); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index feef15c38ec91..880bcfd5b0f22 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -120,6 +120,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) if (!sqsize) { pr_warn("queue size zero!\n"); req->error_loc = offsetof(struct nvmf_connect_command, sqsize); + req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(sqsize); ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; goto err; } @@ -260,11 +261,11 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) } status = nvmet_install_queue(ctrl, req); - if (status) { - /* pass back cntlid that had the issue of installing queue */ - req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); + if (status) goto out_ctrl_put; - } + + /* pass back cntlid for successful completion */ + req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index d67789b13057e..220c2534a21e4 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "nvmet.h" #define NVMET_MAX_MPOOL_BVEC 16 @@ -257,7 +258,8 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) if (req->ns->buffered_io) { if (likely(!req->f.mpool_alloc) && - nvmet_file_execute_io(req, IOCB_NOWAIT)) + (req->ns->file->f_mode & FMODE_NOWAIT) && + nvmet_file_execute_io(req, IOCB_NOWAIT)) return; nvmet_file_submit_buffered_io(req); } else diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 7cafc2f41ce2c..b06a67a02506b 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -631,10 +631,11 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch) static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd) { struct nvmet_tcp_queue *queue = cmd->queue; + int left = NVME_TCP_DIGEST_LENGTH - cmd->offset; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { - .iov_base = &cmd->exp_ddgst + cmd->offset, - .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset + .iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset, + .iov_len = left }; int ret; @@ -643,6 +644,10 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd) return ret; cmd->offset += ret; + left -= ret; + + if (left) + return -EAGAIN; if (queue->nvme_sq.sqhd_disabled) { cmd->queue->snd_cmd = NULL; @@ -1018,7 +1023,7 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue) } if (queue->hdr_digest && - nvmet_tcp_verify_hdgst(queue, &queue->pdu, queue->offset)) { + nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) { nvmet_tcp_fatal_error(queue); /* fatal */ return -EPROTO; } @@ -1341,6 +1346,7 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) static void nvmet_tcp_release_queue_work(struct work_struct *w) { + struct page *page; struct nvmet_tcp_queue *queue = container_of(w, struct nvmet_tcp_queue, release_work); @@ -1360,6 +1366,8 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) nvmet_tcp_free_crypto(queue); ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); + page = virt_to_head_page(queue->pf_cache.va); + __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); kfree(queue); } diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 6da270e8c6746..c0f4324d8f7c8 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -954,7 +954,8 @@ static void nvmem_shift_read_buffer_in_place(struct nvmem_cell *cell, void *buf) *p-- = 0; /* clear msb bits if any leftover in the last byte */ - *p &= GENMASK((cell->nbits%BITS_PER_BYTE) - 1, 0); + if (cell->nbits % BITS_PER_BYTE) + *p &= GENMASK((cell->nbits % BITS_PER_BYTE) - 1, 0); } static int __nvmem_cell_read(struct nvmem_device *nvmem, diff --git a/drivers/of/base.c b/drivers/of/base.c index 1d667eb730e19..b5c84607a74bf 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1366,9 +1366,14 @@ int of_phandle_iterator_next(struct of_phandle_iterator *it) * property data length */ if (it->cur + count > it->list_end) { - pr_err("%pOF: %s = %d found %d\n", - it->parent, it->cells_name, - count, it->cell_count); + if (it->cells_name) + pr_err("%pOF: %s = %d found %td\n", + it->parent, it->cells_name, + count, it->list_end - it->cur); + else + pr_err("%pOF: phandle %s needs %d, found %td\n", + it->parent, of_node_full_name(it->node), + count, it->list_end - it->cur); goto err; } } diff --git a/drivers/of/kobj.c b/drivers/of/kobj.c index a32e60b024b8d..6675b5e56960c 100644 --- a/drivers/of/kobj.c +++ b/drivers/of/kobj.c @@ -119,7 +119,7 @@ int __of_attach_node_sysfs(struct device_node *np) struct property *pp; int rc; - if (!of_kset) + if (!IS_ENABLED(CONFIG_SYSFS) || !of_kset) return 0; np->kobj.kset = of_kset; diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 1688f576ee8ac..8420ef42d89ea 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -170,9 +170,7 @@ static int overlay_notify(struct overlay_changeset *ovcs, ret = blocking_notifier_call_chain(&overlay_notify_chain, action, &nd); - if (ret == NOTIFY_OK || ret == NOTIFY_STOP) - return 0; - if (ret) { + if (notifier_to_errno(ret)) { ret = notifier_to_errno(ret); pr_err("overlay changeset %s notifier error %d, target: %pOF\n", of_overlay_action_name[action], ret, nd.target); diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 088c93dc0085c..e98b295b20484 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1177,13 +1177,19 @@ void dev_pm_opp_remove(struct device *dev, unsigned long freq) } EXPORT_SYMBOL_GPL(dev_pm_opp_remove); -void _opp_remove_all_static(struct opp_table *opp_table) +bool _opp_remove_all_static(struct opp_table *opp_table) { struct dev_pm_opp *opp, *tmp; + bool ret = true; mutex_lock(&opp_table->lock); - if (!opp_table->parsed_static_opps || --opp_table->parsed_static_opps) + if (!opp_table->parsed_static_opps) { + ret = false; + goto unlock; + } + + if (--opp_table->parsed_static_opps) goto unlock; list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) { @@ -1193,6 +1199,8 @@ void _opp_remove_all_static(struct opp_table *opp_table) unlock: mutex_unlock(&opp_table->lock); + + return ret; } /** @@ -2206,13 +2214,15 @@ void _dev_pm_opp_find_and_remove_table(struct device *dev) return; } - _opp_remove_all_static(opp_table); + /* + * Drop the extra reference only if the OPP table was successfully added + * with dev_pm_opp_of_add_table() earlier. + **/ + if (_opp_remove_all_static(opp_table)) + dev_pm_opp_put_opp_table(opp_table); /* Drop reference taken by _find_opp_table() */ dev_pm_opp_put_opp_table(opp_table); - - /* Drop reference taken while the OPP table was added */ - dev_pm_opp_put_opp_table(opp_table); } /** diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 249738e1e0b7a..ba30694508153 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -95,15 +95,7 @@ static struct dev_pm_opp *_find_opp_of_np(struct opp_table *opp_table, static struct device_node *of_parse_required_opp(struct device_node *np, int index) { - struct device_node *required_np; - - required_np = of_parse_phandle(np, "required-opps", index); - if (unlikely(!required_np)) { - pr_err("%s: Unable to parse required-opps: %pOF, index: %d\n", - __func__, np, index); - } - - return required_np; + return of_parse_phandle(np, "required-opps", index); } /* The caller must call dev_pm_opp_put_opp_table() after the table is used */ @@ -647,7 +639,7 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table, free_opp: _opp_free(new_opp); - return ERR_PTR(ret); + return ret ? ERR_PTR(ret) : NULL; } /* Initializes OPP tables based on new bindings */ @@ -682,8 +674,9 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table) } } - /* There should be one of more OPP defined */ - if (WARN_ON(!count)) { + /* There should be one or more OPPs defined */ + if (!count) { + dev_err(dev, "%s: no supported OPPs", __func__); ret = -ENOENT; goto remove_static_opp; } @@ -995,7 +988,7 @@ int of_get_required_opp_performance_state(struct device_node *np, int index) required_np = of_parse_required_opp(np, index); if (!required_np) - return -EINVAL; + return -ENODEV; opp_table = _find_table_of_opp_np(required_np); if (IS_ERR(opp_table)) { diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index d14e27102730c..469af6954686f 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -203,7 +203,7 @@ struct opp_table { /* Routines internal to opp core */ void dev_pm_opp_get(struct dev_pm_opp *opp); -void _opp_remove_all_static(struct opp_table *opp_table); +bool _opp_remove_all_static(struct opp_table *opp_table); void _get_opp_table_kref(struct opp_table *opp_table); int _get_opp_count(struct opp_table *opp_table); struct opp_table *_find_opp_table(struct device *dev); diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index ad290f79983b9..5013568c571e5 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1003,7 +1003,7 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, ioc->usg_calls++; #endif - while(sg_dma_len(sglist) && nents--) { + while (nents && sg_dma_len(sglist)) { #ifdef CCIO_COLLECT_STATS ioc->usg_pages += sg_dma_len(sglist) >> PAGE_SHIFT; @@ -1011,6 +1011,7 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, ccio_unmap_page(dev, sg_dma_address(sglist), sg_dma_len(sglist), direction, 0); ++sglist; + nents--; } DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents); diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index 2f1cac89ddf5c..f4de8be05c614 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -142,9 +142,8 @@ struct dino_device { struct pci_hba_data hba; /* 'C' inheritance - must be first */ spinlock_t dinosaur_pen; - unsigned long txn_addr; /* EIR addr to generate interrupt */ - u32 txn_data; /* EIR data assign to each dino */ u32 imr; /* IRQ's which are enabled */ + struct gsc_irq gsc_irq; int global_irq[DINO_LOCAL_IRQS]; /* map IMR bit to global irq */ #ifdef DINO_DEBUG unsigned int dino_irr0; /* save most recent IRQ line stat */ @@ -156,15 +155,6 @@ static inline struct dino_device *DINO_DEV(struct pci_hba_data *hba) return container_of(hba, struct dino_device, hba); } -/* Check if PCI device is behind a Card-mode Dino. */ -static int pci_dev_is_behind_card_dino(struct pci_dev *dev) -{ - struct dino_device *dino_dev; - - dino_dev = DINO_DEV(parisc_walk_tree(dev->bus->bridge)); - return is_card_dino(&dino_dev->hba.dev->id); -} - /* * Dino Configuration Space Accessor Functions */ @@ -348,14 +338,43 @@ static void dino_unmask_irq(struct irq_data *d) if (tmp & DINO_MASK_IRQ(local_irq)) { DBG(KERN_WARNING "%s(): IRQ asserted! (ILR 0x%x)\n", __func__, tmp); - gsc_writel(dino_dev->txn_data, dino_dev->txn_addr); + gsc_writel(dino_dev->gsc_irq.txn_data, dino_dev->gsc_irq.txn_addr); } } +#ifdef CONFIG_SMP +static int dino_set_affinity_irq(struct irq_data *d, const struct cpumask *dest, + bool force) +{ + struct dino_device *dino_dev = irq_data_get_irq_chip_data(d); + struct cpumask tmask; + int cpu_irq; + u32 eim; + + if (!cpumask_and(&tmask, dest, cpu_online_mask)) + return -EINVAL; + + cpu_irq = cpu_check_affinity(d, &tmask); + if (cpu_irq < 0) + return cpu_irq; + + dino_dev->gsc_irq.txn_addr = txn_affinity_addr(d->irq, cpu_irq); + eim = ((u32) dino_dev->gsc_irq.txn_addr) | dino_dev->gsc_irq.txn_data; + __raw_writel(eim, dino_dev->hba.base_addr+DINO_IAR0); + + irq_data_update_effective_affinity(d, &tmask); + + return IRQ_SET_MASK_OK; +} +#endif + static struct irq_chip dino_interrupt_type = { .name = "GSC-PCI", .irq_unmask = dino_unmask_irq, .irq_mask = dino_mask_irq, +#ifdef CONFIG_SMP + .irq_set_affinity = dino_set_affinity_irq, +#endif }; @@ -447,6 +466,15 @@ static void quirk_cirrus_cardbus(struct pci_dev *dev) DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_6832, quirk_cirrus_cardbus ); #ifdef CONFIG_TULIP +/* Check if PCI device is behind a Card-mode Dino. */ +static int pci_dev_is_behind_card_dino(struct pci_dev *dev) +{ + struct dino_device *dino_dev; + + dino_dev = DINO_DEV(parisc_walk_tree(dev->bus->bridge)); + return is_card_dino(&dino_dev->hba.dev->id); +} + static void pci_fixup_tulip(struct pci_dev *dev) { if (!pci_dev_is_behind_card_dino(dev)) @@ -806,7 +834,6 @@ static int __init dino_common_init(struct parisc_device *dev, { int status; u32 eim; - struct gsc_irq gsc_irq; struct resource *res; pcibios_register_hba(&dino_dev->hba); @@ -821,10 +848,8 @@ static int __init dino_common_init(struct parisc_device *dev, ** still only has 11 IRQ input lines - just map some of them ** to a different processor. */ - dev->irq = gsc_alloc_irq(&gsc_irq); - dino_dev->txn_addr = gsc_irq.txn_addr; - dino_dev->txn_data = gsc_irq.txn_data; - eim = ((u32) gsc_irq.txn_addr) | gsc_irq.txn_data; + dev->irq = gsc_alloc_irq(&dino_dev->gsc_irq); + eim = ((u32) dino_dev->gsc_irq.txn_addr) | dino_dev->gsc_irq.txn_data; /* ** Dino needs a PA "IRQ" to get a processor's attention. diff --git a/drivers/parisc/gsc.c b/drivers/parisc/gsc.c index ed9371acf37eb..ec175ae998733 100644 --- a/drivers/parisc/gsc.c +++ b/drivers/parisc/gsc.c @@ -135,10 +135,41 @@ static void gsc_asic_unmask_irq(struct irq_data *d) */ } +#ifdef CONFIG_SMP +static int gsc_set_affinity_irq(struct irq_data *d, const struct cpumask *dest, + bool force) +{ + struct gsc_asic *gsc_dev = irq_data_get_irq_chip_data(d); + struct cpumask tmask; + int cpu_irq; + + if (!cpumask_and(&tmask, dest, cpu_online_mask)) + return -EINVAL; + + cpu_irq = cpu_check_affinity(d, &tmask); + if (cpu_irq < 0) + return cpu_irq; + + gsc_dev->gsc_irq.txn_addr = txn_affinity_addr(d->irq, cpu_irq); + gsc_dev->eim = ((u32) gsc_dev->gsc_irq.txn_addr) | gsc_dev->gsc_irq.txn_data; + + /* switch IRQ's for devices below LASI/WAX to other CPU */ + gsc_writel(gsc_dev->eim, gsc_dev->hpa + OFFSET_IAR); + + irq_data_update_effective_affinity(d, &tmask); + + return IRQ_SET_MASK_OK; +} +#endif + + static struct irq_chip gsc_asic_interrupt_type = { .name = "GSC-ASIC", .irq_unmask = gsc_asic_unmask_irq, .irq_mask = gsc_asic_mask_irq, +#ifdef CONFIG_SMP + .irq_set_affinity = gsc_set_affinity_irq, +#endif }; int gsc_assign_irq(struct irq_chip *type, void *data) diff --git a/drivers/parisc/gsc.h b/drivers/parisc/gsc.h index 86abad3fa2150..73cbd0bb1975a 100644 --- a/drivers/parisc/gsc.h +++ b/drivers/parisc/gsc.h @@ -31,6 +31,7 @@ struct gsc_asic { int version; int type; int eim; + struct gsc_irq gsc_irq; int global_irq[32]; }; diff --git a/drivers/parisc/lasi.c b/drivers/parisc/lasi.c index 4e4fd12c2112e..6ef621adb63a8 100644 --- a/drivers/parisc/lasi.c +++ b/drivers/parisc/lasi.c @@ -163,7 +163,6 @@ static int __init lasi_init_chip(struct parisc_device *dev) { extern void (*chassis_power_off)(void); struct gsc_asic *lasi; - struct gsc_irq gsc_irq; int ret; lasi = kzalloc(sizeof(*lasi), GFP_KERNEL); @@ -185,7 +184,7 @@ static int __init lasi_init_chip(struct parisc_device *dev) lasi_init_irq(lasi); /* the IRQ lasi should use */ - dev->irq = gsc_alloc_irq(&gsc_irq); + dev->irq = gsc_alloc_irq(&lasi->gsc_irq); if (dev->irq < 0) { printk(KERN_ERR "%s(): cannot get GSC irq\n", __func__); @@ -193,9 +192,9 @@ static int __init lasi_init_chip(struct parisc_device *dev) return -EBUSY; } - lasi->eim = ((u32) gsc_irq.txn_addr) | gsc_irq.txn_data; + lasi->eim = ((u32) lasi->gsc_irq.txn_addr) | lasi->gsc_irq.txn_data; - ret = request_irq(gsc_irq.irq, gsc_asic_intr, 0, "lasi", lasi); + ret = request_irq(lasi->gsc_irq.irq, gsc_asic_intr, 0, "lasi", lasi); if (ret < 0) { kfree(lasi); return ret; diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c index e090978518f1a..4760f82def6ec 100644 --- a/drivers/parisc/pdc_stable.c +++ b/drivers/parisc/pdc_stable.c @@ -979,8 +979,10 @@ pdcs_register_pathentries(void) entry->kobj.kset = paths_kset; err = kobject_init_and_add(&entry->kobj, &ktype_pdcspath, NULL, "%s", entry->name); - if (err) + if (err) { + kobject_put(&entry->kobj); return err; + } /* kobject is now registered */ write_lock(&entry->rw_lock); diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index e410033b6df0c..822e5d19949e4 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -1047,7 +1047,7 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, spin_unlock_irqrestore(&ioc->res_lock, flags); #endif - while (sg_dma_len(sglist) && nents--) { + while (nents && sg_dma_len(sglist)) { sba_unmap_page(dev, sg_dma_address(sglist), sg_dma_len(sglist), direction, 0); @@ -1056,6 +1056,7 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, ioc->usingle_calls--; /* kluge since call is unmap_sg() */ #endif ++sglist; + nents--; } DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents); diff --git a/drivers/parisc/wax.c b/drivers/parisc/wax.c index 5b6df15162354..73a2b01f8d9ca 100644 --- a/drivers/parisc/wax.c +++ b/drivers/parisc/wax.c @@ -68,7 +68,6 @@ static int __init wax_init_chip(struct parisc_device *dev) { struct gsc_asic *wax; struct parisc_device *parent; - struct gsc_irq gsc_irq; int ret; wax = kzalloc(sizeof(*wax), GFP_KERNEL); @@ -85,7 +84,7 @@ static int __init wax_init_chip(struct parisc_device *dev) wax_init_irq(wax); /* the IRQ wax should use */ - dev->irq = gsc_claim_irq(&gsc_irq, WAX_GSC_IRQ); + dev->irq = gsc_claim_irq(&wax->gsc_irq, WAX_GSC_IRQ); if (dev->irq < 0) { printk(KERN_ERR "%s(): cannot get GSC irq\n", __func__); @@ -93,9 +92,9 @@ static int __init wax_init_chip(struct parisc_device *dev) return -EBUSY; } - wax->eim = ((u32) gsc_irq.txn_addr) | gsc_irq.txn_data; + wax->eim = ((u32) wax->gsc_irq.txn_addr) | wax->gsc_irq.txn_data; - ret = request_irq(gsc_irq.irq, gsc_asic_intr, 0, "wax", wax); + ret = request_irq(wax->gsc_irq.irq, gsc_asic_intr, 0, "wax", wax); if (ret < 0) { kfree(wax); return ret; diff --git a/drivers/parport/ieee1284_ops.c b/drivers/parport/ieee1284_ops.c index 5d41dda6da4e7..75daa16f38b7f 100644 --- a/drivers/parport/ieee1284_ops.c +++ b/drivers/parport/ieee1284_ops.c @@ -535,7 +535,7 @@ size_t parport_ieee1284_ecp_read_data (struct parport *port, goto out; /* Yield the port for a while. */ - if (count && dev->port->irq != PARPORT_IRQ_NONE) { + if (dev->port->irq != PARPORT_IRQ_NONE) { parport_release (dev); schedule_timeout_interruptible(msecs_to_jiffies(40)); parport_claim_or_block (dev); diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 9793f17fa1843..1647b532a71e6 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -160,9 +160,12 @@ int pci_generic_config_write32(struct pci_bus *bus, unsigned int devfn, * write happen to have any RW1C (write-one-to-clear) bits set, we * just inadvertently cleared something we shouldn't have. */ - dev_warn_ratelimited(&bus->dev, "%d-byte config write to %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n", - size, pci_domain_nr(bus), bus->number, - PCI_SLOT(devfn), PCI_FUNC(devfn), where); + if (!bus->unsafe_warn) { + dev_warn(&bus->dev, "%d-byte config write to %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n", + size, pci_domain_nr(bus), bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), where); + bus->unsafe_warn = 1; + } mask = ~(((1 << (size * 8)) - 1) << ((where & 0x3) * 8)); tmp = readl(addr) & mask; diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index acfbd34032a86..b34b52b364d5f 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -413,6 +413,11 @@ static void imx6_pcie_assert_core_reset(struct imx6_pcie *imx6_pcie) dev_err(dev, "failed to disable vpcie regulator: %d\n", ret); } + + /* Some boards don't have PCIe reset GPIO. */ + if (gpio_is_valid(imx6_pcie->reset_gpio)) + gpio_set_value_cansleep(imx6_pcie->reset_gpio, + imx6_pcie->gpio_active_high); } static unsigned int imx6_pcie_grp_offset(const struct imx6_pcie *imx6_pcie) @@ -535,15 +540,6 @@ static void imx6_pcie_deassert_core_reset(struct imx6_pcie *imx6_pcie) /* allow the clocks to stabilize */ usleep_range(200, 500); - /* Some boards don't have PCIe reset GPIO. */ - if (gpio_is_valid(imx6_pcie->reset_gpio)) { - gpio_set_value_cansleep(imx6_pcie->reset_gpio, - imx6_pcie->gpio_active_high); - msleep(100); - gpio_set_value_cansleep(imx6_pcie->reset_gpio, - !imx6_pcie->gpio_active_high); - } - switch (imx6_pcie->drvdata->variant) { case IMX8MQ: reset_control_deassert(imx6_pcie->pciephy_reset); @@ -586,6 +582,15 @@ static void imx6_pcie_deassert_core_reset(struct imx6_pcie *imx6_pcie) break; } + /* Some boards don't have PCIe reset GPIO. */ + if (gpio_is_valid(imx6_pcie->reset_gpio)) { + msleep(100); + gpio_set_value_cansleep(imx6_pcie->reset_gpio, + !imx6_pcie->gpio_active_high); + /* Wait for 100ms after PERST# deassertion (PCIe r5.0, 6.6.1) */ + msleep(100); + } + return; err_ref_clk: diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index a8eab4e67af10..17f411772f0ca 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -1343,22 +1343,21 @@ static int qcom_pcie_probe(struct platform_device *pdev) } ret = phy_init(pcie->phy); - if (ret) { - pm_runtime_disable(&pdev->dev); + if (ret) goto err_pm_runtime_put; - } platform_set_drvdata(pdev, pcie); ret = dw_pcie_host_init(pp); if (ret) { dev_err(dev, "cannot initialize host\n"); - pm_runtime_disable(&pdev->dev); - goto err_pm_runtime_put; + goto err_phy_exit; } return 0; +err_phy_exit: + phy_exit(pcie->phy); err_pm_runtime_put: pm_runtime_put(dev); pm_runtime_disable(dev); diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 0a2902569f140..7219ca39aa909 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include "../pci.h" @@ -25,21 +27,8 @@ /* PCIe core registers */ #define PCIE_CORE_DEV_ID_REG 0x0 #define PCIE_CORE_CMD_STATUS_REG 0x4 -#define PCIE_CORE_CMD_IO_ACCESS_EN BIT(0) -#define PCIE_CORE_CMD_MEM_ACCESS_EN BIT(1) -#define PCIE_CORE_CMD_MEM_IO_REQ_EN BIT(2) #define PCIE_CORE_DEV_REV_REG 0x8 #define PCIE_CORE_PCIEXP_CAP 0xc0 -#define PCIE_CORE_DEV_CTRL_STATS_REG 0xc8 -#define PCIE_CORE_DEV_CTRL_STATS_RELAX_ORDER_DISABLE (0 << 4) -#define PCIE_CORE_DEV_CTRL_STATS_MAX_PAYLOAD_SZ_SHIFT 5 -#define PCIE_CORE_DEV_CTRL_STATS_SNOOP_DISABLE (0 << 11) -#define PCIE_CORE_DEV_CTRL_STATS_MAX_RD_REQ_SIZE_SHIFT 12 -#define PCIE_CORE_DEV_CTRL_STATS_MAX_RD_REQ_SZ 0x2 -#define PCIE_CORE_LINK_CTRL_STAT_REG 0xd0 -#define PCIE_CORE_LINK_L0S_ENTRY BIT(0) -#define PCIE_CORE_LINK_TRAINING BIT(5) -#define PCIE_CORE_LINK_WIDTH_SHIFT 20 #define PCIE_CORE_ERR_CAPCTL_REG 0x118 #define PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX BIT(5) #define PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX_EN BIT(6) @@ -62,6 +51,7 @@ #define PIO_COMPLETION_STATUS_CRS 2 #define PIO_COMPLETION_STATUS_CA 4 #define PIO_NON_POSTED_REQ BIT(10) +#define PIO_ERR_STATUS BIT(11) #define PIO_ADDR_LS (PIO_BASE_ADDR + 0x8) #define PIO_ADDR_MS (PIO_BASE_ADDR + 0xc) #define PIO_WR_DATA (PIO_BASE_ADDR + 0x10) @@ -107,26 +97,110 @@ #define PCIE_ISR0_MSI_INT_PENDING BIT(24) #define PCIE_ISR0_INTX_ASSERT(val) BIT(16 + (val)) #define PCIE_ISR0_INTX_DEASSERT(val) BIT(20 + (val)) -#define PCIE_ISR0_ALL_MASK GENMASK(26, 0) +#define PCIE_ISR0_ALL_MASK GENMASK(31, 0) #define PCIE_ISR1_REG (CONTROL_BASE_ADDR + 0x48) #define PCIE_ISR1_MASK_REG (CONTROL_BASE_ADDR + 0x4C) #define PCIE_ISR1_POWER_STATE_CHANGE BIT(4) #define PCIE_ISR1_FLUSH BIT(5) #define PCIE_ISR1_INTX_ASSERT(val) BIT(8 + (val)) -#define PCIE_ISR1_ALL_MASK GENMASK(11, 4) +#define PCIE_ISR1_ALL_MASK GENMASK(31, 0) #define PCIE_MSI_ADDR_LOW_REG (CONTROL_BASE_ADDR + 0x50) #define PCIE_MSI_ADDR_HIGH_REG (CONTROL_BASE_ADDR + 0x54) #define PCIE_MSI_STATUS_REG (CONTROL_BASE_ADDR + 0x58) #define PCIE_MSI_MASK_REG (CONTROL_BASE_ADDR + 0x5C) +#define PCIE_MSI_ALL_MASK GENMASK(31, 0) #define PCIE_MSI_PAYLOAD_REG (CONTROL_BASE_ADDR + 0x9C) +#define PCIE_MSI_DATA_MASK GENMASK(15, 0) + +/* PCIe window configuration */ +#define OB_WIN_BASE_ADDR 0x4c00 +#define OB_WIN_BLOCK_SIZE 0x20 +#define OB_WIN_COUNT 8 +#define OB_WIN_REG_ADDR(win, offset) (OB_WIN_BASE_ADDR + \ + OB_WIN_BLOCK_SIZE * (win) + \ + (offset)) +#define OB_WIN_MATCH_LS(win) OB_WIN_REG_ADDR(win, 0x00) +#define OB_WIN_ENABLE BIT(0) +#define OB_WIN_MATCH_MS(win) OB_WIN_REG_ADDR(win, 0x04) +#define OB_WIN_REMAP_LS(win) OB_WIN_REG_ADDR(win, 0x08) +#define OB_WIN_REMAP_MS(win) OB_WIN_REG_ADDR(win, 0x0c) +#define OB_WIN_MASK_LS(win) OB_WIN_REG_ADDR(win, 0x10) +#define OB_WIN_MASK_MS(win) OB_WIN_REG_ADDR(win, 0x14) +#define OB_WIN_ACTIONS(win) OB_WIN_REG_ADDR(win, 0x18) +#define OB_WIN_DEFAULT_ACTIONS (OB_WIN_ACTIONS(OB_WIN_COUNT-1) + 0x4) +#define OB_WIN_FUNC_NUM_MASK GENMASK(31, 24) +#define OB_WIN_FUNC_NUM_SHIFT 24 +#define OB_WIN_FUNC_NUM_ENABLE BIT(23) +#define OB_WIN_BUS_NUM_BITS_MASK GENMASK(22, 20) +#define OB_WIN_BUS_NUM_BITS_SHIFT 20 +#define OB_WIN_MSG_CODE_ENABLE BIT(22) +#define OB_WIN_MSG_CODE_MASK GENMASK(21, 14) +#define OB_WIN_MSG_CODE_SHIFT 14 +#define OB_WIN_MSG_PAYLOAD_LEN BIT(12) +#define OB_WIN_ATTR_ENABLE BIT(11) +#define OB_WIN_ATTR_TC_MASK GENMASK(10, 8) +#define OB_WIN_ATTR_TC_SHIFT 8 +#define OB_WIN_ATTR_RELAXED BIT(7) +#define OB_WIN_ATTR_NOSNOOP BIT(6) +#define OB_WIN_ATTR_POISON BIT(5) +#define OB_WIN_ATTR_IDO BIT(4) +#define OB_WIN_TYPE_MASK GENMASK(3, 0) +#define OB_WIN_TYPE_SHIFT 0 +#define OB_WIN_TYPE_MEM 0x0 +#define OB_WIN_TYPE_IO 0x4 +#define OB_WIN_TYPE_CONFIG_TYPE0 0x8 +#define OB_WIN_TYPE_CONFIG_TYPE1 0x9 +#define OB_WIN_TYPE_MSG 0xc /* LMI registers base address and register offsets */ #define LMI_BASE_ADDR 0x6000 #define CFG_REG (LMI_BASE_ADDR + 0x0) #define LTSSM_SHIFT 24 #define LTSSM_MASK 0x3f -#define LTSSM_L0 0x10 #define RC_BAR_CONFIG 0x300 + +/* LTSSM values in CFG_REG */ +enum { + LTSSM_DETECT_QUIET = 0x0, + LTSSM_DETECT_ACTIVE = 0x1, + LTSSM_POLLING_ACTIVE = 0x2, + LTSSM_POLLING_COMPLIANCE = 0x3, + LTSSM_POLLING_CONFIGURATION = 0x4, + LTSSM_CONFIG_LINKWIDTH_START = 0x5, + LTSSM_CONFIG_LINKWIDTH_ACCEPT = 0x6, + LTSSM_CONFIG_LANENUM_ACCEPT = 0x7, + LTSSM_CONFIG_LANENUM_WAIT = 0x8, + LTSSM_CONFIG_COMPLETE = 0x9, + LTSSM_CONFIG_IDLE = 0xa, + LTSSM_RECOVERY_RCVR_LOCK = 0xb, + LTSSM_RECOVERY_SPEED = 0xc, + LTSSM_RECOVERY_RCVR_CFG = 0xd, + LTSSM_RECOVERY_IDLE = 0xe, + LTSSM_L0 = 0x10, + LTSSM_RX_L0S_ENTRY = 0x11, + LTSSM_RX_L0S_IDLE = 0x12, + LTSSM_RX_L0S_FTS = 0x13, + LTSSM_TX_L0S_ENTRY = 0x14, + LTSSM_TX_L0S_IDLE = 0x15, + LTSSM_TX_L0S_FTS = 0x16, + LTSSM_L1_ENTRY = 0x17, + LTSSM_L1_IDLE = 0x18, + LTSSM_L2_IDLE = 0x19, + LTSSM_L2_TRANSMIT_WAKE = 0x1a, + LTSSM_DISABLED = 0x20, + LTSSM_LOOPBACK_ENTRY_MASTER = 0x21, + LTSSM_LOOPBACK_ACTIVE_MASTER = 0x22, + LTSSM_LOOPBACK_EXIT_MASTER = 0x23, + LTSSM_LOOPBACK_ENTRY_SLAVE = 0x24, + LTSSM_LOOPBACK_ACTIVE_SLAVE = 0x25, + LTSSM_LOOPBACK_EXIT_SLAVE = 0x26, + LTSSM_HOT_RESET = 0x27, + LTSSM_RECOVERY_EQUALIZATION_PHASE0 = 0x28, + LTSSM_RECOVERY_EQUALIZATION_PHASE1 = 0x29, + LTSSM_RECOVERY_EQUALIZATION_PHASE2 = 0x2a, + LTSSM_RECOVERY_EQUALIZATION_PHASE3 = 0x2b, +}; + #define VENDOR_ID_REG (LMI_BASE_ADDR + 0x44) /* PCIe core controller registers */ @@ -159,7 +233,7 @@ #define PCIE_IRQ_MSI_INT2_DET BIT(21) #define PCIE_IRQ_RC_DBELL_DET BIT(22) #define PCIE_IRQ_EP_STATUS BIT(23) -#define PCIE_IRQ_ALL_MASK 0xfff0fb +#define PCIE_IRQ_ALL_MASK GENMASK(31, 0) #define PCIE_IRQ_ENABLE_INTS_MASK PCIE_IRQ_CORE_INT /* Transaction types */ @@ -176,7 +250,7 @@ (PCIE_CONF_BUS(bus) | PCIE_CONF_DEV(PCI_SLOT(devfn)) | \ PCIE_CONF_FUNC(PCI_FUNC(devfn)) | PCIE_CONF_REG(where)) -#define PIO_RETRY_CNT 500 +#define PIO_RETRY_CNT 750000 /* 1.5 s */ #define PIO_RETRY_DELAY 2 /* 2 us*/ #define LINK_WAIT_MAX_RETRIES 10 @@ -187,12 +261,22 @@ #define MSI_IRQ_NUM 32 +#define CFG_RD_CRS_VAL 0xffff0001 + struct advk_pcie { struct platform_device *pdev; void __iomem *base; struct list_head resources; + struct { + phys_addr_t match; + phys_addr_t remap; + phys_addr_t mask; + u32 actions; + } wins[OB_WIN_COUNT]; + u8 wins_count; struct irq_domain *irq_domain; struct irq_chip irq_chip; + raw_spinlock_t irq_lock; struct irq_domain *msi_domain; struct irq_domain *msi_inner_domain; struct irq_chip msi_bottom_irq_chip; @@ -202,7 +286,9 @@ struct advk_pcie { struct mutex msi_used_lock; u16 msi_msg; int root_bus_nr; + int link_gen; struct pci_bridge_emul bridge; + struct gpio_desc *reset_gpio; }; static inline void advk_writel(struct advk_pcie *pcie, u32 val, u64 reg) @@ -215,31 +301,63 @@ static inline u32 advk_readl(struct advk_pcie *pcie, u64 reg) return readl(pcie->base + reg); } -static int advk_pcie_link_up(struct advk_pcie *pcie) +static u8 advk_pcie_ltssm_state(struct advk_pcie *pcie) { - u32 val, ltssm_state; + u32 val; + u8 ltssm_state; val = advk_readl(pcie, CFG_REG); ltssm_state = (val >> LTSSM_SHIFT) & LTSSM_MASK; - return ltssm_state >= LTSSM_L0; + return ltssm_state; +} + +static inline bool advk_pcie_link_up(struct advk_pcie *pcie) +{ + /* check if LTSSM is in normal operation - some L* state */ + u8 ltssm_state = advk_pcie_ltssm_state(pcie); + return ltssm_state >= LTSSM_L0 && ltssm_state < LTSSM_DISABLED; +} + +static inline bool advk_pcie_link_active(struct advk_pcie *pcie) +{ + /* + * According to PCIe Base specification 3.0, Table 4-14: Link + * Status Mapped to the LTSSM, and 4.2.6.3.6 Configuration.Idle + * is Link Up mapped to LTSSM Configuration.Idle, Recovery, L0, + * L0s, L1 and L2 states. And according to 3.2.1. Data Link + * Control and Management State Machine Rules is DL Up status + * reported in DL Active state. + */ + u8 ltssm_state = advk_pcie_ltssm_state(pcie); + return ltssm_state >= LTSSM_CONFIG_IDLE && ltssm_state < LTSSM_DISABLED; +} + +static inline bool advk_pcie_link_training(struct advk_pcie *pcie) +{ + /* + * According to PCIe Base specification 3.0, Table 4-14: Link + * Status Mapped to the LTSSM is Link Training mapped to LTSSM + * Configuration and Recovery states. + */ + u8 ltssm_state = advk_pcie_ltssm_state(pcie); + return ((ltssm_state >= LTSSM_CONFIG_LINKWIDTH_START && + ltssm_state < LTSSM_L0) || + (ltssm_state >= LTSSM_RECOVERY_EQUALIZATION_PHASE0 && + ltssm_state <= LTSSM_RECOVERY_EQUALIZATION_PHASE3)); } static int advk_pcie_wait_for_link(struct advk_pcie *pcie) { - struct device *dev = &pcie->pdev->dev; int retries; /* check if the link is up or not */ for (retries = 0; retries < LINK_WAIT_MAX_RETRIES; retries++) { - if (advk_pcie_link_up(pcie)) { - dev_info(dev, "link up\n"); + if (advk_pcie_link_up(pcie)) return 0; - } usleep_range(LINK_WAIT_USLEEP_MIN, LINK_WAIT_USLEEP_MAX); } - dev_err(dev, "link never came up\n"); return -ETIMEDOUT; } @@ -248,15 +366,121 @@ static void advk_pcie_wait_for_retrain(struct advk_pcie *pcie) size_t retries; for (retries = 0; retries < RETRAIN_WAIT_MAX_RETRIES; ++retries) { - if (!advk_pcie_link_up(pcie)) + if (advk_pcie_link_training(pcie)) break; udelay(RETRAIN_WAIT_USLEEP_US); } } +static void advk_pcie_issue_perst(struct advk_pcie *pcie) +{ + if (!pcie->reset_gpio) + return; + + /* 10ms delay is needed for some cards */ + dev_info(&pcie->pdev->dev, "issuing PERST via reset GPIO for 10ms\n"); + gpiod_set_value_cansleep(pcie->reset_gpio, 1); + usleep_range(10000, 11000); + gpiod_set_value_cansleep(pcie->reset_gpio, 0); +} + +static void advk_pcie_train_link(struct advk_pcie *pcie) +{ + struct device *dev = &pcie->pdev->dev; + u32 reg; + int ret; + + /* + * Setup PCIe rev / gen compliance based on device tree property + * 'max-link-speed' which also forces maximal link speed. + */ + reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG); + reg &= ~PCIE_GEN_SEL_MSK; + if (pcie->link_gen == 3) + reg |= SPEED_GEN_3; + else if (pcie->link_gen == 2) + reg |= SPEED_GEN_2; + else + reg |= SPEED_GEN_1; + advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); + + /* + * Set maximal link speed value also into PCIe Link Control 2 register. + * Armada 3700 Functional Specification says that default value is based + * on SPEED_GEN but tests showed that default value is always 8.0 GT/s. + */ + reg = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKCTL2); + reg &= ~PCI_EXP_LNKCTL2_TLS; + if (pcie->link_gen == 3) + reg |= PCI_EXP_LNKCTL2_TLS_8_0GT; + else if (pcie->link_gen == 2) + reg |= PCI_EXP_LNKCTL2_TLS_5_0GT; + else + reg |= PCI_EXP_LNKCTL2_TLS_2_5GT; + advk_writel(pcie, reg, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKCTL2); + + /* Enable link training after selecting PCIe generation */ + reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG); + reg |= LINK_TRAINING_EN; + advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); + + /* + * Reset PCIe card via PERST# signal. Some cards are not detected + * during link training when they are in some non-initial state. + */ + advk_pcie_issue_perst(pcie); + + /* + * PERST# signal could have been asserted by pinctrl subsystem before + * probe() callback has been called or issued explicitly by reset gpio + * function advk_pcie_issue_perst(), making the endpoint going into + * fundamental reset. As required by PCI Express spec (PCI Express + * Base Specification, REV. 4.0 PCI Express, February 19 2014, 6.6.1 + * Conventional Reset) a delay for at least 100ms after such a reset + * before sending a Configuration Request to the device is needed. + * So wait until PCIe link is up. Function advk_pcie_wait_for_link() + * waits for link at least 900ms. + */ + ret = advk_pcie_wait_for_link(pcie); + if (ret < 0) + dev_err(dev, "link never came up\n"); + else + dev_info(dev, "link up\n"); +} + +/* + * Set PCIe address window register which could be used for memory + * mapping. + */ +static void advk_pcie_set_ob_win(struct advk_pcie *pcie, u8 win_num, + phys_addr_t match, phys_addr_t remap, + phys_addr_t mask, u32 actions) +{ + advk_writel(pcie, OB_WIN_ENABLE | + lower_32_bits(match), OB_WIN_MATCH_LS(win_num)); + advk_writel(pcie, upper_32_bits(match), OB_WIN_MATCH_MS(win_num)); + advk_writel(pcie, lower_32_bits(remap), OB_WIN_REMAP_LS(win_num)); + advk_writel(pcie, upper_32_bits(remap), OB_WIN_REMAP_MS(win_num)); + advk_writel(pcie, lower_32_bits(mask), OB_WIN_MASK_LS(win_num)); + advk_writel(pcie, upper_32_bits(mask), OB_WIN_MASK_MS(win_num)); + advk_writel(pcie, actions, OB_WIN_ACTIONS(win_num)); +} + +static void advk_pcie_disable_ob_win(struct advk_pcie *pcie, u8 win_num) +{ + advk_writel(pcie, 0, OB_WIN_MATCH_LS(win_num)); + advk_writel(pcie, 0, OB_WIN_MATCH_MS(win_num)); + advk_writel(pcie, 0, OB_WIN_REMAP_LS(win_num)); + advk_writel(pcie, 0, OB_WIN_REMAP_MS(win_num)); + advk_writel(pcie, 0, OB_WIN_MASK_LS(win_num)); + advk_writel(pcie, 0, OB_WIN_MASK_MS(win_num)); + advk_writel(pcie, 0, OB_WIN_ACTIONS(win_num)); +} + static void advk_pcie_setup_hw(struct advk_pcie *pcie) { u32 reg; + int i; /* Set to Direct mode */ reg = advk_readl(pcie, CTRL_CONFIG_REG); @@ -279,6 +503,31 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) reg = (PCI_VENDOR_ID_MARVELL << 16) | PCI_VENDOR_ID_MARVELL; advk_writel(pcie, reg, VENDOR_ID_REG); + /* + * Change Class Code of PCI Bridge device to PCI Bridge (0x600400), + * because the default value is Mass storage controller (0x010400). + * + * Note that this Aardvark PCI Bridge does not have compliant Type 1 + * Configuration Space and it even cannot be accessed via Aardvark's + * PCI config space access method. Something like config space is + * available in internal Aardvark registers starting at offset 0x0 + * and is reported as Type 0. In range 0x10 - 0x34 it has totally + * different registers. + * + * Therefore driver uses emulation of PCI Bridge which emulates + * access to configuration space via internal Aardvark registers or + * emulated configuration buffer. + */ + reg = advk_readl(pcie, PCIE_CORE_DEV_REV_REG); + reg &= ~0xffffff00; + reg |= (PCI_CLASS_BRIDGE_PCI << 8) << 8; + advk_writel(pcie, reg, PCIE_CORE_DEV_REV_REG); + + /* Disable Root Bridge I/O space, memory space and bus mastering */ + reg = advk_readl(pcie, PCIE_CORE_CMD_STATUS_REG); + reg &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + advk_writel(pcie, reg, PCIE_CORE_CMD_STATUS_REG); + /* Set Advanced Error Capabilities and Control PF0 register */ reg = PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX | PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX_EN | @@ -286,42 +535,34 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) PCIE_CORE_ERR_CAPCTL_ECRC_CHCK_RCV; advk_writel(pcie, reg, PCIE_CORE_ERR_CAPCTL_REG); - /* Set PCIe Device Control and Status 1 PF0 register */ - reg = PCIE_CORE_DEV_CTRL_STATS_RELAX_ORDER_DISABLE | - (7 << PCIE_CORE_DEV_CTRL_STATS_MAX_PAYLOAD_SZ_SHIFT) | - PCIE_CORE_DEV_CTRL_STATS_SNOOP_DISABLE | - (PCIE_CORE_DEV_CTRL_STATS_MAX_RD_REQ_SZ << - PCIE_CORE_DEV_CTRL_STATS_MAX_RD_REQ_SIZE_SHIFT); - advk_writel(pcie, reg, PCIE_CORE_DEV_CTRL_STATS_REG); + /* Set PCIe Device Control register */ + reg = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + PCI_EXP_DEVCTL); + reg &= ~PCI_EXP_DEVCTL_RELAX_EN; + reg &= ~PCI_EXP_DEVCTL_NOSNOOP_EN; + reg &= ~PCI_EXP_DEVCTL_PAYLOAD; + reg &= ~PCI_EXP_DEVCTL_READRQ; + reg |= PCI_EXP_DEVCTL_PAYLOAD_512B; + reg |= PCI_EXP_DEVCTL_READRQ_512B; + advk_writel(pcie, reg, PCIE_CORE_PCIEXP_CAP + PCI_EXP_DEVCTL); /* Program PCIe Control 2 to disable strict ordering */ reg = PCIE_CORE_CTRL2_RESERVED | PCIE_CORE_CTRL2_TD_ENABLE; advk_writel(pcie, reg, PCIE_CORE_CTRL2_REG); - /* Set GEN2 */ - reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG); - reg &= ~PCIE_GEN_SEL_MSK; - reg |= SPEED_GEN_2; - advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); - /* Set lane X1 */ reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG); reg &= ~LANE_CNT_MSK; reg |= LANE_COUNT_1; advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); - /* Enable link training */ - reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG); - reg |= LINK_TRAINING_EN; - advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); - /* Enable MSI */ reg = advk_readl(pcie, PCIE_CORE_CTRL2_REG); reg |= PCIE_CORE_CTRL2_MSI_ENABLE; advk_writel(pcie, reg, PCIE_CORE_CTRL2_REG); /* Clear all interrupts */ + advk_writel(pcie, PCIE_MSI_ALL_MASK, PCIE_MSI_STATUS_REG); advk_writel(pcie, PCIE_ISR0_ALL_MASK, PCIE_ISR0_REG); advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_REG); advk_writel(pcie, PCIE_IRQ_ALL_MASK, HOST_CTRL_INT_STATUS_REG); @@ -334,71 +575,165 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_MASK_REG); /* Unmask all MSIs */ - advk_writel(pcie, 0, PCIE_MSI_MASK_REG); + advk_writel(pcie, ~(u32)PCIE_MSI_ALL_MASK, PCIE_MSI_MASK_REG); /* Enable summary interrupt for GIC SPI source */ reg = PCIE_IRQ_ALL_MASK & (~PCIE_IRQ_ENABLE_INTS_MASK); advk_writel(pcie, reg, HOST_CTRL_INT_MASK_REG); + /* + * Enable AXI address window location generation: + * When it is enabled, the default outbound window + * configurations (Default User Field: 0xD0074CFC) + * are used to transparent address translation for + * the outbound transactions. Thus, PCIe address + * windows are not required for transparent memory + * access when default outbound window configuration + * is set for memory access. + */ reg = advk_readl(pcie, PCIE_CORE_CTRL2_REG); reg |= PCIE_CORE_CTRL2_OB_WIN_ENABLE; advk_writel(pcie, reg, PCIE_CORE_CTRL2_REG); - /* Bypass the address window mapping for PIO */ + /* + * Set memory access in Default User Field so it + * is not required to configure PCIe address for + * transparent memory access. + */ + advk_writel(pcie, OB_WIN_TYPE_MEM, OB_WIN_DEFAULT_ACTIONS); + + /* + * Bypass the address window mapping for PIO: + * Since PIO access already contains all required + * info over AXI interface by PIO registers, the + * address window is not required. + */ reg = advk_readl(pcie, PIO_CTRL); reg |= PIO_CTRL_ADDR_WIN_DISABLE; advk_writel(pcie, reg, PIO_CTRL); - /* Start link training */ - reg = advk_readl(pcie, PCIE_CORE_LINK_CTRL_STAT_REG); - reg |= PCIE_CORE_LINK_TRAINING; - advk_writel(pcie, reg, PCIE_CORE_LINK_CTRL_STAT_REG); + /* + * Configure PCIe address windows for non-memory or + * non-transparent access as by default PCIe uses + * transparent memory access. + */ + for (i = 0; i < pcie->wins_count; i++) + advk_pcie_set_ob_win(pcie, i, + pcie->wins[i].match, pcie->wins[i].remap, + pcie->wins[i].mask, pcie->wins[i].actions); - advk_pcie_wait_for_link(pcie); + /* Disable remaining PCIe outbound windows */ + for (i = pcie->wins_count; i < OB_WIN_COUNT; i++) + advk_pcie_disable_ob_win(pcie, i); - reg = advk_readl(pcie, PCIE_CORE_CMD_STATUS_REG); - reg |= PCIE_CORE_CMD_MEM_ACCESS_EN | - PCIE_CORE_CMD_IO_ACCESS_EN | - PCIE_CORE_CMD_MEM_IO_REQ_EN; - advk_writel(pcie, reg, PCIE_CORE_CMD_STATUS_REG); + advk_pcie_train_link(pcie); } -static void advk_pcie_check_pio_status(struct advk_pcie *pcie) +static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u32 *val) { struct device *dev = &pcie->pdev->dev; u32 reg; unsigned int status; char *strcomp_status, *str_posted; + int ret; reg = advk_readl(pcie, PIO_STAT); status = (reg & PIO_COMPLETION_STATUS_MASK) >> PIO_COMPLETION_STATUS_SHIFT; - if (!status) - return; - + /* + * According to HW spec, the PIO status check sequence as below: + * 1) even if COMPLETION_STATUS(bit9:7) indicates successful, + * it still needs to check Error Status(bit11), only when this bit + * indicates no error happen, the operation is successful. + * 2) value Unsupported Request(1) of COMPLETION_STATUS(bit9:7) only + * means a PIO write error, and for PIO read it is successful with + * a read value of 0xFFFFFFFF. + * 3) value Completion Retry Status(CRS) of COMPLETION_STATUS(bit9:7) + * only means a PIO write error, and for PIO read it is successful + * with a read value of 0xFFFF0001. + * 4) value Completer Abort (CA) of COMPLETION_STATUS(bit9:7) means + * error for both PIO read and PIO write operation. + * 5) other errors are indicated as 'unknown'. + */ switch (status) { + case PIO_COMPLETION_STATUS_OK: + if (reg & PIO_ERR_STATUS) { + strcomp_status = "COMP_ERR"; + ret = -EFAULT; + break; + } + /* Get the read result */ + if (val) + *val = advk_readl(pcie, PIO_RD_DATA); + /* No error */ + strcomp_status = NULL; + ret = 0; + break; case PIO_COMPLETION_STATUS_UR: strcomp_status = "UR"; + ret = -EOPNOTSUPP; break; case PIO_COMPLETION_STATUS_CRS: + if (allow_crs && val) { + /* PCIe r4.0, sec 2.3.2, says: + * If CRS Software Visibility is enabled: + * For a Configuration Read Request that includes both + * bytes of the Vendor ID field of a device Function's + * Configuration Space Header, the Root Complex must + * complete the Request to the host by returning a + * read-data value of 0001h for the Vendor ID field and + * all '1's for any additional bytes included in the + * request. + * + * So CRS in this case is not an error status. + */ + *val = CFG_RD_CRS_VAL; + strcomp_status = NULL; + ret = 0; + break; + } + /* PCIe r4.0, sec 2.3.2, says: + * If CRS Software Visibility is not enabled, the Root Complex + * must re-issue the Configuration Request as a new Request. + * If CRS Software Visibility is enabled: For a Configuration + * Write Request or for any other Configuration Read Request, + * the Root Complex must re-issue the Configuration Request as + * a new Request. + * A Root Complex implementation may choose to limit the number + * of Configuration Request/CRS Completion Status loops before + * determining that something is wrong with the target of the + * Request and taking appropriate action, e.g., complete the + * Request to the host as a failed transaction. + * + * So return -EAGAIN and caller (pci-aardvark.c driver) will + * re-issue request again up to the PIO_RETRY_CNT retries. + */ strcomp_status = "CRS"; + ret = -EAGAIN; break; case PIO_COMPLETION_STATUS_CA: strcomp_status = "CA"; + ret = -ECANCELED; break; default: strcomp_status = "Unknown"; + ret = -EINVAL; break; } + if (!strcomp_status) + return ret; + if (reg & PIO_NON_POSTED_REQ) str_posted = "Non-posted"; else str_posted = "Posted"; - dev_err(dev, "%s PIO Response Status: %s, %#x @ %#x\n", + dev_dbg(dev, "%s PIO Response Status: %s, %#x @ %#x\n", str_posted, strcomp_status, reg, advk_readl(pcie, PIO_ADDR_LS)); + + return ret; } static int advk_pcie_wait_pio(struct advk_pcie *pcie) @@ -406,13 +741,13 @@ static int advk_pcie_wait_pio(struct advk_pcie *pcie) struct device *dev = &pcie->pdev->dev; int i; - for (i = 0; i < PIO_RETRY_CNT; i++) { + for (i = 1; i <= PIO_RETRY_CNT; i++) { u32 start, isr; start = advk_readl(pcie, PIO_START); isr = advk_readl(pcie, PIO_ISR); if (!start && isr) - return 0; + return i; udelay(PIO_RETRY_DELAY); } @@ -420,6 +755,64 @@ static int advk_pcie_wait_pio(struct advk_pcie *pcie) return -ETIMEDOUT; } +static pci_bridge_emul_read_status_t +advk_pci_bridge_emul_base_conf_read(struct pci_bridge_emul *bridge, + int reg, u32 *value) +{ + struct advk_pcie *pcie = bridge->data; + + switch (reg) { + case PCI_COMMAND: + *value = advk_readl(pcie, PCIE_CORE_CMD_STATUS_REG); + return PCI_BRIDGE_EMUL_HANDLED; + + case PCI_INTERRUPT_LINE: { + /* + * From the whole 32bit register we support reading from HW only + * one bit: PCI_BRIDGE_CTL_BUS_RESET. + * Other bits are retrieved only from emulated config buffer. + */ + __le32 *cfgspace = (__le32 *)&bridge->conf; + u32 val = le32_to_cpu(cfgspace[PCI_INTERRUPT_LINE / 4]); + if (advk_readl(pcie, PCIE_CORE_CTRL1_REG) & HOT_RESET_GEN) + val |= PCI_BRIDGE_CTL_BUS_RESET << 16; + else + val &= ~(PCI_BRIDGE_CTL_BUS_RESET << 16); + *value = val; + return PCI_BRIDGE_EMUL_HANDLED; + } + + default: + return PCI_BRIDGE_EMUL_NOT_HANDLED; + } +} + +static void +advk_pci_bridge_emul_base_conf_write(struct pci_bridge_emul *bridge, + int reg, u32 old, u32 new, u32 mask) +{ + struct advk_pcie *pcie = bridge->data; + + switch (reg) { + case PCI_COMMAND: + advk_writel(pcie, new, PCIE_CORE_CMD_STATUS_REG); + break; + + case PCI_INTERRUPT_LINE: + if (mask & (PCI_BRIDGE_CTL_BUS_RESET << 16)) { + u32 val = advk_readl(pcie, PCIE_CORE_CTRL1_REG); + if (new & (PCI_BRIDGE_CTL_BUS_RESET << 16)) + val |= HOT_RESET_GEN; + else + val &= ~HOT_RESET_GEN; + advk_writel(pcie, val, PCIE_CORE_CTRL1_REG); + } + break; + + default: + break; + } +} static pci_bridge_emul_read_status_t advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge, @@ -436,13 +829,29 @@ advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge, case PCI_EXP_RTCTL: { u32 val = advk_readl(pcie, PCIE_ISR0_MASK_REG); *value = (val & PCIE_MSG_PM_PME_MASK) ? 0 : PCI_EXP_RTCTL_PMEIE; + *value |= le16_to_cpu(bridge->pcie_conf.rootctl) & PCI_EXP_RTCTL_CRSSVE; + *value |= PCI_EXP_RTCAP_CRSVIS << 16; return PCI_BRIDGE_EMUL_HANDLED; } case PCI_EXP_RTSTA: { u32 isr0 = advk_readl(pcie, PCIE_ISR0_REG); u32 msglog = advk_readl(pcie, PCIE_MSG_LOG_REG); - *value = (isr0 & PCIE_MSG_PM_PME_MASK) << 16 | (msglog >> 16); + *value = msglog >> 16; + if (isr0 & PCIE_MSG_PM_PME_MASK) + *value |= PCI_EXP_RTSTA_PME; + return PCI_BRIDGE_EMUL_HANDLED; + } + + case PCI_EXP_LNKCAP: { + u32 val = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg); + /* + * PCI_EXP_LNKCAP_DLLLARC bit is hardwired in aardvark HW to 0. + * But support for PCI_EXP_LNKSTA_DLLLA is emulated via ltssm + * state so explicitly enable PCI_EXP_LNKCAP_DLLLARC flag. + */ + val |= PCI_EXP_LNKCAP_DLLLARC; + *value = val; return PCI_BRIDGE_EMUL_HANDLED; } @@ -450,16 +859,16 @@ advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge, /* u32 contains both PCI_EXP_LNKCTL and PCI_EXP_LNKSTA */ u32 val = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg) & ~(PCI_EXP_LNKSTA_LT << 16); - if (!advk_pcie_link_up(pcie)) + if (advk_pcie_link_training(pcie)) val |= (PCI_EXP_LNKSTA_LT << 16); + if (advk_pcie_link_active(pcie)) + val |= (PCI_EXP_LNKSTA_DLLLA << 16); *value = val; return PCI_BRIDGE_EMUL_HANDLED; } - case PCI_CAP_LIST_ID: case PCI_EXP_DEVCAP: case PCI_EXP_DEVCTL: - case PCI_EXP_LNKCAP: *value = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg); return PCI_BRIDGE_EMUL_HANDLED; default: @@ -506,6 +915,8 @@ advk_pci_bridge_emul_pcie_conf_write(struct pci_bridge_emul *bridge, } static struct pci_bridge_emul_ops advk_pci_bridge_emul_ops = { + .read_base = advk_pci_bridge_emul_base_conf_read, + .write_base = advk_pci_bridge_emul_base_conf_write, .read_pcie = advk_pci_bridge_emul_pcie_conf_read, .write_pcie = advk_pci_bridge_emul_pcie_conf_write, }; @@ -518,22 +929,30 @@ static int advk_sw_pci_bridge_init(struct advk_pcie *pcie) { struct pci_bridge_emul *bridge = &pcie->bridge; - bridge->conf.vendor = advk_readl(pcie, PCIE_CORE_DEV_ID_REG) & 0xffff; - bridge->conf.device = advk_readl(pcie, PCIE_CORE_DEV_ID_REG) >> 16; + bridge->conf.vendor = + cpu_to_le16(advk_readl(pcie, PCIE_CORE_DEV_ID_REG) & 0xffff); + bridge->conf.device = + cpu_to_le16(advk_readl(pcie, PCIE_CORE_DEV_ID_REG) >> 16); bridge->conf.class_revision = - advk_readl(pcie, PCIE_CORE_DEV_REV_REG) & 0xff; + cpu_to_le32(advk_readl(pcie, PCIE_CORE_DEV_REV_REG) & 0xff); /* Support 32 bits I/O addressing */ bridge->conf.iobase = PCI_IO_RANGE_TYPE_32; bridge->conf.iolimit = PCI_IO_RANGE_TYPE_32; /* Support 64 bits memory pref */ - bridge->conf.pref_mem_base = PCI_PREF_RANGE_TYPE_64; - bridge->conf.pref_mem_limit = PCI_PREF_RANGE_TYPE_64; + bridge->conf.pref_mem_base = cpu_to_le16(PCI_PREF_RANGE_TYPE_64); + bridge->conf.pref_mem_limit = cpu_to_le16(PCI_PREF_RANGE_TYPE_64); /* Support interrupt A for MSI feature */ bridge->conf.intpin = PCIE_CORE_INT_A_ASSERT_ENABLE; + /* Aardvark HW provides PCIe Capability structure in version 2 */ + bridge->pcie_conf.cap = cpu_to_le16(2); + + /* Indicates supports for Completion Retry Status */ + bridge->pcie_conf.rootcap = cpu_to_le16(PCI_EXP_RTCAP_CRSVIS); + bridge->has_pcie = true; bridge->data = pcie; bridge->ops = &advk_pci_bridge_emul_ops; @@ -547,6 +966,13 @@ static bool advk_pcie_valid_device(struct advk_pcie *pcie, struct pci_bus *bus, if ((bus->number == pcie->root_bus_nr) && PCI_SLOT(devfn) != 0) return false; + /* + * If the link goes down after we check for link-up, nothing bad + * happens but the config access times out. + */ + if (bus->number != pcie->root_bus_nr && !advk_pcie_link_up(pcie)) + return false; + return true; } @@ -583,6 +1009,8 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn, int where, int size, u32 *val) { struct advk_pcie *pcie = bus->sysdata; + int retry_count; + bool allow_crs; u32 reg; int ret; @@ -595,10 +1023,17 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn, return pci_bridge_emul_conf_read(&pcie->bridge, where, size, val); - if (advk_pcie_pio_is_running(pcie)) { - *val = 0xffffffff; - return PCIBIOS_SET_FAILED; - } + /* + * Completion Retry Status is possible to return only when reading all + * 4 bytes from PCI_VENDOR_ID and PCI_DEVICE_ID registers at once and + * CRSSVE flag on Root Bridge is enabled. + */ + allow_crs = (where == PCI_VENDOR_ID) && (size == 4) && + (le16_to_cpu(pcie->bridge.pcie_conf.rootctl) & + PCI_EXP_RTCTL_CRSSVE); + + if (advk_pcie_pio_is_running(pcie)) + goto try_crs; /* Program the control register */ reg = advk_readl(pcie, PIO_CTRL); @@ -617,24 +1052,45 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn, /* Program the data strobe */ advk_writel(pcie, 0xf, PIO_WR_DATA_STRB); - /* Clear PIO DONE ISR and start the transfer */ - advk_writel(pcie, 1, PIO_ISR); - advk_writel(pcie, 1, PIO_START); + retry_count = 0; + do { + /* Clear PIO DONE ISR and start the transfer */ + advk_writel(pcie, 1, PIO_ISR); + advk_writel(pcie, 1, PIO_START); - ret = advk_pcie_wait_pio(pcie); - if (ret < 0) - return PCIBIOS_SET_FAILED; + ret = advk_pcie_wait_pio(pcie); + if (ret < 0) + goto try_crs; + + retry_count += ret; - advk_pcie_check_pio_status(pcie); + /* Check PIO status and get the read result */ + ret = advk_pcie_check_pio_status(pcie, allow_crs, val); + } while (ret == -EAGAIN && retry_count < PIO_RETRY_CNT); + + if (ret < 0) + goto fail; - /* Get the read result */ - *val = advk_readl(pcie, PIO_RD_DATA); if (size == 1) *val = (*val >> (8 * (where & 3))) & 0xff; else if (size == 2) *val = (*val >> (8 * (where & 3))) & 0xffff; return PCIBIOS_SUCCESSFUL; + +try_crs: + /* + * If it is possible, return Completion Retry Status so that caller + * tries to issue the request again instead of failing. + */ + if (allow_crs) { + *val = CFG_RD_CRS_VAL; + return PCIBIOS_SUCCESSFUL; + } + +fail: + *val = 0xffffffff; + return PCIBIOS_SET_FAILED; } static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn, @@ -643,6 +1099,7 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn, struct advk_pcie *pcie = bus->sysdata; u32 reg; u32 data_strobe = 0x0; + int retry_count; int offset; int ret; @@ -684,17 +1141,22 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn, /* Program the data strobe */ advk_writel(pcie, data_strobe, PIO_WR_DATA_STRB); - /* Clear PIO DONE ISR and start the transfer */ - advk_writel(pcie, 1, PIO_ISR); - advk_writel(pcie, 1, PIO_START); + retry_count = 0; + do { + /* Clear PIO DONE ISR and start the transfer */ + advk_writel(pcie, 1, PIO_ISR); + advk_writel(pcie, 1, PIO_START); - ret = advk_pcie_wait_pio(pcie); - if (ret < 0) - return PCIBIOS_SET_FAILED; + ret = advk_pcie_wait_pio(pcie); + if (ret < 0) + return PCIBIOS_SET_FAILED; - advk_pcie_check_pio_status(pcie); + retry_count += ret; - return PCIBIOS_SUCCESSFUL; + ret = advk_pcie_check_pio_status(pcie, false, NULL); + } while (ret == -EAGAIN && retry_count < PIO_RETRY_CNT); + + return ret < 0 ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL; } static struct pci_ops advk_pcie_ops = { @@ -710,7 +1172,7 @@ static void advk_msi_irq_compose_msi_msg(struct irq_data *data, msg->address_lo = lower_32_bits(msi_msg); msg->address_hi = upper_32_bits(msi_msg); - msg->data = data->irq; + msg->data = data->hwirq; } static int advk_msi_set_affinity(struct irq_data *irq_data, @@ -727,15 +1189,11 @@ static int advk_msi_irq_domain_alloc(struct irq_domain *domain, int hwirq, i; mutex_lock(&pcie->msi_used_lock); - hwirq = bitmap_find_next_zero_area(pcie->msi_used, MSI_IRQ_NUM, - 0, nr_irqs, 0); - if (hwirq >= MSI_IRQ_NUM) { - mutex_unlock(&pcie->msi_used_lock); - return -ENOSPC; - } - - bitmap_set(pcie->msi_used, hwirq, nr_irqs); + hwirq = bitmap_find_free_region(pcie->msi_used, MSI_IRQ_NUM, + order_base_2(nr_irqs)); mutex_unlock(&pcie->msi_used_lock); + if (hwirq < 0) + return -ENOSPC; for (i = 0; i < nr_irqs; i++) irq_domain_set_info(domain, virq + i, hwirq + i, @@ -743,7 +1201,7 @@ static int advk_msi_irq_domain_alloc(struct irq_domain *domain, domain->host_data, handle_simple_irq, NULL, NULL); - return hwirq; + return 0; } static void advk_msi_irq_domain_free(struct irq_domain *domain, @@ -753,7 +1211,7 @@ static void advk_msi_irq_domain_free(struct irq_domain *domain, struct advk_pcie *pcie = domain->host_data; mutex_lock(&pcie->msi_used_lock); - bitmap_clear(pcie->msi_used, d->hwirq, nr_irqs); + bitmap_release_region(pcie->msi_used, d->hwirq, order_base_2(nr_irqs)); mutex_unlock(&pcie->msi_used_lock); } @@ -766,22 +1224,28 @@ static void advk_pcie_irq_mask(struct irq_data *d) { struct advk_pcie *pcie = d->domain->host_data; irq_hw_number_t hwirq = irqd_to_hwirq(d); + unsigned long flags; u32 mask; + raw_spin_lock_irqsave(&pcie->irq_lock, flags); mask = advk_readl(pcie, PCIE_ISR1_MASK_REG); mask |= PCIE_ISR1_INTX_ASSERT(hwirq); advk_writel(pcie, mask, PCIE_ISR1_MASK_REG); + raw_spin_unlock_irqrestore(&pcie->irq_lock, flags); } static void advk_pcie_irq_unmask(struct irq_data *d) { struct advk_pcie *pcie = d->domain->host_data; irq_hw_number_t hwirq = irqd_to_hwirq(d); + unsigned long flags; u32 mask; + raw_spin_lock_irqsave(&pcie->irq_lock, flags); mask = advk_readl(pcie, PCIE_ISR1_MASK_REG); mask &= ~PCIE_ISR1_INTX_ASSERT(hwirq); advk_writel(pcie, mask, PCIE_ISR1_MASK_REG); + raw_spin_unlock_irqrestore(&pcie->irq_lock, flags); } static int advk_pcie_irq_map(struct irq_domain *h, @@ -865,6 +1329,8 @@ static int advk_pcie_init_irq_domain(struct advk_pcie *pcie) struct irq_chip *irq_chip; int ret = 0; + raw_spin_lock_init(&pcie->irq_lock); + pcie_intc_node = of_get_next_child(node, NULL); if (!pcie_intc_node) { dev_err(dev, "No PCIe Intc node found\n"); @@ -906,19 +1372,19 @@ static void advk_pcie_remove_irq_domain(struct advk_pcie *pcie) static void advk_pcie_handle_msi(struct advk_pcie *pcie) { u32 msi_val, msi_mask, msi_status, msi_idx; - u16 msi_data; + int virq; msi_mask = advk_readl(pcie, PCIE_MSI_MASK_REG); msi_val = advk_readl(pcie, PCIE_MSI_STATUS_REG); - msi_status = msi_val & ~msi_mask; + msi_status = msi_val & ((~msi_mask) & PCIE_MSI_ALL_MASK); for (msi_idx = 0; msi_idx < MSI_IRQ_NUM; msi_idx++) { if (!(BIT(msi_idx) & msi_status)) continue; advk_writel(pcie, BIT(msi_idx), PCIE_MSI_STATUS_REG); - msi_data = advk_readl(pcie, PCIE_MSI_PAYLOAD_REG) & 0xFF; - generic_handle_irq(msi_data); + virq = irq_find_mapping(pcie->msi_inner_domain, msi_idx); + generic_handle_irq(virq); } advk_writel(pcie, PCIE_ISR0_MSI_INT_PENDING, @@ -939,12 +1405,6 @@ static void advk_pcie_handle_int(struct advk_pcie *pcie) isr1_mask = advk_readl(pcie, PCIE_ISR1_MASK_REG); isr1_status = isr1_val & ((~isr1_mask) & PCIE_ISR1_ALL_MASK); - if (!isr0_status && !isr1_status) { - advk_writel(pcie, isr0_val, PCIE_ISR0_REG); - advk_writel(pcie, isr1_val, PCIE_ISR1_REG); - return; - } - /* Process MSI interrupts */ if (isr0_status & PCIE_ISR0_MSI_INT_PENDING) advk_pcie_handle_msi(pcie); @@ -1037,6 +1497,7 @@ static int advk_pcie_probe(struct platform_device *pdev) struct advk_pcie *pcie; struct resource *res; struct pci_host_bridge *bridge; + struct resource_entry *entry; int ret, irq; bridge = devm_pci_alloc_host_bridge(dev, sizeof(struct advk_pcie)); @@ -1066,6 +1527,102 @@ static int advk_pcie_probe(struct platform_device *pdev) return ret; } + resource_list_for_each_entry(entry, &pcie->resources) { + resource_size_t start = entry->res->start; + resource_size_t size = resource_size(entry->res); + unsigned long type = resource_type(entry->res); + u64 win_size; + + /* + * Aardvark hardware allows to configure also PCIe window + * for config type 0 and type 1 mapping, but driver uses + * only PIO for issuing configuration transfers which does + * not use PCIe window configuration. + */ + if (type != IORESOURCE_MEM && type != IORESOURCE_MEM_64 && + type != IORESOURCE_IO) + continue; + + /* + * Skip transparent memory resources. Default outbound access + * configuration is set to transparent memory access so it + * does not need window configuration. + */ + if ((type == IORESOURCE_MEM || type == IORESOURCE_MEM_64) && + entry->offset == 0) + continue; + + /* + * The n-th PCIe window is configured by tuple (match, remap, mask) + * and an access to address A uses this window if A matches the + * match with given mask. + * So every PCIe window size must be a power of two and every start + * address must be aligned to window size. Minimal size is 64 KiB + * because lower 16 bits of mask must be zero. Remapped address + * may have set only bits from the mask. + */ + while (pcie->wins_count < OB_WIN_COUNT && size > 0) { + /* Calculate the largest aligned window size */ + win_size = (1ULL << (fls64(size)-1)) | + (start ? (1ULL << __ffs64(start)) : 0); + win_size = 1ULL << __ffs64(win_size); + if (win_size < 0x10000) + break; + + dev_dbg(dev, + "Configuring PCIe window %d: [0x%llx-0x%llx] as %lu\n", + pcie->wins_count, (unsigned long long)start, + (unsigned long long)start + win_size, type); + + if (type == IORESOURCE_IO) { + pcie->wins[pcie->wins_count].actions = OB_WIN_TYPE_IO; + pcie->wins[pcie->wins_count].match = pci_pio_to_address(start); + } else { + pcie->wins[pcie->wins_count].actions = OB_WIN_TYPE_MEM; + pcie->wins[pcie->wins_count].match = start; + } + pcie->wins[pcie->wins_count].remap = start - entry->offset; + pcie->wins[pcie->wins_count].mask = ~(win_size - 1); + + if (pcie->wins[pcie->wins_count].remap & (win_size - 1)) + break; + + start += win_size; + size -= win_size; + pcie->wins_count++; + } + + if (size > 0) { + dev_err(&pcie->pdev->dev, + "Invalid PCIe region [0x%llx-0x%llx]\n", + (unsigned long long)entry->res->start, + (unsigned long long)entry->res->end + 1); + return -EINVAL; + } + } + + pcie->reset_gpio = devm_gpiod_get_from_of_node(dev, dev->of_node, + "reset-gpios", 0, + GPIOD_OUT_LOW, + "pcie1-reset"); + ret = PTR_ERR_OR_ZERO(pcie->reset_gpio); + if (ret) { + if (ret == -ENOENT) { + pcie->reset_gpio = NULL; + } else { + if (ret != -EPROBE_DEFER) + dev_err(dev, "Failed to get reset-gpio: %i\n", + ret); + return ret; + } + } + + ret = of_pci_get_max_link_speed(dev->of_node); + if (ret <= 0 || ret > 3) + pcie->link_gen = 3; + else + pcie->link_gen = ret; + advk_pcie_setup_hw(pcie); ret = advk_sw_pci_bridge_init(pcie); diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 8c45d6c32c30e..3d48fa685aaae 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1110,6 +1110,10 @@ static void hv_int_desc_free(struct hv_pci_dev *hpdev, u8 buffer[sizeof(struct pci_delete_interrupt)]; } ctxt; + if (!int_desc->vector_count) { + kfree(int_desc); + return; + } memset(&ctxt, 0, sizeof(ctxt)); int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message; int_pkt->message_type.type = @@ -1172,6 +1176,28 @@ static void hv_irq_mask(struct irq_data *data) pci_msi_mask_irq(data); } +static unsigned int hv_msi_get_int_vector(struct irq_data *data) +{ + struct irq_cfg *cfg = irqd_cfg(data); + + return cfg->vector; +} + +static int hv_msi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *info) +{ + int ret = pci_msi_prepare(domain, dev, nvec, info); + + /* + * By using the interrupt remapper in the hypervisor IOMMU, contiguous + * CPU vectors is not needed for multi-MSI + */ + if (info->type == X86_IRQ_ALLOC_TYPE_MSI) + info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; + + return ret; +} + /** * hv_irq_unmask() - "Unmask" the IRQ by setting its current * affinity. @@ -1187,6 +1213,7 @@ static void hv_irq_unmask(struct irq_data *data) struct msi_desc *msi_desc = irq_data_get_msi_desc(data); struct irq_cfg *cfg = irqd_cfg(data); struct retarget_msi_interrupt *params; + struct tran_int_desc *int_desc; struct hv_pcibus_device *hbus; struct cpumask *dest; cpumask_var_t tmp; @@ -1201,6 +1228,7 @@ static void hv_irq_unmask(struct irq_data *data) pdev = msi_desc_to_pci_dev(msi_desc); pbus = pdev->bus; hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata); + int_desc = data->chip_data; spin_lock_irqsave(&hbus->retarget_msi_interrupt_lock, flags); @@ -1208,8 +1236,8 @@ static void hv_irq_unmask(struct irq_data *data) memset(params, 0, sizeof(*params)); params->partition_id = HV_PARTITION_ID_SELF; params->int_entry.source = 1; /* MSI(-X) */ - params->int_entry.address = msi_desc->msg.address_lo; - params->int_entry.data = msi_desc->msg.data; + params->int_entry.address = int_desc->address & 0xffffffff; + params->int_entry.data = int_desc->data; params->device_id = (hbus->hdev->dev_instance.b[5] << 24) | (hbus->hdev->dev_instance.b[4] << 16) | (hbus->hdev->dev_instance.b[7] << 8) | @@ -1296,12 +1324,12 @@ static void hv_pci_compose_compl(void *context, struct pci_response *resp, static u32 hv_compose_msi_req_v1( struct pci_create_interrupt *int_pkt, struct cpumask *affinity, - u32 slot, u8 vector) + u32 slot, u8 vector, u8 vector_count) { int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; - int_pkt->int_desc.vector_count = 1; + int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = dest_Fixed; /* @@ -1315,14 +1343,14 @@ static u32 hv_compose_msi_req_v1( static u32 hv_compose_msi_req_v2( struct pci_create_interrupt2 *int_pkt, struct cpumask *affinity, - u32 slot, u8 vector) + u32 slot, u8 vector, u8 vector_count) { int cpu; int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2; int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; - int_pkt->int_desc.vector_count = 1; + int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = dest_Fixed; /* @@ -1350,7 +1378,6 @@ static u32 hv_compose_msi_req_v2( */ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { - struct irq_cfg *cfg = irqd_cfg(data); struct hv_pcibus_device *hbus; struct hv_pci_dev *hpdev; struct pci_bus *pbus; @@ -1359,6 +1386,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) unsigned long flags; struct compose_comp_ctxt comp; struct tran_int_desc *int_desc; + struct msi_desc *msi_desc; + u8 vector, vector_count; struct { struct pci_packet pci_pkt; union { @@ -1370,7 +1399,17 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) u32 size; int ret; - pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data)); + /* Reuse the previous allocation */ + if (data->chip_data) { + int_desc = data->chip_data; + msg->address_hi = int_desc->address >> 32; + msg->address_lo = int_desc->address & 0xffffffff; + msg->data = int_desc->data; + return; + } + + msi_desc = irq_data_get_msi_desc(data); + pdev = msi_desc_to_pci_dev(msi_desc); dest = irq_data_get_effective_affinity_mask(data); pbus = pdev->bus; hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata); @@ -1378,17 +1417,40 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) if (!hpdev) goto return_null_message; - /* Free any previous message that might have already been composed. */ - if (data->chip_data) { - int_desc = data->chip_data; - data->chip_data = NULL; - hv_int_desc_free(hpdev, int_desc); - } - int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC); if (!int_desc) goto drop_reference; + if (!msi_desc->msi_attrib.is_msix && msi_desc->nvec_used > 1) { + /* + * If this is not the first MSI of Multi MSI, we already have + * a mapping. Can exit early. + */ + if (msi_desc->irq != data->irq) { + data->chip_data = int_desc; + int_desc->address = msi_desc->msg.address_lo | + (u64)msi_desc->msg.address_hi << 32; + int_desc->data = msi_desc->msg.data + + (data->irq - msi_desc->irq); + msg->address_hi = msi_desc->msg.address_hi; + msg->address_lo = msi_desc->msg.address_lo; + msg->data = int_desc->data; + put_pcichild(hpdev); + return; + } + /* + * The vector we select here is a dummy value. The correct + * value gets sent to the hypervisor in unmask(). This needs + * to be aligned with the count, and also not zero. Multi-msi + * is powers of 2 up to 32, so 32 will always work here. + */ + vector = 32; + vector_count = msi_desc->nvec_used; + } else { + vector = hv_msi_get_int_vector(data); + vector_count = 1; + } + memset(&ctxt, 0, sizeof(ctxt)); init_completion(&comp.comp_pkt.host_event); ctxt.pci_pkt.completion_func = hv_pci_compose_compl; @@ -1399,14 +1461,16 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1, dest, hpdev->desc.win_slot.slot, - cfg->vector); + vector, + vector_count); break; case PCI_PROTOCOL_VERSION_1_2: size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2, dest, hpdev->desc.win_slot.slot, - cfg->vector); + vector, + vector_count); break; default: @@ -1518,7 +1582,7 @@ static irq_hw_number_t hv_msi_domain_ops_get_hwirq(struct msi_domain_info *info, static struct msi_domain_ops hv_msi_ops = { .get_hwirq = hv_msi_domain_ops_get_hwirq, - .msi_prepare = pci_msi_prepare, + .msi_prepare = hv_msi_prepare, .set_desc = pci_msi_set_desc, .msi_free = hv_msi_free, }; diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c index 5a2483e125a3f..09af97083ac09 100644 --- a/drivers/pci/controller/pci-mvebu.c +++ b/drivers/pci/controller/pci-mvebu.c @@ -576,6 +576,8 @@ struct pci_bridge_emul_ops mvebu_pci_bridge_emul_ops = { static void mvebu_pci_bridge_emul_init(struct mvebu_pcie_port *port) { struct pci_bridge_emul *bridge = &port->bridge; + u32 pcie_cap = mvebu_readl(port, PCIE_CAP_PCIEXP); + u8 pcie_cap_ver = ((pcie_cap >> 16) & PCI_EXP_FLAGS_VERS); bridge->conf.vendor = PCI_VENDOR_ID_MARVELL; bridge->conf.device = mvebu_readl(port, PCIE_DEV_ID_OFF) >> 16; @@ -588,6 +590,12 @@ static void mvebu_pci_bridge_emul_init(struct mvebu_pcie_port *port) bridge->conf.iolimit = PCI_IO_RANGE_TYPE_32; } + /* + * Older mvebu hardware provides PCIe Capability structure only in + * version 1. New hardware provides it in version 2. + */ + bridge->pcie_conf.cap = cpu_to_le16(pcie_cap_ver); + bridge->has_pcie = true; bridge->data = port; bridge->ops = &mvebu_pci_bridge_emul_ops; diff --git a/drivers/pci/controller/pcie-cadence-ep.c b/drivers/pci/controller/pcie-cadence-ep.c index def7820cb8247..5e23d575e200a 100644 --- a/drivers/pci/controller/pcie-cadence-ep.c +++ b/drivers/pci/controller/pcie-cadence-ep.c @@ -178,8 +178,7 @@ static int cdns_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, phys_addr_t addr, struct cdns_pcie *pcie = &ep->pcie; u32 r; - r = find_first_zero_bit(&ep->ob_region_map, - sizeof(ep->ob_region_map) * BITS_PER_LONG); + r = find_first_zero_bit(&ep->ob_region_map, BITS_PER_LONG); if (r >= ep->max_regions - 1) { dev_err(&epc->dev, "no free outbound region\n"); return -EINVAL; diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index d743b0a489886..b82edefffd15f 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -263,8 +263,7 @@ static int rockchip_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, struct rockchip_pcie *pcie = &ep->rockchip; u32 r; - r = find_first_zero_bit(&ep->ob_region_map, - sizeof(ep->ob_region_map) * BITS_PER_LONG); + r = find_first_zero_bit(&ep->ob_region_map, BITS_PER_LONG); /* * Region 0 is reserved for configuration space and shouldn't * be used elsewhere per TRM, so leave it out. diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c index 45c0f344ccd16..11b046b20b92a 100644 --- a/drivers/pci/controller/pcie-xilinx-nwl.c +++ b/drivers/pci/controller/pcie-xilinx-nwl.c @@ -6,6 +6,7 @@ * (C) Copyright 2014 - 2015, Xilinx, Inc. */ +#include #include #include #include @@ -169,6 +170,7 @@ struct nwl_pcie { u8 root_busno; struct nwl_msi msi; struct irq_domain *legacy_irq_domain; + struct clk *clk; raw_spinlock_t leg_mask_lock; }; @@ -839,6 +841,16 @@ static int nwl_pcie_probe(struct platform_device *pdev) return err; } + pcie->clk = devm_clk_get(dev, NULL); + if (IS_ERR(pcie->clk)) + return PTR_ERR(pcie->clk); + + err = clk_prepare_enable(pcie->clk); + if (err) { + dev_err(dev, "can't enable PCIe ref clock\n"); + return err; + } + err = nwl_pcie_bridge_init(pcie); if (err) { dev_err(dev, "HW Initialization failed\n"); diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c index fcad9af6df04e..9765d2eb79d21 100644 --- a/drivers/pci/ecam.c +++ b/drivers/pci/ecam.c @@ -165,6 +165,7 @@ struct pci_ecam_ops pci_32b_ops = { } }; +/* ECAM ops for 32-bit read only (non-compliant) */ struct pci_ecam_ops pci_32b_read_ops = { .bus_shift = 20, .pci_ops = { diff --git a/drivers/pci/hotplug/TODO b/drivers/pci/hotplug/TODO index a32070be5adf9..cc6194aa24c15 100644 --- a/drivers/pci/hotplug/TODO +++ b/drivers/pci/hotplug/TODO @@ -40,9 +40,6 @@ ibmphp: * The return value of pci_hp_register() is not checked. -* iounmap(io_mem) is called in the error path of ebda_rsrc_controller() - and once more in the error path of its caller ibmphp_access_ebda(). - * The various slot data structures are difficult to follow and need to be simplified. A lot of functions are too large and too complex, they need to be broken up into smaller, manageable pieces. Negative examples are diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c index 11a2661dc0627..7fb75401ad8a7 100644 --- a/drivers/pci/hotplug/ibmphp_ebda.c +++ b/drivers/pci/hotplug/ibmphp_ebda.c @@ -714,8 +714,7 @@ static int __init ebda_rsrc_controller(void) /* init hpc structure */ hpc_ptr = alloc_ebda_hpc(slot_num, bus_num); if (!hpc_ptr) { - rc = -ENOMEM; - goto error_no_hpc; + return -ENOMEM; } hpc_ptr->ctlr_id = ctlr_id; hpc_ptr->ctlr_relative_id = ctlr; @@ -910,8 +909,6 @@ static int __init ebda_rsrc_controller(void) kfree(tmp_slot); error_no_slot: free_ebda_hpc(hpc_ptr); -error_no_hpc: - iounmap(io_mem); return rc; } diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index aa61d4c219d7b..ae44f46d1bf3b 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -84,6 +84,7 @@ struct controller { struct pcie_device *pcie; u32 slot_cap; /* capabilities and quirks */ + unsigned int inband_presence_disabled:1; u16 slot_ctrl; /* control register access */ struct mutex ctrl_lock; diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index 6503d15effbbd..2f5f4bb42dcc6 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -73,10 +73,8 @@ static int board_added(struct controller *ctrl) /* Check link training status */ retval = pciehp_check_link_status(ctrl); - if (retval) { - ctrl_err(ctrl, "Failed to check link status\n"); + if (retval) goto err_exit; - } /* Check for a power fault */ if (ctrl->power_fault_detected || pciehp_query_power_fault(ctrl)) { diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 88b996764ff95..2c65330628e6f 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -14,6 +14,7 @@ #define dev_fmt(fmt) "pciehp: " fmt +#include #include #include #include @@ -26,6 +27,24 @@ #include "../pci.h" #include "pciehp.h" +static const struct dmi_system_id inband_presence_disabled_dmi_table[] = { + /* + * Match all Dell systems, as some Dell systems have inband + * presence disabled on NVMe slots (but don't support the bit to + * report it). Setting inband presence disabled should have no + * negative effect, except on broken hotplug slots that never + * assert presence detect--and those will still work, they will + * just have a bit of extra delay before being probed. + */ + { + .ident = "Dell System", + .matches = { + DMI_MATCH(DMI_OEM_STRING, "Dell System"), + }, + }, + {} +}; + static inline struct pci_dev *ctrl_dev(struct controller *ctrl) { return ctrl->pcie->port; @@ -79,6 +98,8 @@ static int pcie_poll_cmd(struct controller *ctrl, int timeout) if (slot_status & PCI_EXP_SLTSTA_CC) { pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_CC); + ctrl->cmd_busy = 0; + smp_mb(); return 1; } if (timeout < 0) @@ -254,14 +275,33 @@ static bool pci_bus_check_dev(struct pci_bus *bus, int devfn) return found; } +static void pcie_wait_for_presence(struct pci_dev *pdev) +{ + int timeout = 1250; + u16 slot_status; + + do { + pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status); + if (slot_status & PCI_EXP_SLTSTA_PDS) + return; + msleep(10); + timeout -= 10; + } while (timeout > 0); +} + int pciehp_check_link_status(struct controller *ctrl) { struct pci_dev *pdev = ctrl_dev(ctrl); bool found; u16 lnk_status; - if (!pcie_wait_for_link(pdev, true)) + if (!pcie_wait_for_link(pdev, true)) { + ctrl_info(ctrl, "Slot(%s): No link\n", slot_name(ctrl)); return -1; + } + + if (ctrl->inband_presence_disabled) + pcie_wait_for_presence(pdev); found = pci_bus_check_dev(ctrl->pcie->port->subordinate, PCI_DEVFN(0, 0)); @@ -275,15 +315,18 @@ int pciehp_check_link_status(struct controller *ctrl) ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status); if ((lnk_status & PCI_EXP_LNKSTA_LT) || !(lnk_status & PCI_EXP_LNKSTA_NLW)) { - ctrl_err(ctrl, "link training error: status %#06x\n", - lnk_status); + ctrl_info(ctrl, "Slot(%s): Cannot train link: status %#06x\n", + slot_name(ctrl), lnk_status); return -1; } pcie_update_link_speed(ctrl->pcie->port->subordinate, lnk_status); - if (!found) + if (!found) { + ctrl_info(ctrl, "Slot(%s): No device found\n", + slot_name(ctrl)); return -1; + } return 0; } @@ -577,6 +620,8 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id) */ if (ctrl->power_fault_detected) status &= ~PCI_EXP_SLTSTA_PFD; + else if (status & PCI_EXP_SLTSTA_PFD) + ctrl->power_fault_detected = true; events |= status; if (!events) { @@ -586,7 +631,7 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id) } if (status) { - pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, events); + pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, status); /* * In MSI mode, all event bits must be zero before the port @@ -660,8 +705,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) } /* Check Power Fault Detected */ - if ((events & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) { - ctrl->power_fault_detected = 1; + if (events & PCI_EXP_SLTSTA_PFD) { ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl)); pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF, PCI_EXP_SLTCTL_ATTN_IND_ON); @@ -864,7 +908,7 @@ static inline void dbg_ctrl(struct controller *ctrl) struct controller *pcie_init(struct pcie_device *dev) { struct controller *ctrl; - u32 slot_cap, link_cap; + u32 slot_cap, slot_cap2, link_cap; u8 poweron; struct pci_dev *pdev = dev->port; struct pci_bus *subordinate = pdev->subordinate; @@ -899,6 +943,16 @@ struct controller *pcie_init(struct pcie_device *dev) ctrl->state = list_empty(&subordinate->devices) ? OFF_STATE : ON_STATE; up_read(&pci_bus_sem); + pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP2, &slot_cap2); + if (slot_cap2 & PCI_EXP_SLTCAP2_IBPD) { + pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_IBPD_DISABLE, + PCI_EXP_SLTCTL_IBPD_DISABLE); + ctrl->inband_presence_disabled = 1; + } + + if (dmi_first_match(inband_presence_disabled_dmi_table)) + ctrl->inband_presence_disabled = 1; + /* Check if Data Link Layer Link Active Reporting is implemented */ pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &link_cap); @@ -908,7 +962,7 @@ struct controller *pcie_init(struct pcie_device *dev) PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_CC | PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC); - ctrl_info(ctrl, "Slot #%d AttnBtn%c PwrCtrl%c MRL%c AttnInd%c PwrInd%c HotPlug%c Surprise%c Interlock%c NoCompl%c LLActRep%c%s\n", + ctrl_info(ctrl, "Slot #%d AttnBtn%c PwrCtrl%c MRL%c AttnInd%c PwrInd%c HotPlug%c Surprise%c Interlock%c NoCompl%c IbPresDis%c LLActRep%c%s\n", (slot_cap & PCI_EXP_SLTCAP_PSN) >> 19, FLAG(slot_cap, PCI_EXP_SLTCAP_ABP), FLAG(slot_cap, PCI_EXP_SLTCAP_PCP), @@ -919,6 +973,7 @@ struct controller *pcie_init(struct pcie_device *dev) FLAG(slot_cap, PCI_EXP_SLTCAP_HPS), FLAG(slot_cap, PCI_EXP_SLTCAP_EIP), FLAG(slot_cap, PCI_EXP_SLTCAP_NCCS), + FLAG(slot_cap2, PCI_EXP_SLTCAP2_IBPD), FLAG(link_cap, PCI_EXP_LNKCAP_DLLLARC), pdev->broken_cmd_compl ? " (with Cmd Compl erratum)" : ""); @@ -956,6 +1011,8 @@ static void quirk_cmd_compl(struct pci_dev *pdev) } DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0110, + PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl); DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0400, PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl); DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0401, diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 2163ae3380d5e..715c85d4e688d 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -395,18 +395,6 @@ static void free_msi_irqs(struct pci_dev *dev) for (i = 0; i < entry->nvec_used; i++) BUG_ON(irq_has_action(entry->irq + i)); - pci_msi_teardown_msi_irqs(dev); - - list_for_each_entry_safe(entry, tmp, msi_list, list) { - if (entry->msi_attrib.is_msix) { - if (list_is_last(&entry->list, msi_list)) - iounmap(entry->mask_base); - } - - list_del(&entry->list); - free_msi_entry(entry); - } - if (dev->msi_irq_groups) { sysfs_remove_groups(&dev->dev.kobj, dev->msi_irq_groups); msi_attrs = dev->msi_irq_groups[0]->attrs; @@ -422,6 +410,18 @@ static void free_msi_irqs(struct pci_dev *dev) kfree(dev->msi_irq_groups); dev->msi_irq_groups = NULL; } + + pci_msi_teardown_msi_irqs(dev); + + list_for_each_entry_safe(entry, tmp, msi_list, list) { + if (entry->msi_attrib.is_msix) { + if (list_is_last(&entry->list, msi_list)) + iounmap(entry->mask_base); + } + + list_del(&entry->list); + free_msi_entry(entry); + } } static void pci_intx_for_msi(struct pci_dev *dev, int enable) @@ -591,6 +591,9 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd) goto out; pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); + /* Lies, damned lies, and MSIs */ + if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING) + control |= PCI_MSI_FLAGS_MASKBIT; entry->msi_attrib.is_msix = 0; entry->msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT); @@ -782,6 +785,9 @@ static void msix_mask_all(void __iomem *base, int tsize) u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT; int i; + if (pci_msi_ignore_mask) + return; + for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE) writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL); } @@ -821,9 +827,6 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, goto out_disable; } - /* Ensure that all table entries are masked. */ - msix_mask_all(base, tsize); - ret = msix_setup_entries(dev, base, entries, nvec, affd); if (ret) goto out_disable; @@ -846,6 +849,16 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, /* Set MSI-X enabled bits and unmask the function */ pci_intx_for_msi(dev, 0); dev->msix_enabled = 1; + + /* + * Ensure that all table entries are masked to prevent + * stale entries from firing in a crash kernel. + * + * Done late to deal with a broken Marvell NVME device + * which takes the MSI-X mask bits into account even + * when MSI-X is disabled, which prevents MSI delivery. + */ + msix_mask_all(base, tsize); pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); pcibios_free_irq(dev); @@ -1281,19 +1294,24 @@ EXPORT_SYMBOL(pci_free_irq_vectors); /** * pci_irq_vector - return Linux IRQ number of a device vector - * @dev: PCI device to operate on - * @nr: device-relative interrupt vector index (0-based). + * @dev: PCI device to operate on + * @nr: Interrupt vector index (0-based) + * + * @nr has the following meanings depending on the interrupt mode: + * MSI-X: The index in the MSI-X vector table + * MSI: The index of the enabled MSI vectors + * INTx: Must be 0 + * + * Return: The Linux interrupt number or -EINVAl if @nr is out of range. */ int pci_irq_vector(struct pci_dev *dev, unsigned int nr) { if (dev->msix_enabled) { struct msi_desc *entry; - int i = 0; for_each_pci_msi_entry(entry, dev) { - if (i == nr) + if (entry->msi_attrib.entry_nr == nr) return entry->irq; - i++; } WARN_ON_ONCE(1); return -EINVAL; @@ -1317,17 +1335,22 @@ EXPORT_SYMBOL(pci_irq_vector); * pci_irq_get_affinity - return the affinity of a particular MSI vector * @dev: PCI device to operate on * @nr: device-relative interrupt vector index (0-based). + * + * @nr has the following meanings depending on the interrupt mode: + * MSI-X: The index in the MSI-X vector table + * MSI: The index of the enabled MSI vectors + * INTx: Must be 0 + * + * Return: A cpumask pointer or NULL if @nr is out of range */ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr) { if (dev->msix_enabled) { struct msi_desc *entry; - int i = 0; for_each_pci_msi_entry(entry, dev) { - if (i == nr) + if (entry->msi_attrib.entry_nr == nr) return &entry->affinity->mask; - i++; } WARN_ON_ONCE(1); return NULL; diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c index d3b6b9a056185..0ca3b5eef53a7 100644 --- a/drivers/pci/pci-bridge-emul.c +++ b/drivers/pci/pci-bridge-emul.c @@ -21,8 +21,9 @@ #include "pci-bridge-emul.h" #define PCI_BRIDGE_CONF_END PCI_STD_HEADER_SIZEOF +#define PCI_CAP_PCIE_SIZEOF (PCI_EXP_SLTSTA2 + 2) #define PCI_CAP_PCIE_START PCI_BRIDGE_CONF_END -#define PCI_CAP_PCIE_END (PCI_CAP_PCIE_START + PCI_EXP_SLTSTA2 + 2) +#define PCI_CAP_PCIE_END (PCI_CAP_PCIE_START + PCI_CAP_PCIE_SIZEOF) struct pci_bridge_reg_behavior { /* Read-only bits */ @@ -38,7 +39,8 @@ struct pci_bridge_reg_behavior { u32 rsvd; }; -static const struct pci_bridge_reg_behavior pci_regs_behavior[] = { +static const +struct pci_bridge_reg_behavior pci_regs_behavior[PCI_STD_HEADER_SIZEOF / 4] = { [PCI_VENDOR_ID / 4] = { .ro = ~0 }, [PCI_COMMAND / 4] = { .rw = (PCI_COMMAND_IO | PCI_COMMAND_MEMORY | @@ -173,7 +175,8 @@ static const struct pci_bridge_reg_behavior pci_regs_behavior[] = { }, }; -static const struct pci_bridge_reg_behavior pcie_cap_regs_behavior[] = { +static const +struct pci_bridge_reg_behavior pcie_cap_regs_behavior[PCI_CAP_PCIE_SIZEOF / 4] = { [PCI_CAP_LIST_ID / 4] = { /* * Capability ID, Next Capability Pointer and @@ -270,10 +273,12 @@ static const struct pci_bridge_reg_behavior pcie_cap_regs_behavior[] = { int pci_bridge_emul_init(struct pci_bridge_emul *bridge, unsigned int flags) { - bridge->conf.class_revision |= PCI_CLASS_BRIDGE_PCI << 16; + BUILD_BUG_ON(sizeof(bridge->conf) != PCI_BRIDGE_CONF_END); + + bridge->conf.class_revision |= cpu_to_le32(PCI_CLASS_BRIDGE_PCI << 16); bridge->conf.header_type = PCI_HEADER_TYPE_BRIDGE; bridge->conf.cache_line_size = 0x10; - bridge->conf.status = PCI_STATUS_CAP_LIST; + bridge->conf.status = cpu_to_le16(PCI_STATUS_CAP_LIST); bridge->pci_regs_behavior = kmemdup(pci_regs_behavior, sizeof(pci_regs_behavior), GFP_KERNEL); @@ -282,10 +287,9 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge, if (bridge->has_pcie) { bridge->conf.capabilities_pointer = PCI_CAP_PCIE_START; + bridge->conf.status |= cpu_to_le16(PCI_STATUS_CAP_LIST); bridge->pcie_conf.cap_id = PCI_CAP_ID_EXP; - /* Set PCIe v2, root port, slot support */ - bridge->pcie_conf.cap = PCI_EXP_TYPE_ROOT_PORT << 4 | 2 | - PCI_EXP_FLAGS_SLOT; + bridge->pcie_conf.cap |= cpu_to_le16(PCI_EXP_TYPE_ROOT_PORT << 4); bridge->pcie_cap_regs_behavior = kmemdup(pcie_cap_regs_behavior, sizeof(pcie_cap_regs_behavior), @@ -294,6 +298,27 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge, kfree(bridge->pci_regs_behavior); return -ENOMEM; } + /* These bits are applicable only for PCI and reserved on PCIe */ + bridge->pci_regs_behavior[PCI_CACHE_LINE_SIZE / 4].ro &= + ~GENMASK(15, 8); + bridge->pci_regs_behavior[PCI_COMMAND / 4].ro &= + ~((PCI_COMMAND_SPECIAL | PCI_COMMAND_INVALIDATE | + PCI_COMMAND_VGA_PALETTE | PCI_COMMAND_WAIT | + PCI_COMMAND_FAST_BACK) | + (PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK | + PCI_STATUS_DEVSEL_MASK) << 16); + bridge->pci_regs_behavior[PCI_PRIMARY_BUS / 4].ro &= + ~GENMASK(31, 24); + bridge->pci_regs_behavior[PCI_IO_BASE / 4].ro &= + ~((PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK | + PCI_STATUS_DEVSEL_MASK) << 16); + bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].rw &= + ~((PCI_BRIDGE_CTL_MASTER_ABORT | + BIT(8) | BIT(9) | BIT(11)) << 16); + bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].ro &= + ~((PCI_BRIDGE_CTL_FAST_BACK) << 16); + bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].w1c &= + ~(BIT(10) << 16); } if (flags & PCI_BRIDGE_EMUL_NO_PREFETCHABLE_BAR) { @@ -327,7 +352,7 @@ int pci_bridge_emul_conf_read(struct pci_bridge_emul *bridge, int where, int reg = where & ~3; pci_bridge_emul_read_status_t (*read_op)(struct pci_bridge_emul *bridge, int reg, u32 *value); - u32 *cfgspace; + __le32 *cfgspace; const struct pci_bridge_reg_behavior *behavior; if (bridge->has_pcie && reg >= PCI_CAP_PCIE_END) { @@ -343,11 +368,11 @@ int pci_bridge_emul_conf_read(struct pci_bridge_emul *bridge, int where, if (bridge->has_pcie && reg >= PCI_CAP_PCIE_START) { reg -= PCI_CAP_PCIE_START; read_op = bridge->ops->read_pcie; - cfgspace = (u32 *) &bridge->pcie_conf; + cfgspace = (__le32 *) &bridge->pcie_conf; behavior = bridge->pcie_cap_regs_behavior; } else { read_op = bridge->ops->read_base; - cfgspace = (u32 *) &bridge->conf; + cfgspace = (__le32 *) &bridge->conf; behavior = bridge->pci_regs_behavior; } @@ -357,7 +382,7 @@ int pci_bridge_emul_conf_read(struct pci_bridge_emul *bridge, int where, ret = PCI_BRIDGE_EMUL_NOT_HANDLED; if (ret == PCI_BRIDGE_EMUL_NOT_HANDLED) - *value = cfgspace[reg / 4]; + *value = le32_to_cpu(cfgspace[reg / 4]); /* * Make sure we never return any reserved bit with a value @@ -387,7 +412,7 @@ int pci_bridge_emul_conf_write(struct pci_bridge_emul *bridge, int where, int mask, ret, old, new, shift; void (*write_op)(struct pci_bridge_emul *bridge, int reg, u32 old, u32 new, u32 mask); - u32 *cfgspace; + __le32 *cfgspace; const struct pci_bridge_reg_behavior *behavior; if (bridge->has_pcie && reg >= PCI_CAP_PCIE_END) @@ -414,11 +439,11 @@ int pci_bridge_emul_conf_write(struct pci_bridge_emul *bridge, int where, if (bridge->has_pcie && reg >= PCI_CAP_PCIE_START) { reg -= PCI_CAP_PCIE_START; write_op = bridge->ops->write_pcie; - cfgspace = (u32 *) &bridge->pcie_conf; + cfgspace = (__le32 *) &bridge->pcie_conf; behavior = bridge->pcie_cap_regs_behavior; } else { write_op = bridge->ops->write_base; - cfgspace = (u32 *) &bridge->conf; + cfgspace = (__le32 *) &bridge->conf; behavior = bridge->pci_regs_behavior; } @@ -431,7 +456,20 @@ int pci_bridge_emul_conf_write(struct pci_bridge_emul *bridge, int where, /* Clear the W1C bits */ new &= ~((value << shift) & (behavior[reg / 4].w1c & mask)); - cfgspace[reg / 4] = new; + /* Save the new value with the cleared W1C bits into the cfgspace */ + cfgspace[reg / 4] = cpu_to_le32(new); + + /* + * Clear the W1C bits not specified by the write mask, so that the + * write_op() does not clear them. + */ + new &= ~(behavior[reg / 4].w1c & ~mask); + + /* + * Set the W1C bits specified by the write mask, so that write_op() + * knows about that they are to be cleared. + */ + new |= (value << shift) & (behavior[reg / 4].w1c & mask); if (write_op) write_op(bridge, reg, old, new, mask); diff --git a/drivers/pci/pci-bridge-emul.h b/drivers/pci/pci-bridge-emul.h index e65b1b79899d0..49bbd37ee318a 100644 --- a/drivers/pci/pci-bridge-emul.h +++ b/drivers/pci/pci-bridge-emul.h @@ -6,65 +6,65 @@ /* PCI configuration space of a PCI-to-PCI bridge. */ struct pci_bridge_emul_conf { - u16 vendor; - u16 device; - u16 command; - u16 status; - u32 class_revision; + __le16 vendor; + __le16 device; + __le16 command; + __le16 status; + __le32 class_revision; u8 cache_line_size; u8 latency_timer; u8 header_type; u8 bist; - u32 bar[2]; + __le32 bar[2]; u8 primary_bus; u8 secondary_bus; u8 subordinate_bus; u8 secondary_latency_timer; u8 iobase; u8 iolimit; - u16 secondary_status; - u16 membase; - u16 memlimit; - u16 pref_mem_base; - u16 pref_mem_limit; - u32 prefbaseupper; - u32 preflimitupper; - u16 iobaseupper; - u16 iolimitupper; + __le16 secondary_status; + __le16 membase; + __le16 memlimit; + __le16 pref_mem_base; + __le16 pref_mem_limit; + __le32 prefbaseupper; + __le32 preflimitupper; + __le16 iobaseupper; + __le16 iolimitupper; u8 capabilities_pointer; u8 reserve[3]; - u32 romaddr; + __le32 romaddr; u8 intline; u8 intpin; - u16 bridgectrl; + __le16 bridgectrl; }; /* PCI configuration space of the PCIe capabilities */ struct pci_bridge_emul_pcie_conf { u8 cap_id; u8 next; - u16 cap; - u32 devcap; - u16 devctl; - u16 devsta; - u32 lnkcap; - u16 lnkctl; - u16 lnksta; - u32 slotcap; - u16 slotctl; - u16 slotsta; - u16 rootctl; - u16 rsvd; - u32 rootsta; - u32 devcap2; - u16 devctl2; - u16 devsta2; - u32 lnkcap2; - u16 lnkctl2; - u16 lnksta2; - u32 slotcap2; - u16 slotctl2; - u16 slotsta2; + __le16 cap; + __le32 devcap; + __le16 devctl; + __le16 devsta; + __le32 lnkcap; + __le16 lnkctl; + __le16 lnksta; + __le32 slotcap; + __le16 slotctl; + __le16 slotsta; + __le16 rootctl; + __le16 rootcap; + __le32 rootsta; + __le32 devcap2; + __le16 devctl2; + __le16 devsta2; + __le32 lnkcap2; + __le16 lnkctl2; + __le16 lnksta2; + __le32 slotcap2; + __le16 slotctl2; + __le16 slotsta2; }; struct pci_bridge_emul; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 1455147158922..918ff0725f1f2 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -224,7 +224,7 @@ static int pci_dev_str_match_path(struct pci_dev *dev, const char *path, *endptr = strchrnul(path, ';'); - wpath = kmemdup_nul(path, *endptr - path, GFP_KERNEL); + wpath = kmemdup_nul(path, *endptr - path, GFP_ATOMIC); if (!wpath) return -ENOMEM; @@ -1674,11 +1674,7 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags) * so that things like MSI message writing will behave as expected * (e.g. if the device really is in D0 at enable time). */ - if (dev->pm_cap) { - u16 pmcsr; - pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); - dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK); - } + pci_update_current_state(dev, dev->current_state); if (atomic_inc_return(&dev->enable_cnt) > 1) return 0; /* already enabled */ @@ -1980,6 +1976,14 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) } EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state); +void pcie_clear_device_status(struct pci_dev *dev) +{ + u16 sta; + + pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta); + pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); +} + /** * pcie_clear_root_pme_status - Clear root port PME interrupt status. * @dev: PCIe root port or event collector. @@ -2255,7 +2259,14 @@ static int __pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable if (enable) { int error; - if (pci_pme_capable(dev, state)) + /* + * Enable PME signaling if the device can signal PME from + * D3cold regardless of whether or not it can signal PME from + * the current target state, because that will allow it to + * signal PME when the hierarchy above it goes into D3cold and + * the device itself ends up in D3cold as a result of that. + */ + if (pci_pme_capable(dev, state) || pci_pme_capable(dev, PCI_D3cold)) pci_pme_active(dev, true); else ret = 1; @@ -2359,16 +2370,20 @@ static pci_power_t pci_target_state(struct pci_dev *dev, bool wakeup) if (dev->current_state == PCI_D3cold) target_state = PCI_D3cold; - if (wakeup) { + if (wakeup && dev->pme_support) { + pci_power_t state = target_state; + /* * Find the deepest state from which the device can generate * PME#. */ - if (dev->pme_support) { - while (target_state - && !(dev->pme_support & (1 << target_state))) - target_state--; - } + while (state && !(dev->pme_support & (1 << state))) + state--; + + if (state) + return state; + else if (dev->pme_support & 1) + return PCI_D0; } return target_state; @@ -2609,6 +2624,18 @@ static const struct dmi_system_id bridge_d3_blacklist[] = { DMI_MATCH(DMI_BOARD_NAME, "X299 DESIGNARE EX-CF"), }, }, + { + /* + * Downstream device is not accessible after putting a root port + * into D3cold and back into D0 on Elo i2. + */ + .ident = "Elo i2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Elo Touch Solutions"), + DMI_MATCH(DMI_PRODUCT_NAME, "Elo i2"), + DMI_MATCH(DMI_PRODUCT_VERSION, "RevB"), + }, + }, #endif { } }; @@ -4666,9 +4693,7 @@ static bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active, } if (active && ret) msleep(delay); - else if (ret != active) - pci_info(pdev, "Data Link Layer Link Active not %s in 1000 msec\n", - active ? "set" : "cleared"); + return ret == active; } @@ -4793,6 +4818,7 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev) delay); if (!pcie_wait_for_link_delay(dev, true, delay)) { /* Did not train, no need to wait any further */ + pci_info(dev, "Data Link Layer Link Active not set in 1000 msec\n"); return; } } @@ -4900,18 +4926,18 @@ static int pci_dev_reset_slot_function(struct pci_dev *dev, int probe) static void pci_dev_lock(struct pci_dev *dev) { - pci_cfg_access_lock(dev); /* block PM suspend, driver probe, etc. */ device_lock(&dev->dev); + pci_cfg_access_lock(dev); } /* Return 1 on successful lock, 0 on contention */ static int pci_dev_trylock(struct pci_dev *dev) { - if (pci_cfg_access_trylock(dev)) { - if (device_trylock(&dev->dev)) + if (device_trylock(&dev->dev)) { + if (pci_cfg_access_trylock(dev)) return 1; - pci_cfg_access_unlock(dev); + device_unlock(&dev->dev); } return 0; @@ -4919,8 +4945,8 @@ static int pci_dev_trylock(struct pci_dev *dev) static void pci_dev_unlock(struct pci_dev *dev) { - device_unlock(&dev->dev); pci_cfg_access_unlock(dev); + device_unlock(&dev->dev); } static void pci_dev_save_and_disable(struct pci_dev *dev) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 0e390e1c4d2d3..6f8321feea003 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -92,6 +92,7 @@ void pci_refresh_power_state(struct pci_dev *dev); void pci_power_up(struct pci_dev *dev); void pci_disable_enabled_device(struct pci_dev *dev); int pci_finish_runtime_suspend(struct pci_dev *dev); +void pcie_clear_device_status(struct pci_dev *dev); void pcie_clear_root_pme_status(struct pci_dev *dev); bool pci_check_pme_status(struct pci_dev *dev); void pci_pme_wakeup_bus(struct pci_bus *bus); @@ -444,6 +445,15 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); #endif /* CONFIG_PCIEAER */ +#ifdef CONFIG_PCIEPORTBUS +/* Cached RCEC Endpoint Association */ +struct rcec_ea { + u8 nextbusn; + u8 lastbusn; + u32 bitmap; +}; +#endif + #ifdef CONFIG_PCIE_DPC void pci_save_dpc_state(struct pci_dev *dev); void pci_restore_dpc_state(struct pci_dev *dev); @@ -456,6 +466,22 @@ static inline void pci_restore_dpc_state(struct pci_dev *dev) {} static inline void pci_dpc_init(struct pci_dev *pdev) {} #endif +#ifdef CONFIG_PCIEPORTBUS +void pci_rcec_init(struct pci_dev *dev); +void pci_rcec_exit(struct pci_dev *dev); +void pcie_link_rcec(struct pci_dev *rcec); +void pcie_walk_rcec(struct pci_dev *rcec, + int (*cb)(struct pci_dev *, void *), + void *userdata); +#else +static inline void pci_rcec_init(struct pci_dev *dev) {} +static inline void pci_rcec_exit(struct pci_dev *dev) {} +static inline void pcie_link_rcec(struct pci_dev *rcec) {} +static inline void pcie_walk_rcec(struct pci_dev *rcec, + int (*cb)(struct pci_dev *, void *), + void *userdata) {} +#endif + #ifdef CONFIG_PCI_ATS /* Address Translation Service */ void pci_ats_init(struct pci_dev *dev); @@ -536,8 +562,8 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev) /* PCI error reporting and recovery */ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, - enum pci_channel_state state, - pci_ers_result_t (*reset_link)(struct pci_dev *pdev)); + pci_channel_state_t state, + pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev)); bool pcie_wait_for_link(struct pci_dev *pdev, bool active); #ifdef CONFIG_PCIEASPM @@ -666,14 +692,12 @@ void pci_aer_init(struct pci_dev *dev); void pci_aer_exit(struct pci_dev *dev); extern const struct attribute_group aer_stats_attr_group; void pci_aer_clear_fatal_status(struct pci_dev *dev); -void pci_aer_clear_device_status(struct pci_dev *dev); int pci_aer_raw_clear_status(struct pci_dev *dev); #else static inline void pci_no_aer(void) { } static inline void pci_aer_init(struct pci_dev *d) { } static inline void pci_aer_exit(struct pci_dev *d) { } static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { } -static inline void pci_aer_clear_device_status(struct pci_dev *dev) { } static inline int pci_aer_raw_clear_status(struct pci_dev *dev) { return -EINVAL; } #endif diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 68da9280ff117..d9697892fa3e9 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -2,7 +2,7 @@ # # Makefile for PCI Express features and port driver -pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o +pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o rcec.o obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index bd9f122165e07..cd3e8891f70bf 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -136,22 +136,18 @@ static const char * const ecrc_policy_str[] = { */ static int enable_ecrc_checking(struct pci_dev *dev) { - int pos; + int aer = dev->aer_cap; u32 reg32; - if (!pci_is_pcie(dev)) + if (!aer) return -ENODEV; - pos = dev->aer_cap; - if (!pos) - return -ENODEV; - - pci_read_config_dword(dev, pos + PCI_ERR_CAP, ®32); + pci_read_config_dword(dev, aer + PCI_ERR_CAP, ®32); if (reg32 & PCI_ERR_CAP_ECRC_GENC) reg32 |= PCI_ERR_CAP_ECRC_GENE; if (reg32 & PCI_ERR_CAP_ECRC_CHKC) reg32 |= PCI_ERR_CAP_ECRC_CHKE; - pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32); + pci_write_config_dword(dev, aer + PCI_ERR_CAP, reg32); return 0; } @@ -164,19 +160,15 @@ static int enable_ecrc_checking(struct pci_dev *dev) */ static int disable_ecrc_checking(struct pci_dev *dev) { - int pos; + int aer = dev->aer_cap; u32 reg32; - if (!pci_is_pcie(dev)) - return -ENODEV; - - pos = dev->aer_cap; - if (!pos) + if (!aer) return -ENODEV; - pci_read_config_dword(dev, pos + PCI_ERR_CAP, ®32); + pci_read_config_dword(dev, aer + PCI_ERR_CAP, ®32); reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); - pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32); + pci_write_config_dword(dev, aer + PCI_ERR_CAP, reg32); return 0; } @@ -217,142 +209,22 @@ void pcie_ecrc_get_policy(char *str) } #endif /* CONFIG_PCIE_ECRC */ -#ifdef CONFIG_ACPI_APEI -static inline int hest_match_pci(struct acpi_hest_aer_common *p, - struct pci_dev *pci) -{ - return ACPI_HEST_SEGMENT(p->bus) == pci_domain_nr(pci->bus) && - ACPI_HEST_BUS(p->bus) == pci->bus->number && - p->device == PCI_SLOT(pci->devfn) && - p->function == PCI_FUNC(pci->devfn); -} - -static inline bool hest_match_type(struct acpi_hest_header *hest_hdr, - struct pci_dev *dev) -{ - u16 hest_type = hest_hdr->type; - u8 pcie_type = pci_pcie_type(dev); - - if ((hest_type == ACPI_HEST_TYPE_AER_ROOT_PORT && - pcie_type == PCI_EXP_TYPE_ROOT_PORT) || - (hest_type == ACPI_HEST_TYPE_AER_ENDPOINT && - pcie_type == PCI_EXP_TYPE_ENDPOINT) || - (hest_type == ACPI_HEST_TYPE_AER_BRIDGE && - (dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)) - return true; - return false; -} - -struct aer_hest_parse_info { - struct pci_dev *pci_dev; - int firmware_first; -}; - -static int hest_source_is_pcie_aer(struct acpi_hest_header *hest_hdr) -{ - if (hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT || - hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT || - hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE) - return 1; - return 0; -} - -static int aer_hest_parse(struct acpi_hest_header *hest_hdr, void *data) -{ - struct aer_hest_parse_info *info = data; - struct acpi_hest_aer_common *p; - int ff; - - if (!hest_source_is_pcie_aer(hest_hdr)) - return 0; - - p = (struct acpi_hest_aer_common *)(hest_hdr + 1); - ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST); - - /* - * If no specific device is supplied, determine whether - * FIRMWARE_FIRST is set for *any* PCIe device. - */ - if (!info->pci_dev) { - info->firmware_first |= ff; - return 0; - } - - /* Otherwise, check the specific device */ - if (p->flags & ACPI_HEST_GLOBAL) { - if (hest_match_type(hest_hdr, info->pci_dev)) - info->firmware_first = ff; - } else - if (hest_match_pci(p, info->pci_dev)) - info->firmware_first = ff; - - return 0; -} - -static void aer_set_firmware_first(struct pci_dev *pci_dev) -{ - int rc; - struct aer_hest_parse_info info = { - .pci_dev = pci_dev, - .firmware_first = 0, - }; - - rc = apei_hest_parse(aer_hest_parse, &info); - - if (rc) - pci_dev->__aer_firmware_first = 0; - else - pci_dev->__aer_firmware_first = info.firmware_first; - pci_dev->__aer_firmware_first_valid = 1; -} +#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ + PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) -int pcie_aer_get_firmware_first(struct pci_dev *dev) +int pcie_aer_is_native(struct pci_dev *dev) { - if (!pci_is_pcie(dev)) - return 0; + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); - if (pcie_ports_native) + if (!dev->aer_cap) return 0; - if (!dev->__aer_firmware_first_valid) - aer_set_firmware_first(dev); - return dev->__aer_firmware_first; -} - -static bool aer_firmware_first; - -/** - * aer_acpi_firmware_first - Check if APEI should control AER. - */ -bool aer_acpi_firmware_first(void) -{ - static bool parsed = false; - struct aer_hest_parse_info info = { - .pci_dev = NULL, /* Check all PCIe devices */ - .firmware_first = 0, - }; - - if (pcie_ports_native) - return false; - - if (!parsed) { - apei_hest_parse(aer_hest_parse, &info); - aer_firmware_first = info.firmware_first; - parsed = true; - } - return aer_firmware_first; + return pcie_ports_native || host->native_aer; } -#endif - -#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ - PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) int pci_enable_pcie_error_reporting(struct pci_dev *dev) { - if (pcie_aer_get_firmware_first(dev)) - return -EIO; - - if (!dev->aer_cap) + if (!pcie_aer_is_native(dev)) return -EIO; return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS); @@ -361,7 +233,7 @@ EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting); int pci_disable_pcie_error_reporting(struct pci_dev *dev) { - if (pcie_aer_get_firmware_first(dev)) + if (!pcie_aer_is_native(dev)) return -EIO; return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, @@ -369,32 +241,20 @@ int pci_disable_pcie_error_reporting(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting); -void pci_aer_clear_device_status(struct pci_dev *dev) -{ - u16 sta; - - pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta); - pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); -} - int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) { - int pos; + int aer = dev->aer_cap; u32 status, sev; - pos = dev->aer_cap; - if (!pos) - return -EIO; - - if (pcie_aer_get_firmware_first(dev)) + if (!pcie_aer_is_native(dev)) return -EIO; /* Clear status bits for ERR_NONFATAL errors only */ - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, &sev); status &= ~sev; if (status) - pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); return 0; } @@ -402,22 +262,18 @@ EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); void pci_aer_clear_fatal_status(struct pci_dev *dev) { - int pos; + int aer = dev->aer_cap; u32 status, sev; - pos = dev->aer_cap; - if (!pos) - return; - - if (pcie_aer_get_firmware_first(dev)) + if (!pcie_aer_is_native(dev)) return; /* Clear status bits for ERR_FATAL errors only */ - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, &sev); status &= sev; if (status) - pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); } /** @@ -431,35 +287,32 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev) */ int pci_aer_raw_clear_status(struct pci_dev *dev) { - int pos; + int aer = dev->aer_cap; u32 status; int port_type; - if (!pci_is_pcie(dev)) - return -ENODEV; - - pos = dev->aer_cap; - if (!pos) + if (!aer) return -EIO; port_type = pci_pcie_type(dev); - if (port_type == PCI_EXP_TYPE_ROOT_PORT) { - pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status); - pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, status); + if (port_type == PCI_EXP_TYPE_ROOT_PORT || + port_type == PCI_EXP_TYPE_RC_EC) { + pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &status); + pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, status); } - pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); - pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status); + pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &status); + pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, status); - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); - pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); return 0; } int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) { - if (pcie_aer_get_firmware_first(dev)) + if (!pcie_aer_is_native(dev)) return -EIO; return pci_aer_raw_clear_status(dev); @@ -467,12 +320,11 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) void pci_save_aer_state(struct pci_dev *dev) { + int aer = dev->aer_cap; struct pci_cap_saved_state *save_state; u32 *cap; - int pos; - pos = dev->aer_cap; - if (!pos) + if (!aer) return; save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR); @@ -480,22 +332,21 @@ void pci_save_aer_state(struct pci_dev *dev) return; cap = &save_state->cap.data[0]; - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, cap++); - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, cap++); - pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, cap++); - pci_read_config_dword(dev, pos + PCI_ERR_CAP, cap++); + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, cap++); + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, cap++); + pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, cap++); + pci_read_config_dword(dev, aer + PCI_ERR_CAP, cap++); if (pcie_cap_has_rtctl(dev)) - pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, cap++); + pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, cap++); } void pci_restore_aer_state(struct pci_dev *dev) { + int aer = dev->aer_cap; struct pci_cap_saved_state *save_state; u32 *cap; - int pos; - pos = dev->aer_cap; - if (!pos) + if (!aer) return; save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR); @@ -503,12 +354,12 @@ void pci_restore_aer_state(struct pci_dev *dev) return; cap = &save_state->cap.data[0]; - pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, *cap++); - pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, *cap++); - pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, *cap++); - pci_write_config_dword(dev, pos + PCI_ERR_CAP, *cap++); + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, *cap++); + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, *cap++); + pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, *cap++); + pci_write_config_dword(dev, aer + PCI_ERR_CAP, *cap++); if (pcie_cap_has_rtctl(dev)) - pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, *cap++); + pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, *cap++); } void pci_aer_init(struct pci_dev *dev) @@ -719,7 +570,8 @@ static umode_t aer_stats_attrs_are_visible(struct kobject *kobj, if ((a == &dev_attr_aer_rootport_total_err_cor.attr || a == &dev_attr_aer_rootport_total_err_fatal.attr || a == &dev_attr_aer_rootport_total_err_nonfatal.attr) && - pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) + ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) && + (pci_pcie_type(pdev) != PCI_EXP_TYPE_RC_EC))) return 0; return a->mode; @@ -939,7 +791,7 @@ static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) */ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) { - int pos; + int aer = dev->aer_cap; u32 status, mask; u16 reg16; @@ -974,17 +826,16 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) if (!(reg16 & PCI_EXP_AER_FLAGS)) return false; - pos = dev->aer_cap; - if (!pos) + if (!aer) return false; /* Check if error is recorded */ if (e_info->severity == AER_CORRECTABLE) { - pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); - pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &mask); + pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &status); + pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask); } else { - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask); + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask); } if (status & ~mask) return true; @@ -1037,7 +888,10 @@ static bool find_source_device(struct pci_dev *parent, if (result) return true; - pci_walk_bus(parent->subordinate, find_device_iter, e_info); + if (pci_pcie_type(parent) == PCI_EXP_TYPE_RC_EC) + pcie_walk_rcec(parent, find_device_iter, e_info); + else + pci_walk_bus(parent->subordinate, find_device_iter, e_info); if (!e_info->error_dev_num) { pci_info(parent, "can't find device of ID%04x\n", e_info->id); @@ -1055,18 +909,18 @@ static bool find_source_device(struct pci_dev *parent, */ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) { - int pos; + int aer = dev->aer_cap; if (info->severity == AER_CORRECTABLE) { /* * Correctable error does not need software intervention. * No need to go through error recovery process. */ - pos = dev->aer_cap; - if (pos) - pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, + if (aer) + pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, info->status); - pci_aer_clear_device_status(dev); + if (pcie_aer_is_native(dev)) + pcie_clear_device_status(dev); } else if (info->severity == AER_NONFATAL) pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset); else if (info->severity == AER_FATAL) @@ -1155,51 +1009,52 @@ EXPORT_SYMBOL_GPL(aer_recover_queue); */ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) { - int pos, temp; + int type = pci_pcie_type(dev); + int aer = dev->aer_cap; + int temp; /* Must reset in this function */ info->status = 0; info->tlp_header_valid = 0; - pos = dev->aer_cap; - /* The device might not support AER */ - if (!pos) + if (!aer) return 0; if (info->severity == AER_CORRECTABLE) { - pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, + pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &info->status); - pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, + pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &info->mask); if (!(info->status & ~info->mask)) return 0; - } else if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || - pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM || + } else if (type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_RC_EC || + type == PCI_EXP_TYPE_DOWNSTREAM || info->severity == AER_NONFATAL) { /* Link is still healthy for IO reads */ - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &info->status); - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &info->mask); if (!(info->status & ~info->mask)) return 0; /* Get First Error Pointer */ - pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp); + pci_read_config_dword(dev, aer + PCI_ERR_CAP, &temp); info->first_error = PCI_ERR_CAP_FEP(temp); if (info->status & AER_LOG_TLP_MASKS) { info->tlp_header_valid = 1; pci_read_config_dword(dev, - pos + PCI_ERR_HEADER_LOG, &info->tlp.dw0); + aer + PCI_ERR_HEADER_LOG, &info->tlp.dw0); pci_read_config_dword(dev, - pos + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1); + aer + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1); pci_read_config_dword(dev, - pos + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2); + aer + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2); pci_read_config_dword(dev, - pos + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3); + aer + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3); } } @@ -1305,15 +1160,15 @@ static irqreturn_t aer_irq(int irq, void *context) struct pcie_device *pdev = (struct pcie_device *)context; struct aer_rpc *rpc = get_service_data(pdev); struct pci_dev *rp = rpc->rpd; + int aer = rp->aer_cap; struct aer_err_source e_src = {}; - int pos = rp->aer_cap; - pci_read_config_dword(rp, pos + PCI_ERR_ROOT_STATUS, &e_src.status); + pci_read_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, &e_src.status); if (!(e_src.status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV))) return IRQ_NONE; - pci_read_config_dword(rp, pos + PCI_ERR_ROOT_ERR_SRC, &e_src.id); - pci_write_config_dword(rp, pos + PCI_ERR_ROOT_STATUS, e_src.status); + pci_read_config_dword(rp, aer + PCI_ERR_ROOT_ERR_SRC, &e_src.id); + pci_write_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, e_src.status); if (!kfifo_put(&rpc->aer_fifo, e_src)) return IRQ_HANDLED; @@ -1327,6 +1182,7 @@ static int set_device_error_reporting(struct pci_dev *dev, void *data) int type = pci_pcie_type(dev); if ((type == PCI_EXP_TYPE_ROOT_PORT) || + (type == PCI_EXP_TYPE_RC_EC) || (type == PCI_EXP_TYPE_UPSTREAM) || (type == PCI_EXP_TYPE_DOWNSTREAM)) { if (enable) @@ -1351,9 +1207,12 @@ static void set_downstream_devices_error_reporting(struct pci_dev *dev, { set_device_error_reporting(dev, &enable); - if (!dev->subordinate) - return; - pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable); + if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC) + pcie_walk_rcec(dev, set_device_error_reporting, &enable); + else if (dev->subordinate) + pci_walk_bus(dev->subordinate, set_device_error_reporting, + &enable); + } /** @@ -1365,7 +1224,7 @@ static void set_downstream_devices_error_reporting(struct pci_dev *dev, static void aer_enable_rootport(struct aer_rpc *rpc) { struct pci_dev *pdev = rpc->rpd; - int aer_pos; + int aer = pdev->aer_cap; u16 reg16; u32 reg32; @@ -1377,14 +1236,13 @@ static void aer_enable_rootport(struct aer_rpc *rpc) pcie_capability_clear_word(pdev, PCI_EXP_RTCTL, SYSTEM_ERROR_INTR_ON_MESG_MASK); - aer_pos = pdev->aer_cap; /* Clear error status */ - pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, ®32); - pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32); - pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, ®32); - pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32); - pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, ®32); - pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32); + pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, reg32); + pci_read_config_dword(pdev, aer + PCI_ERR_COR_STATUS, ®32); + pci_write_config_dword(pdev, aer + PCI_ERR_COR_STATUS, reg32); + pci_read_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, ®32); + pci_write_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, reg32); /* * Enable error reporting for the root port device and downstream port @@ -1393,9 +1251,9 @@ static void aer_enable_rootport(struct aer_rpc *rpc) set_downstream_devices_error_reporting(pdev, true); /* Enable Root Port's interrupt in response to error messages */ - pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, ®32); + pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, ®32); reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; - pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, reg32); + pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32); } /** @@ -1407,8 +1265,8 @@ static void aer_enable_rootport(struct aer_rpc *rpc) static void aer_disable_rootport(struct aer_rpc *rpc) { struct pci_dev *pdev = rpc->rpd; + int aer = pdev->aer_cap; u32 reg32; - int pos; /* * Disable error reporting for the root port device and downstream port @@ -1416,15 +1274,14 @@ static void aer_disable_rootport(struct aer_rpc *rpc) */ set_downstream_devices_error_reporting(pdev, false); - pos = pdev->aer_cap; /* Disable Root's interrupt in response to error messages */ - pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, ®32); + pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, ®32); reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; - pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, reg32); + pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32); /* Clear Root's error status reg */ - pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, ®32); - pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32); + pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, reg32); } /** @@ -1453,6 +1310,11 @@ static int aer_probe(struct pcie_device *dev) struct device *device = &dev->device; struct pci_dev *port = dev->port; + /* Limit to Root Ports or Root Complex Event Collectors */ + if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) && + (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT)) + return -ENODEV; + rpc = devm_kzalloc(device, sizeof(struct aer_rpc), GFP_KERNEL); if (!rpc) return -ENOMEM; @@ -1474,42 +1336,74 @@ static int aer_probe(struct pcie_device *dev) } /** - * aer_root_reset - reset link on Root Port - * @dev: pointer to Root Port's pci_dev data structure + * aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP + * @dev: pointer to Root Port, RCEC, or RCiEP * - * Invoked by Port Bus driver when performing link reset at Root Port. + * Invoked by Port Bus driver when performing reset. */ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) { + int type = pci_pcie_type(dev); + struct pci_dev *root; + int aer; + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); u32 reg32; - int pos; int rc; - pos = dev->aer_cap; + /* + * Only Root Ports and RCECs have AER Root Command and Root Status + * registers. If "dev" is an RCiEP, the relevant registers are in + * the RCEC. + */ + if (type == PCI_EXP_TYPE_RC_END) + root = dev->rcec; + else + root = dev; - /* Disable Root's interrupt in response to error messages */ - pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32); - reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; - pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); + /* + * If the platform retained control of AER, an RCiEP may not have + * an RCEC visible to us, so dev->rcec ("root") may be NULL. In + * that case, firmware is responsible for these registers. + */ + aer = root ? root->aer_cap : 0; - rc = pci_bus_error_reset(dev); - pci_info(dev, "Root Port link has been reset\n"); + if ((host->native_aer || pcie_ports_native) && aer) { + /* Disable Root's interrupt in response to error messages */ + pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, ®32); + reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32); + } - /* Clear Root Error Status */ - pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, ®32); - pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, reg32); + if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) { + if (pcie_has_flr(dev)) { + rc = pcie_flr(dev); + pci_info(dev, "has been reset (%d)\n", rc); + } else { + pci_info(dev, "not reset (no FLR support)\n"); + rc = -ENOTTY; + } + } else { + rc = pci_bus_error_reset(dev); + pci_info(dev, "Root Port link has been reset (%d)\n", rc); + } - /* Enable Root Port's interrupt in response to error messages */ - pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32); - reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; - pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); + if ((host->native_aer || pcie_ports_native) && aer) { + /* Clear Root Error Status */ + pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32); + + /* Enable Root Port's interrupt in response to error messages */ + pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, ®32); + reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32); + } return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } static struct pcie_port_service_driver aerdriver = { .name = "aer", - .port_type = PCI_EXP_TYPE_ROOT_PORT, + .port_type = PCIE_ANY_PORT, .service = PCIE_PORT_SERVICE_AER, .probe = aer_probe, @@ -1523,7 +1417,7 @@ static struct pcie_port_service_driver aerdriver = { */ int __init pcie_aer_init(void) { - if (!pci_aer_available() || aer_acpi_firmware_first()) + if (!pci_aer_available()) return -ENXIO; return pcie_port_service_register(&aerdriver); } diff --git a/drivers/pci/pcie/aer_inject.c b/drivers/pci/pcie/aer_inject.c index 21cc3d3387f79..f7e452728d1d9 100644 --- a/drivers/pci/pcie/aer_inject.c +++ b/drivers/pci/pcie/aer_inject.c @@ -333,8 +333,11 @@ static int aer_inject(struct aer_error_inj *einj) if (!dev) return -ENODEV; rpdev = pcie_find_root_port(dev); + /* If Root Port not found, try to find an RCEC */ + if (!rpdev) + rpdev = dev->rcec; if (!rpdev) { - pci_err(dev, "Root port not found\n"); + pci_err(dev, "Neither Root Port nor RCEC found\n"); ret = -ENODEV; goto out_put; } diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 72946b4c41ae2..bddc9b47347fb 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -103,7 +103,8 @@ pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) * Wait until the Link is inactive, then clear DPC Trigger Status * to allow the Port to leave DPC. */ - pcie_wait_for_link(pdev, false); + if (!pcie_wait_for_link(pdev, false)) + pci_info(pdev, "Data Link Layer Link Active not cleared in 1000 msec\n"); if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev)) return PCI_ERS_RESULT_DISCONNECT; @@ -111,8 +112,10 @@ pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS, PCI_EXP_DPC_STATUS_TRIGGER); - if (!pcie_wait_for_link(pdev, true)) + if (!pcie_wait_for_link(pdev, true)) { + pci_info(pdev, "Data Link Layer Link Active not set in 1000 msec\n"); return PCI_ERS_RESULT_DISCONNECT; + } return PCI_ERS_RESULT_RECOVERED; } @@ -284,7 +287,7 @@ static int dpc_probe(struct pcie_device *dev) int status; u16 ctl, cap; - if (pcie_aer_get_firmware_first(pdev) && !pcie_ports_dpc_native) + if (!pcie_aer_is_native(pdev) && !pcie_ports_dpc_native) return -ENOTSUPP; diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 0c40488da651a..c0b9c3311b881 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -46,7 +46,7 @@ static pci_ers_result_t merge_result(enum pci_ers_result orig, } static int report_error_detected(struct pci_dev *dev, - enum pci_channel_state state, + pci_channel_state_t state, enum pci_ers_result *result) { pci_ers_result_t vote; @@ -146,38 +146,71 @@ static int report_resume(struct pci_dev *dev, void *data) return 0; } +/** + * pci_walk_bridge - walk bridges potentially AER affected + * @bridge: bridge which may be a Port, an RCEC, or an RCiEP + * @cb: callback to be called for each device found + * @userdata: arbitrary pointer to be passed to callback + * + * If the device provided is a bridge, walk the subordinate bus, including + * any bridged devices on buses under this bus. Call the provided callback + * on each device found. + * + * If the device provided has no subordinate bus, e.g., an RCEC or RCiEP, + * call the callback on the device itself. + */ +static void pci_walk_bridge(struct pci_dev *bridge, + int (*cb)(struct pci_dev *, void *), + void *userdata) +{ + if (bridge->subordinate) + pci_walk_bus(bridge->subordinate, cb, userdata); + else + cb(bridge, userdata); +} + pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, - enum pci_channel_state state, - pci_ers_result_t (*reset_link)(struct pci_dev *pdev)) + pci_channel_state_t state, + pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev)) { + int type = pci_pcie_type(dev); + struct pci_dev *bridge; pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; - struct pci_bus *bus; + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); /* - * Error recovery runs on all subordinates of the first downstream port. - * If the downstream port detected the error, it is cleared at the end. + * If the error was detected by a Root Port, Downstream Port, RCEC, + * or RCiEP, recovery runs on the device itself. For Ports, that + * also includes any subordinate devices. + * + * If it was detected by another device (Endpoint, etc), recovery + * runs on the device and anything else under the same Port, i.e., + * everything under "bridge". */ - if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || - pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)) - dev = dev->bus->self; - bus = dev->subordinate; - - pci_dbg(dev, "broadcast error_detected message\n"); + if (type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_DOWNSTREAM || + type == PCI_EXP_TYPE_RC_EC || + type == PCI_EXP_TYPE_RC_END) + bridge = dev; + else + bridge = pci_upstream_bridge(dev); + + pci_dbg(bridge, "broadcast error_detected message\n"); if (state == pci_channel_io_frozen) { - pci_walk_bus(bus, report_frozen_detected, &status); - status = reset_link(dev); + pci_walk_bridge(bridge, report_frozen_detected, &status); + status = reset_subordinates(bridge); if (status != PCI_ERS_RESULT_RECOVERED) { - pci_warn(dev, "link reset failed\n"); + pci_warn(bridge, "subordinate device reset failed\n"); goto failed; } } else { - pci_walk_bus(bus, report_normal_detected, &status); + pci_walk_bridge(bridge, report_normal_detected, &status); } if (status == PCI_ERS_RESULT_CAN_RECOVER) { status = PCI_ERS_RESULT_RECOVERED; - pci_dbg(dev, "broadcast mmio_enabled message\n"); - pci_walk_bus(bus, report_mmio_enabled, &status); + pci_dbg(bridge, "broadcast mmio_enabled message\n"); + pci_walk_bridge(bridge, report_mmio_enabled, &status); } if (status == PCI_ERS_RESULT_NEED_RESET) { @@ -187,26 +220,35 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, * drivers' slot_reset callbacks? */ status = PCI_ERS_RESULT_RECOVERED; - pci_dbg(dev, "broadcast slot_reset message\n"); - pci_walk_bus(bus, report_slot_reset, &status); + pci_dbg(bridge, "broadcast slot_reset message\n"); + pci_walk_bridge(bridge, report_slot_reset, &status); } if (status != PCI_ERS_RESULT_RECOVERED) goto failed; - pci_dbg(dev, "broadcast resume message\n"); - pci_walk_bus(bus, report_resume, &status); + pci_dbg(bridge, "broadcast resume message\n"); + pci_walk_bridge(bridge, report_resume, &status); - pci_aer_clear_device_status(dev); - pci_cleanup_aer_uncorrect_error_status(dev); - pci_info(dev, "device recovery successful\n"); + /* + * If we have native control of AER, clear error status in the Root + * Port or Downstream Port that signaled the error. If the + * platform retained control of AER, it is responsible for clearing + * this status. In that case, the signaling device may not even be + * visible to the OS. + */ + if (host->native_aer || pcie_ports_native) { + pcie_clear_device_status(bridge); + pci_cleanup_aer_uncorrect_error_status(bridge); + } + pci_info(bridge, "device recovery successful\n"); return status; failed: - pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); + pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT); /* TODO: Should kernel panic here? */ - pci_info(dev, "device recovery failed\n"); + pci_info(bridge, "device recovery failed\n"); return status; } diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c index f38e6c19dd501..2d25c047e1c4f 100644 --- a/drivers/pci/pcie/pme.c +++ b/drivers/pci/pcie/pme.c @@ -310,7 +310,10 @@ static int pcie_pme_can_wakeup(struct pci_dev *dev, void *ign) static void pcie_pme_mark_devices(struct pci_dev *port) { pcie_pme_can_wakeup(port, NULL); - if (port->subordinate) + + if (pci_pcie_type(port) == PCI_EXP_TYPE_RC_EC) + pcie_walk_rcec(port, pcie_pme_can_wakeup, NULL); + else if (port->subordinate) pci_walk_bus(port->subordinate, pcie_pme_can_wakeup, NULL); } @@ -320,10 +323,16 @@ static void pcie_pme_mark_devices(struct pci_dev *port) */ static int pcie_pme_probe(struct pcie_device *srv) { - struct pci_dev *port; + struct pci_dev *port = srv->port; struct pcie_pme_service_data *data; + int type = pci_pcie_type(port); int ret; + /* Limit to Root Ports or Root Complex Event Collectors */ + if (type != PCI_EXP_TYPE_RC_EC && + type != PCI_EXP_TYPE_ROOT_PORT) + return -ENODEV; + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; @@ -333,7 +342,6 @@ static int pcie_pme_probe(struct pcie_device *srv) data->srv = srv; set_service_data(srv, data); - port = srv->port; pcie_pme_interrupt_enable(port, false); pcie_clear_root_pme_status(port); @@ -445,7 +453,7 @@ static void pcie_pme_remove(struct pcie_device *srv) static struct pcie_port_service_driver pcie_pme_driver = { .name = "pcie_pme", - .port_type = PCI_EXP_TYPE_ROOT_PORT, + .port_type = PCIE_ANY_PORT, .service = PCIE_PORT_SERVICE_PME, .probe = pcie_pme_probe, diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 64b5e081cdb29..af7cf237432ac 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -29,8 +29,10 @@ extern bool pcie_ports_dpc_native; #ifdef CONFIG_PCIEAER int pcie_aer_init(void); +int pcie_aer_is_native(struct pci_dev *dev); #else static inline int pcie_aer_init(void) { return 0; } +static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } #endif #ifdef CONFIG_HOTPLUG_PCI_PCIE @@ -147,16 +149,5 @@ static inline bool pcie_pme_no_msi(void) { return false; } static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {} #endif /* !CONFIG_PCIE_PME */ -#ifdef CONFIG_ACPI_APEI -int pcie_aer_get_firmware_first(struct pci_dev *pci_dev); -#else -static inline int pcie_aer_get_firmware_first(struct pci_dev *pci_dev) -{ - if (pci_dev->__aer_firmware_first_valid) - return pci_dev->__aer_firmware_first; - return 0; -} -#endif - struct device *pcie_port_find_device(struct pci_dev *dev, u32 service); #endif /* _PORTDRV_H_ */ diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 50a9522ab07df..3ee63968deaa5 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -233,12 +233,9 @@ static int get_port_device_capability(struct pci_dev *dev) } #endif - /* - * Root ports are capable of generating PME too. Root Complex - * Event Collectors can also generate PMEs, but we don't handle - * those yet. - */ - if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT && + /* Root Ports and Root Complex Event Collectors may generate PMEs */ + if ((pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC) && (pcie_ports_native || host->native_pme)) { services |= PCIE_PORT_SERVICE_PME; @@ -260,8 +257,13 @@ static int get_port_device_capability(struct pci_dev *dev) services |= PCIE_PORT_SERVICE_DPC; if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM || - pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) - services |= PCIE_PORT_SERVICE_BWNOTIF; + pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) { + u32 linkcap; + + pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &linkcap); + if (linkcap & PCI_EXP_LNKCAP_LBNC) + services |= PCIE_PORT_SERVICE_BWNOTIF; + } return services; } diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 160d67c593105..1825a5e1ac1e6 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -101,14 +101,19 @@ static const struct dev_pm_ops pcie_portdrv_pm_ops = { static int pcie_portdrv_probe(struct pci_dev *dev, const struct pci_device_id *id) { + int type = pci_pcie_type(dev); int status; if (!pci_is_pcie(dev) || - ((pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) && - (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM) && - (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM))) + ((type != PCI_EXP_TYPE_ROOT_PORT) && + (type != PCI_EXP_TYPE_UPSTREAM) && + (type != PCI_EXP_TYPE_DOWNSTREAM) && + (type != PCI_EXP_TYPE_RC_EC))) return -ENODEV; + if (type == PCI_EXP_TYPE_RC_EC) + pcie_link_rcec(dev); + status = pcie_port_device_register(dev); if (status) return status; @@ -146,7 +151,7 @@ static void pcie_portdrv_remove(struct pci_dev *dev) } static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, - enum pci_channel_state error) + pci_channel_state_t error) { /* Root Port has no impact. Always recovers. */ return PCI_ERS_RESULT_CAN_RECOVER; @@ -195,6 +200,8 @@ static const struct pci_device_id port_pci_ids[] = { { PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x00), ~0) }, /* subtractive decode PCI-to-PCI bridge, class type is 060401h */ { PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x01), ~0) }, + /* handle any Root Complex Event Collector */ + { PCI_DEVICE_CLASS(((PCI_CLASS_SYSTEM_RCEC << 8) | 0x00), ~0) }, { }, }; diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c new file mode 100644 index 0000000000000..d0bcd141ac9c6 --- /dev/null +++ b/drivers/pci/pcie/rcec.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Root Complex Event Collector Support + * + * Authors: + * Sean V Kelley + * Qiuxu Zhuo + * + * Copyright (C) 2020 Intel Corp. + */ + +#include +#include +#include + +#include "../pci.h" + +struct walk_rcec_data { + struct pci_dev *rcec; + int (*user_callback)(struct pci_dev *dev, void *data); + void *user_data; +}; + +static bool rcec_assoc_rciep(struct pci_dev *rcec, struct pci_dev *rciep) +{ + unsigned long bitmap = rcec->rcec_ea->bitmap; + unsigned int devn; + + /* An RCiEP found on a different bus in range */ + if (rcec->bus->number != rciep->bus->number) + return true; + + /* Same bus, so check bitmap */ + for_each_set_bit(devn, &bitmap, 32) + if (devn == PCI_SLOT(rciep->devfn)) + return true; + + return false; +} + +static int link_rcec_helper(struct pci_dev *dev, void *data) +{ + struct walk_rcec_data *rcec_data = data; + struct pci_dev *rcec = rcec_data->rcec; + + if ((pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) && + rcec_assoc_rciep(rcec, dev)) { + dev->rcec = rcec; + pci_dbg(dev, "PME & error events signaled via %s\n", + pci_name(rcec)); + } + + return 0; +} + +static int walk_rcec_helper(struct pci_dev *dev, void *data) +{ + struct walk_rcec_data *rcec_data = data; + struct pci_dev *rcec = rcec_data->rcec; + + if ((pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) && + rcec_assoc_rciep(rcec, dev)) + rcec_data->user_callback(dev, rcec_data->user_data); + + return 0; +} + +static void walk_rcec(int (*cb)(struct pci_dev *dev, void *data), + void *userdata) +{ + struct walk_rcec_data *rcec_data = userdata; + struct pci_dev *rcec = rcec_data->rcec; + u8 nextbusn, lastbusn; + struct pci_bus *bus; + unsigned int bnr; + + if (!rcec->rcec_ea) + return; + + /* Walk own bus for bitmap based association */ + pci_walk_bus(rcec->bus, cb, rcec_data); + + nextbusn = rcec->rcec_ea->nextbusn; + lastbusn = rcec->rcec_ea->lastbusn; + + /* All RCiEP devices are on the same bus as the RCEC */ + if (nextbusn == 0xff && lastbusn == 0x00) + return; + + for (bnr = nextbusn; bnr <= lastbusn; bnr++) { + /* No association indicated (PCIe 5.0-1, 7.9.10.3) */ + if (bnr == rcec->bus->number) + continue; + + bus = pci_find_bus(pci_domain_nr(rcec->bus), bnr); + if (!bus) + continue; + + /* Find RCiEP devices on the given bus ranges */ + pci_walk_bus(bus, cb, rcec_data); + } +} + +/** + * pcie_link_rcec - Link RCiEP devices associated with RCEC. + * @rcec: RCEC whose RCiEP devices should be linked. + * + * Link the given RCEC to each RCiEP device found. + */ +void pcie_link_rcec(struct pci_dev *rcec) +{ + struct walk_rcec_data rcec_data; + + if (!rcec->rcec_ea) + return; + + rcec_data.rcec = rcec; + rcec_data.user_callback = NULL; + rcec_data.user_data = NULL; + + walk_rcec(link_rcec_helper, &rcec_data); +} + +/** + * pcie_walk_rcec - Walk RCiEP devices associating with RCEC and call callback. + * @rcec: RCEC whose RCiEP devices should be walked + * @cb: Callback to be called for each RCiEP device found + * @userdata: Arbitrary pointer to be passed to callback + * + * Walk the given RCEC. Call the callback on each RCiEP found. + * + * If @cb returns anything other than 0, break out. + */ +void pcie_walk_rcec(struct pci_dev *rcec, int (*cb)(struct pci_dev *, void *), + void *userdata) +{ + struct walk_rcec_data rcec_data; + + if (!rcec->rcec_ea) + return; + + rcec_data.rcec = rcec; + rcec_data.user_callback = cb; + rcec_data.user_data = userdata; + + walk_rcec(walk_rcec_helper, &rcec_data); +} + +void pci_rcec_init(struct pci_dev *dev) +{ + struct rcec_ea *rcec_ea; + u32 rcec, hdr, busn; + u8 ver; + + /* Only for Root Complex Event Collectors */ + if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) + return; + + rcec = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_RCEC); + if (!rcec) + return; + + rcec_ea = kzalloc(sizeof(*rcec_ea), GFP_KERNEL); + if (!rcec_ea) + return; + + pci_read_config_dword(dev, rcec + PCI_RCEC_RCIEP_BITMAP, + &rcec_ea->bitmap); + + /* Check whether RCEC BUSN register is present */ + pci_read_config_dword(dev, rcec, &hdr); + ver = PCI_EXT_CAP_VER(hdr); + if (ver >= PCI_RCEC_BUSN_REG_VER) { + pci_read_config_dword(dev, rcec + PCI_RCEC_BUSN, &busn); + rcec_ea->nextbusn = PCI_RCEC_BUSN_NEXT(busn); + rcec_ea->lastbusn = PCI_RCEC_BUSN_LAST(busn); + } else { + /* Avoid later ver check by setting nextbusn */ + rcec_ea->nextbusn = 0xff; + rcec_ea->lastbusn = 0x00; + } + + dev->rcec_ea = rcec_ea; +} + +void pci_rcec_exit(struct pci_dev *dev) +{ + kfree(dev->rcec_ea); + dev->rcec_ea = NULL; +} diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index b9dabbc0a4e08..db6719c5d169c 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2151,6 +2151,7 @@ static void pci_configure_device(struct pci_dev *dev) static void pci_release_capabilities(struct pci_dev *dev) { pci_aer_exit(dev); + pci_rcec_exit(dev); pci_vpd_release(dev); pci_iov_release(dev); pci_free_cap_save_buffers(dev); @@ -2349,6 +2350,7 @@ static void pci_init_capabilities(struct pci_dev *dev) pci_ptm_init(dev); /* Precision Time Measurement */ pci_aer_init(dev); /* Advanced Error Reporting */ pci_dpc_init(dev); /* Downstream Port Containment */ + pci_rcec_init(dev); /* Root Complex Event Collector */ pcie_report_downtraining(dev); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 0241f0dcc093f..2a4bc8df85639 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3246,12 +3246,13 @@ static void fixup_mpss_256(struct pci_dev *dev) { dev->pcie_mpss = 1; /* 256 bytes */ } -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SOLARFLARE, - PCI_DEVICE_ID_SOLARFLARE_SFC4000A_0, fixup_mpss_256); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SOLARFLARE, - PCI_DEVICE_ID_SOLARFLARE_SFC4000A_1, fixup_mpss_256); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SOLARFLARE, - PCI_DEVICE_ID_SOLARFLARE_SFC4000B, fixup_mpss_256); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SOLARFLARE, + PCI_DEVICE_ID_SOLARFLARE_SFC4000A_0, fixup_mpss_256); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SOLARFLARE, + PCI_DEVICE_ID_SOLARFLARE_SFC4000A_1, fixup_mpss_256); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SOLARFLARE, + PCI_DEVICE_ID_SOLARFLARE_SFC4000B, fixup_mpss_256); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ASMEDIA, 0x0612, fixup_mpss_256); /* * Intel 5000 and 5100 Memory controllers have an erratum with read completion @@ -3583,6 +3584,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0032, quirk_no_bus_reset); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x003c, quirk_no_bus_reset); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0033, quirk_no_bus_reset); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0034, quirk_no_bus_reset); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x003e, quirk_no_bus_reset); /* * Root port on some Cavium CN8xxx chips do not successfully complete a bus @@ -4132,6 +4134,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9120, quirk_dma_func1_alias); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9123, quirk_dma_func1_alias); +/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c136 */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9125, + quirk_dma_func1_alias); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9128, quirk_dma_func1_alias); /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c14 */ @@ -4683,6 +4688,18 @@ static int pci_quirk_qcom_rp_acs(struct pci_dev *dev, u16 acs_flags) PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF); } +/* + * Each of these NXP Root Ports is in a Root Complex with a unique segment + * number and does provide isolation features to disable peer transactions + * and validate bus numbers in requests, but does not provide an ACS + * capability. + */ +static int pci_quirk_nxp_rp_acs(struct pci_dev *dev, u16 acs_flags) +{ + return pci_acs_ctrl_enabled(acs_flags, + PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF); +} + static int pci_quirk_al_acs(struct pci_dev *dev, u16 acs_flags) { if (pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) @@ -4909,6 +4926,10 @@ static const struct pci_dev_acs_enabled { { 0x10df, 0x720, pci_quirk_mf_endpoint_acs }, /* Emulex Skyhawk-R */ /* Cavium ThunderX */ { PCI_VENDOR_ID_CAVIUM, PCI_ANY_ID, pci_quirk_cavium_acs }, + /* Cavium multi-function devices */ + { PCI_VENDOR_ID_CAVIUM, 0xA026, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_CAVIUM, 0xA059, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_CAVIUM, 0xA060, pci_quirk_mf_endpoint_acs }, /* APM X-Gene */ { PCI_VENDOR_ID_AMCC, 0xE004, pci_quirk_xgene_acs }, /* Ampere Computing */ @@ -4929,6 +4950,39 @@ static const struct pci_dev_acs_enabled { { PCI_VENDOR_ID_ZHAOXIN, 0x3038, pci_quirk_mf_endpoint_acs }, { PCI_VENDOR_ID_ZHAOXIN, 0x3104, pci_quirk_mf_endpoint_acs }, { PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs }, + /* NXP root ports, xx=16, 12, or 08 cores */ + /* LX2xx0A : without security features + CAN-FD */ + { PCI_VENDOR_ID_NXP, 0x8d81, pci_quirk_nxp_rp_acs }, + { PCI_VENDOR_ID_NXP, 0x8da1, pci_quirk_nxp_rp_acs }, + { PCI_VENDOR_ID_NXP, 0x8d83, pci_quirk_nxp_rp_acs }, + /* LX2xx0C : security features + CAN-FD */ + { PCI_VENDOR_ID_NXP, 0x8d80, pci_quirk_nxp_rp_acs }, + { PCI_VENDOR_ID_NXP, 0x8da0, pci_quirk_nxp_rp_acs }, + { PCI_VENDOR_ID_NXP, 0x8d82, pci_quirk_nxp_rp_acs }, + /* LX2xx0E : security features + CAN */ + { PCI_VENDOR_ID_NXP, 0x8d90, pci_quirk_nxp_rp_acs }, + { PCI_VENDOR_ID_NXP, 0x8db0, pci_quirk_nxp_rp_acs }, + { PCI_VENDOR_ID_NXP, 0x8d92, pci_quirk_nxp_rp_acs }, + /* LX2xx0N : without security features + CAN */ + { PCI_VENDOR_ID_NXP, 0x8d91, pci_quirk_nxp_rp_acs }, + { PCI_VENDOR_ID_NXP, 0x8db1, pci_quirk_nxp_rp_acs }, + { PCI_VENDOR_ID_NXP, 0x8d93, pci_quirk_nxp_rp_acs }, + /* LX2xx2A : without security features + CAN-FD */ + { PCI_VENDOR_ID_NXP, 0x8d89, pci_quirk_nxp_rp_acs }, + { PCI_VENDOR_ID_NXP, 0x8da9, pci_quirk_nxp_rp_acs }, + { PCI_VENDOR_ID_NXP, 0x8d8b, pci_quirk_nxp_rp_acs }, + /* LX2xx2C : security features + CAN-FD */ + { PCI_VENDOR_ID_NXP, 0x8d88, pci_quirk_nxp_rp_acs }, + { PCI_VENDOR_ID_NXP, 0x8da8, pci_quirk_nxp_rp_acs }, + { PCI_VENDOR_ID_NXP, 0x8d8a, pci_quirk_nxp_rp_acs }, + /* LX2xx2E : security features + CAN */ + { PCI_VENDOR_ID_NXP, 0x8d98, pci_quirk_nxp_rp_acs }, + { PCI_VENDOR_ID_NXP, 0x8db8, pci_quirk_nxp_rp_acs }, + { PCI_VENDOR_ID_NXP, 0x8d9a, pci_quirk_nxp_rp_acs }, + /* LX2xx2N : without security features + CAN */ + { PCI_VENDOR_ID_NXP, 0x8d99, pci_quirk_nxp_rp_acs }, + { PCI_VENDOR_ID_NXP, 0x8db9, pci_quirk_nxp_rp_acs }, + { PCI_VENDOR_ID_NXP, 0x8d9b, pci_quirk_nxp_rp_acs }, /* Zhaoxin Root/Downstream Ports */ { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs }, { 0 } @@ -5393,7 +5447,7 @@ DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_CLASS_MULTIMEDIA_HD_AUDIO, 8, quirk_gpu_hda); /* - * Create device link for NVIDIA GPU with integrated USB xHCI Host + * Create device link for GPUs with integrated USB xHCI Host * controller to VGA. */ static void quirk_gpu_usb(struct pci_dev *usb) @@ -5402,9 +5456,11 @@ static void quirk_gpu_usb(struct pci_dev *usb) } DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_CLASS_SERIAL_USB, 8, quirk_gpu_usb); +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID, + PCI_CLASS_SERIAL_USB, 8, quirk_gpu_usb); /* - * Create device link for NVIDIA GPU with integrated Type-C UCSI controller + * Create device link for GPUs with integrated Type-C UCSI controller * to VGA. Currently there is no class code defined for UCSI device over PCI * so using UNKNOWN class for now and it will be updated when UCSI * over PCI gets a class code. @@ -5417,6 +5473,9 @@ static void quirk_gpu_usb_typec_ucsi(struct pci_dev *ucsi) DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_CLASS_SERIAL_UNKNOWN, 8, quirk_gpu_usb_typec_ucsi); +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID, + PCI_CLASS_SERIAL_UNKNOWN, 8, + quirk_gpu_usb_typec_ucsi); /* * Enable the NVIDIA GPU integrated HDA controller if the BIOS left it @@ -5721,3 +5780,9 @@ static void apex_pci_fixup_class(struct pci_dev *pdev) } DECLARE_PCI_FIXUP_CLASS_HEADER(0x1ac1, 0x089a, PCI_CLASS_NOT_DEFINED, 8, apex_pci_fixup_class); + +static void nvidia_ion_ahci_fixup(struct pci_dev *pdev) +{ + pdev->dev_flags |= PCI_DEV_FLAGS_HAS_MSI_MASKING; +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0ab8, nvidia_ion_ahci_fixup); diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c index 8b003c890b87b..c9f03418e71e0 100644 --- a/drivers/pci/syscall.c +++ b/drivers/pci/syscall.c @@ -22,8 +22,10 @@ SYSCALL_DEFINE5(pciconfig_read, unsigned long, bus, unsigned long, dfn, long err; int cfg_ret; + err = -EPERM; + dev = NULL; if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + goto error; err = -ENODEV; dev = pci_get_domain_bus_and_slot(0, bus, dfn); diff --git a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig index e004d8da03dcb..73df71a142536 100644 --- a/drivers/pcmcia/Kconfig +++ b/drivers/pcmcia/Kconfig @@ -151,7 +151,7 @@ config TCIC config PCMCIA_ALCHEMY_DEVBOARD tristate "Alchemy Db/Pb1xxx PCMCIA socket services" - depends on MIPS_ALCHEMY && PCMCIA + depends on MIPS_DB1XXX && PCMCIA help Enable this driver of you want PCMCIA support on your Alchemy Db1000, Db/Pb1100, Db/Pb1500, Db/Pb1550, Db/Pb1200, DB1300 diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c index e211e2619680c..f70197154a362 100644 --- a/drivers/pcmcia/cs.c +++ b/drivers/pcmcia/cs.c @@ -666,18 +666,16 @@ static int pccardd(void *__skt) if (events || sysfs_events) continue; + set_current_state(TASK_INTERRUPTIBLE); if (kthread_should_stop()) break; - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - /* make sure we are running */ - __set_current_state(TASK_RUNNING); - try_to_freeze(); } + /* make sure we are running before we exit */ + __set_current_state(TASK_RUNNING); /* shut down socket, if a device is still present */ if (skt->state & SOCKET_PRESENT) { diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c index 9e6922c08ef62..3a512513cb32f 100644 --- a/drivers/pcmcia/rsrc_nonstatic.c +++ b/drivers/pcmcia/rsrc_nonstatic.c @@ -690,6 +690,9 @@ static struct resource *__nonstatic_find_io_region(struct pcmcia_socket *s, unsigned long min = base; int ret; + if (!res) + return NULL; + data.mask = align - 1; data.offset = base & data.mask; data.map = &s_data->io_db; @@ -809,6 +812,9 @@ static struct resource *nonstatic_find_mem_region(u_long base, u_long num, unsigned long min, max; int ret, i, j; + if (!res) + return NULL; + low = low || !(s->features & SS_CAP_PAGE_REGS); data.mask = align - 1; diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index df352b334ea77..7fd11ef5cb8a2 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -26,8 +26,84 @@ #include +static int armpmu_count_irq_users(const int irq); + +struct pmu_irq_ops { + void (*enable_pmuirq)(unsigned int irq); + void (*disable_pmuirq)(unsigned int irq); + void (*free_pmuirq)(unsigned int irq, int cpu, void __percpu *devid); +}; + +static void armpmu_free_pmuirq(unsigned int irq, int cpu, void __percpu *devid) +{ + free_irq(irq, per_cpu_ptr(devid, cpu)); +} + +static const struct pmu_irq_ops pmuirq_ops = { + .enable_pmuirq = enable_irq, + .disable_pmuirq = disable_irq_nosync, + .free_pmuirq = armpmu_free_pmuirq +}; + +static void armpmu_free_pmunmi(unsigned int irq, int cpu, void __percpu *devid) +{ + free_nmi(irq, per_cpu_ptr(devid, cpu)); +} + +static const struct pmu_irq_ops pmunmi_ops = { + .enable_pmuirq = enable_nmi, + .disable_pmuirq = disable_nmi_nosync, + .free_pmuirq = armpmu_free_pmunmi +}; + +static void armpmu_enable_percpu_pmuirq(unsigned int irq) +{ + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + +static void armpmu_free_percpu_pmuirq(unsigned int irq, int cpu, + void __percpu *devid) +{ + if (armpmu_count_irq_users(irq) == 1) + free_percpu_irq(irq, devid); +} + +static const struct pmu_irq_ops percpu_pmuirq_ops = { + .enable_pmuirq = armpmu_enable_percpu_pmuirq, + .disable_pmuirq = disable_percpu_irq, + .free_pmuirq = armpmu_free_percpu_pmuirq +}; + +static void armpmu_enable_percpu_pmunmi(unsigned int irq) +{ + if (!prepare_percpu_nmi(irq)) + enable_percpu_nmi(irq, IRQ_TYPE_NONE); +} + +static void armpmu_disable_percpu_pmunmi(unsigned int irq) +{ + disable_percpu_nmi(irq); + teardown_percpu_nmi(irq); +} + +static void armpmu_free_percpu_pmunmi(unsigned int irq, int cpu, + void __percpu *devid) +{ + if (armpmu_count_irq_users(irq) == 1) + free_percpu_nmi(irq, devid); +} + +static const struct pmu_irq_ops percpu_pmunmi_ops = { + .enable_pmuirq = armpmu_enable_percpu_pmunmi, + .disable_pmuirq = armpmu_disable_percpu_pmunmi, + .free_pmuirq = armpmu_free_percpu_pmunmi +}; + static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); static DEFINE_PER_CPU(int, cpu_irq); +static DEFINE_PER_CPU(const struct pmu_irq_ops *, cpu_irq_ops); + +static bool has_nmi; static inline u64 arm_pmu_event_max_period(struct perf_event *event) { @@ -322,6 +398,9 @@ validate_group(struct perf_event *event) if (!validate_event(event->pmu, &fake_pmu, leader)) return -EINVAL; + if (event == leader) + return 0; + for_each_sibling_event(sibling, leader) { if (!validate_event(event->pmu, &fake_pmu, sibling)) return -EINVAL; @@ -411,12 +490,7 @@ __hw_perf_event_init(struct perf_event *event) local64_set(&hwc->period_left, hwc->sample_period); } - if (event->group_leader != event) { - if (validate_group(event) != 0) - return -EINVAL; - } - - return 0; + return validate_group(event); } static int armpmu_event_init(struct perf_event *event) @@ -544,6 +618,23 @@ static int armpmu_count_irq_users(const int irq) return count; } +static const struct pmu_irq_ops *armpmu_find_irq_ops(int irq) +{ + const struct pmu_irq_ops *ops = NULL; + int cpu; + + for_each_possible_cpu(cpu) { + if (per_cpu(cpu_irq, cpu) != irq) + continue; + + ops = per_cpu(cpu_irq_ops, cpu); + if (ops) + break; + } + + return ops; +} + void armpmu_free_irq(int irq, int cpu) { if (per_cpu(cpu_irq, cpu) == 0) @@ -551,18 +642,18 @@ void armpmu_free_irq(int irq, int cpu) if (WARN_ON(irq != per_cpu(cpu_irq, cpu))) return; - if (!irq_is_percpu_devid(irq)) - free_irq(irq, per_cpu_ptr(&cpu_armpmu, cpu)); - else if (armpmu_count_irq_users(irq) == 1) - free_percpu_irq(irq, &cpu_armpmu); + per_cpu(cpu_irq_ops, cpu)->free_pmuirq(irq, cpu, &cpu_armpmu); per_cpu(cpu_irq, cpu) = 0; + per_cpu(cpu_irq_ops, cpu) = NULL; } int armpmu_request_irq(int irq, int cpu) { int err = 0; const irq_handler_t handler = armpmu_dispatch_irq; + const struct pmu_irq_ops *irq_ops; + if (!irq) return 0; @@ -582,17 +673,44 @@ int armpmu_request_irq(int irq, int cpu) IRQF_NO_THREAD; irq_set_status_flags(irq, IRQ_NOAUTOEN); - err = request_irq(irq, handler, irq_flags, "arm-pmu", + + err = request_nmi(irq, handler, irq_flags, "arm-pmu", per_cpu_ptr(&cpu_armpmu, cpu)); + + /* If cannot get an NMI, get a normal interrupt */ + if (err) { + err = request_irq(irq, handler, irq_flags, "arm-pmu", + per_cpu_ptr(&cpu_armpmu, cpu)); + irq_ops = &pmuirq_ops; + } else { + has_nmi = true; + irq_ops = &pmunmi_ops; + } } else if (armpmu_count_irq_users(irq) == 0) { - err = request_percpu_irq(irq, handler, "arm-pmu", - &cpu_armpmu); + err = request_percpu_nmi(irq, handler, "arm-pmu", &cpu_armpmu); + + /* If cannot get an NMI, get a normal interrupt */ + if (err) { + err = request_percpu_irq(irq, handler, "arm-pmu", + &cpu_armpmu); + irq_ops = &percpu_pmuirq_ops; + } else { + has_nmi= true; + irq_ops = &percpu_pmunmi_ops; + } + } else { + /* Per cpudevid irq was already requested by another CPU */ + irq_ops = armpmu_find_irq_ops(irq); + + if (WARN_ON(!irq_ops)) + err = -EINVAL; } if (err) goto err_out; per_cpu(cpu_irq, cpu) = irq; + per_cpu(cpu_irq_ops, cpu) = irq_ops; return 0; err_out: @@ -625,12 +743,8 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node) per_cpu(cpu_armpmu, cpu) = pmu; irq = armpmu_get_cpu_irq(pmu, cpu); - if (irq) { - if (irq_is_percpu_devid(irq)) - enable_percpu_irq(irq, IRQ_TYPE_NONE); - else - enable_irq(irq); - } + if (irq) + per_cpu(cpu_irq_ops, cpu)->enable_pmuirq(irq); return 0; } @@ -644,12 +758,8 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) return 0; irq = armpmu_get_cpu_irq(pmu, cpu); - if (irq) { - if (irq_is_percpu_devid(irq)) - disable_percpu_irq(irq); - else - disable_irq_nosync(irq); - } + if (irq) + per_cpu(cpu_irq_ops, cpu)->disable_pmuirq(irq); per_cpu(cpu_armpmu, cpu) = NULL; @@ -870,8 +980,9 @@ int armpmu_register(struct arm_pmu *pmu) if (!__oprofile_cpu_pmu) __oprofile_cpu_pmu = pmu; - pr_info("enabled with %s PMU driver, %d counters available\n", - pmu->name, pmu->num_events); + pr_info("enabled with %s PMU driver, %d counters available%s\n", + pmu->name, pmu->num_events, + has_nmi ? ", using NMIs" : ""); return 0; diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 726ed8f59868c..912a220a9db92 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -29,7 +29,7 @@ #define CNTL_OVER_MASK 0xFFFFFFFE #define CNTL_CSV_SHIFT 24 -#define CNTL_CSV_MASK (0xFF << CNTL_CSV_SHIFT) +#define CNTL_CSV_MASK (0xFFU << CNTL_CSV_SHIFT) #define EVENT_CYCLES_ID 0 #define EVENT_CYCLES_COUNTER 0 diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index 4da37f650f983..7018eb21d1892 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -781,7 +781,7 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster( { u64 mpidr; int cpu_cluster_id; - struct cluster_pmu *cluster = NULL; + struct cluster_pmu *cluster; /* * This assumes that the cluster_id is in MPIDR[aff1] for @@ -803,10 +803,10 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster( cluster->cluster_id); cpumask_set_cpu(cpu, &cluster->cluster_cpus); *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster; - break; + return cluster; } - return cluster; + return NULL; } static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c index 94a34cf75eb39..39d13f7e4cf33 100644 --- a/drivers/phy/motorola/phy-mapphone-mdm6600.c +++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c @@ -628,7 +628,8 @@ static int phy_mdm6600_probe(struct platform_device *pdev) cleanup: if (error < 0) phy_mdm6600_device_power_off(ddata); - + pm_runtime_disable(ddata->dev); + pm_runtime_dont_use_autosuspend(ddata->dev); return error; } diff --git a/drivers/phy/phy-core-mipi-dphy.c b/drivers/phy/phy-core-mipi-dphy.c index 14e0551cd3190..0aa740b73d0db 100644 --- a/drivers/phy/phy-core-mipi-dphy.c +++ b/drivers/phy/phy-core-mipi-dphy.c @@ -66,10 +66,10 @@ int phy_mipi_dphy_get_default_config(unsigned long pixel_clock, cfg->hs_trail = max(4 * 8 * ui, 60000 + 4 * 4 * ui); cfg->init = 100; - cfg->lpx = 60000; + cfg->lpx = 50000; cfg->ta_get = 5 * cfg->lpx; cfg->ta_go = 4 * cfg->lpx; - cfg->ta_sure = 2 * cfg->lpx; + cfg->ta_sure = cfg->lpx; cfg->wakeup = 1000; cfg->hs_clk_rate = hs_clk_rate; diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.c b/drivers/phy/qualcomm/phy-qcom-qmp.c index 5ddbf9a1f328b..21d40c6658545 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp.c @@ -1517,7 +1517,7 @@ static int qcom_qmp_phy_enable(struct phy *phy) qcom_qmp_phy_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num); ret = reset_control_deassert(qmp->ufs_reset); if (ret) - goto err_lane_rst; + goto err_pcs_ready; /* * Pull out PHY from POWER DOWN state. @@ -1860,6 +1860,11 @@ static const struct phy_ops qcom_qmp_ufs_ops = { .owner = THIS_MODULE, }; +static void qcom_qmp_reset_control_put(void *data) +{ + reset_control_put(data); +} + static int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id) { @@ -1929,7 +1934,7 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id) * all phys that don't need this. */ snprintf(prop_name, sizeof(prop_name), "pipe%d", id); - qphy->pipe_clk = of_clk_get_by_name(np, prop_name); + qphy->pipe_clk = devm_get_clk_from_child(dev, np, prop_name); if (IS_ERR(qphy->pipe_clk)) { if (qmp->cfg->type == PHY_TYPE_PCIE || qmp->cfg->type == PHY_TYPE_USB3) { @@ -1951,6 +1956,10 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id) dev_err(dev, "failed to get lane%d reset\n", id); return PTR_ERR(qphy->lane_rst); } + ret = devm_add_action_or_reset(dev, qcom_qmp_reset_control_put, + qphy->lane_rst); + if (ret) + return ret; } if (qmp->cfg->type == PHY_TYPE_UFS) diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c index bf94a52d30871..946e9b05f0ae6 100644 --- a/drivers/phy/qualcomm/phy-qcom-qusb2.c +++ b/drivers/phy/qualcomm/phy-qcom-qusb2.c @@ -432,7 +432,7 @@ static void qusb2_phy_set_tune2_param(struct qusb2_phy *qphy) { struct device *dev = &qphy->phy->dev; const struct qusb2_phy_cfg *cfg = qphy->cfg; - u8 *val; + u8 *val, hstx_trim; /* efuse register is optional */ if (!qphy->cell) @@ -446,7 +446,13 @@ static void qusb2_phy_set_tune2_param(struct qusb2_phy *qphy) * set while configuring the phy. */ val = nvmem_cell_read(qphy->cell, NULL); - if (IS_ERR(val) || !val[0]) { + if (IS_ERR(val)) { + dev_dbg(dev, "failed to read a valid hs-tx trim value\n"); + return; + } + hstx_trim = val[0]; + kfree(val); + if (!hstx_trim) { dev_dbg(dev, "failed to read a valid hs-tx trim value\n"); return; } @@ -454,12 +460,10 @@ static void qusb2_phy_set_tune2_param(struct qusb2_phy *qphy) /* Fused TUNE1/2 value is the higher nibble only */ if (cfg->update_tune1_with_efuse) qusb2_write_mask(qphy->base, cfg->regs[QUSB2PHY_PORT_TUNE1], - val[0] << HSTX_TRIM_SHIFT, - HSTX_TRIM_MASK); + hstx_trim << HSTX_TRIM_SHIFT, HSTX_TRIM_MASK); else qusb2_write_mask(qphy->base, cfg->regs[QUSB2PHY_PORT_TUNE2], - val[0] << HSTX_TRIM_SHIFT, - HSTX_TRIM_MASK); + hstx_trim << HSTX_TRIM_SHIFT, HSTX_TRIM_MASK); } static int qusb2_phy_set_mode(struct phy *phy, diff --git a/drivers/phy/samsung/phy-exynos5250-sata.c b/drivers/phy/samsung/phy-exynos5250-sata.c index 4dd7324d91b26..ea46576404b8e 100644 --- a/drivers/phy/samsung/phy-exynos5250-sata.c +++ b/drivers/phy/samsung/phy-exynos5250-sata.c @@ -190,6 +190,7 @@ static int exynos_sata_phy_probe(struct platform_device *pdev) return -EINVAL; sata_phy->client = of_find_i2c_device_by_node(node); + of_node_put(node); if (!sata_phy->client) return -EPROBE_DEFER; @@ -198,20 +199,21 @@ static int exynos_sata_phy_probe(struct platform_device *pdev) sata_phy->phyclk = devm_clk_get(dev, "sata_phyctrl"); if (IS_ERR(sata_phy->phyclk)) { dev_err(dev, "failed to get clk for PHY\n"); - return PTR_ERR(sata_phy->phyclk); + ret = PTR_ERR(sata_phy->phyclk); + goto put_dev; } ret = clk_prepare_enable(sata_phy->phyclk); if (ret < 0) { dev_err(dev, "failed to enable source clk\n"); - return ret; + goto put_dev; } sata_phy->phy = devm_phy_create(dev, NULL, &exynos_sata_phy_ops); if (IS_ERR(sata_phy->phy)) { - clk_disable_unprepare(sata_phy->phyclk); dev_err(dev, "failed to create PHY\n"); - return PTR_ERR(sata_phy->phy); + ret = PTR_ERR(sata_phy->phy); + goto clk_disable; } phy_set_drvdata(sata_phy->phy, sata_phy); @@ -219,11 +221,18 @@ static int exynos_sata_phy_probe(struct platform_device *pdev) phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); if (IS_ERR(phy_provider)) { - clk_disable_unprepare(sata_phy->phyclk); - return PTR_ERR(phy_provider); + ret = PTR_ERR(phy_provider); + goto clk_disable; } return 0; + +clk_disable: + clk_disable_unprepare(sata_phy->phyclk); +put_dev: + put_device(&sata_phy->client->dev); + + return ret; } static const struct of_device_id exynos_sata_phy_of_match[] = { diff --git a/drivers/phy/socionext/phy-uniphier-usb3ss.c b/drivers/phy/socionext/phy-uniphier-usb3ss.c index a7577e316baf5..e63648b5c7547 100644 --- a/drivers/phy/socionext/phy-uniphier-usb3ss.c +++ b/drivers/phy/socionext/phy-uniphier-usb3ss.c @@ -22,11 +22,13 @@ #include #define SSPHY_TESTI 0x0 -#define SSPHY_TESTO 0x4 #define TESTI_DAT_MASK GENMASK(13, 6) #define TESTI_ADR_MASK GENMASK(5, 1) #define TESTI_WR_EN BIT(0) +#define SSPHY_TESTO 0x4 +#define TESTO_DAT_MASK GENMASK(7, 0) + #define PHY_F(regno, msb, lsb) { (regno), (msb), (lsb) } #define CDR_CPD_TRIM PHY_F(7, 3, 0) /* RxPLL charge pump current */ @@ -84,12 +86,12 @@ static void uniphier_u3ssphy_set_param(struct uniphier_u3ssphy_priv *priv, val = FIELD_PREP(TESTI_DAT_MASK, 1); val |= FIELD_PREP(TESTI_ADR_MASK, p->field.reg_no); uniphier_u3ssphy_testio_write(priv, val); - val = readl(priv->base + SSPHY_TESTO); + val = readl(priv->base + SSPHY_TESTO) & TESTO_DAT_MASK; /* update value */ - val &= ~FIELD_PREP(TESTI_DAT_MASK, field_mask); + val &= ~field_mask; data = field_mask & (p->value << p->field.lsb); - val = FIELD_PREP(TESTI_DAT_MASK, data); + val = FIELD_PREP(TESTI_DAT_MASK, data | val); val |= FIELD_PREP(TESTI_ADR_MASK, p->field.reg_no); uniphier_u3ssphy_testio_write(priv, val); uniphier_u3ssphy_testio_write(priv, val | TESTI_WR_EN); diff --git a/drivers/phy/ti/phy-am654-serdes.c b/drivers/phy/ti/phy-am654-serdes.c index 6ef12017ff4e8..96e3426f9293b 100644 --- a/drivers/phy/ti/phy-am654-serdes.c +++ b/drivers/phy/ti/phy-am654-serdes.c @@ -641,7 +641,7 @@ static int serdes_am654_probe(struct platform_device *pdev) clk_err: of_clk_del_provider(node); - + pm_runtime_disable(dev); return ret; } diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c index 3d74629d7423c..471fe2e80f4ea 100644 --- a/drivers/phy/ti/phy-omap-usb2.c +++ b/drivers/phy/ti/phy-omap-usb2.c @@ -157,7 +157,7 @@ static int omap_usb2_enable_clocks(struct omap_usb *phy) return 0; err1: - clk_disable(phy->wkupclk); + clk_disable_unprepare(phy->wkupclk); err0: return ret; diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c index 0de1a3a96984c..e4da3217e9398 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -37,12 +38,10 @@ #define MODULE_NAME "pinctrl-bcm2835" #define BCM2835_NUM_GPIOS 54 +#define BCM2711_NUM_GPIOS 58 #define BCM2835_NUM_BANKS 2 #define BCM2835_NUM_IRQS 3 -#define BCM2835_PIN_BITMAP_SZ \ - DIV_ROUND_UP(BCM2835_NUM_GPIOS, sizeof(unsigned long) * 8) - /* GPIO register offsets */ #define GPFSEL0 0x0 /* Function Select */ #define GPSET0 0x1c /* Pin Output Set */ @@ -78,13 +77,15 @@ struct bcm2835_pinctrl { struct device *dev; void __iomem *base; + int *wake_irq; /* note: locking assumes each bank will have its own unsigned long */ unsigned long enabled_irq_map[BCM2835_NUM_BANKS]; - unsigned int irq_type[BCM2835_NUM_GPIOS]; + unsigned int irq_type[BCM2711_NUM_GPIOS]; struct pinctrl_dev *pctl_dev; struct gpio_chip gpio_chip; + struct pinctrl_desc pctl_desc; struct pinctrl_gpio_range gpio_range; raw_spinlock_t irq_lock[BCM2835_NUM_BANKS]; @@ -147,6 +148,10 @@ static struct pinctrl_pin_desc bcm2835_gpio_pins[] = { BCM2835_GPIO_PIN(51), BCM2835_GPIO_PIN(52), BCM2835_GPIO_PIN(53), + BCM2835_GPIO_PIN(54), + BCM2835_GPIO_PIN(55), + BCM2835_GPIO_PIN(56), + BCM2835_GPIO_PIN(57), }; /* one pin per group */ @@ -205,6 +210,10 @@ static const char * const bcm2835_gpio_groups[] = { "gpio51", "gpio52", "gpio53", + "gpio54", + "gpio55", + "gpio56", + "gpio57", }; enum bcm2835_fsel { @@ -355,6 +364,22 @@ static const struct gpio_chip bcm2835_gpio_chip = { .can_sleep = false, }; +static const struct gpio_chip bcm2711_gpio_chip = { + .label = "pinctrl-bcm2711", + .owner = THIS_MODULE, + .request = gpiochip_generic_request, + .free = gpiochip_generic_free, + .direction_input = bcm2835_gpio_direction_input, + .direction_output = bcm2835_gpio_direction_output, + .get_direction = bcm2835_gpio_get_direction, + .get = bcm2835_gpio_get, + .set = bcm2835_gpio_set, + .set_config = gpiochip_generic_config, + .base = -1, + .ngpio = BCM2711_NUM_GPIOS, + .can_sleep = false, +}; + static void bcm2835_gpio_irq_handle_bank(struct bcm2835_pinctrl *pc, unsigned int bank, u32 mask) { @@ -401,7 +426,7 @@ static void bcm2835_gpio_irq_handler(struct irq_desc *desc) bcm2835_gpio_irq_handle_bank(pc, 0, 0xf0000000); bcm2835_gpio_irq_handle_bank(pc, 1, 0x00003fff); break; - case 2: /* IRQ2 covers GPIOs 46-53 */ + case 2: /* IRQ2 covers GPIOs 46-57 */ bcm2835_gpio_irq_handle_bank(pc, 1, 0x003fc000); break; } @@ -409,6 +434,11 @@ static void bcm2835_gpio_irq_handler(struct irq_desc *desc) chained_irq_exit(host_chip, desc); } +static irqreturn_t bcm2835_gpio_wake_irq_handler(int irq, void *dev_id) +{ + return IRQ_HANDLED; +} + static inline void __bcm2835_gpio_irq_config(struct bcm2835_pinctrl *pc, unsigned reg, unsigned offset, bool enable) { @@ -608,6 +638,34 @@ static void bcm2835_gpio_irq_ack(struct irq_data *data) bcm2835_gpio_set_bit(pc, GPEDS0, gpio); } +static int bcm2835_gpio_irq_set_wake(struct irq_data *data, unsigned int on) +{ + struct gpio_chip *chip = irq_data_get_irq_chip_data(data); + struct bcm2835_pinctrl *pc = gpiochip_get_data(chip); + unsigned gpio = irqd_to_hwirq(data); + unsigned int irqgroup; + int ret = -EINVAL; + + if (!pc->wake_irq) + return ret; + + if (gpio <= 27) + irqgroup = 0; + else if (gpio >= 28 && gpio <= 45) + irqgroup = 1; + else if (gpio >= 46 && gpio <= 57) + irqgroup = 2; + else + return ret; + + if (on) + ret = enable_irq_wake(pc->wake_irq[irqgroup]); + else + ret = disable_irq_wake(pc->wake_irq[irqgroup]); + + return ret; +} + static struct irq_chip bcm2835_gpio_irq_chip = { .name = MODULE_NAME, .irq_enable = bcm2835_gpio_irq_enable, @@ -616,11 +674,13 @@ static struct irq_chip bcm2835_gpio_irq_chip = { .irq_ack = bcm2835_gpio_irq_ack, .irq_mask = bcm2835_gpio_irq_disable, .irq_unmask = bcm2835_gpio_irq_enable, + .irq_set_wake = bcm2835_gpio_irq_set_wake, + .flags = IRQCHIP_MASK_ON_SUSPEND, }; static int bcm2835_pctl_get_groups_count(struct pinctrl_dev *pctldev) { - return ARRAY_SIZE(bcm2835_gpio_groups); + return BCM2835_NUM_GPIOS; } static const char *bcm2835_pctl_get_group_name(struct pinctrl_dev *pctldev, @@ -778,7 +838,7 @@ static int bcm2835_pctl_dt_node_to_map(struct pinctrl_dev *pctldev, err = of_property_read_u32_index(np, "brcm,pins", i, &pin); if (err) goto out; - if (pin >= ARRAY_SIZE(bcm2835_gpio_pins)) { + if (pin >= pc->pctl_desc.npins) { dev_err(pc->dev, "%pOF: invalid brcm,pins value %d\n", np, pin); err = -EINVAL; @@ -854,7 +914,7 @@ static int bcm2835_pmx_get_function_groups(struct pinctrl_dev *pctldev, { /* every pin can do every function */ *groups = bcm2835_gpio_groups; - *num_groups = ARRAY_SIZE(bcm2835_gpio_groups); + *num_groups = BCM2835_NUM_GPIOS; return 0; } @@ -1054,29 +1114,66 @@ static const struct pinconf_ops bcm2711_pinconf_ops = { .pin_config_set = bcm2711_pinconf_set, }; -static struct pinctrl_desc bcm2835_pinctrl_desc = { +static const struct pinctrl_desc bcm2835_pinctrl_desc = { .name = MODULE_NAME, .pins = bcm2835_gpio_pins, - .npins = ARRAY_SIZE(bcm2835_gpio_pins), + .npins = BCM2835_NUM_GPIOS, .pctlops = &bcm2835_pctl_ops, .pmxops = &bcm2835_pmx_ops, .confops = &bcm2835_pinconf_ops, .owner = THIS_MODULE, }; -static struct pinctrl_gpio_range bcm2835_pinctrl_gpio_range = { +static const struct pinctrl_desc bcm2711_pinctrl_desc = { + .name = "pinctrl-bcm2711", + .pins = bcm2835_gpio_pins, + .npins = BCM2711_NUM_GPIOS, + .pctlops = &bcm2835_pctl_ops, + .pmxops = &bcm2835_pmx_ops, + .confops = &bcm2711_pinconf_ops, + .owner = THIS_MODULE, +}; + +static const struct pinctrl_gpio_range bcm2835_pinctrl_gpio_range = { .name = MODULE_NAME, .npins = BCM2835_NUM_GPIOS, }; +static const struct pinctrl_gpio_range bcm2711_pinctrl_gpio_range = { + .name = "pinctrl-bcm2711", + .npins = BCM2711_NUM_GPIOS, +}; + +struct bcm_plat_data { + const struct gpio_chip *gpio_chip; + const struct pinctrl_desc *pctl_desc; + const struct pinctrl_gpio_range *gpio_range; +}; + +static const struct bcm_plat_data bcm2835_plat_data = { + .gpio_chip = &bcm2835_gpio_chip, + .pctl_desc = &bcm2835_pinctrl_desc, + .gpio_range = &bcm2835_pinctrl_gpio_range, +}; + +static const struct bcm_plat_data bcm2711_plat_data = { + .gpio_chip = &bcm2711_gpio_chip, + .pctl_desc = &bcm2711_pinctrl_desc, + .gpio_range = &bcm2711_pinctrl_gpio_range, +}; + static const struct of_device_id bcm2835_pinctrl_match[] = { { .compatible = "brcm,bcm2835-gpio", - .data = &bcm2835_pinconf_ops, + .data = &bcm2835_plat_data, }, { .compatible = "brcm,bcm2711-gpio", - .data = &bcm2711_pinconf_ops, + .data = &bcm2711_plat_data, + }, + { + .compatible = "brcm,bcm7211-gpio", + .data = &bcm2711_plat_data, }, {} }; @@ -1085,14 +1182,16 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; + const struct bcm_plat_data *pdata; struct bcm2835_pinctrl *pc; struct gpio_irq_chip *girq; struct resource iomem; int err, i; const struct of_device_id *match; + int is_7211 = 0; - BUILD_BUG_ON(ARRAY_SIZE(bcm2835_gpio_pins) != BCM2835_NUM_GPIOS); - BUILD_BUG_ON(ARRAY_SIZE(bcm2835_gpio_groups) != BCM2835_NUM_GPIOS); + BUILD_BUG_ON(ARRAY_SIZE(bcm2835_gpio_pins) != BCM2711_NUM_GPIOS); + BUILD_BUG_ON(ARRAY_SIZE(bcm2835_gpio_groups) != BCM2711_NUM_GPIOS); pc = devm_kzalloc(dev, sizeof(*pc), GFP_KERNEL); if (!pc) @@ -1111,7 +1210,14 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) if (IS_ERR(pc->base)) return PTR_ERR(pc->base); - pc->gpio_chip = bcm2835_gpio_chip; + match = of_match_node(bcm2835_pinctrl_match, pdev->dev.of_node); + if (!match) + return -EINVAL; + + pdata = match->data; + is_7211 = of_device_is_compatible(np, "brcm,bcm7211-gpio"); + + pc->gpio_chip = *pdata->gpio_chip; pc->gpio_chip.parent = dev; pc->gpio_chip.of_node = np; @@ -1135,6 +1241,18 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) raw_spin_lock_init(&pc->irq_lock[i]); } + pc->pctl_desc = *pdata->pctl_desc; + pc->pctl_dev = devm_pinctrl_register(dev, &pc->pctl_desc, pc); + if (IS_ERR(pc->pctl_dev)) { + gpiochip_remove(&pc->gpio_chip); + return PTR_ERR(pc->pctl_dev); + } + + pc->gpio_range = *pdata->gpio_range; + pc->gpio_range.base = pc->gpio_chip.base; + pc->gpio_range.gc = &pc->gpio_chip; + pinctrl_add_gpio_range(pc->pctl_dev, &pc->gpio_range); + girq = &pc->gpio_chip.irq; girq->chip = &bcm2835_gpio_irq_chip; girq->parent_handler = bcm2835_gpio_irq_handler; @@ -1142,8 +1260,21 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) girq->parents = devm_kcalloc(dev, BCM2835_NUM_IRQS, sizeof(*girq->parents), GFP_KERNEL); - if (!girq->parents) - return -ENOMEM; + if (!girq->parents) { + err = -ENOMEM; + goto out_remove; + } + + if (is_7211) { + pc->wake_irq = devm_kcalloc(dev, BCM2835_NUM_IRQS, + sizeof(*pc->wake_irq), + GFP_KERNEL); + if (!pc->wake_irq) { + err = -ENOMEM; + goto out_remove; + } + } + /* * Use the same handler for all groups: this is necessary * since we use one gpiochip to cover all lines - the @@ -1151,35 +1282,50 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) * bank that was firing the IRQ and look up the per-group * and bank data. */ - for (i = 0; i < BCM2835_NUM_IRQS; i++) + for (i = 0; i < BCM2835_NUM_IRQS; i++) { + int len; + char *name; + girq->parents[i] = irq_of_parse_and_map(np, i); + if (!is_7211) + continue; + + /* Skip over the all banks interrupts */ + pc->wake_irq[i] = irq_of_parse_and_map(np, i + + BCM2835_NUM_IRQS + 1); + + len = strlen(dev_name(pc->dev)) + 16; + name = devm_kzalloc(pc->dev, len, GFP_KERNEL); + if (!name) { + err = -ENOMEM; + goto out_remove; + } + + snprintf(name, len, "%s:bank%d", dev_name(pc->dev), i); + + /* These are optional interrupts */ + err = devm_request_irq(dev, pc->wake_irq[i], + bcm2835_gpio_wake_irq_handler, + IRQF_SHARED, name, pc); + if (err) + dev_warn(dev, "unable to request wake IRQ %d\n", + pc->wake_irq[i]); + } + girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_level_irq; err = gpiochip_add_data(&pc->gpio_chip, pc); if (err) { dev_err(dev, "could not add GPIO chip\n"); - return err; + goto out_remove; } - match = of_match_node(bcm2835_pinctrl_match, pdev->dev.of_node); - if (match) { - bcm2835_pinctrl_desc.confops = - (const struct pinconf_ops *)match->data; - } - - pc->pctl_dev = devm_pinctrl_register(dev, &bcm2835_pinctrl_desc, pc); - if (IS_ERR(pc->pctl_dev)) { - gpiochip_remove(&pc->gpio_chip); - return PTR_ERR(pc->pctl_dev); - } - - pc->gpio_range = bcm2835_pinctrl_gpio_range; - pc->gpio_range.base = pc->gpio_chip.base; - pc->gpio_range.gc = &pc->gpio_chip; - pinctrl_add_gpio_range(pc->pctl_dev, &pc->gpio_range); - return 0; + +out_remove: + pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range); + return err; } static struct platform_driver bcm2835_pinctrl_driver = { diff --git a/drivers/pinctrl/bcm/pinctrl-ns.c b/drivers/pinctrl/bcm/pinctrl-ns.c index e79690bd8b85f..d7f8175d2c1c8 100644 --- a/drivers/pinctrl/bcm/pinctrl-ns.c +++ b/drivers/pinctrl/bcm/pinctrl-ns.c @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -13,7 +12,6 @@ #include #include #include -#include #include #define FLAG_BCM4708 BIT(1) @@ -24,8 +22,7 @@ struct ns_pinctrl { struct device *dev; unsigned int chipset_flag; struct pinctrl_dev *pctldev; - struct regmap *regmap; - u32 offset; + void __iomem *base; struct pinctrl_desc pctldesc; struct ns_pinctrl_group *groups; @@ -232,9 +229,9 @@ static int ns_pinctrl_set_mux(struct pinctrl_dev *pctrl_dev, unset |= BIT(pin_number); } - regmap_read(ns_pinctrl->regmap, ns_pinctrl->offset, &tmp); + tmp = readl(ns_pinctrl->base); tmp &= ~unset; - regmap_write(ns_pinctrl->regmap, ns_pinctrl->offset, tmp); + writel(tmp, ns_pinctrl->base); return 0; } @@ -266,13 +263,13 @@ static const struct of_device_id ns_pinctrl_of_match_table[] = { static int ns_pinctrl_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; const struct of_device_id *of_id; struct ns_pinctrl *ns_pinctrl; struct pinctrl_desc *pctldesc; struct pinctrl_pin_desc *pin; struct ns_pinctrl_group *group; struct ns_pinctrl_function *function; + struct resource *res; int i; ns_pinctrl = devm_kzalloc(dev, sizeof(*ns_pinctrl), GFP_KERNEL); @@ -290,18 +287,12 @@ static int ns_pinctrl_probe(struct platform_device *pdev) return -EINVAL; ns_pinctrl->chipset_flag = (uintptr_t)of_id->data; - ns_pinctrl->regmap = syscon_node_to_regmap(of_get_parent(np)); - if (IS_ERR(ns_pinctrl->regmap)) { - int err = PTR_ERR(ns_pinctrl->regmap); - - dev_err(dev, "Failed to map pinctrl regs: %d\n", err); - - return err; - } - - if (of_property_read_u32(np, "offset", &ns_pinctrl->offset)) { - dev_err(dev, "Failed to get register offset\n"); - return -ENOENT; + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "cru_gpio_control"); + ns_pinctrl->base = devm_ioremap_resource(dev, res); + if (IS_ERR(ns_pinctrl->base)) { + dev_err(dev, "Failed to map pinctrl regs\n"); + return PTR_ERR(ns_pinctrl->base); } memcpy(pctldesc, &ns_pinctrl_desc, sizeof(*pctldesc)); diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 6381745e3bb18..9ebeef2ac7b18 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -2055,6 +2055,8 @@ int pinctrl_enable(struct pinctrl_dev *pctldev) if (error) { dev_err(pctldev->dev, "could not claim hogs: %i\n", error); + pinctrl_free_pindescs(pctldev, pctldev->desc->pins, + pctldev->desc->npins); mutex_destroy(&pctldev->mutex); kfree(pctldev); diff --git a/drivers/pinctrl/intel/Kconfig b/drivers/pinctrl/intel/Kconfig index 452a14f787070..aac7fb6620d45 100644 --- a/drivers/pinctrl/intel/Kconfig +++ b/drivers/pinctrl/intel/Kconfig @@ -82,6 +82,14 @@ config PINCTRL_DENVERTON This pinctrl driver provides an interface that allows configuring of Intel Denverton SoC pins and using them as GPIOs. +config PINCTRL_EMMITSBURG + tristate "Intel Emmitsburg pinctrl and GPIO driver" + depends on ACPI + select PINCTRL_INTEL + help + This pinctrl driver provides an interface that allows configuring + of Intel Emmitsburg pins and using them as GPIOs. + config PINCTRL_GEMINILAKE tristate "Intel Gemini Lake SoC pinctrl and GPIO driver" depends on ACPI diff --git a/drivers/pinctrl/intel/Makefile b/drivers/pinctrl/intel/Makefile index cb491e6557495..6d82f196dfe46 100644 --- a/drivers/pinctrl/intel/Makefile +++ b/drivers/pinctrl/intel/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_PINCTRL_BROXTON) += pinctrl-broxton.o obj-$(CONFIG_PINCTRL_CANNONLAKE) += pinctrl-cannonlake.o obj-$(CONFIG_PINCTRL_CEDARFORK) += pinctrl-cedarfork.o obj-$(CONFIG_PINCTRL_DENVERTON) += pinctrl-denverton.o +obj-$(CONFIG_PINCTRL_EMMITSBURG) += pinctrl-emmitsburg.o obj-$(CONFIG_PINCTRL_GEMINILAKE) += pinctrl-geminilake.o obj-$(CONFIG_PINCTRL_ICELAKE) += pinctrl-icelake.o obj-$(CONFIG_PINCTRL_LEWISBURG) += pinctrl-lewisburg.o diff --git a/drivers/pinctrl/intel/pinctrl-emmitsburg.c b/drivers/pinctrl/intel/pinctrl-emmitsburg.c new file mode 100644 index 0000000000000..f6114dbf75200 --- /dev/null +++ b/drivers/pinctrl/intel/pinctrl-emmitsburg.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Emmitsburg PCH pinctrl/GPIO driver + * + * Copyright (C) 2020, Intel Corporation + * Author: Andy Shevchenko + */ + +#include +#include +#include + +#include + +#include "pinctrl-intel.h" + +#define EBG_PAD_OWN 0x0a0 +#define EBG_PADCFGLOCK 0x100 +#define EBG_HOSTSW_OWN 0x130 +#define EBG_GPI_IS 0x200 +#define EBG_GPI_IE 0x210 + +#define EBG_GPP(r, s, e) \ + { \ + .reg_num = (r), \ + .base = (s), \ + .size = ((e) - (s) + 1), \ + } + +#define EBG_COMMUNITY(b, s, e, g) \ + { \ + .barno = (b), \ + .padown_offset = EBG_PAD_OWN, \ + .padcfglock_offset = EBG_PADCFGLOCK, \ + .hostown_offset = EBG_HOSTSW_OWN, \ + .is_offset = EBG_GPI_IS, \ + .ie_offset = EBG_GPI_IE, \ + .pin_base = (s), \ + .npins = ((e) - (s) + 1), \ + .gpps = (g), \ + .ngpps = ARRAY_SIZE(g), \ + } + +/* Emmitsburg */ +static const struct pinctrl_pin_desc ebg_pins[] = { + /* GPP_A */ + PINCTRL_PIN(0, "ESPI_ALERT0B"), + PINCTRL_PIN(1, "ESPI_ALERT1B"), + PINCTRL_PIN(2, "ESPI_IO_0"), + PINCTRL_PIN(3, "ESPI_IO_1"), + PINCTRL_PIN(4, "ESPI_IO_2"), + PINCTRL_PIN(5, "ESPI_IO_3"), + PINCTRL_PIN(6, "ESPI_CS0B"), + PINCTRL_PIN(7, "ESPI_CS1B"), + PINCTRL_PIN(8, "ESPI_RESETB"), + PINCTRL_PIN(9, "ESPI_CLK"), + PINCTRL_PIN(10, "SRCCLKREQB_0"), + PINCTRL_PIN(11, "SRCCLKREQB_1"), + PINCTRL_PIN(12, "SRCCLKREQB_2"), + PINCTRL_PIN(13, "SRCCLKREQB_3"), + PINCTRL_PIN(14, "SRCCLKREQB_4"), + PINCTRL_PIN(15, "SRCCLKREQB_5"), + PINCTRL_PIN(16, "SRCCLKREQB_6"), + PINCTRL_PIN(17, "SRCCLKREQB_7"), + PINCTRL_PIN(18, "SRCCLKREQB_8"), + PINCTRL_PIN(19, "SRCCLKREQB_9"), + PINCTRL_PIN(20, "ESPI_CLK_LOOPBK"), + /* GPP_B */ + PINCTRL_PIN(21, "GSXDOUT"), + PINCTRL_PIN(22, "GSXSLOAD"), + PINCTRL_PIN(23, "GSXDIN"), + PINCTRL_PIN(24, "GSXSRESETB"), + PINCTRL_PIN(25, "GSXCLK"), + PINCTRL_PIN(26, "USB2_OCB_0"), + PINCTRL_PIN(27, "USB2_OCB_1"), + PINCTRL_PIN(28, "USB2_OCB_2"), + PINCTRL_PIN(29, "USB2_OCB_3"), + PINCTRL_PIN(30, "USB2_OCB_4"), + PINCTRL_PIN(31, "USB2_OCB_5"), + PINCTRL_PIN(32, "USB2_OCB_6"), + PINCTRL_PIN(33, "HS_UART0_RXD"), + PINCTRL_PIN(34, "HS_UART0_TXD"), + PINCTRL_PIN(35, "HS_UART0_RTSB"), + PINCTRL_PIN(36, "HS_UART0_CTSB"), + PINCTRL_PIN(37, "HS_UART1_RXD"), + PINCTRL_PIN(38, "HS_UART1_TXD"), + PINCTRL_PIN(39, "HS_UART1_RTSB"), + PINCTRL_PIN(40, "HS_UART1_CTSB"), + PINCTRL_PIN(41, "GPPC_B_20"), + PINCTRL_PIN(42, "GPPC_B_21"), + PINCTRL_PIN(43, "GPPC_B_22"), + PINCTRL_PIN(44, "PS_ONB"), + /* SPI */ + PINCTRL_PIN(45, "SPI0_IO_2"), + PINCTRL_PIN(46, "SPI0_IO_3"), + PINCTRL_PIN(47, "SPI0_MOSI_IO_0"), + PINCTRL_PIN(48, "SPI0_MISO_IO_1"), + PINCTRL_PIN(49, "SPI0_TPM_CSB"), + PINCTRL_PIN(50, "SPI0_FLASH_0_CSB"), + PINCTRL_PIN(51, "SPI0_FLASH_1_CSB"), + PINCTRL_PIN(52, "SPI0_CLK"), + PINCTRL_PIN(53, "TIME_SYNC_0"), + PINCTRL_PIN(54, "SPKR"), + PINCTRL_PIN(55, "CPU_GP_0"), + PINCTRL_PIN(56, "CPU_GP_1"), + PINCTRL_PIN(57, "CPU_GP_2"), + PINCTRL_PIN(58, "CPU_GP_3"), + PINCTRL_PIN(59, "SUSWARNB_SUSPWRDNACK"), + PINCTRL_PIN(60, "SUSACKB"), + PINCTRL_PIN(61, "NMIB"), + PINCTRL_PIN(62, "SMIB"), + PINCTRL_PIN(63, "GPPC_S_10"), + PINCTRL_PIN(64, "GPPC_S_11"), + PINCTRL_PIN(65, "SPI_CLK_LOOPBK"), + /* GPP_C */ + PINCTRL_PIN(66, "ME_SML0CLK"), + PINCTRL_PIN(67, "ME_SML0DATA"), + PINCTRL_PIN(68, "ME_SML0ALERTB"), + PINCTRL_PIN(69, "ME_SML0BDATA"), + PINCTRL_PIN(70, "ME_SML0BCLK"), + PINCTRL_PIN(71, "ME_SML0BALERTB"), + PINCTRL_PIN(72, "ME_SML1CLK"), + PINCTRL_PIN(73, "ME_SML1DATA"), + PINCTRL_PIN(74, "ME_SML1ALERTB"), + PINCTRL_PIN(75, "ME_SML2CLK"), + PINCTRL_PIN(76, "ME_SML2DATA"), + PINCTRL_PIN(77, "ME_SML2ALERTB"), + PINCTRL_PIN(78, "ME_SML3CLK"), + PINCTRL_PIN(79, "ME_SML3DATA"), + PINCTRL_PIN(80, "ME_SML3ALERTB"), + PINCTRL_PIN(81, "ME_SML4CLK"), + PINCTRL_PIN(82, "ME_SML4DATA"), + PINCTRL_PIN(83, "ME_SML4ALERTB"), + PINCTRL_PIN(84, "GPPC_C_18"), + PINCTRL_PIN(85, "MC_SMBCLK"), + PINCTRL_PIN(86, "MC_SMBDATA"), + PINCTRL_PIN(87, "MC_SMBALERTB"), + /* GPP_D */ + PINCTRL_PIN(88, "HS_SMBCLK"), + PINCTRL_PIN(89, "HS_SMBDATA"), + PINCTRL_PIN(90, "HS_SMBALERTB"), + PINCTRL_PIN(91, "GBE_SMB_ALRT_N"), + PINCTRL_PIN(92, "GBE_SMB_CLK"), + PINCTRL_PIN(93, "GBE_SMB_DATA"), + PINCTRL_PIN(94, "GBE_GPIO10"), + PINCTRL_PIN(95, "GBE_GPIO11"), + PINCTRL_PIN(96, "CRASHLOG_TRIG_N"), + PINCTRL_PIN(97, "PMEB"), + PINCTRL_PIN(98, "BM_BUSYB"), + PINCTRL_PIN(99, "PLTRSTB"), + PINCTRL_PIN(100, "PCHHOTB"), + PINCTRL_PIN(101, "ADR_COMPLETE"), + PINCTRL_PIN(102, "ADR_TRIGGER_N"), + PINCTRL_PIN(103, "VRALERTB"), + PINCTRL_PIN(104, "ADR_ACK"), + PINCTRL_PIN(105, "THERMTRIP_N"), + PINCTRL_PIN(106, "MEMTRIP_N"), + PINCTRL_PIN(107, "MSMI_N"), + PINCTRL_PIN(108, "CATERR_N"), + PINCTRL_PIN(109, "GLB_RST_WARN_B"), + PINCTRL_PIN(110, "USB2_OCB_7"), + PINCTRL_PIN(111, "GPP_D_23"), + /* GPP_E */ + PINCTRL_PIN(112, "SATA1_XPCIE_0"), + PINCTRL_PIN(113, "SATA1_XPCIE_1"), + PINCTRL_PIN(114, "SATA1_XPCIE_2"), + PINCTRL_PIN(115, "SATA1_XPCIE_3"), + PINCTRL_PIN(116, "SATA0_XPCIE_2"), + PINCTRL_PIN(117, "SATA0_XPCIE_3"), + PINCTRL_PIN(118, "SATA0_USB3_XPCIE_0"), + PINCTRL_PIN(119, "SATA0_USB3_XPCIE_1"), + PINCTRL_PIN(120, "SATA0_SCLOCK"), + PINCTRL_PIN(121, "SATA0_SLOAD"), + PINCTRL_PIN(122, "SATA0_SDATAOUT"), + PINCTRL_PIN(123, "SATA1_SCLOCK"), + PINCTRL_PIN(124, "SATA1_SLOAD"), + PINCTRL_PIN(125, "SATA1_SDATAOUT"), + PINCTRL_PIN(126, "SATA2_SCLOCK"), + PINCTRL_PIN(127, "SATA2_SLOAD"), + PINCTRL_PIN(128, "SATA2_SDATAOUT"), + PINCTRL_PIN(129, "ERR0_N"), + PINCTRL_PIN(130, "ERR1_N"), + PINCTRL_PIN(131, "ERR2_N"), + PINCTRL_PIN(132, "GBE_UART_RXD"), + PINCTRL_PIN(133, "GBE_UART_TXD"), + PINCTRL_PIN(134, "GBE_UART_RTSB"), + PINCTRL_PIN(135, "GBE_UART_CTSB"), + /* JTAG */ + PINCTRL_PIN(136, "JTAG_TDO"), + PINCTRL_PIN(137, "JTAG_TDI"), + PINCTRL_PIN(138, "JTAG_TCK"), + PINCTRL_PIN(139, "JTAG_TMS"), + PINCTRL_PIN(140, "JTAGX"), + PINCTRL_PIN(141, "PRDYB"), + PINCTRL_PIN(142, "PREQB"), + PINCTRL_PIN(143, "GLB_PC_DISABLE"), + PINCTRL_PIN(144, "DBG_PMODE"), + PINCTRL_PIN(145, "GLB_EXT_ACC_DISABLE"), + /* GPP_H */ + PINCTRL_PIN(146, "GBE_GPIO12"), + PINCTRL_PIN(147, "GBE_GPIO13"), + PINCTRL_PIN(148, "GBE_SDP_TIMESYNC0_S2N"), + PINCTRL_PIN(149, "GBE_SDP_TIMESYNC1_S2N"), + PINCTRL_PIN(150, "GBE_SDP_TIMESYNC2_S2N"), + PINCTRL_PIN(151, "GBE_SDP_TIMESYNC3_S2N"), + PINCTRL_PIN(152, "GPPC_H_6"), + PINCTRL_PIN(153, "GPPC_H_7"), + PINCTRL_PIN(154, "NCSI_CLK_IN"), + PINCTRL_PIN(155, "NCSI_CRS_DV"), + PINCTRL_PIN(156, "NCSI_RXD0"), + PINCTRL_PIN(157, "NCSI_RXD1"), + PINCTRL_PIN(158, "NCSI_TX_EN"), + PINCTRL_PIN(159, "NCSI_TXD0"), + PINCTRL_PIN(160, "NCSI_TXD1"), + PINCTRL_PIN(161, "NAC_NCSI_CLK_OUT_0"), + PINCTRL_PIN(162, "NAC_NCSI_CLK_OUT_1"), + PINCTRL_PIN(163, "NAC_NCSI_CLK_OUT_2"), + PINCTRL_PIN(164, "PMCALERTB"), + PINCTRL_PIN(165, "GPPC_H_19"), + /* GPP_J */ + PINCTRL_PIN(166, "CPUPWRGD"), + PINCTRL_PIN(167, "CPU_THRMTRIP_N"), + PINCTRL_PIN(168, "PLTRST_CPUB"), + PINCTRL_PIN(169, "TRIGGER0_N"), + PINCTRL_PIN(170, "TRIGGER1_N"), + PINCTRL_PIN(171, "CPU_PWR_DEBUG_N"), + PINCTRL_PIN(172, "CPU_MEMTRIP_N"), + PINCTRL_PIN(173, "CPU_MSMI_N"), + PINCTRL_PIN(174, "ME_PECI"), + PINCTRL_PIN(175, "NAC_SPARE0"), + PINCTRL_PIN(176, "NAC_SPARE1"), + PINCTRL_PIN(177, "NAC_SPARE2"), + PINCTRL_PIN(178, "CPU_ERR0_N"), + PINCTRL_PIN(179, "CPU_CATERR_N"), + PINCTRL_PIN(180, "CPU_ERR1_N"), + PINCTRL_PIN(181, "CPU_ERR2_N"), + PINCTRL_PIN(182, "GPP_J_16"), + PINCTRL_PIN(183, "GPP_J_17"), + /* GPP_I */ + PINCTRL_PIN(184, "GBE_GPIO4"), + PINCTRL_PIN(185, "GBE_GPIO5"), + PINCTRL_PIN(186, "GBE_GPIO6"), + PINCTRL_PIN(187, "GBE_GPIO7"), + PINCTRL_PIN(188, "GBE1_LED1"), + PINCTRL_PIN(189, "GBE1_LED2"), + PINCTRL_PIN(190, "GBE2_LED0"), + PINCTRL_PIN(191, "GBE2_LED1"), + PINCTRL_PIN(192, "GBE2_LED2"), + PINCTRL_PIN(193, "GBE3_LED0"), + PINCTRL_PIN(194, "GBE3_LED1"), + PINCTRL_PIN(195, "GBE3_LED2"), + PINCTRL_PIN(196, "GBE0_I2C_CLK"), + PINCTRL_PIN(197, "GBE0_I2C_DATA"), + PINCTRL_PIN(198, "GBE1_I2C_CLK"), + PINCTRL_PIN(199, "GBE1_I2C_DATA"), + PINCTRL_PIN(200, "GBE2_I2C_CLK"), + PINCTRL_PIN(201, "GBE2_I2C_DATA"), + PINCTRL_PIN(202, "GBE3_I2C_CLK"), + PINCTRL_PIN(203, "GBE3_I2C_DATA"), + PINCTRL_PIN(204, "GBE4_I2C_CLK"), + PINCTRL_PIN(205, "GBE4_I2C_DATA"), + PINCTRL_PIN(206, "GBE_GPIO8"), + PINCTRL_PIN(207, "GBE_GPIO9"), + /* GPP_L */ + PINCTRL_PIN(208, "PM_SYNC_0"), + PINCTRL_PIN(209, "PM_DOWN_0"), + PINCTRL_PIN(210, "PM_SYNC_CLK_0"), + PINCTRL_PIN(211, "GPP_L_3"), + PINCTRL_PIN(212, "GPP_L_4"), + PINCTRL_PIN(213, "GPP_L_5"), + PINCTRL_PIN(214, "GPP_L_6"), + PINCTRL_PIN(215, "GPP_L_7"), + PINCTRL_PIN(216, "GPP_L_8"), + PINCTRL_PIN(217, "NAC_GBE_GPIO0_S2N"), + PINCTRL_PIN(218, "NAC_GBE_GPIO1_S2N"), + PINCTRL_PIN(219, "NAC_GBE_GPIO2_S2N"), + PINCTRL_PIN(220, "NAC_GBE_GPIO3_S2N"), + PINCTRL_PIN(221, "NAC_GBE_SMB_DATA_IN"), + PINCTRL_PIN(222, "NAC_GBE_SMB_DATA_OUT"), + PINCTRL_PIN(223, "NAC_GBE_SMB_ALRT_N"), + PINCTRL_PIN(224, "NAC_GBE_SMB_CLK_IN"), + PINCTRL_PIN(225, "NAC_GBE_SMB_CLK_OUT"), + /* GPP_M */ + PINCTRL_PIN(226, "GPP_M_0"), + PINCTRL_PIN(227, "GPP_M_1"), + PINCTRL_PIN(228, "GPP_M_2"), + PINCTRL_PIN(229, "GPP_M_3"), + PINCTRL_PIN(230, "NAC_WAKE_N"), + PINCTRL_PIN(231, "GPP_M_5"), + PINCTRL_PIN(232, "GPP_M_6"), + PINCTRL_PIN(233, "GPP_M_7"), + PINCTRL_PIN(234, "GPP_M_8"), + PINCTRL_PIN(235, "NAC_SBLINK_S2N"), + PINCTRL_PIN(236, "NAC_SBLINK_N2S"), + PINCTRL_PIN(237, "NAC_SBLINK_CLK_N2S"), + PINCTRL_PIN(238, "NAC_SBLINK_CLK_S2N"), + PINCTRL_PIN(239, "NAC_XTAL_VALID"), + PINCTRL_PIN(240, "NAC_RESET_NAC_N"), + PINCTRL_PIN(241, "GPP_M_15"), + PINCTRL_PIN(242, "GPP_M_16"), + PINCTRL_PIN(243, "GPP_M_17"), + /* GPP_N */ + PINCTRL_PIN(244, "GPP_N_0"), + PINCTRL_PIN(245, "NAC_NCSI_TXD0"), + PINCTRL_PIN(246, "GPP_N_2"), + PINCTRL_PIN(247, "GPP_N_3"), + PINCTRL_PIN(248, "NAC_NCSI_REFCLK_IN"), + PINCTRL_PIN(249, "GPP_N_5"), + PINCTRL_PIN(250, "GPP_N_6"), + PINCTRL_PIN(251, "GPP_N_7"), + PINCTRL_PIN(252, "NAC_NCSI_RXD0"), + PINCTRL_PIN(253, "NAC_NCSI_RXD1"), + PINCTRL_PIN(254, "NAC_NCSI_CRS_DV"), + PINCTRL_PIN(255, "NAC_NCSI_CLK_IN"), + PINCTRL_PIN(256, "NAC_NCSI_REFCLK_OUT"), + PINCTRL_PIN(257, "NAC_NCSI_TX_EN"), + PINCTRL_PIN(258, "NAC_NCSI_TXD1"), + PINCTRL_PIN(259, "NAC_NCSI_OE_N"), + PINCTRL_PIN(260, "NAC_GR_N"), + PINCTRL_PIN(261, "NAC_INIT_SX_WAKE_N"), +}; + +static const struct intel_padgroup ebg_community0_gpps[] = { + EBG_GPP(0, 0, 20), /* GPP_A */ + EBG_GPP(1, 21, 44), /* GPP_B */ + EBG_GPP(2, 45, 65), /* SPI */ +}; + +static const struct intel_padgroup ebg_community1_gpps[] = { + EBG_GPP(0, 66, 87), /* GPP_C */ + EBG_GPP(1, 88, 111), /* GPP_D */ +}; + +static const struct intel_padgroup ebg_community3_gpps[] = { + EBG_GPP(0, 112, 135), /* GPP_E */ + EBG_GPP(1, 136, 145), /* JTAG */ +}; + +static const struct intel_padgroup ebg_community4_gpps[] = { + EBG_GPP(0, 146, 165), /* GPP_H */ + EBG_GPP(1, 166, 183), /* GPP_J */ +}; + +static const struct intel_padgroup ebg_community5_gpps[] = { + EBG_GPP(0, 184, 207), /* GPP_I */ + EBG_GPP(1, 208, 225), /* GPP_L */ + EBG_GPP(2, 226, 243), /* GPP_M */ + EBG_GPP(3, 244, 261), /* GPP_N */ +}; + +static const struct intel_community ebg_communities[] = { + EBG_COMMUNITY(0, 0, 65, ebg_community0_gpps), + EBG_COMMUNITY(1, 66, 111, ebg_community1_gpps), + EBG_COMMUNITY(2, 112, 145, ebg_community3_gpps), + EBG_COMMUNITY(3, 146, 183, ebg_community4_gpps), + EBG_COMMUNITY(4, 184, 261, ebg_community5_gpps), +}; + +static const struct intel_pinctrl_soc_data ebg_soc_data = { + .pins = ebg_pins, + .npins = ARRAY_SIZE(ebg_pins), + .communities = ebg_communities, + .ncommunities = ARRAY_SIZE(ebg_communities), +}; + +static const struct acpi_device_id ebg_pinctrl_acpi_match[] = { + { "INTC1071", (kernel_ulong_t)&ebg_soc_data }, + { } +}; +MODULE_DEVICE_TABLE(acpi, ebg_pinctrl_acpi_match); + +static INTEL_PINCTRL_PM_OPS(ebg_pinctrl_pm_ops); + +static struct platform_driver ebg_pinctrl_driver = { + .probe = intel_pinctrl_probe_by_hid, + .driver = { + .name = "emmitsburg-pinctrl", + .acpi_match_table = ebg_pinctrl_acpi_match, + .pm = &ebg_pinctrl_pm_ops, + }, +}; + +module_platform_driver(ebg_pinctrl_driver); + +MODULE_AUTHOR("Andy Shevchenko "); +MODULE_DESCRIPTION("Intel Emmitsburg PCH pinctrl/GPIO driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index 20e1c890e73b3..c3e6f3c1b4743 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -236,8 +236,12 @@ static int mtk_xt_get_gpio_n(void *data, unsigned long eint_n, desc = (const struct mtk_pin_desc *)hw->soc->pins; *gpio_chip = &hw->chip; - /* Be greedy to guess first gpio_n is equal to eint_n */ - if (desc[eint_n].eint.eint_n == eint_n) + /* + * Be greedy to guess first gpio_n is equal to eint_n. + * Only eint virtual eint number is greater than gpio number. + */ + if (hw->soc->npins > eint_n && + desc[eint_n].eint.eint_n == eint_n) *gpio_n = eint_n; else *gpio_n = mtk_xt_find_eint_num(hw, eint_n); diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 53f52b9a0acdc..c2df70712ca22 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1038,6 +1038,7 @@ int mtk_pctrl_init(struct platform_device *pdev, node = of_parse_phandle(np, "mediatek,pctl-regmap", 0); if (node) { pctl->regmap1 = syscon_node_to_regmap(node); + of_node_put(node); if (IS_ERR(pctl->regmap1)) return PTR_ERR(pctl->regmap1); } else if (regmap) { @@ -1051,6 +1052,7 @@ int mtk_pctrl_init(struct platform_device *pdev, node = of_parse_phandle(np, "mediatek,pctl-regmap", 1); if (node) { pctl->regmap2 = syscon_node_to_regmap(node); + of_node_put(node); if (IS_ERR(pctl->regmap2)) return PTR_ERR(pctl->regmap2); } diff --git a/drivers/pinctrl/mediatek/pinctrl-paris.c b/drivers/pinctrl/mediatek/pinctrl-paris.c index 923264d0e9ef2..31449514a8c0c 100644 --- a/drivers/pinctrl/mediatek/pinctrl-paris.c +++ b/drivers/pinctrl/mediatek/pinctrl-paris.c @@ -198,8 +198,7 @@ static int mtk_pinconf_get(struct pinctrl_dev *pctldev, } static int mtk_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, - enum pin_config_param param, - enum pin_config_param arg) + enum pin_config_param param, u32 arg) { struct mtk_pinctrl *hw = pinctrl_dev_get_drvdata(pctldev); const struct mtk_pin_desc *desc; @@ -647,10 +646,10 @@ static int mtk_pconf_group_get(struct pinctrl_dev *pctldev, unsigned group, unsigned long *config) { struct mtk_pinctrl *hw = pinctrl_dev_get_drvdata(pctldev); + struct mtk_pinctrl_group *grp = &hw->groups[group]; - *config = hw->groups[group].config; - - return 0; + /* One pin per group only */ + return mtk_pinconf_get(pctldev, grp->pin, config); } static int mtk_pconf_group_set(struct pinctrl_dev *pctldev, unsigned group, @@ -666,8 +665,6 @@ static int mtk_pconf_group_set(struct pinctrl_dev *pctldev, unsigned group, pinconf_to_config_argument(configs[i])); if (ret < 0) return ret; - - grp->config = configs[i]; } return 0; diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index 83e585c5a6132..359b2ecfcbdb3 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -166,10 +166,14 @@ static struct armada_37xx_pin_group armada_37xx_nb_groups[] = { PIN_GRP_GPIO("jtag", 20, 5, BIT(0), "jtag"), PIN_GRP_GPIO("sdio0", 8, 3, BIT(1), "sdio"), PIN_GRP_GPIO("emmc_nb", 27, 9, BIT(2), "emmc"), - PIN_GRP_GPIO("pwm0", 11, 1, BIT(3), "pwm"), - PIN_GRP_GPIO("pwm1", 12, 1, BIT(4), "pwm"), - PIN_GRP_GPIO("pwm2", 13, 1, BIT(5), "pwm"), - PIN_GRP_GPIO("pwm3", 14, 1, BIT(6), "pwm"), + PIN_GRP_GPIO_3("pwm0", 11, 1, BIT(3) | BIT(20), 0, BIT(20), BIT(3), + "pwm", "led"), + PIN_GRP_GPIO_3("pwm1", 12, 1, BIT(4) | BIT(21), 0, BIT(21), BIT(4), + "pwm", "led"), + PIN_GRP_GPIO_3("pwm2", 13, 1, BIT(5) | BIT(22), 0, BIT(22), BIT(5), + "pwm", "led"), + PIN_GRP_GPIO_3("pwm3", 14, 1, BIT(6) | BIT(23), 0, BIT(23), BIT(6), + "pwm", "led"), PIN_GRP_GPIO("pmic1", 7, 1, BIT(7), "pmic"), PIN_GRP_GPIO("pmic0", 6, 1, BIT(8), "pmic"), PIN_GRP_GPIO("i2c2", 2, 2, BIT(9), "i2c"), @@ -183,11 +187,6 @@ static struct armada_37xx_pin_group armada_37xx_nb_groups[] = { PIN_GRP_EXTRA("uart2", 9, 2, BIT(1) | BIT(13) | BIT(14) | BIT(19), BIT(1) | BIT(13) | BIT(14), BIT(1) | BIT(19), 18, 2, "gpio", "uart"), - PIN_GRP_GPIO_2("led0_od", 11, 1, BIT(20), BIT(20), 0, "led"), - PIN_GRP_GPIO_2("led1_od", 12, 1, BIT(21), BIT(21), 0, "led"), - PIN_GRP_GPIO_2("led2_od", 13, 1, BIT(22), BIT(22), 0, "led"), - PIN_GRP_GPIO_2("led3_od", 14, 1, BIT(23), BIT(23), 0, "led"), - }; static struct armada_37xx_pin_group armada_37xx_sb_groups[] = { @@ -774,7 +773,7 @@ static int armada_37xx_irqchip_register(struct platform_device *pdev, for (i = 0; i < nr_irq_parent; i++) { int irq = irq_of_parse_and_map(np, i); - if (irq < 0) + if (!irq) continue; gpiochip_set_chained_irqchip(gc, irqchip, irq, diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index 2a8190b11d104..9f00adfefba8e 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -1923,8 +1923,10 @@ static int nmk_pinctrl_probe(struct platform_device *pdev) } prcm_np = of_parse_phandle(np, "prcm", 0); - if (prcm_np) + if (prcm_np) { npct->prcm_base = of_iomap(prcm_np, 0); + of_node_put(prcm_np); + } if (!npct->prcm_base) { if (version == PINCTRL_NMK_STN8815) { dev_info(&pdev->dev, diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c index 17f909d8b63a9..2dccd24c65557 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c +++ b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c @@ -78,7 +78,6 @@ struct npcm7xx_gpio { struct gpio_chip gc; int irqbase; int irq; - void *priv; struct irq_chip irq_chip; u32 pinctrl_id; int (*direction_input)(struct gpio_chip *chip, unsigned offset); @@ -226,7 +225,7 @@ static void npcmgpio_irq_handler(struct irq_desc *desc) chained_irq_enter(chip, desc); sts = ioread32(bank->base + NPCM7XX_GP_N_EVST); en = ioread32(bank->base + NPCM7XX_GP_N_EVEN); - dev_dbg(chip->parent_device, "==> got irq sts %.8x %.8x\n", sts, + dev_dbg(bank->gc.parent, "==> got irq sts %.8x %.8x\n", sts, en); sts &= en; @@ -241,33 +240,33 @@ static int npcmgpio_set_irq_type(struct irq_data *d, unsigned int type) gpiochip_get_data(irq_data_get_irq_chip_data(d)); unsigned int gpio = BIT(d->hwirq); - dev_dbg(d->chip->parent_device, "setirqtype: %u.%u = %u\n", gpio, + dev_dbg(bank->gc.parent, "setirqtype: %u.%u = %u\n", gpio, d->irq, type); switch (type) { case IRQ_TYPE_EDGE_RISING: - dev_dbg(d->chip->parent_device, "edge.rising\n"); + dev_dbg(bank->gc.parent, "edge.rising\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_EDGE_FALLING: - dev_dbg(d->chip->parent_device, "edge.falling\n"); + dev_dbg(bank->gc.parent, "edge.falling\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_EDGE_BOTH: - dev_dbg(d->chip->parent_device, "edge.both\n"); + dev_dbg(bank->gc.parent, "edge.both\n"); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); break; case IRQ_TYPE_LEVEL_LOW: - dev_dbg(d->chip->parent_device, "level.low\n"); + dev_dbg(bank->gc.parent, "level.low\n"); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_LEVEL_HIGH: - dev_dbg(d->chip->parent_device, "level.high\n"); + dev_dbg(bank->gc.parent, "level.high\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; default: - dev_dbg(d->chip->parent_device, "invalid irq type\n"); + dev_dbg(bank->gc.parent, "invalid irq type\n"); return -EINVAL; } @@ -289,7 +288,7 @@ static void npcmgpio_irq_ack(struct irq_data *d) gpiochip_get_data(irq_data_get_irq_chip_data(d)); unsigned int gpio = d->hwirq; - dev_dbg(d->chip->parent_device, "irq_ack: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_ack: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVST); } @@ -301,7 +300,7 @@ static void npcmgpio_irq_mask(struct irq_data *d) unsigned int gpio = d->hwirq; /* Clear events */ - dev_dbg(d->chip->parent_device, "irq_mask: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_mask: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENC); } @@ -313,7 +312,7 @@ static void npcmgpio_irq_unmask(struct irq_data *d) unsigned int gpio = d->hwirq; /* Enable events */ - dev_dbg(d->chip->parent_device, "irq_unmask: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_unmask: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENS); } @@ -323,7 +322,7 @@ static unsigned int npcmgpio_irq_startup(struct irq_data *d) unsigned int gpio = d->hwirq; /* active-high, input, clear interrupt, enable interrupt */ - dev_dbg(d->chip->parent_device, "startup: %u.%u\n", gpio, d->irq); + dev_dbg(gc->parent, "startup: %u.%u\n", gpio, d->irq); npcmgpio_direction_input(gc, gpio); npcmgpio_irq_ack(d); npcmgpio_irq_unmask(d); @@ -905,7 +904,7 @@ static struct npcm7xx_func npcm7xx_funcs[] = { #define DRIVE_STRENGTH_HI_SHIFT 12 #define DRIVE_STRENGTH_MASK 0x0000FF00 -#define DS(lo, hi) (((lo) << DRIVE_STRENGTH_LO_SHIFT) | \ +#define DSTR(lo, hi) (((lo) << DRIVE_STRENGTH_LO_SHIFT) | \ ((hi) << DRIVE_STRENGTH_HI_SHIFT)) #define DSLO(x) (((x) >> DRIVE_STRENGTH_LO_SHIFT) & 0xF) #define DSHI(x) (((x) >> DRIVE_STRENGTH_HI_SHIFT) & 0xF) @@ -925,31 +924,31 @@ struct npcm7xx_pincfg { static const struct npcm7xx_pincfg pincfg[] = { /* PIN FUNCTION 1 FUNCTION 2 FUNCTION 3 FLAGS */ NPCM7XX_PINCFG(0, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(1, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(2, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(1, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(2, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(3, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(4, iox2, MFSEL3, 14, smb1d, I2CSEGSEL, 7, none, NONE, 0, SLEW), NPCM7XX_PINCFG(5, iox2, MFSEL3, 14, smb1d, I2CSEGSEL, 7, none, NONE, 0, SLEW), NPCM7XX_PINCFG(6, iox2, MFSEL3, 14, smb2d, I2CSEGSEL, 10, none, NONE, 0, SLEW), NPCM7XX_PINCFG(7, iox2, MFSEL3, 14, smb2d, I2CSEGSEL, 10, none, NONE, 0, SLEW), - NPCM7XX_PINCFG(8, lkgpo1, FLOCKR1, 4, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(9, lkgpo2, FLOCKR1, 8, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(10, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(11, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(8, lkgpo1, FLOCKR1, 4, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(9, lkgpo2, FLOCKR1, 8, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(10, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(11, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(12, gspi, MFSEL1, 24, smb5b, I2CSEGSEL, 19, none, NONE, 0, SLEW), NPCM7XX_PINCFG(13, gspi, MFSEL1, 24, smb5b, I2CSEGSEL, 19, none, NONE, 0, SLEW), NPCM7XX_PINCFG(14, gspi, MFSEL1, 24, smb5c, I2CSEGSEL, 20, none, NONE, 0, SLEW), NPCM7XX_PINCFG(15, gspi, MFSEL1, 24, smb5c, I2CSEGSEL, 20, none, NONE, 0, SLEW), - NPCM7XX_PINCFG(16, lkgpo0, FLOCKR1, 0, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(17, pspi2, MFSEL3, 13, smb4den, I2CSEGSEL, 23, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(18, pspi2, MFSEL3, 13, smb4b, I2CSEGSEL, 14, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(19, pspi2, MFSEL3, 13, smb4b, I2CSEGSEL, 14, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(16, lkgpo0, FLOCKR1, 0, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(17, pspi2, MFSEL3, 13, smb4den, I2CSEGSEL, 23, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(18, pspi2, MFSEL3, 13, smb4b, I2CSEGSEL, 14, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(19, pspi2, MFSEL3, 13, smb4b, I2CSEGSEL, 14, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(20, smb4c, I2CSEGSEL, 15, smb15, MFSEL3, 8, none, NONE, 0, 0), NPCM7XX_PINCFG(21, smb4c, I2CSEGSEL, 15, smb15, MFSEL3, 8, none, NONE, 0, 0), NPCM7XX_PINCFG(22, smb4d, I2CSEGSEL, 16, smb14, MFSEL3, 7, none, NONE, 0, 0), NPCM7XX_PINCFG(23, smb4d, I2CSEGSEL, 16, smb14, MFSEL3, 7, none, NONE, 0, 0), - NPCM7XX_PINCFG(24, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(25, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(24, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(25, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(26, smb5, MFSEL1, 2, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(27, smb5, MFSEL1, 2, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(28, smb4, MFSEL1, 1, none, NONE, 0, none, NONE, 0, 0), @@ -965,12 +964,12 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(39, smb3b, I2CSEGSEL, 11, none, NONE, 0, none, NONE, 0, SLEW), NPCM7XX_PINCFG(40, smb3b, I2CSEGSEL, 11, none, NONE, 0, none, NONE, 0, SLEW), NPCM7XX_PINCFG(41, bmcuart0a, MFSEL1, 9, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(42, bmcuart0a, MFSEL1, 9, none, NONE, 0, none, NONE, 0, DS(2, 4) | GPO), + NPCM7XX_PINCFG(42, bmcuart0a, MFSEL1, 9, none, NONE, 0, none, NONE, 0, DSTR(2, 4) | GPO), NPCM7XX_PINCFG(43, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, bmcuart1, MFSEL3, 24, 0), NPCM7XX_PINCFG(44, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, bmcuart1, MFSEL3, 24, 0), NPCM7XX_PINCFG(45, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(46, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, DS(2, 8)), - NPCM7XX_PINCFG(47, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, DS(2, 8)), + NPCM7XX_PINCFG(46, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, DSTR(2, 8)), + NPCM7XX_PINCFG(47, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, DSTR(2, 8)), NPCM7XX_PINCFG(48, uart2, MFSEL1, 11, bmcuart0b, MFSEL4, 1, none, NONE, 0, GPO), NPCM7XX_PINCFG(49, uart2, MFSEL1, 11, bmcuart0b, MFSEL4, 1, none, NONE, 0, 0), NPCM7XX_PINCFG(50, uart2, MFSEL1, 11, none, NONE, 0, none, NONE, 0, 0), @@ -980,8 +979,8 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(54, uart2, MFSEL1, 11, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(55, uart2, MFSEL1, 11, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(56, r1err, MFSEL1, 12, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(57, r1md, MFSEL1, 13, none, NONE, 0, none, NONE, 0, DS(2, 4)), - NPCM7XX_PINCFG(58, r1md, MFSEL1, 13, none, NONE, 0, none, NONE, 0, DS(2, 4)), + NPCM7XX_PINCFG(57, r1md, MFSEL1, 13, none, NONE, 0, none, NONE, 0, DSTR(2, 4)), + NPCM7XX_PINCFG(58, r1md, MFSEL1, 13, none, NONE, 0, none, NONE, 0, DSTR(2, 4)), NPCM7XX_PINCFG(59, smb3d, I2CSEGSEL, 13, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(60, smb3d, I2CSEGSEL, 13, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(61, uart1, MFSEL1, 10, none, NONE, 0, none, NONE, 0, GPO), @@ -1004,19 +1003,19 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(77, fanin13, MFSEL2, 13, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(78, fanin14, MFSEL2, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(79, fanin15, MFSEL2, 15, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(80, pwm0, MFSEL2, 16, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(81, pwm1, MFSEL2, 17, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(82, pwm2, MFSEL2, 18, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(83, pwm3, MFSEL2, 19, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(84, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(85, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(86, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(80, pwm0, MFSEL2, 16, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(81, pwm1, MFSEL2, 17, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(82, pwm2, MFSEL2, 18, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(83, pwm3, MFSEL2, 19, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(84, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(85, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(86, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(87, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(88, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(89, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(90, r2err, MFSEL1, 15, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(91, r2md, MFSEL1, 16, none, NONE, 0, none, NONE, 0, DS(2, 4)), - NPCM7XX_PINCFG(92, r2md, MFSEL1, 16, none, NONE, 0, none, NONE, 0, DS(2, 4)), + NPCM7XX_PINCFG(91, r2md, MFSEL1, 16, none, NONE, 0, none, NONE, 0, DSTR(2, 4)), + NPCM7XX_PINCFG(92, r2md, MFSEL1, 16, none, NONE, 0, none, NONE, 0, DSTR(2, 4)), NPCM7XX_PINCFG(93, ga20kbc, MFSEL1, 17, smb5d, I2CSEGSEL, 21, none, NONE, 0, 0), NPCM7XX_PINCFG(94, ga20kbc, MFSEL1, 17, smb5d, I2CSEGSEL, 21, none, NONE, 0, 0), NPCM7XX_PINCFG(95, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, 0), @@ -1062,34 +1061,34 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(133, smb10, MFSEL4, 13, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(134, smb11, MFSEL4, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(135, smb11, MFSEL4, 14, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(136, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(137, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(138, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(139, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(140, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(136, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(137, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(138, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(139, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(140, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(141, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(142, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(142, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(143, sd1, MFSEL3, 12, sd1pwr, MFSEL4, 5, none, NONE, 0, 0), - NPCM7XX_PINCFG(144, pwm4, MFSEL2, 20, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(145, pwm5, MFSEL2, 21, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(146, pwm6, MFSEL2, 22, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(147, pwm7, MFSEL2, 23, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(148, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(149, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(150, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(151, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(152, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(144, pwm4, MFSEL2, 20, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(145, pwm5, MFSEL2, 21, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(146, pwm6, MFSEL2, 22, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(147, pwm7, MFSEL2, 23, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(148, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(149, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(150, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(151, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(152, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(153, mmcwp, FLOCKR1, 24, none, NONE, 0, none, NONE, 0, 0), /* Z1/A1 */ - NPCM7XX_PINCFG(154, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(154, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(155, mmccd, MFSEL3, 25, mmcrst, MFSEL4, 6, none, NONE, 0, 0), /* Z1/A1 */ - NPCM7XX_PINCFG(156, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(157, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(158, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(159, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - - NPCM7XX_PINCFG(160, clkout, MFSEL1, 21, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(161, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, DS(8, 12)), - NPCM7XX_PINCFG(162, serirq, NONE, 0, gpio, MFSEL1, 31, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(156, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(157, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(158, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(159, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + + NPCM7XX_PINCFG(160, clkout, MFSEL1, 21, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(161, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, DSTR(8, 12)), + NPCM7XX_PINCFG(162, serirq, NONE, 0, gpio, MFSEL1, 31, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(163, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, 0), NPCM7XX_PINCFG(164, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, SLEWLPC), NPCM7XX_PINCFG(165, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, SLEWLPC), @@ -1102,25 +1101,25 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(172, smb6, MFSEL3, 1, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(173, smb7, MFSEL3, 2, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(174, smb7, MFSEL3, 2, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(175, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(176, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(177, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(178, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(179, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(180, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(175, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(176, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(177, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(178, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(179, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(180, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(181, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(182, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(183, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(184, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW | GPO), - NPCM7XX_PINCFG(185, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW | GPO), - NPCM7XX_PINCFG(186, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(187, spi3cs1, MFSEL4, 17, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(188, spi3quad, MFSEL4, 20, spi3cs2, MFSEL4, 18, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(189, spi3quad, MFSEL4, 20, spi3cs3, MFSEL4, 19, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(190, gpio, FLOCKR1, 20, nprd_smi, NONE, 0, none, NONE, 0, DS(2, 4)), - NPCM7XX_PINCFG(191, none, NONE, 0, none, NONE, 0, none, NONE, 0, DS(8, 12)), /* XX */ - - NPCM7XX_PINCFG(192, none, NONE, 0, none, NONE, 0, none, NONE, 0, DS(8, 12)), /* XX */ + NPCM7XX_PINCFG(183, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(184, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW | GPO), + NPCM7XX_PINCFG(185, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW | GPO), + NPCM7XX_PINCFG(186, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(187, spi3cs1, MFSEL4, 17, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(188, spi3quad, MFSEL4, 20, spi3cs2, MFSEL4, 18, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(189, spi3quad, MFSEL4, 20, spi3cs3, MFSEL4, 19, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(190, gpio, FLOCKR1, 20, nprd_smi, NONE, 0, none, NONE, 0, DSTR(2, 4)), + NPCM7XX_PINCFG(191, none, NONE, 0, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), /* XX */ + + NPCM7XX_PINCFG(192, none, NONE, 0, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), /* XX */ NPCM7XX_PINCFG(193, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(194, smb0b, I2CSEGSEL, 0, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(195, smb0b, I2CSEGSEL, 0, none, NONE, 0, none, NONE, 0, 0), @@ -1131,11 +1130,11 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(200, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(201, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(202, smb0c, I2CSEGSEL, 1, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(203, faninx, MFSEL3, 3, none, NONE, 0, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(203, faninx, MFSEL3, 3, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(204, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, SLEW), NPCM7XX_PINCFG(205, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, SLEW), - NPCM7XX_PINCFG(206, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(207, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, DS(4, 8)), + NPCM7XX_PINCFG(206, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(207, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, DSTR(4, 8)), NPCM7XX_PINCFG(208, rg2, MFSEL4, 24, ddr, MFSEL3, 26, none, NONE, 0, 0), NPCM7XX_PINCFG(209, rg2, MFSEL4, 24, ddr, MFSEL3, 26, none, NONE, 0, 0), NPCM7XX_PINCFG(210, rg2, MFSEL4, 24, ddr, MFSEL3, 26, none, NONE, 0, 0), @@ -1147,20 +1146,20 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(216, rg2mdio, MFSEL4, 23, ddr, MFSEL3, 26, none, NONE, 0, 0), NPCM7XX_PINCFG(217, rg2mdio, MFSEL4, 23, ddr, MFSEL3, 26, none, NONE, 0, 0), NPCM7XX_PINCFG(218, wdog1, MFSEL3, 19, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(219, wdog2, MFSEL3, 20, none, NONE, 0, none, NONE, 0, DS(4, 8)), + NPCM7XX_PINCFG(219, wdog2, MFSEL3, 20, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), NPCM7XX_PINCFG(220, smb12, MFSEL3, 5, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(221, smb12, MFSEL3, 5, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(222, smb13, MFSEL3, 6, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(223, smb13, MFSEL3, 6, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(224, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, SLEW), - NPCM7XX_PINCFG(225, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW | GPO), - NPCM7XX_PINCFG(226, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW | GPO), - NPCM7XX_PINCFG(227, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(228, spixcs1, MFSEL4, 28, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(229, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(230, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(231, clkreq, MFSEL4, 9, none, NONE, 0, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(225, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW | GPO), + NPCM7XX_PINCFG(226, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW | GPO), + NPCM7XX_PINCFG(227, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(228, spixcs1, MFSEL4, 28, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(229, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(230, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(231, clkreq, MFSEL4, 9, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(253, none, NONE, 0, none, NONE, 0, none, NONE, 0, GPI), /* SDHC1 power */ NPCM7XX_PINCFG(254, none, NONE, 0, none, NONE, 0, none, NONE, 0, GPI), /* SDHC2 power */ NPCM7XX_PINCFG(255, none, NONE, 0, none, NONE, 0, none, NONE, 0, GPI), /* DACOSEL */ @@ -1561,7 +1560,7 @@ static int npcm7xx_get_groups_count(struct pinctrl_dev *pctldev) { struct npcm7xx_pinctrl *npcm = pinctrl_dev_get_drvdata(pctldev); - dev_dbg(npcm->dev, "group size: %d\n", ARRAY_SIZE(npcm7xx_groups)); + dev_dbg(npcm->dev, "group size: %zu\n", ARRAY_SIZE(npcm7xx_groups)); return ARRAY_SIZE(npcm7xx_groups); } diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c index 9eb86309c70bf..355bc4c748e26 100644 --- a/drivers/pinctrl/pinconf-generic.c +++ b/drivers/pinctrl/pinconf-generic.c @@ -30,10 +30,10 @@ static const struct pin_config_item conf_items[] = { PCONFDUMP(PIN_CONFIG_BIAS_BUS_HOLD, "input bias bus hold", NULL, false), PCONFDUMP(PIN_CONFIG_BIAS_DISABLE, "input bias disabled", NULL, false), PCONFDUMP(PIN_CONFIG_BIAS_HIGH_IMPEDANCE, "input bias high impedance", NULL, false), - PCONFDUMP(PIN_CONFIG_BIAS_PULL_DOWN, "input bias pull down", NULL, false), + PCONFDUMP(PIN_CONFIG_BIAS_PULL_DOWN, "input bias pull down", "ohms", true), PCONFDUMP(PIN_CONFIG_BIAS_PULL_PIN_DEFAULT, - "input bias pull to pin specific state", NULL, false), - PCONFDUMP(PIN_CONFIG_BIAS_PULL_UP, "input bias pull up", NULL, false), + "input bias pull to pin specific state", "ohms", true), + PCONFDUMP(PIN_CONFIG_BIAS_PULL_UP, "input bias pull up", "ohms", true), PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_DRAIN, "output drive open drain", NULL, false), PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_SOURCE, "output drive open source", NULL, false), PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL, false), diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c index 91596eee0bda1..ba078a7098468 100644 --- a/drivers/pinctrl/pinctrl-ingenic.c +++ b/drivers/pinctrl/pinctrl-ingenic.c @@ -348,7 +348,7 @@ static const struct ingenic_chip_info jz4725b_chip_info = { }; static const u32 jz4760_pull_ups[6] = { - 0xffffffff, 0xfffcf3ff, 0xffffffff, 0xffffcfff, 0xfffffb7c, 0xfffff00f, + 0xffffffff, 0xfffcf3ff, 0xffffffff, 0xffffcfff, 0xfffffb7c, 0x0000000f, }; static const u32 jz4760_pull_downs[6] = { @@ -611,11 +611,11 @@ static const struct ingenic_chip_info jz4760b_chip_info = { }; static const u32 jz4770_pull_ups[6] = { - 0x3fffffff, 0xfff0030c, 0xffffffff, 0xffff4fff, 0xfffffb7c, 0xffa7f00f, + 0x3fffffff, 0xfff0f3fc, 0xffffffff, 0xffff4fff, 0xfffffb7c, 0x0024f00f, }; static const u32 jz4770_pull_downs[6] = { - 0x00000000, 0x000f0c03, 0x00000000, 0x0000b000, 0x00000483, 0x00580ff0, + 0x00000000, 0x000f0c03, 0x00000000, 0x0000b000, 0x00000483, 0x005b0ff0, }; static int jz4770_uart0_data_pins[] = { 0xa0, 0xa3, }; diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c index 379e9a6a6d894..5f381e4c75779 100644 --- a/drivers/pinctrl/pinctrl-pistachio.c +++ b/drivers/pinctrl/pinctrl-pistachio.c @@ -1370,10 +1370,10 @@ static int pistachio_gpio_register(struct pistachio_pinctrl *pctl) } irq = irq_of_parse_and_map(child, 0); - if (irq < 0) { - dev_err(pctl->dev, "No IRQ for bank %u: %d\n", i, irq); + if (!irq) { + dev_err(pctl->dev, "No IRQ for bank %u\n", i); of_node_put(child); - ret = irq; + ret = -EINVAL; goto err; } diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index 59fe3204e965d..4b972be3487f9 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -3433,6 +3433,7 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev) node = of_parse_phandle(np, "rockchip,grf", 0); if (node) { info->regmap_base = syscon_node_to_regmap(node); + of_node_put(node); if (IS_ERR(info->regmap_base)) return PTR_ERR(info->regmap_base); } else { @@ -3469,6 +3470,7 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev) node = of_parse_phandle(np, "rockchip,pmu", 0); if (node) { info->regmap_pmu = syscon_node_to_regmap(node); + of_node_put(node); if (IS_ERR(info->regmap_pmu)) return PTR_ERR(info->regmap_pmu); } diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index a9d511982780c..fb1c8965cb991 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -1201,6 +1201,7 @@ static int pcs_parse_bits_in_pinctrl_entry(struct pcs_device *pcs, if (PCS_HAS_PINCONF) { dev_err(pcs->dev, "pinconf not supported\n"); + res = -ENOTSUPP; goto free_pingroups; } diff --git a/drivers/pinctrl/pinctrl-stmfx.c b/drivers/pinctrl/pinctrl-stmfx.c index ccdf0bb214149..835c14bb315bc 100644 --- a/drivers/pinctrl/pinctrl-stmfx.c +++ b/drivers/pinctrl/pinctrl-stmfx.c @@ -540,7 +540,7 @@ static irqreturn_t stmfx_pinctrl_irq_thread_fn(int irq, void *dev_id) u8 pending[NR_GPIO_REGS]; u8 src[NR_GPIO_REGS] = {0, 0, 0}; unsigned long n, status; - int ret; + int i, ret; ret = regmap_bulk_read(pctl->stmfx->map, STMFX_REG_IRQ_GPI_PENDING, &pending, NR_GPIO_REGS); @@ -550,7 +550,9 @@ static irqreturn_t stmfx_pinctrl_irq_thread_fn(int irq, void *dev_id) regmap_bulk_write(pctl->stmfx->map, STMFX_REG_IRQ_GPI_SRC, src, NR_GPIO_REGS); - status = *(unsigned long *)pending; + BUILD_BUG_ON(NR_GPIO_REGS > sizeof(status)); + for (i = 0, status = 0; i < NR_GPIO_REGS; i++) + status |= (unsigned long)pending[i] << (i * 8); for_each_set_bit(n, &status, gc->ngpio) { handle_nested_irq(irq_find_mapping(gc->irq.domain, n)); stmfx_pinctrl_irq_toggle_trigger(pctl, n); diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c index f26574ef234ab..131fa7958f754 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.c +++ b/drivers/pinctrl/samsung/pinctrl-samsung.c @@ -918,7 +918,7 @@ static int samsung_pinctrl_register(struct platform_device *pdev, pin_bank->grange.pin_base = drvdata->pin_base + pin_bank->pin_base; pin_bank->grange.base = pin_bank->grange.pin_base; - pin_bank->grange.npins = pin_bank->gpio_chip.ngpio; + pin_bank->grange.npins = pin_bank->nr_pins; pin_bank->grange.gc = &pin_bank->gpio_chip; pinctrl_add_gpio_range(drvdata->pctl_dev, &pin_bank->grange); } @@ -1002,6 +1002,16 @@ samsung_pinctrl_get_soc_data_for_of_alias(struct platform_device *pdev) return &(of_data->ctrl[id]); } +static void samsung_banks_of_node_put(struct samsung_pinctrl_drv_data *d) +{ + struct samsung_pin_bank *bank; + unsigned int i; + + bank = d->pin_banks; + for (i = 0; i < d->nr_banks; ++i, ++bank) + of_node_put(bank->of_node); +} + /* retrieve the soc specific data */ static const struct samsung_pin_ctrl * samsung_pinctrl_get_soc_data(struct samsung_pinctrl_drv_data *d, @@ -1116,19 +1126,19 @@ static int samsung_pinctrl_probe(struct platform_device *pdev) if (ctrl->retention_data) { drvdata->retention_ctrl = ctrl->retention_data->init(drvdata, ctrl->retention_data); - if (IS_ERR(drvdata->retention_ctrl)) - return PTR_ERR(drvdata->retention_ctrl); + if (IS_ERR(drvdata->retention_ctrl)) { + ret = PTR_ERR(drvdata->retention_ctrl); + goto err_put_banks; + } } ret = samsung_pinctrl_register(pdev, drvdata); if (ret) - return ret; + goto err_put_banks; ret = samsung_gpiolib_register(pdev, drvdata); - if (ret) { - samsung_pinctrl_unregister(pdev, drvdata); - return ret; - } + if (ret) + goto err_unregister; if (ctrl->eint_gpio_init) ctrl->eint_gpio_init(drvdata); @@ -1138,6 +1148,12 @@ static int samsung_pinctrl_probe(struct platform_device *pdev) platform_set_drvdata(pdev, drvdata); return 0; + +err_unregister: + samsung_pinctrl_unregister(pdev, drvdata); +err_put_banks: + samsung_banks_of_node_put(drvdata); + return ret; } /** diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a77470.c b/drivers/pinctrl/sh-pfc/pfc-r8a77470.c index b3b116da1bb0d..14005725a726b 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a77470.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a77470.c @@ -2121,7 +2121,7 @@ static const unsigned int vin0_clk_mux[] = { VI0_CLK_MARK, }; /* - VIN1 ------------------------------------------------------------------- */ -static const union vin_data vin1_data_pins = { +static const union vin_data12 vin1_data_pins = { .data12 = { RCAR_GP_PIN(3, 1), RCAR_GP_PIN(3, 2), RCAR_GP_PIN(3, 3), RCAR_GP_PIN(3, 4), @@ -2131,7 +2131,7 @@ static const union vin_data vin1_data_pins = { RCAR_GP_PIN(3, 15), RCAR_GP_PIN(3, 16), }, }; -static const union vin_data vin1_data_mux = { +static const union vin_data12 vin1_data_mux = { .data12 = { VI1_DATA0_MARK, VI1_DATA1_MARK, VI1_DATA2_MARK, VI1_DATA3_MARK, diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index bac1d040bacab..e8149ff1d401c 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1186,10 +1186,10 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, bank_nr = args.args[1] / STM32_GPIO_PINS_PER_BANK; bank->gpio_chip.base = args.args[1]; - npins = args.args[2]; - while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, - ++i, &args)) - npins += args.args[2]; + /* get the last defined gpio line (offset + nb of pins) */ + npins = args.args[0] + args.args[2]; + while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, ++i, &args)) + npins = max(npins, (int)(args.args[0] + args.args[2])); } else { bank_nr = pctl->nbanks; bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK; @@ -1215,15 +1215,17 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, bank->bank_ioport_nr = bank_ioport_nr; spin_lock_init(&bank->lock); - /* create irq hierarchical domain */ - bank->fwnode = of_node_to_fwnode(np); + if (pctl->domain) { + /* create irq hierarchical domain */ + bank->fwnode = of_node_to_fwnode(np); - bank->domain = irq_domain_create_hierarchy(pctl->domain, 0, - STM32_GPIO_IRQ_LINE, bank->fwnode, - &stm32_gpio_domain_ops, bank); + bank->domain = irq_domain_create_hierarchy(pctl->domain, 0, STM32_GPIO_IRQ_LINE, + bank->fwnode, &stm32_gpio_domain_ops, + bank); - if (!bank->domain) - return -ENODEV; + if (!bank->domain) + return -ENODEV; + } err = gpiochip_add_data(&bank->gpio_chip, bank); if (err) { @@ -1393,6 +1395,8 @@ int stm32_pctl_probe(struct platform_device *pdev) pctl->domain = stm32_pctrl_get_irq_domain(np); if (IS_ERR(pctl->domain)) return PTR_ERR(pctl->domain); + if (!pctl->domain) + dev_warn(dev, "pinctrl without interrupt support\n"); /* hwspinlock is optional */ hwlock_id = of_hwspin_lock_get_id(pdev->dev.of_node, 0); @@ -1554,8 +1558,8 @@ int __maybe_unused stm32_pinctrl_resume(struct device *dev) struct stm32_pinctrl_group *g = pctl->groups; int i; - for (i = g->pin; i < g->pin + pctl->ngroups; i++) - stm32_pinctrl_restore_gpio_regs(pctl, i); + for (i = 0; i < pctl->ngroups; i++, g++) + stm32_pinctrl_restore_gpio_regs(pctl, g->pin); return 0; } diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c index 4ada80317a3bd..b5c1a8f363f32 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c @@ -158,26 +158,26 @@ static const struct sunxi_desc_pin sun8i_a83t_pins[] = { SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 14), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "nand"), /* DQ6 */ + SUNXI_FUNCTION(0x2, "nand0"), /* DQ6 */ SUNXI_FUNCTION(0x3, "mmc2")), /* D6 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 15), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "nand"), /* DQ7 */ + SUNXI_FUNCTION(0x2, "nand0"), /* DQ7 */ SUNXI_FUNCTION(0x3, "mmc2")), /* D7 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 16), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "nand"), /* DQS */ + SUNXI_FUNCTION(0x2, "nand0"), /* DQS */ SUNXI_FUNCTION(0x3, "mmc2")), /* RST */ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 17), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "nand")), /* CE2 */ + SUNXI_FUNCTION(0x2, "nand0")), /* CE2 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 18), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "nand")), /* CE3 */ + SUNXI_FUNCTION(0x2, "nand0")), /* CE3 */ /* Hole */ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 2), SUNXI_FUNCTION(0x0, "gpio_in"), diff --git a/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c b/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c index 2801ca7062732..68a5b627fb9b2 100644 --- a/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c +++ b/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c @@ -204,7 +204,7 @@ static const struct sunxi_desc_pin suniv_f1c100s_pins[] = { SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), SUNXI_FUNCTION(0x2, "lcd"), /* D20 */ - SUNXI_FUNCTION(0x3, "lvds1"), /* RX */ + SUNXI_FUNCTION(0x3, "uart2"), /* RX */ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 14)), SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 15), SUNXI_FUNCTION(0x0, "gpio_in"), diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index 77783582080c7..c4052eab6bfcc 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -536,6 +536,8 @@ static int sunxi_pconf_set(struct pinctrl_dev *pctldev, unsigned pin, struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev); int i; + pin -= pctl->desc->pin_base; + for (i = 0; i < num_configs; i++) { enum pin_config_param param; unsigned long flags; diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c index 6ae484989d1f5..c4b57e1df192a 100644 --- a/drivers/platform/chrome/cros_ec_debugfs.c +++ b/drivers/platform/chrome/cros_ec_debugfs.c @@ -26,6 +26,9 @@ #define CIRC_ADD(idx, size, value) (((idx) + (value)) & ((size) - 1)) +/* waitqueue for log readers */ +static DECLARE_WAIT_QUEUE_HEAD(cros_ec_debugfs_log_wq); + /** * struct cros_ec_debugfs - EC debugging information. * @@ -34,7 +37,6 @@ * @log_buffer: circular buffer for console log information * @read_msg: preallocated EC command and buffer to read console log * @log_mutex: mutex to protect circular buffer - * @log_wq: waitqueue for log readers * @log_poll_work: recurring task to poll EC for new console log data * @panicinfo_blob: panicinfo debugfs blob */ @@ -45,7 +47,6 @@ struct cros_ec_debugfs { struct circ_buf log_buffer; struct cros_ec_command *read_msg; struct mutex log_mutex; - wait_queue_head_t log_wq; struct delayed_work log_poll_work; /* EC panicinfo */ struct debugfs_blob_wrapper panicinfo_blob; @@ -108,7 +109,7 @@ static void cros_ec_console_log_work(struct work_struct *__work) buf_space--; } - wake_up(&debug_info->log_wq); + wake_up(&cros_ec_debugfs_log_wq); } mutex_unlock(&debug_info->log_mutex); @@ -142,7 +143,7 @@ static ssize_t cros_ec_console_log_read(struct file *file, char __user *buf, mutex_unlock(&debug_info->log_mutex); - ret = wait_event_interruptible(debug_info->log_wq, + ret = wait_event_interruptible(cros_ec_debugfs_log_wq, CIRC_CNT(cb->head, cb->tail, LOG_SIZE)); if (ret < 0) return ret; @@ -174,7 +175,7 @@ static __poll_t cros_ec_console_log_poll(struct file *file, struct cros_ec_debugfs *debug_info = file->private_data; __poll_t mask = 0; - poll_wait(file, &debug_info->log_wq, wait); + poll_wait(file, &cros_ec_debugfs_log_wq, wait); mutex_lock(&debug_info->log_mutex); if (CIRC_CNT(debug_info->log_buffer.head, @@ -359,7 +360,6 @@ static int cros_ec_create_console_log(struct cros_ec_debugfs *debug_info) debug_info->log_buffer.tail = 0; mutex_init(&debug_info->log_mutex); - init_waitqueue_head(&debug_info->log_wq); debugfs_create_file("console_log", S_IFREG | 0444, debug_info->dir, debug_info, &cros_ec_console_log_fops); diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index f659f96bda128..9b575e9dd71c5 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -213,6 +213,15 @@ static int cros_ec_host_command_proto_query(struct cros_ec_device *ec_dev, msg->insize = sizeof(struct ec_response_get_protocol_info); ret = send_command(ec_dev, msg); + /* + * Send command once again when timeout occurred. + * Fingerprint MCU (FPMCU) is restarted during system boot which + * introduces small window in which FPMCU won't respond for any + * messages sent by kernel. There is no need to wait before next + * attempt because we waited at least EC_MSG_DEADLINE_MS. + */ + if (ret == -ETIMEDOUT) + ret = send_command(ec_dev, msg); if (ret < 0) { dev_dbg(ec_dev->dev, diff --git a/drivers/platform/mellanox/mlxreg-io.c b/drivers/platform/mellanox/mlxreg-io.c index acfaf64ffde68..1c3760c13f832 100644 --- a/drivers/platform/mellanox/mlxreg-io.c +++ b/drivers/platform/mellanox/mlxreg-io.c @@ -123,7 +123,7 @@ mlxreg_io_attr_store(struct device *dev, struct device_attribute *attr, return -EINVAL; /* Convert buffer to input value. */ - ret = kstrtou32(buf, len, &input_val); + ret = kstrtou32(buf, 0, &input_val); if (ret) return ret; diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c index 7e3083deb1c5d..1b86005c0f5f4 100644 --- a/drivers/platform/x86/apple-gmux.c +++ b/drivers/platform/x86/apple-gmux.c @@ -625,7 +625,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) } gmux_data->iostart = res->start; - gmux_data->iolen = res->end - res->start; + gmux_data->iolen = resource_size(res); if (gmux_data->iolen < GMUX_MIN_IO_LEN) { pr_err("gmux I/O region too small (%lu < %u)\n", diff --git a/drivers/platform/x86/dell-smbios-wmi.c b/drivers/platform/x86/dell-smbios-wmi.c index c97bd4a452422..5821e9d9a4ce4 100644 --- a/drivers/platform/x86/dell-smbios-wmi.c +++ b/drivers/platform/x86/dell-smbios-wmi.c @@ -69,6 +69,7 @@ static int run_smbios_call(struct wmi_device *wdev) if (obj->type == ACPI_TYPE_INTEGER) dev_dbg(&wdev->dev, "SMBIOS call failed: %llu\n", obj->integer.value); + kfree(output.pointer); return -EIO; } memcpy(&priv->buf->std, obj->buffer.pointer, obj->buffer.length); diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 63a530a3d9feb..c3fdb0ecad96e 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -62,6 +62,7 @@ enum hp_wmi_event_ids { HPWMI_BACKLIT_KB_BRIGHTNESS = 0x0D, HPWMI_PEAKSHIFT_PERIOD = 0x0F, HPWMI_BATTERY_CHARGE_PERIOD = 0x10, + HPWMI_SANITIZATION_MODE = 0x17, }; struct bios_args { @@ -629,6 +630,8 @@ static void hp_wmi_notify(u32 value, void *context) break; case HPWMI_BATTERY_CHARGE_PERIOD: break; + case HPWMI_SANITIZATION_MODE: + break; default: pr_info("Unknown event_id - %d - 0x%x\n", event_id, event_data); break; diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c index 8c0867bda8280..0dfaa1a43b674 100644 --- a/drivers/platform/x86/hp_accel.c +++ b/drivers/platform/x86/hp_accel.c @@ -372,9 +372,11 @@ static int lis3lv02d_add(struct acpi_device *device) INIT_WORK(&hpled_led.work, delayed_set_status_worker); ret = led_classdev_register(NULL, &hpled_led.led_classdev); if (ret) { + i8042_remove_filter(hp_accel_i8042_filter); lis3lv02d_joystick_disable(&lis3_dev); lis3lv02d_poweroff(&lis3_dev); flush_work(&hpled_led.work); + lis3lv02d_remove_fs(&lis3_dev); return ret; } diff --git a/drivers/platform/x86/intel_punit_ipc.c b/drivers/platform/x86/intel_punit_ipc.c index ccb44f2eb2407..af3a28623e869 100644 --- a/drivers/platform/x86/intel_punit_ipc.c +++ b/drivers/platform/x86/intel_punit_ipc.c @@ -8,7 +8,6 @@ * which provide mailbox interface for power management usage. */ -#include #include #include #include @@ -335,7 +334,7 @@ static struct platform_driver intel_punit_ipc_driver = { .remove = intel_punit_ipc_remove, .driver = { .name = "intel_punit_ipc", - .acpi_match_table = ACPI_PTR(punit_ipc_acpi_ids), + .acpi_match_table = punit_ipc_acpi_ids, }, }; diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index e330ec73c465d..bcb516892d01d 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -181,7 +181,7 @@ static inline int busy_loop(struct intel_scu_ipc_dev *scu) return 0; } -/* Wait till ipc ioc interrupt is received or timeout in 3 HZ */ +/* Wait till ipc ioc interrupt is received or timeout in 10 HZ */ static inline int ipc_wait_for_interrupt(struct intel_scu_ipc_dev *scu) { int status; diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_common.c b/drivers/platform/x86/intel_speed_select_if/isst_if_common.c index 0c2aa22c7a12e..407afafc7e83f 100644 --- a/drivers/platform/x86/intel_speed_select_if/isst_if_common.c +++ b/drivers/platform/x86/intel_speed_select_if/isst_if_common.c @@ -532,7 +532,10 @@ static long isst_if_def_ioctl(struct file *file, unsigned int cmd, return ret; } -static DEFINE_MUTEX(punit_misc_dev_lock); +/* Lock to prevent module registration when already opened by user space */ +static DEFINE_MUTEX(punit_misc_dev_open_lock); +/* Lock to allow one share misc device for all ISST interace */ +static DEFINE_MUTEX(punit_misc_dev_reg_lock); static int misc_usage_count; static int misc_device_ret; static int misc_device_open; @@ -542,7 +545,7 @@ static int isst_if_open(struct inode *inode, struct file *file) int i, ret = 0; /* Fail open, if a module is going away */ - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); for (i = 0; i < ISST_IF_DEV_MAX; ++i) { struct isst_if_cmd_cb *cb = &punit_callbacks[i]; @@ -564,7 +567,7 @@ static int isst_if_open(struct inode *inode, struct file *file) } else { misc_device_open++; } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return ret; } @@ -573,7 +576,7 @@ static int isst_if_relase(struct inode *inode, struct file *f) { int i; - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); misc_device_open--; for (i = 0; i < ISST_IF_DEV_MAX; ++i) { struct isst_if_cmd_cb *cb = &punit_callbacks[i]; @@ -581,7 +584,7 @@ static int isst_if_relase(struct inode *inode, struct file *f) if (cb->registered) module_put(cb->owner); } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return 0; } @@ -598,6 +601,43 @@ static struct miscdevice isst_if_char_driver = { .fops = &isst_if_char_driver_ops, }; +static int isst_misc_reg(void) +{ + mutex_lock(&punit_misc_dev_reg_lock); + if (misc_device_ret) + goto unlock_exit; + + if (!misc_usage_count) { + misc_device_ret = isst_if_cpu_info_init(); + if (misc_device_ret) + goto unlock_exit; + + misc_device_ret = misc_register(&isst_if_char_driver); + if (misc_device_ret) { + isst_if_cpu_info_exit(); + goto unlock_exit; + } + } + misc_usage_count++; + +unlock_exit: + mutex_unlock(&punit_misc_dev_reg_lock); + + return misc_device_ret; +} + +static void isst_misc_unreg(void) +{ + mutex_lock(&punit_misc_dev_reg_lock); + if (misc_usage_count) + misc_usage_count--; + if (!misc_usage_count && !misc_device_ret) { + misc_deregister(&isst_if_char_driver); + isst_if_cpu_info_exit(); + } + mutex_unlock(&punit_misc_dev_reg_lock); +} + /** * isst_if_cdev_register() - Register callback for IOCTL * @device_type: The device type this callback handling. @@ -615,38 +655,31 @@ static struct miscdevice isst_if_char_driver = { */ int isst_if_cdev_register(int device_type, struct isst_if_cmd_cb *cb) { - if (misc_device_ret) - return misc_device_ret; + int ret; if (device_type >= ISST_IF_DEV_MAX) return -EINVAL; - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); + /* Device is already open, we don't want to add new callbacks */ if (misc_device_open) { - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return -EAGAIN; } - if (!misc_usage_count) { - int ret; - - misc_device_ret = misc_register(&isst_if_char_driver); - if (misc_device_ret) - goto unlock_exit; - - ret = isst_if_cpu_info_init(); - if (ret) { - misc_deregister(&isst_if_char_driver); - misc_device_ret = ret; - goto unlock_exit; - } - } memcpy(&punit_callbacks[device_type], cb, sizeof(*cb)); punit_callbacks[device_type].registered = 1; - misc_usage_count++; -unlock_exit: - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); - return misc_device_ret; + ret = isst_misc_reg(); + if (ret) { + /* + * No need of mutex as the misc device register failed + * as no one can open device yet. Hence no contention. + */ + punit_callbacks[device_type].registered = 0; + return ret; + } + return 0; } EXPORT_SYMBOL_GPL(isst_if_cdev_register); @@ -661,16 +694,12 @@ EXPORT_SYMBOL_GPL(isst_if_cdev_register); */ void isst_if_cdev_unregister(int device_type) { - mutex_lock(&punit_misc_dev_lock); - misc_usage_count--; + isst_misc_unreg(); + mutex_lock(&punit_misc_dev_open_lock); punit_callbacks[device_type].registered = 0; if (device_type == ISST_IF_DEV_MBOX) isst_delete_hash(); - if (!misc_usage_count && !misc_device_ret) { - misc_deregister(&isst_if_char_driver); - isst_if_cpu_info_exit(); - } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); } EXPORT_SYMBOL_GPL(isst_if_cdev_unregister); diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c index 9b6a93ff41ffb..91e468fcaf7cc 100644 --- a/drivers/platform/x86/samsung-laptop.c +++ b/drivers/platform/x86/samsung-laptop.c @@ -1121,8 +1121,6 @@ static void kbd_led_set(struct led_classdev *led_cdev, if (value > samsung->kbd_led.max_brightness) value = samsung->kbd_led.max_brightness; - else if (value < 0) - value = 0; samsung->kbd_led_wk = value; queue_work(samsung->led_workqueue, &samsung->kbd_led_work); diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index f027609fdab6d..5d114088c88fb 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -1188,15 +1188,6 @@ static int tpacpi_rfk_update_swstate(const struct tpacpi_rfk *tp_rfk) return status; } -/* Query FW and update rfkill sw state for all rfkill switches */ -static void tpacpi_rfk_update_swstate_all(void) -{ - unsigned int i; - - for (i = 0; i < TPACPI_RFK_SW_MAX; i++) - tpacpi_rfk_update_swstate(tpacpi_rfkill_switches[i]); -} - /* * Sync the HW-blocking state of all rfkill switches, * do notice it causes the rfkill core to schedule uevents @@ -3135,9 +3126,6 @@ static void tpacpi_send_radiosw_update(void) if (wlsw == TPACPI_RFK_RADIO_OFF) tpacpi_rfk_update_hwblock_state(true); - /* Sync sw blocking state */ - tpacpi_rfk_update_swstate_all(); - /* Sync hw blocking state last if it is hw-unblocked */ if (wlsw == TPACPI_RFK_RADIO_ON) tpacpi_rfk_update_hwblock_state(false); @@ -9086,7 +9074,7 @@ static int fan_write_cmd_level(const char *cmd, int *rc) if (strlencmp(cmd, "level auto") == 0) level = TP_EC_FAN_AUTO; - else if ((strlencmp(cmd, "level disengaged") == 0) | + else if ((strlencmp(cmd, "level disengaged") == 0) || (strlencmp(cmd, "level full-speed") == 0)) level = TP_EC_FAN_FULLSPEED; else if (sscanf(cmd, "level %d", &level) != 1) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 59e9aa0f96436..cb029126a68c6 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -353,7 +353,14 @@ static acpi_status __query_block(struct wmi_block *wblock, u8 instance, * the WQxx method failed - we should disable collection anyway. */ if ((block->flags & ACPI_WMI_EXPENSIVE) && ACPI_SUCCESS(wc_status)) { - status = acpi_execute_simple_method(handle, wc_method, 0); + /* + * Ignore whether this WCxx call succeeds or not since + * the previously executed WQxx method call might have + * succeeded, and returning the failing status code + * of this call would throw away the result of the WQxx + * call, potentially leaking memory. + */ + acpi_execute_simple_method(handle, wc_method, 0); } return status; diff --git a/drivers/power/reset/arm-versatile-reboot.c b/drivers/power/reset/arm-versatile-reboot.c index 08d0a07b58ef2..c7624d7611a7e 100644 --- a/drivers/power/reset/arm-versatile-reboot.c +++ b/drivers/power/reset/arm-versatile-reboot.c @@ -146,6 +146,7 @@ static int __init versatile_reboot_probe(void) versatile_reboot_type = (enum versatile_reboot)reboot_id->data; syscon_regmap = syscon_node_to_regmap(np); + of_node_put(np); if (IS_ERR(syscon_regmap)) return PTR_ERR(syscon_regmap); diff --git a/drivers/power/reset/gemini-poweroff.c b/drivers/power/reset/gemini-poweroff.c index 90e35c07240ae..b7f7a8225f22e 100644 --- a/drivers/power/reset/gemini-poweroff.c +++ b/drivers/power/reset/gemini-poweroff.c @@ -107,8 +107,8 @@ static int gemini_poweroff_probe(struct platform_device *pdev) return PTR_ERR(gpw->base); irq = platform_get_irq(pdev, 0); - if (!irq) - return -EINVAL; + if (irq < 0) + return irq; gpw->dev = dev; diff --git a/drivers/power/reset/ltc2952-poweroff.c b/drivers/power/reset/ltc2952-poweroff.c index e4a0cc45b3d11..ec613bcc0a302 100644 --- a/drivers/power/reset/ltc2952-poweroff.c +++ b/drivers/power/reset/ltc2952-poweroff.c @@ -160,8 +160,8 @@ static void ltc2952_poweroff_kill(void) static void ltc2952_poweroff_default(struct ltc2952_poweroff *data) { - data->wde_interval = 300L * 1E6L; - data->trigger_delay = ktime_set(2, 500L*1E6L); + data->wde_interval = 300L * NSEC_PER_MSEC; + data->trigger_delay = ktime_set(2, 500L * NSEC_PER_MSEC); hrtimer_init(&data->timer_trigger, CLOCK_MONOTONIC, HRTIMER_MODE_REL); data->timer_trigger.function = ltc2952_poweroff_timer_trigger; diff --git a/drivers/power/supply/ab8500_fg.c b/drivers/power/supply/ab8500_fg.c index 69452fc085b99..4c229e6fb750a 100644 --- a/drivers/power/supply/ab8500_fg.c +++ b/drivers/power/supply/ab8500_fg.c @@ -2541,8 +2541,10 @@ static int ab8500_fg_sysfs_init(struct ab8500_fg *di) ret = kobject_init_and_add(&di->fg_kobject, &ab8500_fg_ktype, NULL, "battery"); - if (ret < 0) + if (ret < 0) { + kobject_put(&di->fg_kobject); dev_err(di->dev, "failed to create sysfs entry\n"); + } return ret; } diff --git a/drivers/power/supply/axp20x_battery.c b/drivers/power/supply/axp20x_battery.c index e84b6e4da14a8..9fda98b950bab 100644 --- a/drivers/power/supply/axp20x_battery.c +++ b/drivers/power/supply/axp20x_battery.c @@ -185,7 +185,6 @@ static int axp20x_battery_get_prop(struct power_supply *psy, union power_supply_propval *val) { struct axp20x_batt_ps *axp20x_batt = power_supply_get_drvdata(psy); - struct iio_channel *chan; int ret = 0, reg, val1; switch (psp) { @@ -265,12 +264,12 @@ static int axp20x_battery_get_prop(struct power_supply *psy, if (ret) return ret; - if (reg & AXP20X_PWR_STATUS_BAT_CHARGING) - chan = axp20x_batt->batt_chrg_i; - else - chan = axp20x_batt->batt_dischrg_i; - - ret = iio_read_channel_processed(chan, &val->intval); + if (reg & AXP20X_PWR_STATUS_BAT_CHARGING) { + ret = iio_read_channel_processed(axp20x_batt->batt_chrg_i, &val->intval); + } else { + ret = iio_read_channel_processed(axp20x_batt->batt_dischrg_i, &val1); + val->intval = -val1; + } if (ret) return ret; diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c index 7d09e49f04d3b..2ee6bb7c08045 100644 --- a/drivers/power/supply/axp288_charger.c +++ b/drivers/power/supply/axp288_charger.c @@ -41,11 +41,11 @@ #define VBUS_ISPOUT_CUR_LIM_1500MA 0x1 /* 1500mA */ #define VBUS_ISPOUT_CUR_LIM_2000MA 0x2 /* 2000mA */ #define VBUS_ISPOUT_CUR_NO_LIM 0x3 /* 2500mA */ -#define VBUS_ISPOUT_VHOLD_SET_MASK 0x31 +#define VBUS_ISPOUT_VHOLD_SET_MASK 0x38 #define VBUS_ISPOUT_VHOLD_SET_BIT_POS 0x3 #define VBUS_ISPOUT_VHOLD_SET_OFFSET 4000 /* 4000mV */ #define VBUS_ISPOUT_VHOLD_SET_LSB_RES 100 /* 100mV */ -#define VBUS_ISPOUT_VHOLD_SET_4300MV 0x3 /* 4300mV */ +#define VBUS_ISPOUT_VHOLD_SET_4400MV 0x4 /* 4400mV */ #define VBUS_ISPOUT_VBUS_PATH_DIS BIT(7) #define CHRG_CCCV_CC_MASK 0xf /* 4 bits */ @@ -744,6 +744,16 @@ static int charger_init_hw_regs(struct axp288_chrg_info *info) ret = axp288_charger_vbus_path_select(info, true); if (ret < 0) return ret; + } else { + /* Set Vhold to the factory default / recommended 4.4V */ + val = VBUS_ISPOUT_VHOLD_SET_4400MV << VBUS_ISPOUT_VHOLD_SET_BIT_POS; + ret = regmap_update_bits(info->regmap, AXP20X_VBUS_IPSOUT_MGMT, + VBUS_ISPOUT_VHOLD_SET_MASK, val); + if (ret < 0) { + dev_err(&info->pdev->dev, "register(%x) write error(%d)\n", + AXP20X_VBUS_IPSOUT_MGMT, ret); + return ret; + } } /* Read current charge voltage and current limit */ diff --git a/drivers/power/supply/axp288_fuel_gauge.c b/drivers/power/supply/axp288_fuel_gauge.c index f40fa0e63b6e5..993e4a4a34b38 100644 --- a/drivers/power/supply/axp288_fuel_gauge.c +++ b/drivers/power/supply/axp288_fuel_gauge.c @@ -149,7 +149,7 @@ static int fuel_gauge_reg_readb(struct axp288_fg_info *info, int reg) } if (ret < 0) { - dev_err(&info->pdev->dev, "axp288 reg read err:%d\n", ret); + dev_err(&info->pdev->dev, "Error reading reg 0x%02x err: %d\n", reg, ret); return ret; } @@ -163,7 +163,7 @@ static int fuel_gauge_reg_writeb(struct axp288_fg_info *info, int reg, u8 val) ret = regmap_write(info->regmap, reg, (unsigned int)val); if (ret < 0) - dev_err(&info->pdev->dev, "axp288 reg write err:%d\n", ret); + dev_err(&info->pdev->dev, "Error writing reg 0x%02x err: %d\n", reg, ret); return ret; } diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c index 1ae5d6d42c9e3..64d87dccea82c 100644 --- a/drivers/power/supply/bq24190_charger.c +++ b/drivers/power/supply/bq24190_charger.c @@ -41,6 +41,7 @@ #define BQ24190_REG_POC_CHG_CONFIG_DISABLE 0x0 #define BQ24190_REG_POC_CHG_CONFIG_CHARGE 0x1 #define BQ24190_REG_POC_CHG_CONFIG_OTG 0x2 +#define BQ24190_REG_POC_CHG_CONFIG_OTG_ALT 0x3 #define BQ24190_REG_POC_SYS_MIN_MASK (BIT(3) | BIT(2) | BIT(1)) #define BQ24190_REG_POC_SYS_MIN_SHIFT 1 #define BQ24190_REG_POC_SYS_MIN_MIN 3000 @@ -550,7 +551,11 @@ static int bq24190_vbus_is_enabled(struct regulator_dev *dev) pm_runtime_mark_last_busy(bdi->dev); pm_runtime_put_autosuspend(bdi->dev); - return ret ? ret : val == BQ24190_REG_POC_CHG_CONFIG_OTG; + if (ret) + return ret; + + return (val == BQ24190_REG_POC_CHG_CONFIG_OTG || + val == BQ24190_REG_POC_CHG_CONFIG_OTG_ALT); } static const struct regulator_ops bq24190_vbus_ops = { diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index 9d1ec8d677de6..5afe55119fe65 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -531,12 +531,12 @@ static void bq25890_handle_state_change(struct bq25890_device *bq, if (!new_state->online) { /* power removed */ /* disable ADC */ - ret = bq25890_field_write(bq, F_CONV_START, 0); + ret = bq25890_field_write(bq, F_CONV_RATE, 0); if (ret < 0) goto error; } else if (!old_state.online) { /* power inserted */ /* enable ADC, to have control of charge current/voltage */ - ret = bq25890_field_write(bq, F_CONV_START, 1); + ret = bq25890_field_write(bq, F_CONV_RATE, 1); if (ret < 0) goto error; } diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c index 2677c38a8a424..34229c1f43e31 100644 --- a/drivers/power/supply/bq27xxx_battery_i2c.c +++ b/drivers/power/supply/bq27xxx_battery_i2c.c @@ -195,7 +195,8 @@ static int bq27xxx_battery_i2c_probe(struct i2c_client *client, dev_err(&client->dev, "Unable to register IRQ %d error %d\n", client->irq, ret); - return ret; + bq27xxx_battery_teardown(di); + goto err_failed; } } diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c index fa862f0380c41..170639a1e734a 100644 --- a/drivers/power/supply/max17042_battery.c +++ b/drivers/power/supply/max17042_battery.c @@ -312,7 +312,10 @@ static int max17042_get_property(struct power_supply *psy, val->intval = data * 625 / 8; break; case POWER_SUPPLY_PROP_CAPACITY: - ret = regmap_read(map, MAX17042_RepSOC, &data); + if (chip->pdata->enable_current_sense) + ret = regmap_read(map, MAX17042_RepSOC, &data); + else + ret = regmap_read(map, MAX17042_VFSOC, &data); if (ret < 0) return ret; @@ -726,7 +729,7 @@ static inline void max17042_override_por_values(struct max17042_chip *chip) struct max17042_config_data *config = chip->pdata->config_data; max17042_override_por(map, MAX17042_TGAIN, config->tgain); - max17042_override_por(map, MAx17042_TOFF, config->toff); + max17042_override_por(map, MAX17042_TOFF, config->toff); max17042_override_por(map, MAX17042_CGAIN, config->cgain); max17042_override_por(map, MAX17042_COFF, config->coff); @@ -834,7 +837,8 @@ static void max17042_set_soc_threshold(struct max17042_chip *chip, u16 off) regmap_read(map, MAX17042_RepSOC, &soc); soc >>= 8; soc_tr = (soc + off) << 8; - soc_tr |= (soc - off); + if (off < soc) + soc_tr |= soc - off; regmap_write(map, MAX17042_SALRT_Th, soc_tr); } @@ -842,8 +846,12 @@ static irqreturn_t max17042_thread_handler(int id, void *dev) { struct max17042_chip *chip = dev; u32 val; + int ret; + + ret = regmap_read(chip->regmap, MAX17042_STATUS, &val); + if (ret) + return IRQ_HANDLED; - regmap_read(chip->regmap, MAX17042_STATUS, &val); if ((val & STATUS_INTR_SOCMIN_BIT) || (val & STATUS_INTR_SOCMAX_BIT)) { dev_info(&chip->client->dev, "SOC threshold INTR\n"); diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 5c36c430ce8b6..a2f56a68c50d6 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -742,6 +742,10 @@ power_supply_find_ocv2cap_table(struct power_supply_battery_info *info, return NULL; for (i = 0; i < POWER_SUPPLY_OCV_TEMP_MAX; i++) { + /* Out of capacity tables */ + if (!info->ocv_table[i]) + break; + temp_diff = abs(info->ocv_temp[i] - temp); if (temp_diff < best_temp_diff) { diff --git a/drivers/power/supply/rt5033_battery.c b/drivers/power/supply/rt5033_battery.c index 6609f8cb8ca01..ef53891b88bbc 100644 --- a/drivers/power/supply/rt5033_battery.c +++ b/drivers/power/supply/rt5033_battery.c @@ -60,7 +60,7 @@ static int rt5033_battery_get_watt_prop(struct i2c_client *client, regmap_read(battery->regmap, regh, &msb); regmap_read(battery->regmap, regl, &lsb); - ret = ((msb << 4) + (lsb >> 4)) * 1250 / 1000; + ret = ((msb << 4) + (lsb >> 4)) * 1250; return ret; } diff --git a/drivers/power/supply/wm8350_power.c b/drivers/power/supply/wm8350_power.c index 26923af574f43..4fe9ea9721b39 100644 --- a/drivers/power/supply/wm8350_power.c +++ b/drivers/power/supply/wm8350_power.c @@ -408,44 +408,112 @@ static const struct power_supply_desc wm8350_usb_desc = { * Initialisation *********************************************************************/ -static void wm8350_init_charger(struct wm8350 *wm8350) +static int wm8350_init_charger(struct wm8350 *wm8350) { + int ret; + /* register our interest in charger events */ - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT, + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT, wm8350_charger_handler, 0, "Battery hot", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD, + if (ret) + goto err; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD, wm8350_charger_handler, 0, "Battery cold", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL, + if (ret) + goto free_chg_bat_hot; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL, wm8350_charger_handler, 0, "Battery fail", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_TO, + if (ret) + goto free_chg_bat_cold; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_TO, wm8350_charger_handler, 0, "Charger timeout", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_END, + if (ret) + goto free_chg_bat_fail; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_END, wm8350_charger_handler, 0, "Charge end", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_START, + if (ret) + goto free_chg_to; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_START, wm8350_charger_handler, 0, "Charge start", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY, + if (ret) + goto free_chg_end; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY, wm8350_charger_handler, 0, "Fast charge ready", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, + if (ret) + goto free_chg_start; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, wm8350_charger_handler, 0, "Battery <3.9V", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, + if (ret) + goto free_chg_fast_rdy; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, wm8350_charger_handler, 0, "Battery <3.1V", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, + if (ret) + goto free_chg_vbatt_lt_3p9; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, wm8350_charger_handler, 0, "Battery <2.85V", wm8350); + if (ret) + goto free_chg_vbatt_lt_3p1; /* and supply change events */ - wm8350_register_irq(wm8350, WM8350_IRQ_EXT_USB_FB, + ret = wm8350_register_irq(wm8350, WM8350_IRQ_EXT_USB_FB, wm8350_charger_handler, 0, "USB", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_EXT_WALL_FB, + if (ret) + goto free_chg_vbatt_lt_2p85; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_EXT_WALL_FB, wm8350_charger_handler, 0, "Wall", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_EXT_BAT_FB, + if (ret) + goto free_ext_usb_fb; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_EXT_BAT_FB, wm8350_charger_handler, 0, "Battery", wm8350); + if (ret) + goto free_ext_wall_fb; + + return 0; + +free_ext_wall_fb: + wm8350_free_irq(wm8350, WM8350_IRQ_EXT_WALL_FB, wm8350); +free_ext_usb_fb: + wm8350_free_irq(wm8350, WM8350_IRQ_EXT_USB_FB, wm8350); +free_chg_vbatt_lt_2p85: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, wm8350); +free_chg_vbatt_lt_3p1: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, wm8350); +free_chg_vbatt_lt_3p9: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, wm8350); +free_chg_fast_rdy: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY, wm8350); +free_chg_start: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_START, wm8350); +free_chg_end: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_END, wm8350); +free_chg_to: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_TO, wm8350); +free_chg_bat_fail: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL, wm8350); +free_chg_bat_cold: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD, wm8350); +free_chg_bat_hot: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT, wm8350); +err: + return ret; } static void free_charger_irq(struct wm8350 *wm8350) @@ -456,6 +524,7 @@ static void free_charger_irq(struct wm8350 *wm8350) wm8350_free_irq(wm8350, WM8350_IRQ_CHG_TO, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_END, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_START, wm8350); + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, wm8350); diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c index dcd6e00c80467..a50656632df93 100644 --- a/drivers/ptp/ptp_pch.c +++ b/drivers/ptp/ptp_pch.c @@ -683,6 +683,7 @@ static const struct pci_device_id pch_ieee1588_pcidev_id[] = { }, {0} }; +MODULE_DEVICE_TABLE(pci, pch_ieee1588_pcidev_id); static struct pci_driver pch_driver = { .name = KBUILD_MODNAME, diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index be076a91e20e6..8cd59e8481631 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -13,7 +13,7 @@ static ssize_t clock_name_show(struct device *dev, struct device_attribute *attr, char *page) { struct ptp_clock *ptp = dev_get_drvdata(dev); - return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name); + return sysfs_emit(page, "%s\n", ptp->info->name); } static DEVICE_ATTR_RO(clock_name); @@ -227,7 +227,7 @@ static ssize_t ptp_pin_show(struct device *dev, struct device_attribute *attr, mutex_unlock(&ptp->pincfg_mux); - return snprintf(page, PAGE_SIZE, "%u %u\n", func, chan); + return sysfs_emit(page, "%u %u\n", func, chan); } static ssize_t ptp_pin_store(struct device *dev, struct device_attribute *attr, diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c index 22c002e685b34..37f9b688661d4 100644 --- a/drivers/pwm/pwm-img.c +++ b/drivers/pwm/pwm-img.c @@ -329,23 +329,7 @@ static int img_pwm_probe(struct platform_device *pdev) static int img_pwm_remove(struct platform_device *pdev) { struct img_pwm_chip *pwm_chip = platform_get_drvdata(pdev); - u32 val; - unsigned int i; - int ret; - - ret = pm_runtime_get_sync(&pdev->dev); - if (ret < 0) { - pm_runtime_put(&pdev->dev); - return ret; - } - - for (i = 0; i < pwm_chip->chip.npwm; i++) { - val = img_pwm_readl(pwm_chip, PWM_CTRL_CFG); - val &= ~BIT(i); - img_pwm_writel(pwm_chip, PWM_CTRL_CFG, val); - } - pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); if (!pm_runtime_status_suspended(&pdev->dev)) img_pwm_runtime_suspend(&pdev->dev); diff --git a/drivers/pwm/pwm-lp3943.c b/drivers/pwm/pwm-lp3943.c index bf3f14fb5f244..05e4120fd7022 100644 --- a/drivers/pwm/pwm-lp3943.c +++ b/drivers/pwm/pwm-lp3943.c @@ -125,6 +125,7 @@ static int lp3943_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, if (err) return err; + duty_ns = min(duty_ns, period_ns); val = (u8)(duty_ns * LP3943_MAX_DUTY / period_ns); return lp3943_write_byte(lp3943, reg_duty, val); diff --git a/drivers/pwm/pwm-lpc18xx-sct.c b/drivers/pwm/pwm-lpc18xx-sct.c index 5ff11145c1a30..9b15b6a79082a 100644 --- a/drivers/pwm/pwm-lpc18xx-sct.c +++ b/drivers/pwm/pwm-lpc18xx-sct.c @@ -400,12 +400,6 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev) lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_LIMIT, BIT(lpc18xx_pwm->period_event)); - ret = pwmchip_add(&lpc18xx_pwm->chip); - if (ret < 0) { - dev_err(&pdev->dev, "pwmchip_add failed: %d\n", ret); - goto disable_pwmclk; - } - for (i = 0; i < lpc18xx_pwm->chip.npwm; i++) { struct lpc18xx_pwm_data *data; @@ -415,14 +409,12 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev) GFP_KERNEL); if (!data) { ret = -ENOMEM; - goto remove_pwmchip; + goto disable_pwmclk; } pwm_set_chip_data(pwm, data); } - platform_set_drvdata(pdev, lpc18xx_pwm); - val = lpc18xx_pwm_readl(lpc18xx_pwm, LPC18XX_PWM_CTRL); val &= ~LPC18XX_PWM_BIDIR; val &= ~LPC18XX_PWM_CTRL_HALT; @@ -430,10 +422,16 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev) val |= LPC18XX_PWM_PRE(0); lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_CTRL, val); + ret = pwmchip_add(&lpc18xx_pwm->chip); + if (ret < 0) { + dev_err(&pdev->dev, "pwmchip_add failed: %d\n", ret); + goto disable_pwmclk; + } + + platform_set_drvdata(pdev, lpc18xx_pwm); + return 0; -remove_pwmchip: - pwmchip_remove(&lpc18xx_pwm->chip); disable_pwmclk: clk_disable_unprepare(lpc18xx_pwm->pwm_clk); return ret; diff --git a/drivers/pwm/pwm-lpc32xx.c b/drivers/pwm/pwm-lpc32xx.c index 710d9a207d2b0..522f862eca526 100644 --- a/drivers/pwm/pwm-lpc32xx.c +++ b/drivers/pwm/pwm-lpc32xx.c @@ -120,17 +120,17 @@ static int lpc32xx_pwm_probe(struct platform_device *pdev) lpc32xx->chip.npwm = 1; lpc32xx->chip.base = -1; + /* If PWM is disabled, configure the output to the default value */ + val = readl(lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2)); + val &= ~PWM_PIN_LEVEL; + writel(val, lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2)); + ret = pwmchip_add(&lpc32xx->chip); if (ret < 0) { dev_err(&pdev->dev, "failed to add PWM chip, error %d\n", ret); return ret; } - /* When PWM is disable, configure the output to the default value */ - val = readl(lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2)); - val &= ~PWM_PIN_LEVEL; - writel(val, lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2)); - platform_set_drvdata(pdev, lpc32xx); return 0; diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c index 6ad6aad215cf1..8c0af705c5ae9 100644 --- a/drivers/pwm/pwm-rockchip.c +++ b/drivers/pwm/pwm-rockchip.c @@ -383,20 +383,6 @@ static int rockchip_pwm_remove(struct platform_device *pdev) { struct rockchip_pwm_chip *pc = platform_get_drvdata(pdev); - /* - * Disable the PWM clk before unpreparing it if the PWM device is still - * running. This should only happen when the last PWM user left it - * enabled, or when nobody requested a PWM that was previously enabled - * by the bootloader. - * - * FIXME: Maybe the core should disable all PWM devices in - * pwmchip_remove(). In this case we'd only have to call - * clk_unprepare() after pwmchip_remove(). - * - */ - if (pwm_is_enabled(pc->chip.pwms)) - clk_disable(pc->clk); - clk_unprepare(pc->pclk); clk_unprepare(pc->clk); diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c index 67fca62524dc2..05bb1f95a7739 100644 --- a/drivers/pwm/pwm-stm32-lp.c +++ b/drivers/pwm/pwm-stm32-lp.c @@ -225,8 +225,6 @@ static int stm32_pwm_lp_remove(struct platform_device *pdev) { struct stm32_pwm_lp *priv = platform_get_drvdata(pdev); - pwm_disable(&priv->chip.pwms[0]); - return pwmchip_remove(&priv->chip); } diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c index 20191e785467a..5352b7f335e45 100644 --- a/drivers/ras/cec.c +++ b/drivers/ras/cec.c @@ -562,20 +562,20 @@ static struct notifier_block cec_nb = { .priority = MCE_PRIO_CEC, }; -static void __init cec_init(void) +static int __init cec_init(void) { if (ce_arr.disabled) - return; + return -ENODEV; ce_arr.array = (void *)get_zeroed_page(GFP_KERNEL); if (!ce_arr.array) { pr_err("Error allocating CE array page!\n"); - return; + return -ENOMEM; } if (create_debugfs_nodes()) { free_page((unsigned long)ce_arr.array); - return; + return -ENOMEM; } INIT_DELAYED_WORK(&cec_work, cec_work_fn); @@ -584,6 +584,7 @@ static void __init cec_init(void) mce_register_decode_chain(&cec_nb); pr_info("Correctable Errors collector initialized.\n"); + return 0; } late_initcall(cec_init); diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 7fd793d8536cd..ae2addadb36f2 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1988,10 +1988,13 @@ struct regulator *_regulator_get(struct device *dev, const char *id, rdev->exclusive = 1; ret = _regulator_is_enabled(rdev); - if (ret > 0) + if (ret > 0) { rdev->use_count = 1; - else + regulator->enable_count = 1; + } else { rdev->use_count = 0; + regulator->enable_count = 0; + } } device_link_add(dev, &rdev->dev, DL_FLAG_STATELESS); diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c index 44b1da7cc3744..f873d97100e28 100644 --- a/drivers/regulator/pfuze100-regulator.c +++ b/drivers/regulator/pfuze100-regulator.c @@ -528,6 +528,7 @@ static int pfuze_parse_regulators_dt(struct pfuze_chip *chip) parent = of_get_child_by_name(np, "regulators"); if (!parent) { dev_err(dev, "regulators node not found\n"); + of_node_put(np); return -EINVAL; } @@ -557,6 +558,7 @@ static int pfuze_parse_regulators_dt(struct pfuze_chip *chip) } of_node_put(parent); + of_node_put(np); if (ret < 0) { dev_err(dev, "Error parsing regulator init data: %d\n", ret); diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c index 3b0828c79e2b5..a4c82f5e2dae7 100644 --- a/drivers/regulator/qcom_smd-regulator.c +++ b/drivers/regulator/qcom_smd-regulator.c @@ -9,6 +9,7 @@ #include #include #include +#include #include struct qcom_rpm_reg { @@ -776,52 +777,93 @@ static const struct of_device_id rpm_of_match[] = { }; MODULE_DEVICE_TABLE(of, rpm_of_match); -static int rpm_reg_probe(struct platform_device *pdev) +/** + * rpm_regulator_init_vreg() - initialize all attributes of a qcom_smd-regulator + * @vreg: Pointer to the individual qcom_smd-regulator resource + * @dev: Pointer to the top level qcom_smd-regulator PMIC device + * @node: Pointer to the individual qcom_smd-regulator resource + * device node + * @rpm: Pointer to the rpm bus node + * @pmic_rpm_data: Pointer to a null-terminated array of qcom_smd-regulator + * resources defined for the top level PMIC device + * + * Return: 0 on success, errno on failure + */ +static int rpm_regulator_init_vreg(struct qcom_rpm_reg *vreg, struct device *dev, + struct device_node *node, struct qcom_smd_rpm *rpm, + const struct rpm_regulator_data *pmic_rpm_data) { - const struct rpm_regulator_data *reg; - const struct of_device_id *match; - struct regulator_config config = { }; + struct regulator_config config = {}; + const struct rpm_regulator_data *rpm_data; struct regulator_dev *rdev; + int ret; + + for (rpm_data = pmic_rpm_data; rpm_data->name; rpm_data++) + if (of_node_name_eq(node, rpm_data->name)) + break; + + if (!rpm_data->name) { + dev_err(dev, "Unknown regulator %pOFn\n", node); + return -EINVAL; + } + + vreg->dev = dev; + vreg->rpm = rpm; + vreg->type = rpm_data->type; + vreg->id = rpm_data->id; + + memcpy(&vreg->desc, rpm_data->desc, sizeof(vreg->desc)); + vreg->desc.name = rpm_data->name; + vreg->desc.supply_name = rpm_data->supply; + vreg->desc.owner = THIS_MODULE; + vreg->desc.type = REGULATOR_VOLTAGE; + vreg->desc.of_match = rpm_data->name; + + config.dev = dev; + config.of_node = node; + config.driver_data = vreg; + + rdev = devm_regulator_register(dev, &vreg->desc, &config); + if (IS_ERR(rdev)) { + ret = PTR_ERR(rdev); + dev_err(dev, "%pOFn: devm_regulator_register() failed, ret=%d\n", node, ret); + return ret; + } + + return 0; +} + +static int rpm_reg_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + const struct rpm_regulator_data *vreg_data; + struct device_node *node; struct qcom_rpm_reg *vreg; struct qcom_smd_rpm *rpm; + int ret; rpm = dev_get_drvdata(pdev->dev.parent); if (!rpm) { - dev_err(&pdev->dev, "unable to retrieve handle to rpm\n"); + dev_err(&pdev->dev, "Unable to retrieve handle to rpm\n"); return -ENODEV; } - match = of_match_device(rpm_of_match, &pdev->dev); - if (!match) { - dev_err(&pdev->dev, "failed to match device\n"); + vreg_data = of_device_get_match_data(dev); + if (!vreg_data) return -ENODEV; - } - for (reg = match->data; reg->name; reg++) { + for_each_available_child_of_node(dev->of_node, node) { vreg = devm_kzalloc(&pdev->dev, sizeof(*vreg), GFP_KERNEL); - if (!vreg) + if (!vreg) { + of_node_put(node); return -ENOMEM; + } + + ret = rpm_regulator_init_vreg(vreg, dev, node, rpm, vreg_data); - vreg->dev = &pdev->dev; - vreg->type = reg->type; - vreg->id = reg->id; - vreg->rpm = rpm; - - memcpy(&vreg->desc, reg->desc, sizeof(vreg->desc)); - - vreg->desc.id = -1; - vreg->desc.owner = THIS_MODULE; - vreg->desc.type = REGULATOR_VOLTAGE; - vreg->desc.name = reg->name; - vreg->desc.supply_name = reg->supply; - vreg->desc.of_match = reg->name; - - config.dev = &pdev->dev; - config.driver_data = vreg; - rdev = devm_regulator_register(&pdev->dev, &vreg->desc, &config); - if (IS_ERR(rdev)) { - dev_err(&pdev->dev, "failed to register %s\n", reg->name); - return PTR_ERR(rdev); + if (ret < 0) { + of_node_put(node); + return ret; } } diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c index 5276f8442f3c6..1e9f03a2ea1cc 100644 --- a/drivers/regulator/s5m8767.c +++ b/drivers/regulator/s5m8767.c @@ -851,18 +851,15 @@ static int s5m8767_pmic_probe(struct platform_device *pdev) /* DS4 GPIO */ gpio_direction_output(pdata->buck_ds[2], 0x0); - if (pdata->buck2_gpiodvs || pdata->buck3_gpiodvs || - pdata->buck4_gpiodvs) { - regmap_update_bits(s5m8767->iodev->regmap_pmic, - S5M8767_REG_BUCK2CTRL, 1 << 1, - (pdata->buck2_gpiodvs) ? (1 << 1) : (0 << 1)); - regmap_update_bits(s5m8767->iodev->regmap_pmic, - S5M8767_REG_BUCK3CTRL, 1 << 1, - (pdata->buck3_gpiodvs) ? (1 << 1) : (0 << 1)); - regmap_update_bits(s5m8767->iodev->regmap_pmic, - S5M8767_REG_BUCK4CTRL, 1 << 1, - (pdata->buck4_gpiodvs) ? (1 << 1) : (0 << 1)); - } + regmap_update_bits(s5m8767->iodev->regmap_pmic, + S5M8767_REG_BUCK2CTRL, 1 << 1, + (pdata->buck2_gpiodvs) ? (1 << 1) : (0 << 1)); + regmap_update_bits(s5m8767->iodev->regmap_pmic, + S5M8767_REG_BUCK3CTRL, 1 << 1, + (pdata->buck3_gpiodvs) ? (1 << 1) : (0 << 1)); + regmap_update_bits(s5m8767->iodev->regmap_pmic, + S5M8767_REG_BUCK4CTRL, 1 << 1, + (pdata->buck4_gpiodvs) ? (1 << 1) : (0 << 1)); /* Initialize GPIO DVS registers */ for (i = 0; i < 8; i++) { diff --git a/drivers/regulator/vctrl-regulator.c b/drivers/regulator/vctrl-regulator.c index cbadb1c996790..d2a37978fc3a8 100644 --- a/drivers/regulator/vctrl-regulator.c +++ b/drivers/regulator/vctrl-regulator.c @@ -37,7 +37,6 @@ struct vctrl_voltage_table { struct vctrl_data { struct regulator_dev *rdev; struct regulator_desc desc; - struct regulator *ctrl_reg; bool enabled; unsigned int min_slew_down_rate; unsigned int ovp_threshold; @@ -82,7 +81,12 @@ static int vctrl_calc_output_voltage(struct vctrl_data *vctrl, int ctrl_uV) static int vctrl_get_voltage(struct regulator_dev *rdev) { struct vctrl_data *vctrl = rdev_get_drvdata(rdev); - int ctrl_uV = regulator_get_voltage_rdev(vctrl->ctrl_reg->rdev); + int ctrl_uV; + + if (!rdev->supply) + return -EPROBE_DEFER; + + ctrl_uV = regulator_get_voltage_rdev(rdev->supply->rdev); return vctrl_calc_output_voltage(vctrl, ctrl_uV); } @@ -92,14 +96,19 @@ static int vctrl_set_voltage(struct regulator_dev *rdev, unsigned int *selector) { struct vctrl_data *vctrl = rdev_get_drvdata(rdev); - struct regulator *ctrl_reg = vctrl->ctrl_reg; - int orig_ctrl_uV = regulator_get_voltage_rdev(ctrl_reg->rdev); - int uV = vctrl_calc_output_voltage(vctrl, orig_ctrl_uV); + int orig_ctrl_uV; + int uV; int ret; + if (!rdev->supply) + return -EPROBE_DEFER; + + orig_ctrl_uV = regulator_get_voltage_rdev(rdev->supply->rdev); + uV = vctrl_calc_output_voltage(vctrl, orig_ctrl_uV); + if (req_min_uV >= uV || !vctrl->ovp_threshold) /* voltage rising or no OVP */ - return regulator_set_voltage_rdev(ctrl_reg->rdev, + return regulator_set_voltage_rdev(rdev->supply->rdev, vctrl_calc_ctrl_voltage(vctrl, req_min_uV), vctrl_calc_ctrl_voltage(vctrl, req_max_uV), PM_SUSPEND_ON); @@ -117,7 +126,7 @@ static int vctrl_set_voltage(struct regulator_dev *rdev, next_uV = max_t(int, req_min_uV, uV - max_drop_uV); next_ctrl_uV = vctrl_calc_ctrl_voltage(vctrl, next_uV); - ret = regulator_set_voltage_rdev(ctrl_reg->rdev, + ret = regulator_set_voltage_rdev(rdev->supply->rdev, next_ctrl_uV, next_ctrl_uV, PM_SUSPEND_ON); @@ -134,7 +143,7 @@ static int vctrl_set_voltage(struct regulator_dev *rdev, err: /* Try to go back to original voltage */ - regulator_set_voltage_rdev(ctrl_reg->rdev, orig_ctrl_uV, orig_ctrl_uV, + regulator_set_voltage_rdev(rdev->supply->rdev, orig_ctrl_uV, orig_ctrl_uV, PM_SUSPEND_ON); return ret; @@ -151,16 +160,18 @@ static int vctrl_set_voltage_sel(struct regulator_dev *rdev, unsigned int selector) { struct vctrl_data *vctrl = rdev_get_drvdata(rdev); - struct regulator *ctrl_reg = vctrl->ctrl_reg; unsigned int orig_sel = vctrl->sel; int ret; + if (!rdev->supply) + return -EPROBE_DEFER; + if (selector >= rdev->desc->n_voltages) return -EINVAL; if (selector >= vctrl->sel || !vctrl->ovp_threshold) { /* voltage rising or no OVP */ - ret = regulator_set_voltage_rdev(ctrl_reg->rdev, + ret = regulator_set_voltage_rdev(rdev->supply->rdev, vctrl->vtable[selector].ctrl, vctrl->vtable[selector].ctrl, PM_SUSPEND_ON); @@ -179,7 +190,7 @@ static int vctrl_set_voltage_sel(struct regulator_dev *rdev, else next_sel = vctrl->vtable[vctrl->sel].ovp_min_sel; - ret = regulator_set_voltage_rdev(ctrl_reg->rdev, + ret = regulator_set_voltage_rdev(rdev->supply->rdev, vctrl->vtable[next_sel].ctrl, vctrl->vtable[next_sel].ctrl, PM_SUSPEND_ON); @@ -202,7 +213,7 @@ static int vctrl_set_voltage_sel(struct regulator_dev *rdev, err: if (vctrl->sel != orig_sel) { /* Try to go back to original voltage */ - if (!regulator_set_voltage_rdev(ctrl_reg->rdev, + if (!regulator_set_voltage_rdev(rdev->supply->rdev, vctrl->vtable[orig_sel].ctrl, vctrl->vtable[orig_sel].ctrl, PM_SUSPEND_ON)) @@ -234,10 +245,6 @@ static int vctrl_parse_dt(struct platform_device *pdev, u32 pval; u32 vrange_ctrl[2]; - vctrl->ctrl_reg = devm_regulator_get(&pdev->dev, "ctrl"); - if (IS_ERR(vctrl->ctrl_reg)) - return PTR_ERR(vctrl->ctrl_reg); - ret = of_property_read_u32(np, "ovp-threshold-percent", &pval); if (!ret) { vctrl->ovp_threshold = pval; @@ -315,11 +322,11 @@ static int vctrl_cmp_ctrl_uV(const void *a, const void *b) return at->ctrl - bt->ctrl; } -static int vctrl_init_vtable(struct platform_device *pdev) +static int vctrl_init_vtable(struct platform_device *pdev, + struct regulator *ctrl_reg) { struct vctrl_data *vctrl = platform_get_drvdata(pdev); struct regulator_desc *rdesc = &vctrl->desc; - struct regulator *ctrl_reg = vctrl->ctrl_reg; struct vctrl_voltage_range *vrange_ctrl = &vctrl->vrange.ctrl; int n_voltages; int ctrl_uV; @@ -395,23 +402,19 @@ static int vctrl_init_vtable(struct platform_device *pdev) static int vctrl_enable(struct regulator_dev *rdev) { struct vctrl_data *vctrl = rdev_get_drvdata(rdev); - int ret = regulator_enable(vctrl->ctrl_reg); - if (!ret) - vctrl->enabled = true; + vctrl->enabled = true; - return ret; + return 0; } static int vctrl_disable(struct regulator_dev *rdev) { struct vctrl_data *vctrl = rdev_get_drvdata(rdev); - int ret = regulator_disable(vctrl->ctrl_reg); - if (!ret) - vctrl->enabled = false; + vctrl->enabled = false; - return ret; + return 0; } static int vctrl_is_enabled(struct regulator_dev *rdev) @@ -447,6 +450,7 @@ static int vctrl_probe(struct platform_device *pdev) struct regulator_desc *rdesc; struct regulator_config cfg = { }; struct vctrl_voltage_range *vrange_ctrl; + struct regulator *ctrl_reg; int ctrl_uV; int ret; @@ -461,15 +465,20 @@ static int vctrl_probe(struct platform_device *pdev) if (ret) return ret; + ctrl_reg = devm_regulator_get(&pdev->dev, "ctrl"); + if (IS_ERR(ctrl_reg)) + return PTR_ERR(ctrl_reg); + vrange_ctrl = &vctrl->vrange.ctrl; rdesc = &vctrl->desc; rdesc->name = "vctrl"; rdesc->type = REGULATOR_VOLTAGE; rdesc->owner = THIS_MODULE; + rdesc->supply_name = "ctrl"; - if ((regulator_get_linear_step(vctrl->ctrl_reg) == 1) || - (regulator_count_voltages(vctrl->ctrl_reg) == -EINVAL)) { + if ((regulator_get_linear_step(ctrl_reg) == 1) || + (regulator_count_voltages(ctrl_reg) == -EINVAL)) { rdesc->continuous_voltage_range = true; rdesc->ops = &vctrl_ops_cont; } else { @@ -486,11 +495,12 @@ static int vctrl_probe(struct platform_device *pdev) cfg.init_data = init_data; if (!rdesc->continuous_voltage_range) { - ret = vctrl_init_vtable(pdev); + ret = vctrl_init_vtable(pdev, ctrl_reg); if (ret) return ret; - ctrl_uV = regulator_get_voltage_rdev(vctrl->ctrl_reg->rdev); + /* Use locked consumer API when not in regulator framework */ + ctrl_uV = regulator_get_voltage(ctrl_reg); if (ctrl_uV < 0) { dev_err(&pdev->dev, "failed to get control voltage\n"); return ctrl_uV; @@ -513,6 +523,9 @@ static int vctrl_probe(struct platform_device *pdev) } } + /* Drop ctrl-supply here in favor of regulator core managed supply */ + devm_regulator_put(ctrl_reg); + vctrl->rdev = devm_regulator_register(&pdev->dev, rdesc, &cfg); if (IS_ERR(vctrl->rdev)) { ret = PTR_ERR(vctrl->rdev); diff --git a/drivers/regulator/wm8994-regulator.c b/drivers/regulator/wm8994-regulator.c index cadea0344486f..40befdd9dfa92 100644 --- a/drivers/regulator/wm8994-regulator.c +++ b/drivers/regulator/wm8994-regulator.c @@ -71,6 +71,35 @@ static const struct regulator_ops wm8994_ldo2_ops = { }; static const struct regulator_desc wm8994_ldo_desc[] = { + { + .name = "LDO1", + .id = 1, + .type = REGULATOR_VOLTAGE, + .n_voltages = WM8994_LDO1_MAX_SELECTOR + 1, + .vsel_reg = WM8994_LDO_1, + .vsel_mask = WM8994_LDO1_VSEL_MASK, + .ops = &wm8994_ldo1_ops, + .min_uV = 2400000, + .uV_step = 100000, + .enable_time = 3000, + .off_on_delay = 36000, + .owner = THIS_MODULE, + }, + { + .name = "LDO2", + .id = 2, + .type = REGULATOR_VOLTAGE, + .n_voltages = WM8994_LDO2_MAX_SELECTOR + 1, + .vsel_reg = WM8994_LDO_2, + .vsel_mask = WM8994_LDO2_VSEL_MASK, + .ops = &wm8994_ldo2_ops, + .enable_time = 3000, + .off_on_delay = 36000, + .owner = THIS_MODULE, + }, +}; + +static const struct regulator_desc wm8958_ldo_desc[] = { { .name = "LDO1", .id = 1, @@ -172,9 +201,16 @@ static int wm8994_ldo_probe(struct platform_device *pdev) * regulator core and we need not worry about it on the * error path. */ - ldo->regulator = devm_regulator_register(&pdev->dev, - &wm8994_ldo_desc[id], - &config); + if (ldo->wm8994->type == WM8994) { + ldo->regulator = devm_regulator_register(&pdev->dev, + &wm8994_ldo_desc[id], + &config); + } else { + ldo->regulator = devm_regulator_register(&pdev->dev, + &wm8958_ldo_desc[id], + &config); + } + if (IS_ERR(ldo->regulator)) { ret = PTR_ERR(ldo->regulator); dev_err(wm8994->dev, "Failed to register LDO%d: %d\n", diff --git a/drivers/remoteproc/qcom_q6v5_adsp.c b/drivers/remoteproc/qcom_q6v5_adsp.c index 24e8b7e271773..6b131a490ebf8 100644 --- a/drivers/remoteproc/qcom_q6v5_adsp.c +++ b/drivers/remoteproc/qcom_q6v5_adsp.c @@ -389,6 +389,7 @@ static int adsp_alloc_memory_region(struct qcom_adsp *adsp) } ret = of_address_to_resource(node, 0, &r); + of_node_put(node); if (ret) return ret; diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c index dc135754bb9c5..c72f1b3b60858 100644 --- a/drivers/remoteproc/qcom_wcnss.c +++ b/drivers/remoteproc/qcom_wcnss.c @@ -440,6 +440,7 @@ static int wcnss_alloc_memory_region(struct qcom_wcnss *wcnss) } ret = of_address_to_resource(node, 0, &r); + of_node_put(node); if (ret) return ret; diff --git a/drivers/reset/reset-socfpga.c b/drivers/reset/reset-socfpga.c index 96953992c2bb5..1c5236a69dc49 100644 --- a/drivers/reset/reset-socfpga.c +++ b/drivers/reset/reset-socfpga.c @@ -86,3 +86,29 @@ void __init socfpga_reset_init(void) for_each_matching_node(np, socfpga_early_reset_dt_ids) a10_reset_init(np); } + +/* + * The early driver is problematic, because it doesn't register + * itself as a driver. This causes certain device links to prevent + * consumer devices from probing. The hacky solution is to register + * an empty driver, whose only job is to attach itself to the reset + * manager and call probe. + */ +static const struct of_device_id socfpga_reset_dt_ids[] = { + { .compatible = "altr,rst-mgr", }, + { /* sentinel */ }, +}; + +static int reset_simple_probe(struct platform_device *pdev) +{ + return 0; +} + +static struct platform_driver reset_socfpga_driver = { + .probe = reset_simple_probe, + .driver = { + .name = "socfpga-reset", + .of_match_table = socfpga_reset_dt_ids, + }, +}; +builtin_platform_driver(reset_socfpga_driver); diff --git a/drivers/reset/reset-zynqmp.c b/drivers/reset/reset-zynqmp.c index 99e75d92dadab..8a7473b6ba585 100644 --- a/drivers/reset/reset-zynqmp.c +++ b/drivers/reset/reset-zynqmp.c @@ -46,7 +46,8 @@ static int zynqmp_reset_status(struct reset_controller_dev *rcdev, unsigned long id) { struct zynqmp_reset_data *priv = to_zynqmp_reset_data(rcdev); - int val, err; + int err; + u32 val; err = priv->eemi_ops->reset_get_status(ZYNQMP_RESET_ID + id, &val); if (err) diff --git a/drivers/reset/tegra/reset-bpmp.c b/drivers/reset/tegra/reset-bpmp.c index 24d3395964cc4..4c5bba52b1059 100644 --- a/drivers/reset/tegra/reset-bpmp.c +++ b/drivers/reset/tegra/reset-bpmp.c @@ -20,6 +20,7 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc, struct tegra_bpmp *bpmp = to_tegra_bpmp(rstc); struct mrq_reset_request request; struct tegra_bpmp_message msg; + int err; memset(&request, 0, sizeof(request)); request.cmd = command; @@ -30,7 +31,13 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc, msg.tx.data = &request; msg.tx.size = sizeof(request); - return tegra_bpmp_transfer(bpmp, &msg); + err = tegra_bpmp_transfer(bpmp, &msg); + if (err) + return err; + if (msg.rx.ret) + return -EINVAL; + + return 0; } static int tegra_bpmp_reset_module(struct reset_controller_dev *rstc, diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c index 19903de6268db..a4db9f6100d2f 100644 --- a/drivers/rpmsg/qcom_smd.c +++ b/drivers/rpmsg/qcom_smd.c @@ -1388,9 +1388,9 @@ static int qcom_smd_parse_edge(struct device *dev, edge->name = node->name; irq = irq_of_parse_and_map(node, 0); - if (irq < 0) { + if (!irq) { dev_err(dev, "required smd interrupt missing\n"); - ret = irq; + ret = -EINVAL; goto put_node; } diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c index c655074c07c2e..ac50ed7577651 100644 --- a/drivers/rpmsg/rpmsg_char.c +++ b/drivers/rpmsg/rpmsg_char.c @@ -92,7 +92,7 @@ static int rpmsg_eptdev_destroy(struct device *dev, void *data) /* wake up any blocked readers */ wake_up_interruptible(&eptdev->readq); - device_del(&eptdev->dev); + cdev_device_del(&eptdev->cdev, &eptdev->dev); put_device(&eptdev->dev); return 0; @@ -336,7 +336,6 @@ static void rpmsg_eptdev_release_device(struct device *dev) ida_simple_remove(&rpmsg_ept_ida, dev->id); ida_simple_remove(&rpmsg_minor_ida, MINOR(eptdev->dev.devt)); - cdev_del(&eptdev->cdev); kfree(eptdev); } @@ -381,19 +380,13 @@ static int rpmsg_eptdev_create(struct rpmsg_ctrldev *ctrldev, dev->id = ret; dev_set_name(dev, "rpmsg%d", ret); - ret = cdev_add(&eptdev->cdev, dev->devt, 1); + ret = cdev_device_add(&eptdev->cdev, &eptdev->dev); if (ret) goto free_ept_ida; /* We can now rely on the release function for cleanup */ dev->release = rpmsg_eptdev_release_device; - ret = device_add(dev); - if (ret) { - dev_err(dev, "device_add failed: %d\n", ret); - put_device(dev); - } - return ret; free_ept_ida: @@ -462,7 +455,6 @@ static void rpmsg_ctrldev_release_device(struct device *dev) ida_simple_remove(&rpmsg_ctrl_ida, dev->id); ida_simple_remove(&rpmsg_minor_ida, MINOR(dev->devt)); - cdev_del(&ctrldev->cdev); kfree(ctrldev); } @@ -497,19 +489,13 @@ static int rpmsg_chrdev_probe(struct rpmsg_device *rpdev) dev->id = ret; dev_set_name(&ctrldev->dev, "rpmsg_ctrl%d", ret); - ret = cdev_add(&ctrldev->cdev, dev->devt, 1); + ret = cdev_device_add(&ctrldev->cdev, &ctrldev->dev); if (ret) goto free_ctrl_ida; /* We can now rely on the release function for cleanup */ dev->release = rpmsg_ctrldev_release_device; - ret = device_add(dev); - if (ret) { - dev_err(&rpdev->dev, "device_add failed: %d\n", ret); - put_device(dev); - } - dev_set_drvdata(&rpdev->dev, ctrldev); return ret; @@ -535,7 +521,7 @@ static void rpmsg_chrdev_remove(struct rpmsg_device *rpdev) if (ret) dev_warn(&rpdev->dev, "failed to nuke endpoints: %d\n", ret); - device_del(&ctrldev->dev); + cdev_device_del(&ctrldev->cdev, &ctrldev->dev); put_device(&ctrldev->dev); } diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c index e330ec4dfc337..c1d4990beab02 100644 --- a/drivers/rpmsg/rpmsg_core.c +++ b/drivers/rpmsg/rpmsg_core.c @@ -473,13 +473,25 @@ static int rpmsg_dev_probe(struct device *dev) err = rpdrv->probe(rpdev); if (err) { dev_err(dev, "%s: failed: %d\n", __func__, err); - if (ept) - rpmsg_destroy_ept(ept); - goto out; + goto destroy_ept; } - if (ept && rpdev->ops->announce_create) + if (ept && rpdev->ops->announce_create) { err = rpdev->ops->announce_create(rpdev); + if (err) { + dev_err(dev, "failed to announce creation\n"); + goto remove_rpdev; + } + } + + return 0; + +remove_rpdev: + if (rpdrv->remove) + rpdrv->remove(rpdev); +destroy_ept: + if (ept) + rpmsg_destroy_ept(ept); out: return err; } diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 9ae7ce3f50696..0ad8d84aeb339 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -625,6 +625,7 @@ config RTC_DRV_FM3130 config RTC_DRV_RX8010 tristate "Epson RX8010SJ" + select REGMAP_I2C help If you say yes here you get support for the Epson RX8010SJ RTC chip. diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 9458e6d6686ac..8b434213bc7ad 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -26,6 +26,15 @@ struct class *rtc_class; static void rtc_device_release(struct device *dev) { struct rtc_device *rtc = to_rtc_device(dev); + struct timerqueue_head *head = &rtc->timerqueue; + struct timerqueue_node *node; + + mutex_lock(&rtc->ops_lock); + while ((node = timerqueue_getnext(head))) + timerqueue_del(head, node); + mutex_unlock(&rtc->ops_lock); + + cancel_work_sync(&rtc->irqwork); ida_simple_remove(&rtc_ida, rtc->id); kfree(rtc); diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 5c1378d2fab3d..ba345a379e262 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -793,9 +793,13 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue); struct rtc_time tm; ktime_t now; + int err; + + err = __rtc_read_time(rtc, &tm); + if (err) + return err; timer->enabled = 1; - __rtc_read_time(rtc, &tm); now = rtc_tm_to_ktime(tm); /* Skip over expired timers */ @@ -809,7 +813,6 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) trace_rtc_timer_enqueue(timer); if (!next || ktime_before(timer->node.expires, next->expires)) { struct rtc_wkalrm alarm; - int err; alarm.time = rtc_ktime_to_tm(timer->node.expires); alarm.enabled = 1; diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index cb28bbdc9e17f..c0dc03ffa8173 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -463,7 +463,10 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) min = t->time.tm_min; sec = t->time.tm_sec; + spin_lock_irq(&rtc_lock); rtc_control = CMOS_READ(RTC_CONTROL); + spin_unlock_irq(&rtc_lock); + if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { /* Writing 0xff means "don't care" or "match all". */ mon = (mon <= 12) ? bin2bcd(mon) : 0xff; diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 2ecd8752b088b..b036ff33fbe61 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -83,7 +83,7 @@ unsigned int mc146818_get_time(struct rtc_time *time) time->tm_year += real_year - 72; #endif - if (century > 20) + if (century > 19) time->tm_year += (century - 19) * 100; /* @@ -99,6 +99,17 @@ unsigned int mc146818_get_time(struct rtc_time *time) } EXPORT_SYMBOL_GPL(mc146818_get_time); +/* AMD systems don't allow access to AltCentury with DV1 */ +static bool apply_amd_register_a_behavior(void) +{ +#ifdef CONFIG_X86 + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + return true; +#endif + return false; +} + /* Set the current date and time in the real time clock. */ int mc146818_set_time(struct rtc_time *time) { @@ -172,7 +183,10 @@ int mc146818_set_time(struct rtc_time *time) save_control = CMOS_READ(RTC_CONTROL); CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + if (apply_amd_register_a_behavior()) + CMOS_WRITE((save_freq_select & ~RTC_AMD_BANK_SELECT), RTC_FREQ_SELECT); + else + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); #ifdef CONFIG_MACH_DECSTATION CMOS_WRITE(real_yrs, RTC_DEC_YEAR); diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c index b216bdcba0da4..dd3901b0a4ed2 100644 --- a/drivers/rtc/rtc-mt6397.c +++ b/drivers/rtc/rtc-mt6397.c @@ -331,6 +331,8 @@ static int mtk_rtc_probe(struct platform_device *pdev) return -ENOMEM; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; rtc->addr_base = res->start; rtc->irq = platform_get_irq(pdev, 0); diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c index d2f1d8f754bf3..cf8119b6d3204 100644 --- a/drivers/rtc/rtc-pxa.c +++ b/drivers/rtc/rtc-pxa.c @@ -330,6 +330,10 @@ static int __init pxa_rtc_probe(struct platform_device *pdev) if (sa1100_rtc->irq_alarm < 0) return -ENXIO; + sa1100_rtc->rtc = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(sa1100_rtc->rtc)) + return PTR_ERR(sa1100_rtc->rtc); + pxa_rtc->base = devm_ioremap(dev, pxa_rtc->ress->start, resource_size(pxa_rtc->ress)); if (!pxa_rtc->base) { diff --git a/drivers/rtc/rtc-tps65910.c b/drivers/rtc/rtc-tps65910.c index 2c0467a9e7179..8d1b1fda62dd1 100644 --- a/drivers/rtc/rtc-tps65910.c +++ b/drivers/rtc/rtc-tps65910.c @@ -460,6 +460,6 @@ static struct platform_driver tps65910_rtc_driver = { }; module_platform_driver(tps65910_rtc_driver); -MODULE_ALIAS("platform:rtc-tps65910"); +MODULE_ALIAS("platform:tps65910-rtc"); MODULE_AUTHOR("Venu Byravarasu "); MODULE_LICENSE("GPL"); diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c index 2018614f258f6..6eaa9321c0741 100644 --- a/drivers/rtc/rtc-wm8350.c +++ b/drivers/rtc/rtc-wm8350.c @@ -432,14 +432,21 @@ static int wm8350_rtc_probe(struct platform_device *pdev) return ret; } - wm8350_register_irq(wm8350, WM8350_IRQ_RTC_SEC, + ret = wm8350_register_irq(wm8350, WM8350_IRQ_RTC_SEC, wm8350_rtc_update_handler, 0, "RTC Seconds", wm8350); + if (ret) + return ret; + wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC); - wm8350_register_irq(wm8350, WM8350_IRQ_RTC_ALM, + ret = wm8350_register_irq(wm8350, WM8350_IRQ_RTC_ALM, wm8350_rtc_alarm_handler, 0, "RTC Alarm", wm8350); + if (ret) { + wm8350_free_irq(wm8350, WM8350_IRQ_RTC_SEC, wm8350); + return ret; + } return 0; } diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index b577c8f7e3462..e0570cd0e520c 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1462,6 +1462,13 @@ int dasd_start_IO(struct dasd_ccw_req *cqr) if (!cqr->lpm) cqr->lpm = dasd_path_get_opm(device); } + /* + * remember the amount of formatted tracks to prevent double format on + * ESE devices + */ + if (cqr->block) + cqr->trkcount = atomic_read(&cqr->block->trkcount); + if (cqr->cpmode == 1) { rc = ccw_device_tm_start(device->cdev, cqr->cpaddr, (long) cqr, cqr->lpm); @@ -1680,6 +1687,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, unsigned long now; int nrf_suppressed = 0; int fp_suppressed = 0; + struct request *req; u8 *sense = NULL; int expires; @@ -1780,7 +1788,12 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, } if (dasd_ese_needs_format(cqr->block, irb)) { - if (rq_data_dir((struct request *)cqr->callback_data) == READ) { + req = dasd_get_callback_data(cqr); + if (!req) { + cqr->status = DASD_CQR_ERROR; + return; + } + if (rq_data_dir(req) == READ) { device->discipline->ese_read(cqr, irb); cqr->status = DASD_CQR_SUCCESS; cqr->stopclk = now; @@ -2799,8 +2812,7 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr) * complete a request partially. */ if (proc_bytes) { - blk_update_request(req, BLK_STS_OK, - blk_rq_bytes(req) - proc_bytes); + blk_update_request(req, BLK_STS_OK, proc_bytes); blk_mq_requeue_request(req, true); } else { blk_mq_complete_request(req); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index ad44d22e88591..7749deb614d75 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -3026,13 +3026,24 @@ static int dasd_eckd_format_device(struct dasd_device *base, } static bool test_and_set_format_track(struct dasd_format_entry *to_format, - struct dasd_block *block) + struct dasd_ccw_req *cqr) { + struct dasd_block *block = cqr->block; struct dasd_format_entry *format; unsigned long flags; bool rc = false; spin_lock_irqsave(&block->format_lock, flags); + if (cqr->trkcount != atomic_read(&block->trkcount)) { + /* + * The number of formatted tracks has changed after request + * start and we can not tell if the current track was involved. + * To avoid data corruption treat it as if the current track is + * involved + */ + rc = true; + goto out; + } list_for_each_entry(format, &block->format_list, list) { if (format->track == to_format->track) { rc = true; @@ -3052,6 +3063,7 @@ static void clear_format_track(struct dasd_format_entry *format, unsigned long flags; spin_lock_irqsave(&block->format_lock, flags); + atomic_inc(&block->trkcount); list_del_init(&format->list); spin_unlock_irqrestore(&block->format_lock, flags); } @@ -3088,7 +3100,7 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr, sector_t curr_trk; int rc; - req = cqr->callback_data; + req = dasd_get_callback_data(cqr); block = cqr->block; base = block->base; private = base->private; @@ -3113,8 +3125,11 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr, } format->track = curr_trk; /* test if track is already in formatting by another thread */ - if (test_and_set_format_track(format, block)) + if (test_and_set_format_track(format, cqr)) { + /* this is no real error so do not count down retries */ + cqr->retries++; return ERR_PTR(-EEXIST); + } fdata.start_unit = curr_trk; fdata.stop_unit = curr_trk; @@ -3213,12 +3228,11 @@ static int dasd_eckd_ese_read(struct dasd_ccw_req *cqr, struct irb *irb) cqr->proc_bytes = blk_count * blksize; return 0; } - if (dst && !skip_block) { - dst += off; + if (dst && !skip_block) memset(dst, 0, blksize); - } else { + else skip_block--; - } + dst += blksize; blk_count++; } } diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index fa552f9f16667..9d9685c25253d 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -188,6 +188,7 @@ struct dasd_ccw_req { void (*callback)(struct dasd_ccw_req *, void *data); void *callback_data; unsigned int proc_bytes; /* bytes for partial completion */ + unsigned int trkcount; /* count formatted tracks */ }; /* @@ -575,6 +576,7 @@ struct dasd_block { struct list_head format_list; spinlock_t format_lock; + atomic_t trkcount; }; struct dasd_attention_data { @@ -723,6 +725,18 @@ dasd_check_blocksize(int bsize) return 0; } +/* + * return the callback data of the original request in case there are + * ERP requests build on top of it + */ +static inline void *dasd_get_callback_data(struct dasd_ccw_req *cqr) +{ + while (cqr->refers) + cqr = cqr->refers; + + return cqr->callback_data; +} + /* externals in dasd.c */ #define DASD_PROFILE_OFF 0 #define DASD_PROFILE_ON 1 diff --git a/drivers/s390/char/keyboard.h b/drivers/s390/char/keyboard.h index c467589c7f452..c06d399b9b1f1 100644 --- a/drivers/s390/char/keyboard.h +++ b/drivers/s390/char/keyboard.h @@ -56,7 +56,7 @@ static inline void kbd_put_queue(struct tty_port *port, int ch) { tty_insert_flip_char(port, ch, 0); - tty_schedule_flip(port); + tty_flip_buffer_push(port); } static inline void @@ -64,5 +64,5 @@ kbd_puts_queue(struct tty_port *port, char *cp) { while (*cp) tty_insert_flip_char(port, *cp++, 0); - tty_schedule_flip(port); + tty_flip_buffer_push(port); } diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index cc5e84b80c699..faa3a4b8ed91d 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -40,13 +40,14 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb) sclp.has_gisaf = !!(sccb->fac118 & 0x08); sclp.has_hvs = !!(sccb->fac119 & 0x80); sclp.has_kss = !!(sccb->fac98 & 0x01); - sclp.has_sipl = !!(sccb->cbl & 0x4000); if (sccb->fac85 & 0x02) S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; if (sccb->fac91 & 0x40) S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST; if (sccb->cpuoff > 134) sclp.has_diag318 = !!(sccb->byte_134 & 0x80); + if (sccb->cpuoff > 137) + sclp.has_sipl = !!(sccb->cbl & 0x4000); sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; sclp.rzm <<= 20; diff --git a/drivers/s390/char/tape_std.c b/drivers/s390/char/tape_std.c index 1f5fab617b679..f7e75d9fedf61 100644 --- a/drivers/s390/char/tape_std.c +++ b/drivers/s390/char/tape_std.c @@ -53,7 +53,6 @@ int tape_std_assign(struct tape_device *device) { int rc; - struct timer_list timeout; struct tape_request *request; request = tape_alloc_request(2, 11); @@ -70,7 +69,7 @@ tape_std_assign(struct tape_device *device) * So we set up a timeout for this call. */ timer_setup(&request->timer, tape_std_assign_timeout, 0); - mod_timer(&timeout, jiffies + 2 * HZ); + mod_timer(&request->timer, jiffies + msecs_to_jiffies(2000)); rc = tape_do_io_interruptible(device, request); diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 5734a78dbb8e6..ad447437a27c5 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -426,9 +426,26 @@ static ssize_t pimpampom_show(struct device *dev, } static DEVICE_ATTR_RO(pimpampom); +static ssize_t dev_busid_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct subchannel *sch = to_subchannel(dev); + struct pmcw *pmcw = &sch->schib.pmcw; + + if ((pmcw->st == SUBCHANNEL_TYPE_IO && pmcw->dnv) || + (pmcw->st == SUBCHANNEL_TYPE_MSG && pmcw->w)) + return sysfs_emit(buf, "0.%x.%04x\n", sch->schid.ssid, + pmcw->dev); + else + return sysfs_emit(buf, "none\n"); +} +static DEVICE_ATTR_RO(dev_busid); + static struct attribute *io_subchannel_type_attrs[] = { &dev_attr_chpids.attr, &dev_attr_pimpampom.attr, + &dev_attr_dev_busid.attr, NULL, }; ATTRIBUTE_GROUPS(io_subchannel_type); diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index ccecf6b9504e8..5a411595347b5 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -717,13 +717,23 @@ EXPORT_SYMBOL_GPL(ccw_device_get_schid); */ void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size) { - return cio_gp_dma_zalloc(cdev->private->dma_pool, &cdev->dev, size); + void *addr; + + if (!get_device(&cdev->dev)) + return NULL; + addr = cio_gp_dma_zalloc(cdev->private->dma_pool, &cdev->dev, size); + if (IS_ERR_OR_NULL(addr)) + put_device(&cdev->dev); + return addr; } EXPORT_SYMBOL(ccw_device_dma_zalloc); void ccw_device_dma_free(struct ccw_device *cdev, void *cpu_addr, size_t size) { + if (!cpu_addr) + return; cio_gp_dma_free(cdev->private->dma_pool, cpu_addr, size); + put_device(&cdev->dev); } EXPORT_SYMBOL(ccw_device_dma_free); diff --git a/drivers/s390/net/ctcm_mpc.c b/drivers/s390/net/ctcm_mpc.c index ab316baa82843..d766002bc5bee 100644 --- a/drivers/s390/net/ctcm_mpc.c +++ b/drivers/s390/net/ctcm_mpc.c @@ -626,8 +626,6 @@ static void mpc_rcvd_sweep_resp(struct mpcg_info *mpcginfo) ctcm_clear_busy_do(dev); } - kfree(mpcginfo); - return; } @@ -1206,10 +1204,10 @@ static void ctcmpc_unpack_skb(struct channel *ch, struct sk_buff *pskb) CTCM_FUNTAIL, dev->name); priv->stats.rx_dropped++; /* mpcginfo only used for non-data transfers */ - kfree(mpcginfo); if (do_debug_data) ctcmpc_dump_skb(pskb, -8); } + kfree(mpcginfo); } done: @@ -1991,7 +1989,6 @@ static void mpc_action_rcvd_xid0(fsm_instance *fsm, int event, void *arg) } break; } - kfree(mpcginfo); CTCM_PR_DEBUG("ctcmpc:%s() %s xid2:%i xid7:%i xidt_p2:%i \n", __func__, ch->id, grp->outstanding_xid2, @@ -2052,7 +2049,6 @@ static void mpc_action_rcvd_xid7(fsm_instance *fsm, int event, void *arg) mpc_validate_xid(mpcginfo); break; } - kfree(mpcginfo); return; } diff --git a/drivers/s390/net/ctcm_sysfs.c b/drivers/s390/net/ctcm_sysfs.c index ded1930a00b2d..e3813a7aa5e68 100644 --- a/drivers/s390/net/ctcm_sysfs.c +++ b/drivers/s390/net/ctcm_sysfs.c @@ -39,11 +39,12 @@ static ssize_t ctcm_buffer_write(struct device *dev, struct ctcm_priv *priv = dev_get_drvdata(dev); int rc; - ndev = priv->channel[CTCM_READ]->netdev; - if (!(priv && priv->channel[CTCM_READ] && ndev)) { + if (!(priv && priv->channel[CTCM_READ] && + priv->channel[CTCM_READ]->netdev)) { CTCM_DBF_TEXT(SETUP, CTC_DBF_ERROR, "bfnondev"); return -ENODEV; } + ndev = priv->channel[CTCM_READ]->netdev; rc = kstrtouint(buf, 0, &bs1); if (rc) diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index 8f08b0a2917c6..4eec7bfb5de93 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -1735,10 +1735,11 @@ lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd) lcs_schedule_recovery(card); break; case LCS_CMD_STOPLAN: - pr_warn("Stoplan for %s initiated by LGW\n", - card->dev->name); - if (card->dev) + if (card->dev) { + pr_warn("Stoplan for %s initiated by LGW\n", + card->dev->name); netif_carrier_off(card->dev); + } break; default: LCS_DBF_TEXT(5, trace, "noLGWcmd"); diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index b018b61bd168e..d4c2c44b863dd 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -521,6 +521,8 @@ static void zfcp_fc_adisc_handler(void *data) goto out; } + /* re-init to undo drop from zfcp_fc_adisc() */ + port->d_id = ntoh24(adisc_resp->adisc_port_id); /* port is good, unblock rport without going through erp */ zfcp_scsi_schedule_rport_register(port); out: @@ -534,6 +536,7 @@ static int zfcp_fc_adisc(struct zfcp_port *port) struct zfcp_fc_req *fc_req; struct zfcp_adapter *adapter = port->adapter; struct Scsi_Host *shost = adapter->scsi_host; + u32 d_id; int ret; fc_req = kmem_cache_zalloc(zfcp_fc_req_cache, GFP_ATOMIC); @@ -558,7 +561,15 @@ static int zfcp_fc_adisc(struct zfcp_port *port) fc_req->u.adisc.req.adisc_cmd = ELS_ADISC; hton24(fc_req->u.adisc.req.adisc_port_id, fc_host_port_id(shost)); - ret = zfcp_fsf_send_els(adapter, port->d_id, &fc_req->ct_els, + d_id = port->d_id; /* remember as destination for send els below */ + /* + * Force fresh GID_PN lookup on next port recovery. + * Must happen after request setup and before sending request, + * to prevent race with port->d_id re-init in zfcp_fc_adisc_handler(). + */ + port->d_id = 0; + + ret = zfcp_fsf_send_els(adapter, d_id, &fc_req->ct_els, ZFCP_FC_CTELS_TMO); if (ret) kmem_cache_free(zfcp_fc_req_cache, fc_req); diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c index 6e988233fb81f..6a54556119dd6 100644 --- a/drivers/scsi/BusLogic.c +++ b/drivers/scsi/BusLogic.c @@ -3601,7 +3601,7 @@ static void blogic_msg(enum blogic_msglevel msglevel, char *fmt, if (buf[0] != '\n' || len > 1) printk("%sscsi%d: %s", blogic_msglevelmap[msglevel], adapter->host_no, buf); } else - printk("%s", buf); + pr_cont("%s", buf); } else { if (begin) { if (adapter != NULL && adapter->adapter_initd) @@ -3609,7 +3609,7 @@ static void blogic_msg(enum blogic_msglevel msglevel, char *fmt, else printk("%s%s", blogic_msglevelmap[msglevel], buf); } else - printk("%s", buf); + pr_cont("%s", buf); } begin = (buf[len - 1] == '\n'); } diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 1035f947f1bcf..44342cad6dae8 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -1995,7 +1995,7 @@ static void aac_flush_ios(struct aac_dev *aac) } static pci_ers_result_t aac_pci_error_detected(struct pci_dev *pdev, - enum pci_channel_state error) + pci_channel_state_t error) { struct Scsi_Host *shost = pci_get_drvdata(pdev); struct aac_dev *aac = shost_priv(shost); diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c index a242a62caaa16..7b3e52ff5f516 100644 --- a/drivers/scsi/advansys.c +++ b/drivers/scsi/advansys.c @@ -3366,8 +3366,8 @@ static void asc_prt_adv_board_info(struct seq_file *m, struct Scsi_Host *shost) shost->host_no); seq_printf(m, - " iop_base 0x%lx, cable_detect: %X, err_code %u\n", - (unsigned long)v->iop_base, + " iop_base 0x%p, cable_detect: %X, err_code %u\n", + v->iop_base, AdvReadWordRegister(iop_base,IOPW_SCSI_CFG1) & CABLE_DETECT, v->err_code); diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c index eb466c2e1839e..fdd9f1a5100c7 100644 --- a/drivers/scsi/aha152x.c +++ b/drivers/scsi/aha152x.c @@ -3368,13 +3368,11 @@ static int __init aha152x_setup(char *str) setup[setup_count].synchronous = ints[0] >= 6 ? ints[6] : 1; setup[setup_count].delay = ints[0] >= 7 ? ints[7] : DELAY_DEFAULT; setup[setup_count].ext_trans = ints[0] >= 8 ? ints[8] : 0; - if (ints[0] > 8) { /*}*/ + if (ints[0] > 8) printk(KERN_NOTICE "aha152x: usage: aha152x=[,[," "[,[,[,[,[,]]]]]]]\n"); - } else { + else setup_count++; - return 0; - } return 1; } diff --git a/drivers/scsi/bfa/bfad_attr.c b/drivers/scsi/bfa/bfad_attr.c index fbfce02e5b935..c285f401ff7b5 100644 --- a/drivers/scsi/bfa/bfad_attr.c +++ b/drivers/scsi/bfa/bfad_attr.c @@ -711,7 +711,7 @@ bfad_im_serial_num_show(struct device *dev, struct device_attribute *attr, char serial_num[BFA_ADAPTER_SERIAL_NUM_LEN]; bfa_get_adapter_serial_num(&bfad->bfa, serial_num); - return snprintf(buf, PAGE_SIZE, "%s\n", serial_num); + return sysfs_emit(buf, "%s\n", serial_num); } static ssize_t @@ -725,7 +725,7 @@ bfad_im_model_show(struct device *dev, struct device_attribute *attr, char model[BFA_ADAPTER_MODEL_NAME_LEN]; bfa_get_adapter_model(&bfad->bfa, model); - return snprintf(buf, PAGE_SIZE, "%s\n", model); + return sysfs_emit(buf, "%s\n", model); } static ssize_t @@ -805,7 +805,7 @@ bfad_im_model_desc_show(struct device *dev, struct device_attribute *attr, snprintf(model_descr, BFA_ADAPTER_MODEL_DESCR_LEN, "Invalid Model"); - return snprintf(buf, PAGE_SIZE, "%s\n", model_descr); + return sysfs_emit(buf, "%s\n", model_descr); } static ssize_t @@ -819,7 +819,7 @@ bfad_im_node_name_show(struct device *dev, struct device_attribute *attr, u64 nwwn; nwwn = bfa_fcs_lport_get_nwwn(port->fcs_port); - return snprintf(buf, PAGE_SIZE, "0x%llx\n", cpu_to_be64(nwwn)); + return sysfs_emit(buf, "0x%llx\n", cpu_to_be64(nwwn)); } static ssize_t @@ -836,7 +836,7 @@ bfad_im_symbolic_name_show(struct device *dev, struct device_attribute *attr, bfa_fcs_lport_get_attr(&bfad->bfa_fcs.fabric.bport, &port_attr); strlcpy(symname, port_attr.port_cfg.sym_name.symname, BFA_SYMNAME_MAXLEN); - return snprintf(buf, PAGE_SIZE, "%s\n", symname); + return sysfs_emit(buf, "%s\n", symname); } static ssize_t @@ -850,14 +850,14 @@ bfad_im_hw_version_show(struct device *dev, struct device_attribute *attr, char hw_ver[BFA_VERSION_LEN]; bfa_get_pci_chip_rev(&bfad->bfa, hw_ver); - return snprintf(buf, PAGE_SIZE, "%s\n", hw_ver); + return sysfs_emit(buf, "%s\n", hw_ver); } static ssize_t bfad_im_drv_version_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%s\n", BFAD_DRIVER_VERSION); + return sysfs_emit(buf, "%s\n", BFAD_DRIVER_VERSION); } static ssize_t @@ -871,7 +871,7 @@ bfad_im_optionrom_version_show(struct device *dev, char optrom_ver[BFA_VERSION_LEN]; bfa_get_adapter_optrom_ver(&bfad->bfa, optrom_ver); - return snprintf(buf, PAGE_SIZE, "%s\n", optrom_ver); + return sysfs_emit(buf, "%s\n", optrom_ver); } static ssize_t @@ -885,7 +885,7 @@ bfad_im_fw_version_show(struct device *dev, struct device_attribute *attr, char fw_ver[BFA_VERSION_LEN]; bfa_get_adapter_fw_ver(&bfad->bfa, fw_ver); - return snprintf(buf, PAGE_SIZE, "%s\n", fw_ver); + return sysfs_emit(buf, "%s\n", fw_ver); } static ssize_t @@ -897,7 +897,7 @@ bfad_im_num_of_ports_show(struct device *dev, struct device_attribute *attr, (struct bfad_im_port_s *) shost->hostdata[0]; struct bfad_s *bfad = im_port->bfad; - return snprintf(buf, PAGE_SIZE, "%d\n", + return sysfs_emit(buf, "%d\n", bfa_get_nports(&bfad->bfa)); } @@ -905,7 +905,7 @@ static ssize_t bfad_im_drv_name_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%s\n", BFAD_DRIVER_NAME); + return sysfs_emit(buf, "%s\n", BFAD_DRIVER_NAME); } static ssize_t @@ -924,14 +924,14 @@ bfad_im_num_of_discovered_ports_show(struct device *dev, rports = kcalloc(nrports, sizeof(struct bfa_rport_qualifier_s), GFP_ATOMIC); if (rports == NULL) - return snprintf(buf, PAGE_SIZE, "Failed\n"); + return sysfs_emit(buf, "Failed\n"); spin_lock_irqsave(&bfad->bfad_lock, flags); bfa_fcs_lport_get_rport_quals(port->fcs_port, rports, &nrports); spin_unlock_irqrestore(&bfad->bfad_lock, flags); kfree(rports); - return snprintf(buf, PAGE_SIZE, "%d\n", nrports); + return sysfs_emit(buf, "%d\n", nrports); } static DEVICE_ATTR(serial_number, S_IRUGO, diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index b4bfab5edf8ff..9ed109fb6b67b 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -80,7 +80,7 @@ static int bnx2fc_bind_pcidev(struct bnx2fc_hba *hba); static void bnx2fc_unbind_pcidev(struct bnx2fc_hba *hba); static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface, struct device *parent, int npiv); -static void bnx2fc_destroy_work(struct work_struct *work); +static void bnx2fc_port_destroy(struct fcoe_port *port); static struct bnx2fc_hba *bnx2fc_hba_lookup(struct net_device *phys_dev); static struct bnx2fc_interface *bnx2fc_interface_lookup(struct net_device @@ -506,7 +506,8 @@ static int bnx2fc_l2_rcv_thread(void *arg) static void bnx2fc_recv_frame(struct sk_buff *skb) { - u32 fr_len; + u64 crc_err; + u32 fr_len, fr_crc; struct fc_lport *lport; struct fcoe_rcv_info *fr; struct fc_stats *stats; @@ -540,6 +541,11 @@ static void bnx2fc_recv_frame(struct sk_buff *skb) skb_pull(skb, sizeof(struct fcoe_hdr)); fr_len = skb->len - sizeof(struct fcoe_crc_eof); + stats = per_cpu_ptr(lport->stats, get_cpu()); + stats->RxFrames++; + stats->RxWords += fr_len / FCOE_WORD_TO_BYTE; + put_cpu(); + fp = (struct fc_frame *)skb; fc_frame_init(fp); fr_dev(fp) = lport; @@ -622,16 +628,15 @@ static void bnx2fc_recv_frame(struct sk_buff *skb) return; } - stats = per_cpu_ptr(lport->stats, smp_processor_id()); - stats->RxFrames++; - stats->RxWords += fr_len / FCOE_WORD_TO_BYTE; + fr_crc = le32_to_cpu(fr_crc(fp)); - if (le32_to_cpu(fr_crc(fp)) != - ~crc32(~0, skb->data, fr_len)) { - if (stats->InvalidCRCCount < 5) + if (unlikely(fr_crc != ~crc32(~0, skb->data, fr_len))) { + stats = per_cpu_ptr(lport->stats, get_cpu()); + crc_err = (stats->InvalidCRCCount++); + put_cpu(); + if (crc_err < 5) printk(KERN_WARNING PFX "dropping frame with " "CRC error\n"); - stats->InvalidCRCCount++; kfree_skb(skb); return; } @@ -902,9 +907,6 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event, __bnx2fc_destroy(interface); } mutex_unlock(&bnx2fc_dev_lock); - - /* Ensure ALL destroy work has been completed before return */ - flush_workqueue(bnx2fc_wq); return; default: @@ -1211,8 +1213,8 @@ static int bnx2fc_vport_destroy(struct fc_vport *vport) mutex_unlock(&n_port->lp_mutex); bnx2fc_free_vport(interface->hba, port->lport); bnx2fc_port_shutdown(port->lport); + bnx2fc_port_destroy(port); bnx2fc_interface_put(interface); - queue_work(bnx2fc_wq, &port->destroy_work); return 0; } @@ -1521,7 +1523,6 @@ static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface, port->lport = lport; port->priv = interface; port->get_netdev = bnx2fc_netdev; - INIT_WORK(&port->destroy_work, bnx2fc_destroy_work); /* Configure fcoe_port */ rc = bnx2fc_lport_config(lport); @@ -1649,8 +1650,8 @@ static void __bnx2fc_destroy(struct bnx2fc_interface *interface) bnx2fc_interface_cleanup(interface); bnx2fc_stop(interface); list_del(&interface->list); + bnx2fc_port_destroy(port); bnx2fc_interface_put(interface); - queue_work(bnx2fc_wq, &port->destroy_work); } /** @@ -1691,15 +1692,12 @@ static int bnx2fc_destroy(struct net_device *netdev) return rc; } -static void bnx2fc_destroy_work(struct work_struct *work) +static void bnx2fc_port_destroy(struct fcoe_port *port) { - struct fcoe_port *port; struct fc_lport *lport; - port = container_of(work, struct fcoe_port, destroy_work); lport = port->lport; - - BNX2FC_HBA_DBG(lport, "Entered bnx2fc_destroy_work\n"); + BNX2FC_HBA_DBG(lport, "Entered %s, destroying lport %p\n", __func__, lport); bnx2fc_if_destroy(lport); } @@ -2553,9 +2551,6 @@ static void bnx2fc_ulp_exit(struct cnic_dev *dev) __bnx2fc_destroy(interface); mutex_unlock(&bnx2fc_dev_lock); - /* Ensure ALL destroy work has been completed before return */ - flush_workqueue(bnx2fc_wq); - bnx2fc_ulp_stop(hba); /* unregister cnic device */ if (test_and_clear_bit(BNX2FC_CNIC_REGISTERED, &hba->reg_with_cnic)) diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index a0e7764148899..06133d7c2a559 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -1212,7 +1212,7 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd) * cleanup the command and return that I/O was successfully * aborted. */ - rc = bnx2fc_abts_cleanup(io_req); + bnx2fc_abts_cleanup(io_req); /* This only occurs when an task abort was requested while ABTS is in progress. Setting the IO_CLEANUP flag will skip the RRQ process in the case when the fw generated SCSI_CMD cmpl diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c index a6dd704d7f2de..1b8ccadc7cf67 100644 --- a/drivers/scsi/csiostor/csio_init.c +++ b/drivers/scsi/csiostor/csio_init.c @@ -1257,3 +1257,4 @@ MODULE_DEVICE_TABLE(pci, csio_pci_tbl); MODULE_VERSION(CSIO_DRV_VERSION); MODULE_FIRMWARE(FW_FNAME_T5); MODULE_FIRMWARE(FW_FNAME_T6); +MODULE_SOFTDEP("pre: cxgb4"); diff --git a/drivers/scsi/csiostor/csio_lnode.c b/drivers/scsi/csiostor/csio_lnode.c index 23cbe4cda760e..c3bf590f5d685 100644 --- a/drivers/scsi/csiostor/csio_lnode.c +++ b/drivers/scsi/csiostor/csio_lnode.c @@ -619,7 +619,7 @@ csio_ln_vnp_read_cbfn(struct csio_hw *hw, struct csio_mb *mbp) struct fc_els_csp *csp; struct fc_els_cssp *clsp; enum fw_retval retval; - __be32 nport_id; + __be32 nport_id = 0; retval = FW_CMD_RETVAL_G(ntohl(rsp->alloc_to_len16)); if (retval != FW_SUCCESS) { diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c index 13fbb2eab842e..c4a6609d8fae1 100644 --- a/drivers/scsi/dc395x.c +++ b/drivers/scsi/dc395x.c @@ -3664,10 +3664,19 @@ static struct DeviceCtlBlk *device_alloc(struct AdapterCtlBlk *acb, #endif if (dcb->target_lun != 0) { /* Copy settings */ - struct DeviceCtlBlk *p; - list_for_each_entry(p, &acb->dcb_list, list) - if (p->target_id == dcb->target_id) + struct DeviceCtlBlk *p = NULL, *iter; + + list_for_each_entry(iter, &acb->dcb_list, list) + if (iter->target_id == dcb->target_id) { + p = iter; break; + } + + if (!p) { + kfree(dcb); + return NULL; + } + dprintkdbg(DBG_1, "device_alloc: <%02i-%i> copy from <%02i-%i>\n", dcb->target_id, dcb->target_lun, @@ -4698,6 +4707,7 @@ static int dc395x_init_one(struct pci_dev *dev, const struct pci_device_id *id) /* initialise the adapter and everything we need */ if (adapter_init(acb, io_port_base, io_port_len, irq)) { dprintkl(KERN_INFO, "adapter init failed\n"); + acb = NULL; goto fail; } diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 07a0dadc75bf5..7ce2a0434e1e5 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -1966,7 +1966,7 @@ EXPORT_SYMBOL(fcoe_ctlr_recv_flogi); * * Returns: u64 fc world wide name */ -u64 fcoe_wwn_from_mac(unsigned char mac[MAX_ADDR_LEN], +u64 fcoe_wwn_from_mac(unsigned char mac[ETH_ALEN], unsigned int scheme, unsigned int port) { u64 wwn; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 13f314fa757e8..a86aae52d94f4 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -504,7 +504,7 @@ MODULE_PARM_DESC(intr_conv, "interrupt converge enable (0-1)"); /* permit overriding the host protection capabilities mask (EEDP/T10 PI) */ static int prot_mask; -module_param(prot_mask, int, 0); +module_param(prot_mask, int, 0444); MODULE_PARM_DESC(prot_mask, " host protection capabilities mask, def=0x0 "); static bool auto_affine_msi_experimental; diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index a929fe76102b0..d5b2917aea44f 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -35,7 +35,7 @@ #define IBMVSCSIS_VERSION "v0.2" -#define INITIAL_SRP_LIMIT 800 +#define INITIAL_SRP_LIMIT 1024 #define DEFAULT_MAX_SECTORS 256 #define MAX_TXU 1024 * 1024 diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 7a57b61f0340e..a163fd9331b3c 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -9772,7 +9772,7 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg) GFP_KERNEL); if (!ioa_cfg->hrrq[i].host_rrq) { - while (--i > 0) + while (--i >= 0) dma_free_coherent(&pdev->dev, sizeof(u32) * ioa_cfg->hrrq[i].size, ioa_cfg->hrrq[i].host_rrq, @@ -10045,7 +10045,7 @@ static int ipr_request_other_msi_irqs(struct ipr_ioa_cfg *ioa_cfg, ioa_cfg->vectors_info[i].desc, &ioa_cfg->hrrq[i]); if (rc) { - while (--i >= 0) + while (--i > 0) free_irq(pci_irq_vector(pdev, i), &ioa_cfg->hrrq[i]); return rc; diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index e5b18e5d46dac..6e2a36eeb12a7 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -1697,6 +1697,7 @@ static void fc_exch_abts_resp(struct fc_exch *ep, struct fc_frame *fp) if (cancel_delayed_work_sync(&ep->timeout_work)) { FC_EXCH_DBG(ep, "Exchange timer canceled due to ABTS response\n"); fc_exch_release(ep); /* release from pending timer hold */ + return; } spin_lock_bh(&ep->ex_lock); diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index eeba6180711cd..f3cee64c6d12f 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -2948,6 +2948,8 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) { struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_session *session = conn->session; + char *tmp_persistent_address = conn->persistent_address; + char *tmp_local_ipaddr = conn->local_ipaddr; del_timer_sync(&conn->transport_timer); @@ -2969,8 +2971,6 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) spin_lock_bh(&session->frwd_lock); free_pages((unsigned long) conn->data, get_order(ISCSI_DEF_MAX_RECV_SEG_LEN)); - kfree(conn->persistent_address); - kfree(conn->local_ipaddr); /* regular RX path uses back_lock */ spin_lock_bh(&session->back_lock); kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task, @@ -2982,6 +2982,8 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) mutex_unlock(&session->eh_mutex); iscsi_destroy_conn(cls_conn); + kfree(tmp_persistent_address); + kfree(tmp_local_ipaddr); } EXPORT_SYMBOL_GPL(iscsi_conn_teardown); diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 5d28bb7f2ca40..5c801705c4700 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -201,7 +201,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) task->total_xfer_len = qc->nbytes; task->num_scatter = qc->n_elem; task->data_dir = qc->dma_dir; - } else if (qc->tf.protocol == ATA_PROT_NODATA) { + } else if (!ata_is_data(qc->tf.protocol)) { task->data_dir = DMA_NONE; } else { for_each_sg(qc->sg, sg, qc->n_elem, si) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 8943d42fc406e..088b764aefa43 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -377,6 +377,7 @@ struct lpfc_vport { #define FC_VPORT_LOGO_RCVD 0x200 /* LOGO received on vport */ #define FC_RSCN_DISCOVERY 0x400 /* Auth all devices after RSCN */ #define FC_LOGO_RCVD_DID_CHNG 0x800 /* FDISC on phys port detect DID chng*/ +#define FC_PT2PT_NO_NVME 0x1000 /* Don't send NVME PRLI */ #define FC_SCSI_SCAN_TMO 0x4000 /* scsi scan timer running */ #define FC_ABORT_DISCOVERY 0x8000 /* we want to abort discovery */ #define FC_NDISC_ACTIVE 0x10000 /* NPort discovery active */ @@ -735,7 +736,6 @@ struct lpfc_hba { #define HBA_DEVLOSS_TMO 0x2000 /* HBA in devloss timeout */ #define HBA_RRQ_ACTIVE 0x4000 /* process the rrq active list */ #define HBA_IOQ_FLUSH 0x8000 /* FCP/NVME I/O queues being flushed */ -#define HBA_FW_DUMP_OP 0x10000 /* Skips fn reset before FW dump */ #define HBA_RECOVERABLE_UE 0x20000 /* Firmware supports recoverable UE */ #define HBA_FORCED_LINK_SPEED 0x40000 /* * Firmware supports Forced Link Speed @@ -744,6 +744,7 @@ struct lpfc_hba { #define HBA_FLOGI_ISSUED 0x100000 /* FLOGI was issued */ #define HBA_DEFER_FLOGI 0x800000 /* Defer FLOGI till read_sparm cmpl */ + struct completion *fw_dump_cmpl; /* cmpl event tracker for fw_dump */ uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/ struct lpfc_dmabuf slim2p; @@ -870,6 +871,16 @@ struct lpfc_hba { uint32_t cfg_hostmem_hgp; uint32_t cfg_log_verbose; uint32_t cfg_enable_fc4_type; +#define LPFC_ENABLE_FCP 1 +#define LPFC_ENABLE_NVME 2 +#define LPFC_ENABLE_BOTH 3 +#if (IS_ENABLED(CONFIG_NVME_FC)) +#define LPFC_MAX_ENBL_FC4_TYPE LPFC_ENABLE_BOTH +#define LPFC_DEF_ENBL_FC4_TYPE LPFC_ENABLE_BOTH +#else +#define LPFC_MAX_ENBL_FC4_TYPE LPFC_ENABLE_FCP +#define LPFC_DEF_ENBL_FC4_TYPE LPFC_ENABLE_FCP +#endif uint32_t cfg_aer_support; uint32_t cfg_sriov_nr_virtfn; uint32_t cfg_request_firmware_upgrade; @@ -892,9 +903,6 @@ struct lpfc_hba { uint32_t cfg_ras_fwlog_func; uint32_t cfg_enable_bbcr; /* Enable BB Credit Recovery */ uint32_t cfg_enable_dpp; /* Enable Direct Packet Push */ -#define LPFC_ENABLE_FCP 1 -#define LPFC_ENABLE_NVME 2 -#define LPFC_ENABLE_BOTH 3 uint32_t cfg_enable_pbde; struct nvmet_fc_target_port *targetport; lpfc_vpd_t vpd; /* vital product data */ diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 45db19e31b348..1e59c60a9dcc3 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -1145,6 +1145,9 @@ lpfc_issue_lip(struct Scsi_Host *shost) pmboxq->u.mb.mbxCommand = MBX_DOWN_LINK; pmboxq->u.mb.mbxOwner = OWN_HOST; + if ((vport->fc_flag & FC_PT2PT) && (vport->fc_flag & FC_PT2PT_NO_NVME)) + vport->fc_flag &= ~FC_PT2PT_NO_NVME; + mbxstatus = lpfc_sli_issue_mbox_wait(phba, pmboxq, LPFC_MBOX_TMO * 2); if ((mbxstatus == MBX_SUCCESS) && @@ -1537,25 +1540,25 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode) before_fc_flag = phba->pport->fc_flag; sriov_nr_virtfn = phba->cfg_sriov_nr_virtfn; - /* Disable SR-IOV virtual functions if enabled */ - if (phba->cfg_sriov_nr_virtfn) { - pci_disable_sriov(pdev); - phba->cfg_sriov_nr_virtfn = 0; - } + if (opcode == LPFC_FW_DUMP) { + init_completion(&online_compl); + phba->fw_dump_cmpl = &online_compl; + } else { + /* Disable SR-IOV virtual functions if enabled */ + if (phba->cfg_sriov_nr_virtfn) { + pci_disable_sriov(pdev); + phba->cfg_sriov_nr_virtfn = 0; + } - if (opcode == LPFC_FW_DUMP) - phba->hba_flag |= HBA_FW_DUMP_OP; + status = lpfc_do_offline(phba, LPFC_EVT_OFFLINE); - status = lpfc_do_offline(phba, LPFC_EVT_OFFLINE); + if (status != 0) + return status; - if (status != 0) { - phba->hba_flag &= ~HBA_FW_DUMP_OP; - return status; + /* wait for the device to be quiesced before firmware reset */ + msleep(100); } - /* wait for the device to be quiesced before firmware reset */ - msleep(100); - reg_val = readl(phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PDEV_CTL_OFFSET); @@ -1584,24 +1587,42 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode) lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "3153 Fail to perform the requested " "access: x%x\n", reg_val); + if (phba->fw_dump_cmpl) + phba->fw_dump_cmpl = NULL; return rc; } /* keep the original port state */ - if (before_fc_flag & FC_OFFLINE_MODE) - goto out; - - init_completion(&online_compl); - job_posted = lpfc_workq_post_event(phba, &status, &online_compl, - LPFC_EVT_ONLINE); - if (!job_posted) + if (before_fc_flag & FC_OFFLINE_MODE) { + if (phba->fw_dump_cmpl) + phba->fw_dump_cmpl = NULL; goto out; + } - wait_for_completion(&online_compl); + /* Firmware dump will trigger an HA_ERATT event, and + * lpfc_handle_eratt_s4 routine already handles bringing the port back + * online. + */ + if (opcode == LPFC_FW_DUMP) { + wait_for_completion(phba->fw_dump_cmpl); + } else { + init_completion(&online_compl); + job_posted = lpfc_workq_post_event(phba, &status, &online_compl, + LPFC_EVT_ONLINE); + if (!job_posted) + goto out; + wait_for_completion(&online_compl); + } out: /* in any case, restore the virtual functions enabled as before */ if (sriov_nr_virtfn) { + /* If fw_dump was performed, first disable to clean up */ + if (opcode == LPFC_FW_DUMP) { + pci_disable_sriov(pdev); + phba->cfg_sriov_nr_virtfn = 0; + } + sriov_err = lpfc_sli_probe_sriov_nr_virtfn(phba, sriov_nr_virtfn); if (!sriov_err) @@ -3821,8 +3842,8 @@ LPFC_ATTR_R(nvmet_mrq_post, * 3 - register both FCP and NVME * Supported values are [1,3]. Default value is 3 */ -LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH, - LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH, +LPFC_ATTR_R(enable_fc4_type, LPFC_DEF_ENBL_FC4_TYPE, + LPFC_ENABLE_FCP, LPFC_MAX_ENBL_FC4_TYPE, "Enable FC4 Protocol support - FCP / NVME"); /* @@ -5881,7 +5902,8 @@ lpfc_sg_seg_cnt_show(struct device *dev, struct device_attribute *attr, len = scnprintf(buf, PAGE_SIZE, "SGL sz: %d total SGEs: %d\n", phba->cfg_sg_dma_buf_size, phba->cfg_total_seg_cnt); - len += scnprintf(buf + len, PAGE_SIZE, "Cfg: %d SCSI: %d NVME: %d\n", + len += scnprintf(buf + len, PAGE_SIZE - len, + "Cfg: %d SCSI: %d NVME: %d\n", phba->cfg_sg_seg_cnt, phba->cfg_scsi_seg_cnt, phba->cfg_nvme_seg_cnt); return len; diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 3c9248d2435e1..e15bb3dfe9956 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -2757,8 +2757,8 @@ lpfc_debugfs_nvmeio_trc_write(struct file *file, const char __user *buf, char mybuf[64]; char *pbuf; - if (nbytes > 64) - nbytes = 64; + if (nbytes > 63) + nbytes = 63; memset(mybuf, 0, sizeof(mybuf)); diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 2040affa08874..9951d63a8b493 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -1066,7 +1066,8 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* FLOGI failed, so there is no fabric */ spin_lock_irq(shost->host_lock); - vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); + vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP | + FC_PT2PT_NO_NVME); spin_unlock_irq(shost->host_lock); /* If private loop, then allow max outstanding els to be @@ -3740,6 +3741,23 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* Added for Vendor specifc support * Just keep retrying for these Rsn / Exp codes */ + if ((vport->fc_flag & FC_PT2PT) && + cmd == ELS_CMD_NVMEPRLI) { + switch (stat.un.b.lsRjtRsnCode) { + case LSRJT_UNABLE_TPC: + case LSRJT_INVALID_CMD: + case LSRJT_LOGICAL_ERR: + case LSRJT_CMD_UNSUPPORTED: + lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, + "0168 NVME PRLI LS_RJT " + "reason %x port doesn't " + "support NVME, disabling NVME\n", + stat.un.b.lsRjtRsnCode); + retry = 0; + vport->fc_flag |= FC_PT2PT_NO_NVME; + goto out_retry; + } + } switch (stat.un.b.lsRjtRsnCode) { case LSRJT_UNABLE_TPC: /* The driver has a VALID PLOGI but the rport has diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 0dc1d56ff4709..0abce779fbb13 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -628,10 +628,16 @@ lpfc_work_done(struct lpfc_hba *phba) if (phba->pci_dev_grp == LPFC_PCI_DEV_OC) lpfc_sli4_post_async_mbox(phba); - if (ha_copy & HA_ERATT) + if (ha_copy & HA_ERATT) { /* Handle the error attention event */ lpfc_handle_eratt(phba); + if (phba->fw_dump_cmpl) { + complete(phba->fw_dump_cmpl); + phba->fw_dump_cmpl = NULL; + } + } + if (ha_copy & HA_MBATT) lpfc_sli_handle_mb_event(phba); diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index b8a772f80d6c5..cbea3e0c1a7d8 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -4249,6 +4249,9 @@ struct wqe_common { #define wqe_sup_SHIFT 6 #define wqe_sup_MASK 0x00000001 #define wqe_sup_WORD word11 +#define wqe_ffrq_SHIFT 6 +#define wqe_ffrq_MASK 0x00000001 +#define wqe_ffrq_WORD word11 #define wqe_wqec_SHIFT 7 #define wqe_wqec_MASK 0x00000001 #define wqe_wqec_WORD word11 diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index fdd87508c804d..e6a94f550a572 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -842,7 +842,8 @@ lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, else lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL); if (ndlp->nlp_DID == Fabric_DID) { - if (vport->port_state <= LPFC_FDISC) + if (vport->port_state <= LPFC_FDISC || + vport->fc_flag & FC_PT2PT) goto out; lpfc_linkdown_port(vport); spin_lock_irq(shost->host_lock); @@ -1987,8 +1988,9 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport, * is configured try it. */ ndlp->nlp_fc4_type |= NLP_FC4_FCP; - if ((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) || - (vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) { + if ((!(vport->fc_flag & FC_PT2PT_NO_NVME)) && + (vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH || + vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) { ndlp->nlp_fc4_type |= NLP_FC4_NVME; /* We need to update the localport also */ lpfc_nvme_update_localport(vport); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 5a86a1ee0de3b..193c1a81cac01 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1202,7 +1202,8 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport, { struct lpfc_hba *phba = vport->phba; struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd; - struct lpfc_iocbq *pwqeq = &(lpfc_ncmd->cur_iocbq); + struct nvme_common_command *sqe; + struct lpfc_iocbq *pwqeq = &lpfc_ncmd->cur_iocbq; union lpfc_wqe128 *wqe = &pwqeq->wqe; uint32_t req_len; @@ -1258,8 +1259,14 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport, cstat->control_requests++; } - if (pnode->nlp_nvme_info & NLP_NVME_NSLER) + if (pnode->nlp_nvme_info & NLP_NVME_NSLER) { bf_set(wqe_erp, &wqe->generic.wqe_com, 1); + sqe = &((struct nvme_fc_cmd_iu *) + nCmd->cmdaddr)->sqe.common; + if (sqe->opcode == nvme_admin_async_event) + bf_set(wqe_ffrq, &wqe->generic.wqe_com, 1); + } + /* * Finish initializing those WQE fields that are independent * of the nvme_cmnd request_buffer diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 4a7ceaa34341c..bd908dd273078 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -4498,12 +4498,6 @@ lpfc_sli4_brdreset(struct lpfc_hba *phba) phba->fcf.fcf_flag = 0; spin_unlock_irq(&phba->hbalock); - /* SLI4 INTF 2: if FW dump is being taken skip INIT_PORT */ - if (phba->hba_flag & HBA_FW_DUMP_OP) { - phba->hba_flag &= ~HBA_FW_DUMP_OP; - return rc; - } - /* Now physically reset the device */ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "0389 Performing PCI function reset!\n"); @@ -19692,6 +19686,7 @@ lpfc_drain_txq(struct lpfc_hba *phba) fail_msg, piocbq->iotag, piocbq->sli4_xritag); list_add_tail(&piocbq->list, &completions); + fail_msg = NULL; } spin_unlock_irqrestore(&pring->ring_lock, iflags); } diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index ff6d4aa924213..8b1ba690039b7 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -4635,7 +4635,7 @@ static int __init megaraid_init(void) * major number allocation. */ major = register_chrdev(0, "megadev_legacy", &megadev_fops); - if (!major) { + if (major < 0) { printk(KERN_WARNING "megaraid: failed to register char device\n"); } diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 3d43ac9772f7e..aa62cc8ffd0af 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2551,6 +2551,9 @@ struct megasas_instance_template { #define MEGASAS_IS_LOGICAL(sdev) \ ((sdev->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1) +#define MEGASAS_IS_LUN_VALID(sdev) \ + (((sdev)->lun == 0) ? 1 : 0) + #define MEGASAS_DEV_INDEX(scp) \ (((scp->device->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) + \ scp->device->id) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 6700d43b12ff5..a261ce511e9ed 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -2102,6 +2102,9 @@ static int megasas_slave_alloc(struct scsi_device *sdev) goto scan_target; } return -ENXIO; + } else if (!MEGASAS_IS_LUN_VALID(sdev)) { + sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__); + return -ENXIO; } scan_target: @@ -2132,6 +2135,10 @@ static void megasas_slave_destroy(struct scsi_device *sdev) instance = megasas_lookup_instance(sdev->host->host_no); if (MEGASAS_IS_LOGICAL(sdev)) { + if (!MEGASAS_IS_LUN_VALID(sdev)) { + sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__); + return; + } ld_tgt_id = MEGASAS_TARGET_ID(sdev); instance->ld_tgtid_status[ld_tgt_id] = LD_TARGET_ID_DELETED; if (megasas_dbg_lvl & LD_PD_DEBUG) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 3654cfc4376fa..97c1f242ef0a3 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -3387,7 +3387,7 @@ _scsih_ublock_io_device(struct MPT3SAS_ADAPTER *ioc, u64 sas_address) shost_for_each_device(sdev, ioc->shost) { sas_device_priv_data = sdev->hostdata; - if (!sas_device_priv_data) + if (!sas_device_priv_data || !sas_device_priv_data->sas_target) continue; if (sas_device_priv_data->sas_target->sas_address != sas_address) diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c index 52405ce58ade8..0c5e2c6105867 100644 --- a/drivers/scsi/mvsas/mv_init.c +++ b/drivers/scsi/mvsas/mv_init.c @@ -646,6 +646,7 @@ static struct pci_device_id mvs_pci_table[] = { { PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1300), chip_1300 }, { PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1320), chip_1320 }, { PCI_VDEVICE(ADAPTEC2, 0x0450), chip_6440 }, + { PCI_VDEVICE(TTI, 0x2640), chip_6440 }, { PCI_VDEVICE(TTI, 0x2710), chip_9480 }, { PCI_VDEVICE(TTI, 0x2720), chip_9480 }, { PCI_VDEVICE(TTI, 0x2721), chip_9480 }, @@ -697,7 +698,7 @@ static ssize_t mvs_show_driver_version(struct device *cdev, struct device_attribute *attr, char *buffer) { - return snprintf(buffer, PAGE_SIZE, "%s\n", DRV_VERSION); + return sysfs_emit(buffer, "%s\n", DRV_VERSION); } static DEVICE_ATTR(driver_version, @@ -749,7 +750,7 @@ mvs_store_interrupt_coalescing(struct device *cdev, static ssize_t mvs_show_interrupt_coalescing(struct device *cdev, struct device_attribute *attr, char *buffer) { - return snprintf(buffer, PAGE_SIZE, "%d\n", interrupt_coalescing); + return sysfs_emit(buffer, "%d\n", interrupt_coalescing); } static DEVICE_ATTR(interrupt_coalescing, diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index 539ac8ce4fcd7..35b32920a94a0 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -1241,7 +1241,8 @@ static void myrb_cleanup(struct myrb_hba *cb) myrb_unmap(cb); if (cb->mmio_base) { - cb->disable_intr(cb->io_base); + if (cb->disable_intr) + cb->disable_intr(cb->io_base); iounmap(cb->mmio_base); } if (cb->irq) @@ -3516,9 +3517,13 @@ static struct myrb_hba *myrb_detect(struct pci_dev *pdev, mutex_init(&cb->dcmd_mutex); mutex_init(&cb->dma_mutex); cb->pdev = pdev; + cb->host = shost; - if (pci_enable_device(pdev)) - goto failure; + if (pci_enable_device(pdev)) { + dev_err(&pdev->dev, "Failed to enable PCI device\n"); + scsi_host_put(shost); + return NULL; + } if (privdata->hw_init == DAC960_PD_hw_init || privdata->hw_init == DAC960_P_hw_init) { diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index cfc3f8b4174ab..2d3d14aa46b4b 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -2272,7 +2272,8 @@ static void myrs_cleanup(struct myrs_hba *cs) myrs_unmap(cs); if (cs->mmio_base) { - cs->disable_intr(cs); + if (cs->disable_intr) + cs->disable_intr(cs); iounmap(cs->mmio_base); cs->mmio_base = NULL; } diff --git a/drivers/scsi/pcmcia/fdomain_cs.c b/drivers/scsi/pcmcia/fdomain_cs.c index e42acf314d068..33df6a9ba9b5f 100644 --- a/drivers/scsi/pcmcia/fdomain_cs.c +++ b/drivers/scsi/pcmcia/fdomain_cs.c @@ -45,8 +45,10 @@ static int fdomain_probe(struct pcmcia_device *link) goto fail_disable; if (!request_region(link->resource[0]->start, FDOMAIN_REGION_SIZE, - "fdomain_cs")) + "fdomain_cs")) { + ret = -EBUSY; goto fail_disable; + } sh = fdomain_create(link->resource[0]->start, link->irq, 7, &link->dev); if (!sh) { diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index 68a8217032d0f..fec653b54307a 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -1750,6 +1750,7 @@ static void pm8001_send_abort_all(struct pm8001_hba_info *pm8001_ha, ccb->device = pm8001_ha_dev; ccb->ccb_tag = ccb_tag; ccb->task = task; + ccb->n_elem = 0; circularQ = &pm8001_ha->inbnd_q_tbl[0]; @@ -1812,6 +1813,7 @@ static void pm8001_send_read_log(struct pm8001_hba_info *pm8001_ha, ccb->device = pm8001_ha_dev; ccb->ccb_tag = ccb_tag; ccb->task = task; + ccb->n_elem = 0; pm8001_ha_dev->id |= NCQ_READ_LOG_FLAG; pm8001_ha_dev->id |= NCQ_2ND_RLE_FLAG; @@ -1828,7 +1830,7 @@ static void pm8001_send_read_log(struct pm8001_hba_info *pm8001_ha, sata_cmd.tag = cpu_to_le32(ccb_tag); sata_cmd.device_id = cpu_to_le32(pm8001_ha_dev->device_id); - sata_cmd.ncqtag_atap_dir_m |= ((0x1 << 7) | (0x5 << 9)); + sata_cmd.ncqtag_atap_dir_m = cpu_to_le32((0x1 << 7) | (0x5 << 9)); memcpy(&sata_cmd.sata_fis, &fis, sizeof(struct host_to_dev_fis)); res = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &sata_cmd, 0); @@ -3773,12 +3775,11 @@ int pm8001_mpi_task_abort_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) mb(); if (pm8001_dev->id & NCQ_ABORT_ALL_FLAG) { - pm8001_tag_free(pm8001_ha, tag); sas_free_task(t); - /* clear the flag */ - pm8001_dev->id &= 0xBFFFFFFF; - } else + pm8001_dev->id &= ~NCQ_ABORT_ALL_FLAG; + } else { t->task_done(t); + } return 0; } @@ -4727,7 +4728,7 @@ int pm8001_chip_ssp_tm_req(struct pm8001_hba_info *pm8001_ha, memcpy(sspTMCmd.lun, task->ssp_task.LUN, 8); sspTMCmd.tag = cpu_to_le32(ccb->ccb_tag); if (pm8001_ha->chip_id != chip_8001) - sspTMCmd.ds_ads_m = 0x08; + sspTMCmd.ds_ads_m = cpu_to_le32(0x08); circularQ = &pm8001_ha->inbnd_q_tbl[0]; ret = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &sspTMCmd, 0); return ret; diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 161bf4760eac7..ce67965a504fa 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -881,9 +881,11 @@ pm80xx_set_thermal_config(struct pm8001_hba_info *pm8001_ha) else page_code = THERMAL_PAGE_CODE_8H; - payload.cfg_pg[0] = (THERMAL_LOG_ENABLE << 9) | - (THERMAL_ENABLE << 8) | page_code; - payload.cfg_pg[1] = (LTEMPHIL << 24) | (RTEMPHIL << 8); + payload.cfg_pg[0] = + cpu_to_le32((THERMAL_LOG_ENABLE << 9) | + (THERMAL_ENABLE << 8) | page_code); + payload.cfg_pg[1] = + cpu_to_le32((LTEMPHIL << 24) | (RTEMPHIL << 8)); rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload, 0); if (rc) @@ -1435,6 +1437,7 @@ static void pm80xx_send_abort_all(struct pm8001_hba_info *pm8001_ha, ccb->device = pm8001_ha_dev; ccb->ccb_tag = ccb_tag; ccb->task = task; + ccb->n_elem = 0; circularQ = &pm8001_ha->inbnd_q_tbl[0]; @@ -1516,7 +1519,7 @@ static void pm80xx_send_read_log(struct pm8001_hba_info *pm8001_ha, sata_cmd.tag = cpu_to_le32(ccb_tag); sata_cmd.device_id = cpu_to_le32(pm8001_ha_dev->device_id); - sata_cmd.ncqtag_atap_dir_m_dad |= ((0x1 << 7) | (0x5 << 9)); + sata_cmd.ncqtag_atap_dir_m_dad = cpu_to_le32(((0x1 << 7) | (0x5 << 9))); memcpy(&sata_cmd.sata_fis, &fis, sizeof(struct host_to_dev_fis)); res = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &sata_cmd, 0); diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index 398d2af60832c..f95a970db8fdb 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -4532,7 +4532,7 @@ pmcraid_register_interrupt_handler(struct pmcraid_instance *pinstance) return 0; out_unwind: - while (--i > 0) + while (--i >= 0) free_irq(pci_irq_vector(pdev, i), &pinstance->hrrq_vector[i]); pci_free_irq_vectors(pdev); return rc; diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index 4e8a284e606c0..d02d1ef0d0116 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -2253,6 +2253,7 @@ int qedf_initiate_cleanup(struct qedf_ioreq *io_req, io_req->tm_flags == FCP_TMF_TGT_RESET) { clear_bit(QEDF_CMD_OUTSTANDING, &io_req->flags); io_req->sc_cmd = NULL; + kref_put(&io_req->refcount, qedf_release_cmd); complete(&io_req->tm_done); } diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 7a6306f8483ec..c95e04cc64240 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -2894,7 +2894,7 @@ static int qedf_alloc_global_queues(struct qedf_ctx *qedf) { u32 *list; int i; - int status = 0, rc; + int status; u32 *pbl; dma_addr_t page; int num_pages; @@ -2906,7 +2906,7 @@ static int qedf_alloc_global_queues(struct qedf_ctx *qedf) */ if (!qedf->num_queues) { QEDF_ERR(&(qedf->dbg_ctx), "No MSI-X vectors available!\n"); - return 1; + return -ENOMEM; } /* @@ -2914,7 +2914,7 @@ static int qedf_alloc_global_queues(struct qedf_ctx *qedf) * addresses of our queues */ if (!qedf->p_cpuq) { - status = 1; + status = -EINVAL; QEDF_ERR(&qedf->dbg_ctx, "p_cpuq is NULL.\n"); goto mem_alloc_failure; } @@ -2930,8 +2930,8 @@ static int qedf_alloc_global_queues(struct qedf_ctx *qedf) "qedf->global_queues=%p.\n", qedf->global_queues); /* Allocate DMA coherent buffers for BDQ */ - rc = qedf_alloc_bdq(qedf); - if (rc) { + status = qedf_alloc_bdq(qedf); + if (status) { QEDF_ERR(&qedf->dbg_ctx, "Unable to allocate bdq.\n"); goto mem_alloc_failure; } diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index 755f66b1ff9c7..f05fb4ddeaff0 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -797,6 +797,37 @@ static int qedi_task_xmit(struct iscsi_task *task) return qedi_iscsi_send_ioreq(task); } +static void qedi_offload_work(struct work_struct *work) +{ + struct qedi_endpoint *qedi_ep = + container_of(work, struct qedi_endpoint, offload_work); + struct qedi_ctx *qedi; + int wait_delay = 5 * HZ; + int ret; + + qedi = qedi_ep->qedi; + + ret = qedi_iscsi_offload_conn(qedi_ep); + if (ret) { + QEDI_ERR(&qedi->dbg_ctx, + "offload error: iscsi_cid=%u, qedi_ep=%p, ret=%d\n", + qedi_ep->iscsi_cid, qedi_ep, ret); + qedi_ep->state = EP_STATE_OFLDCONN_FAILED; + return; + } + + ret = wait_event_interruptible_timeout(qedi_ep->tcp_ofld_wait, + (qedi_ep->state == + EP_STATE_OFLDCONN_COMPL), + wait_delay); + if (ret <= 0 || qedi_ep->state != EP_STATE_OFLDCONN_COMPL) { + qedi_ep->state = EP_STATE_OFLDCONN_FAILED; + QEDI_ERR(&qedi->dbg_ctx, + "Offload conn TIMEOUT iscsi_cid=%u, qedi_ep=%p\n", + qedi_ep->iscsi_cid, qedi_ep); + } +} + static struct iscsi_endpoint * qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, int non_blocking) @@ -840,6 +871,7 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, } qedi_ep = ep->dd_data; memset(qedi_ep, 0, sizeof(struct qedi_endpoint)); + INIT_WORK(&qedi_ep->offload_work, qedi_offload_work); qedi_ep->state = EP_STATE_IDLE; qedi_ep->iscsi_cid = (u32)-1; qedi_ep->qedi = qedi; @@ -996,12 +1028,11 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep) qedi_ep = ep->dd_data; qedi = qedi_ep->qedi; + flush_work(&qedi_ep->offload_work); + if (qedi_ep->state == EP_STATE_OFLDCONN_START) goto ep_exit_recover; - if (qedi_ep->state != EP_STATE_OFLDCONN_NONE) - flush_work(&qedi_ep->offload_work); - if (qedi_ep->conn) { qedi_conn = qedi_ep->conn; conn = qedi_conn->cls_conn->dd_data; @@ -1161,37 +1192,6 @@ static int qedi_data_avail(struct qedi_ctx *qedi, u16 vlanid) return rc; } -static void qedi_offload_work(struct work_struct *work) -{ - struct qedi_endpoint *qedi_ep = - container_of(work, struct qedi_endpoint, offload_work); - struct qedi_ctx *qedi; - int wait_delay = 5 * HZ; - int ret; - - qedi = qedi_ep->qedi; - - ret = qedi_iscsi_offload_conn(qedi_ep); - if (ret) { - QEDI_ERR(&qedi->dbg_ctx, - "offload error: iscsi_cid=%u, qedi_ep=%p, ret=%d\n", - qedi_ep->iscsi_cid, qedi_ep, ret); - qedi_ep->state = EP_STATE_OFLDCONN_FAILED; - return; - } - - ret = wait_event_interruptible_timeout(qedi_ep->tcp_ofld_wait, - (qedi_ep->state == - EP_STATE_OFLDCONN_COMPL), - wait_delay); - if ((ret <= 0) || (qedi_ep->state != EP_STATE_OFLDCONN_COMPL)) { - qedi_ep->state = EP_STATE_OFLDCONN_FAILED; - QEDI_ERR(&qedi->dbg_ctx, - "Offload conn TIMEOUT iscsi_cid=%u, qedi_ep=%p\n", - qedi_ep->iscsi_cid, qedi_ep); - } -} - static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data) { struct qedi_ctx *qedi; @@ -1307,7 +1307,6 @@ static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data) qedi_ep->dst_addr, qedi_ep->dst_port); } - INIT_WORK(&qedi_ep->offload_work, qedi_offload_work); queue_work(qedi->offload_thread, &qedi_ep->offload_work); ret = 0; diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 1ec42c5f0b2a0..92c4a367b7bd7 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1553,7 +1553,7 @@ static int qedi_alloc_global_queues(struct qedi_ctx *qedi) { u32 *list; int i; - int status = 0, rc; + int status; u32 *pbl; dma_addr_t page; int num_pages; @@ -1564,14 +1564,14 @@ static int qedi_alloc_global_queues(struct qedi_ctx *qedi) */ if (!qedi->num_queues) { QEDI_ERR(&qedi->dbg_ctx, "No MSI-X vectors available!\n"); - return 1; + return -ENOMEM; } /* Make sure we allocated the PBL that will contain the physical * addresses of our queues */ if (!qedi->p_cpuq) { - status = 1; + status = -EINVAL; goto mem_alloc_failure; } @@ -1586,13 +1586,13 @@ static int qedi_alloc_global_queues(struct qedi_ctx *qedi) "qedi->global_queues=%p.\n", qedi->global_queues); /* Allocate DMA coherent buffers for BDQ */ - rc = qedi_alloc_bdq(qedi); - if (rc) + status = qedi_alloc_bdq(qedi); + if (status) goto mem_alloc_failure; /* Allocate DMA coherent buffers for NVM_ISCSI_CFG */ - rc = qedi_alloc_nvm_iscsi_cfg(qedi); - if (rc) + status = qedi_alloc_nvm_iscsi_cfg(qedi); + if (status) goto mem_alloc_failure; /* Allocate a CQ and an associated PBL for each MSI-X diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 580d30cd5c35c..59f5dc9876cc5 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -527,7 +527,7 @@ qla2x00_sysfs_read_vpd(struct file *filp, struct kobject *kobj, if (!capable(CAP_SYS_ADMIN)) return -EINVAL; - if (IS_NOCACHE_VPD_TYPE(ha)) + if (!IS_NOCACHE_VPD_TYPE(ha)) goto skip; faddr = ha->flt_region_vpd << 2; @@ -710,7 +710,7 @@ qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj, ql_log(ql_log_info, vha, 0x706f, "Issuing MPI reset.\n"); - if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { + if (IS_QLA83XX(ha)) { uint32_t idc_control; qla83xx_idc_lock(vha, 0); @@ -1020,9 +1020,6 @@ qla2x00_free_sysfs_attr(scsi_qla_host_t *vha, bool stop_beacon) continue; if (iter->type == 3 && !(IS_CNA_CAPABLE(ha))) continue; - if (iter->type == 0x27 && - (!IS_QLA27XX(ha) || !IS_QLA28XX(ha))) - continue; sysfs_remove_bin_file(&host->shost_gendev.kobj, iter->attr); @@ -1759,6 +1756,18 @@ qla2x00_port_speed_store(struct device *dev, struct device_attribute *attr, return strlen(buf); } +static const struct { + u16 rate; + char *str; +} port_speed_str[] = { + { PORT_SPEED_4GB, "4" }, + { PORT_SPEED_8GB, "8" }, + { PORT_SPEED_16GB, "16" }, + { PORT_SPEED_32GB, "32" }, + { PORT_SPEED_64GB, "64" }, + { PORT_SPEED_10GB, "10" }, +}; + static ssize_t qla2x00_port_speed_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1766,7 +1775,8 @@ qla2x00_port_speed_show(struct device *dev, struct device_attribute *attr, struct scsi_qla_host *vha = shost_priv(dev_to_shost(dev)); struct qla_hw_data *ha = vha->hw; ssize_t rval; - char *spd[7] = {"0", "0", "0", "4", "8", "16", "32"}; + u16 i; + char *speed = "Unknown"; rval = qla2x00_get_data_rate(vha); if (rval != QLA_SUCCESS) { @@ -1775,7 +1785,14 @@ qla2x00_port_speed_show(struct device *dev, struct device_attribute *attr, return -EINVAL; } - return scnprintf(buf, PAGE_SIZE, "%s\n", spd[ha->link_data_rate]); + for (i = 0; i < ARRAY_SIZE(port_speed_str); i++) { + if (port_speed_str[i].rate != ha->link_data_rate) + continue; + speed = port_speed_str[i].str; + break; + } + + return scnprintf(buf, PAGE_SIZE, "%s\n", speed); } /* ----- */ diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 7c22f8eea3ead..caf28c5281aa5 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2725,7 +2725,11 @@ struct ct_fdmiv2_hba_attributes { #define FDMI_PORT_SPEED_8GB 0x10 #define FDMI_PORT_SPEED_16GB 0x20 #define FDMI_PORT_SPEED_32GB 0x40 -#define FDMI_PORT_SPEED_64GB 0x80 +#define FDMI_PORT_SPEED_20GB 0x80 +#define FDMI_PORT_SPEED_40GB 0x100 +#define FDMI_PORT_SPEED_128GB 0x200 +#define FDMI_PORT_SPEED_64GB 0x400 +#define FDMI_PORT_SPEED_256GB 0x800 #define FDMI_PORT_SPEED_UNKNOWN 0x8000 #define FC_CLASS_2 0x04 @@ -4866,4 +4870,8 @@ struct sff_8247_a0 { #include "qla_gbl.h" #include "qla_dbg.h" #include "qla_inline.h" + +#define IS_SESSION_DELETED(_fcport) (_fcport->disc_state == DSC_DELETE_PEND || \ + _fcport->disc_state == DSC_DELETED) + #endif diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index 7aa233771ec86..1a98e37c9be22 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -156,7 +156,6 @@ extern int ql2xasynctmfenable; extern int ql2xgffidenable; extern int ql2xenabledif; extern int ql2xenablehba_err_chk; -extern int ql2xtargetreset; extern int ql2xdontresethba; extern uint64_t ql2xmaxlun; extern int ql2xmdcapmask; @@ -770,7 +769,6 @@ extern void qlafx00_abort_iocb(srb_t *, struct abort_iocb_entry_fx00 *); extern void qlafx00_fxdisc_iocb(srb_t *, struct fxdisc_entry_fx00 *); extern void qlafx00_timer_routine(scsi_qla_host_t *); extern int qlafx00_rescan_isp(scsi_qla_host_t *); -extern int qlafx00_loop_reset(scsi_qla_host_t *vha); /* qla82xx related functions */ diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index d9b5ea77fde99..e9f03370afba3 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -675,8 +675,7 @@ qla2x00_rff_id(scsi_qla_host_t *vha, u8 type) return (QLA_SUCCESS); } - return qla_async_rffid(vha, &vha->d_id, qlt_rff_id(vha), - FC4_TYPE_FCP_SCSI); + return qla_async_rffid(vha, &vha->d_id, qlt_rff_id(vha), type); } static int qla_async_rffid(scsi_qla_host_t *vha, port_id_t *d_id, @@ -726,7 +725,7 @@ static int qla_async_rffid(scsi_qla_host_t *vha, port_id_t *d_id, /* Prepare CT arguments -- port_id, FC-4 feature, FC-4 type */ ct_req->req.rff_id.port_id = port_id_to_be_id(*d_id); ct_req->req.rff_id.fc4_feature = fc4feature; - ct_req->req.rff_id.fc4_type = fc4type; /* SCSI - FCP */ + ct_req->req.rff_id.fc4_type = fc4type; /* SCSI-FCP or FC-NVMe */ sp->u.iocb_cmd.u.ctarg.req_size = RFF_ID_REQ_SIZE; sp->u.iocb_cmd.u.ctarg.rsp_size = RFF_ID_RSP_SIZE; diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 643b8ae36cbeb..8d8a0ead61474 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -572,6 +572,14 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport, struct srb_iocb *lio; int rval = QLA_FUNCTION_FAILED; + if (IS_SESSION_DELETED(fcport)) { + ql_log(ql_log_warn, vha, 0xffff, + "%s: %8phC is being delete - not sending command.\n", + __func__, fcport->port_name); + fcport->flags &= ~FCF_ASYNC_ACTIVE; + return rval; + } + if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT)) return rval; @@ -955,6 +963,9 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, set_bit(RELOGIN_NEEDED, &vha->dpc_flags); } break; + case ISP_CFG_NL: + qla24xx_fcport_handle_login(vha, fcport); + break; default: break; } @@ -978,8 +989,6 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res) sp->name, res, sp->u.iocb_cmd.u.mbx.in_mb[1], sp->u.iocb_cmd.u.mbx.in_mb[2]); - if (res == QLA_FUNCTION_TIMEOUT) - return; sp->fcport->flags &= ~(FCF_ASYNC_SENT|FCF_ASYNC_ACTIVE); memset(&ea, 0, sizeof(ea)); @@ -1017,8 +1026,8 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res) spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); list_for_each_entry_safe(fcport, tf, &h, gnl_entry) { - list_del_init(&fcport->gnl_entry); spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags); + list_del_init(&fcport->gnl_entry); fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); ea.fcport = fcport; @@ -1313,14 +1322,21 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) struct port_database_24xx *pd; struct qla_hw_data *ha = vha->hw; - if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT) || - fcport->loop_id == FC_NO_LOOP_ID) { + if (IS_SESSION_DELETED(fcport)) { ql_log(ql_log_warn, vha, 0xffff, - "%s: %8phC - not sending command.\n", - __func__, fcport->port_name); + "%s: %8phC is being delete - not sending command.\n", + __func__, fcport->port_name); + fcport->flags &= ~FCF_ASYNC_ACTIVE; return rval; } + if (!vha->flags.online || fcport->flags & FCF_ASYNC_SENT) { + ql_log(ql_log_warn, vha, 0xffff, + "%s: %8phC online %d flags %x - not sending command.\n", + __func__, fcport->port_name, vha->flags.online, fcport->flags); + goto done; + } + sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; @@ -1479,6 +1495,11 @@ static void qla_chk_n2n_b4_login(struct scsi_qla_host *vha, fc_port_t *fcport) u8 login = 0; int rc; + ql_dbg(ql_dbg_disc, vha, 0x307b, + "%s %8phC DS %d LS %d lid %d retries=%d\n", + __func__, fcport->port_name, fcport->disc_state, + fcport->fw_login_state, fcport->loop_id, fcport->login_retry); + if (qla_tgt_mode_enabled(vha)) return; @@ -1536,7 +1557,8 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) fcport->conflict, fcport->last_rscn_gen, fcport->rscn_gen, fcport->login_gen, fcport->loop_id, fcport->scan_state); - if (fcport->scan_state != QLA_FCPORT_FOUND) + if (fcport->scan_state != QLA_FCPORT_FOUND || + fcport->disc_state == DSC_DELETE_PEND) return 0; if ((fcport->loop_id != FC_NO_LOOP_ID) && @@ -1557,7 +1579,7 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) if (vha->host->active_mode == MODE_TARGET) return 0; - if (fcport->flags & FCF_ASYNC_SENT) { + if (fcport->flags & (FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE)) { set_bit(RELOGIN_NEEDED, &vha->dpc_flags); return 0; } @@ -2264,8 +2286,18 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha) ql_dbg(ql_dbg_init, vha, 0x0061, "Configure NVRAM parameters...\n"); + /* Let priority default to FCP, can be overridden by nvram_config */ + ha->fc4_type_priority = FC4_PRIORITY_FCP; + ha->isp_ops->nvram_config(vha); + if (ha->fc4_type_priority != FC4_PRIORITY_FCP && + ha->fc4_type_priority != FC4_PRIORITY_NVME) + ha->fc4_type_priority = FC4_PRIORITY_FCP; + + ql_log(ql_log_info, vha, 0xffff, "FC4 priority set to %s\n", + ha->fc4_type_priority == FC4_PRIORITY_FCP ? "FCP" : "NVMe"); + if (ha->flags.disable_serdes) { /* Mask HBA via NVRAM settings? */ ql_log(ql_log_info, vha, 0x0077, @@ -3225,6 +3257,14 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) struct rsp_que *rsp = ha->rsp_q_map[0]; struct qla2xxx_fw_dump *fw_dump; + if (ha->fw_dump) { + ql_dbg(ql_dbg_init, vha, 0x00bd, + "Firmware dump already allocated.\n"); + return; + } + + ha->fw_dumped = 0; + ha->fw_dump_cap_flags = 0; dump_size = fixed_size = mem_size = eft_size = fce_size = mq_size = 0; req_q_size = rsp_q_size = 0; @@ -3235,7 +3275,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) mem_size = (ha->fw_memory_size - 0x11000 + 1) * sizeof(uint16_t); } else if (IS_FWI2_CAPABLE(ha)) { - if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) + if (IS_QLA83XX(ha)) fixed_size = offsetof(struct qla83xx_fw_dump, ext_mem); else if (IS_QLA81XX(ha)) fixed_size = offsetof(struct qla81xx_fw_dump, ext_mem); @@ -3247,8 +3287,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) mem_size = (ha->fw_memory_size - 0x100000 + 1) * sizeof(uint32_t); if (ha->mqenable) { - if (!IS_QLA83XX(ha) && !IS_QLA27XX(ha) && - !IS_QLA28XX(ha)) + if (!IS_QLA83XX(ha)) mq_size = sizeof(struct qla2xxx_mq_chain); /* * Allocate maximum buffer size for all queues - Q0. @@ -3745,8 +3784,7 @@ qla2x00_setup_chip(scsi_qla_host_t *vha) ha->fw_major_version, ha->fw_minor_version, ha->fw_subminor_version); - if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || - IS_QLA28XX(ha)) { + if (IS_QLA83XX(ha)) { ha->flags.fac_supported = 0; rval = QLA_SUCCESS; } @@ -5219,6 +5257,13 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) memcpy(fcport->node_name, new_fcport->node_name, WWN_SIZE); fcport->scan_state = QLA_FCPORT_FOUND; + if (fcport->login_retry == 0) { + fcport->login_retry = vha->hw->login_retry_count; + ql_dbg(ql_dbg_disc, vha, 0x2135, + "Port login retry %8phN, lid 0x%04x retry cnt=%d.\n", + fcport->port_name, fcport->loop_id, + fcport->login_retry); + } found++; break; } @@ -5353,6 +5398,8 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport) if (atomic_read(&fcport->state) == FCS_ONLINE) return; + qla2x00_set_fcport_state(fcport, FCS_ONLINE); + rport_ids.node_name = wwn_to_u64(fcport->node_name); rport_ids.port_name = wwn_to_u64(fcport->port_name); rport_ids.port_id = fcport->d_id.b.domain << 16 | @@ -5448,6 +5495,7 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport) qla2x00_reg_remote_port(vha, fcport); break; case MODE_TARGET: + qla2x00_set_fcport_state(fcport, FCS_ONLINE); if (!vha->vha_tgt.qla_tgt->tgt_stop && !vha->vha_tgt.qla_tgt->tgt_stopped) qlt_fc_port_added(vha, fcport); @@ -5462,8 +5510,6 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport) break; } - qla2x00_set_fcport_state(fcport, FCS_ONLINE); - if (IS_IIDMA_CAPABLE(vha->hw) && vha->hw->flags.gpsc_supported) { if (fcport->id_changed) { fcport->id_changed = 0; @@ -6803,7 +6849,8 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) return 0; break; case QLA2XXX_INI_MODE_DUAL: - if (!qla_dual_mode_enabled(vha)) + if (!qla_dual_mode_enabled(vha) && + !qla_ini_mode_enabled(vha)) return 0; break; case QLA2XXX_INI_MODE_ENABLED: @@ -8565,8 +8612,6 @@ qla81xx_nvram_config(scsi_qla_host_t *vha) /* Determine NVMe/FCP priority for target ports */ ha->fc4_type_priority = qla2xxx_get_fc4_priority(vha); - ql_log(ql_log_info, vha, 0xffff, "FC4 priority set to %s\n", - ha->fc4_type_priority & BIT_0 ? "FCP" : "NVMe"); if (rval) { ql_log(ql_log_warn, vha, 0x0076, @@ -9002,7 +9047,7 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos, qpair->rsp->req = qpair->req; qpair->rsp->qpair = qpair; /* init qpair to this cpu. Will adjust at run time. */ - qla_cpu_update(qpair, smp_processor_id()); + qla_cpu_update(qpair, raw_smp_processor_id()); if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) { if (ha->fw_attributes & BIT_4) diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 936103604d02d..103288b0377e0 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2859,6 +2859,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); qla2xxx_wake_dpc(vha); + break; } /* fall through */ default: @@ -2868,9 +2869,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) fw_status[0], fw_status[1], fw_status[2]); fcport->flags &= ~FCF_ASYNC_SENT; - qla2x00_set_fcport_disc_state(fcport, - DSC_LOGIN_FAILED); - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); + qlt_schedule_sess_for_deletion(fcport); break; } break; @@ -2882,8 +2881,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) fw_status[0], fw_status[1], fw_status[2]); sp->fcport->flags &= ~FCF_ASYNC_SENT; - qla2x00_set_fcport_disc_state(fcport, DSC_LOGIN_FAILED); - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); + qlt_schedule_sess_for_deletion(fcport); break; } diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 3e9c5768815e5..d7cf7f9570874 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1839,6 +1839,7 @@ qla24xx_tm_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, void *tsk) iocb->u.tmf.data = QLA_FUNCTION_FAILED; } else if ((le16_to_cpu(sts->scsi_status) & SS_RESPONSE_INFO_LEN_VALID)) { + host_to_fcp_swap(sts->data, sizeof(sts->data)); if (le32_to_cpu(sts->rsp_data_len) < 4) { ql_log(ql_log_warn, fcport->vha, 0x503b, "Async-%s error - hdl=%x not enough response(%d).\n", diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 098388a12febc..29f2730fbf66a 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -10,6 +10,12 @@ #include #include +#ifdef CONFIG_PPC +#define IS_PPCARCH true +#else +#define IS_PPCARCH false +#endif + static struct mb_cmd_name { uint16_t cmd; const char *str; @@ -731,6 +737,9 @@ qla2x00_execute_fw(scsi_qla_host_t *vha, uint32_t risc_addr) vha->min_supported_speed = nv->min_supported_speed; } + + if (IS_PPCARCH) + mcp->mb[11] |= BIT_4; } if (ha->flags.exlogins_enabled) @@ -2897,8 +2906,7 @@ qla2x00_get_resource_cnts(scsi_qla_host_t *vha) ha->orig_fw_iocb_count = mcp->mb[10]; if (ha->flags.npiv_supported) ha->max_npiv_vports = mcp->mb[11]; - if (IS_QLA81XX(ha) || IS_QLA83XX(ha) || IS_QLA27XX(ha) || - IS_QLA28XX(ha)) + if (IS_QLA81XX(ha) || IS_QLA83XX(ha)) ha->fw_max_fcf_count = mcp->mb[12]; } @@ -5391,7 +5399,7 @@ qla2x00_get_data_rate(scsi_qla_host_t *vha) mcp->out_mb = MBX_1|MBX_0; mcp->in_mb = MBX_2|MBX_1|MBX_0; if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) - mcp->in_mb |= MBX_3; + mcp->in_mb |= MBX_4|MBX_3; mcp->tov = MBX_TOV_SECONDS; mcp->flags = 0; rval = qla2x00_mailbox_command(vha, mcp); diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c index 605b59c76c901..badd09c5dd429 100644 --- a/drivers/scsi/qla2xxx/qla_mr.c +++ b/drivers/scsi/qla2xxx/qla_mr.c @@ -740,29 +740,6 @@ qlafx00_lun_reset(fc_port_t *fcport, uint64_t l, int tag) return qla2x00_async_tm_cmd(fcport, TCF_LUN_RESET, l, tag); } -int -qlafx00_loop_reset(scsi_qla_host_t *vha) -{ - int ret; - struct fc_port *fcport; - struct qla_hw_data *ha = vha->hw; - - if (ql2xtargetreset) { - list_for_each_entry(fcport, &vha->vp_fcports, list) { - if (fcport->port_type != FCT_TARGET) - continue; - - ret = ha->isp_ops->target_reset(fcport, 0, 0); - if (ret != QLA_SUCCESS) { - ql_dbg(ql_dbg_taskm, vha, 0x803d, - "Bus Reset failed: Reset=%d " - "d_id=%x.\n", ret, fcport->d_id.b24); - } - } - } - return QLA_SUCCESS; -} - int qlafx00_iospace_config(struct qla_hw_data *ha) { diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 11656e864fca9..a15af048cd820 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -36,6 +36,11 @@ int qla_nvme_register_remote(struct scsi_qla_host *vha, struct fc_port *fcport) (fcport->nvme_flag & NVME_FLAG_REGISTERED)) return 0; + if (atomic_read(&fcport->state) == FCS_ONLINE) + return 0; + + qla2x00_set_fcport_state(fcport, FCS_ONLINE); + fcport->nvme_flag &= ~NVME_FLAG_RESETTING; memset(&req, 0, sizeof(struct nvme_fc_port_info)); @@ -84,8 +89,9 @@ static int qla_nvme_alloc_queue(struct nvme_fc_local_port *lport, struct qla_hw_data *ha; struct qla_qpair *qpair; - if (!qidx) - qidx++; + /* Map admin queue and 1st IO queue to index 0 */ + if (qidx) + qidx--; vha = (struct scsi_qla_host *)lport->private; ha = vha->hw; @@ -151,6 +157,18 @@ static void qla_nvme_release_fcp_cmd_kref(struct kref *kref) qla2xxx_rel_qpair_sp(sp->qpair, sp); } +static void qla_nvme_ls_unmap(struct srb *sp, struct nvmefc_ls_req *fd) +{ + if (sp->flags & SRB_DMA_VALID) { + struct srb_iocb *nvme = &sp->u.iocb_cmd; + struct qla_hw_data *ha = sp->fcport->vha->hw; + + dma_unmap_single(&ha->pdev->dev, nvme->u.nvme.cmd_dma, + fd->rqstlen, DMA_TO_DEVICE); + sp->flags &= ~SRB_DMA_VALID; + } +} + static void qla_nvme_release_ls_cmd_kref(struct kref *kref) { struct srb *sp = container_of(kref, struct srb, cmd_kref); @@ -167,6 +185,8 @@ static void qla_nvme_release_ls_cmd_kref(struct kref *kref) spin_unlock_irqrestore(&priv->cmd_lock, flags); fd = priv->fd; + + qla_nvme_ls_unmap(sp, fd); fd->done(fd, priv->comp_status); out: qla2x00_rel_sp(sp); @@ -313,6 +333,8 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma, fd->rqstlen, DMA_TO_DEVICE); + sp->flags |= SRB_DMA_VALID; + rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x700e, @@ -320,6 +342,7 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, wake_up(&sp->nvme_ls_waitq); sp->priv = NULL; priv->sp = NULL; + qla_nvme_ls_unmap(sp, fd); qla2x00_rel_sp(sp); return rval; } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 052ce78814075..a5dbaa3491f82 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -190,12 +191,6 @@ MODULE_PARM_DESC(ql2xdbwr, " 0 -- Regular doorbell.\n" " 1 -- CAMRAM doorbell (faster).\n"); -int ql2xtargetreset = 1; -module_param(ql2xtargetreset, int, S_IRUGO); -MODULE_PARM_DESC(ql2xtargetreset, - "Enable target reset." - "Default is 1 - use hw defaults."); - int ql2xgffidenable; module_param(ql2xgffidenable, int, S_IRUGO); MODULE_PARM_DESC(ql2xgffidenable, @@ -1228,6 +1223,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) uint32_t ratov_j; struct qla_qpair *qpair; unsigned long flags; + int fast_fail_status = SUCCESS; if (qla2x00_isp_reg_stat(ha)) { ql_log(ql_log_info, vha, 0x8042, @@ -1235,15 +1231,16 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) return FAILED; } + /* Save any FAST_IO_FAIL value to return later if abort succeeds */ ret = fc_block_scsi_eh(cmd); if (ret != 0) - return ret; + fast_fail_status = ret; sp = scsi_cmd_priv(cmd); qpair = sp->qpair; if ((sp->fcport && sp->fcport->deleted) || !qpair) - return SUCCESS; + return fast_fail_status != SUCCESS ? fast_fail_status : FAILED; spin_lock_irqsave(qpair->qp_lock_ptr, flags); if (sp->completed) { @@ -1289,7 +1286,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) __func__, ha->r_a_tov/10); ret = FAILED; } else { - ret = SUCCESS; + ret = fast_fail_status; } break; default: @@ -1635,27 +1632,10 @@ int qla2x00_loop_reset(scsi_qla_host_t *vha) { int ret; - struct fc_port *fcport; struct qla_hw_data *ha = vha->hw; - if (IS_QLAFX00(ha)) { - return qlafx00_loop_reset(vha); - } - - if (ql2xtargetreset == 1 && ha->flags.enable_target_reset) { - list_for_each_entry(fcport, &vha->vp_fcports, list) { - if (fcport->port_type != FCT_TARGET) - continue; - - ret = ha->isp_ops->target_reset(fcport, 0, 0); - if (ret != QLA_SUCCESS) { - ql_dbg(ql_dbg_taskm, vha, 0x802c, - "Bus Reset failed: Reset=%d " - "d_id=%x.\n", ret, fcport->d_id.b24); - } - } - } - + if (IS_QLAFX00(ha)) + return QLA_SUCCESS; if (ha->flags.enable_lip_full_login && !IS_CNA_CAPABLE(ha)) { atomic_set(&vha->loop_state, LOOP_DOWN); @@ -2799,6 +2779,11 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) return ret; } + if (is_kdump_kernel()) { + ql2xmqsupport = 0; + ql2xallocfwdump = 0; + } + /* This may fail but that's ok */ pci_enable_pcie_error_reporting(pdev); @@ -3642,8 +3627,7 @@ qla2x00_unmap_iobases(struct qla_hw_data *ha) if (ha->mqiobase) iounmap(ha->mqiobase); - if ((IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) && - ha->msixbase) + if (ha->msixbase) iounmap(ha->msixbase); } } @@ -4058,7 +4042,7 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, ql_dbg_pci(ql_dbg_init, ha->pdev, 0xe0ee, "%s: failed alloc dsd\n", __func__); - return 1; + return -ENOMEM; } ha->dif_bundle_kallocs++; @@ -5363,6 +5347,11 @@ void qla2x00_relogin(struct scsi_qla_host *vha) memset(&ea, 0, sizeof(ea)); ea.fcport = fcport; qla24xx_handle_relogin_event(vha, &ea); + } else if (vha->hw->current_topology == + ISP_CFG_NL && + IS_QLA2XXX_MIDTYPE(vha->hw)) { + (void)qla24xx_fcport_handle_login(vha, + fcport); } else if (vha->hw->current_topology == ISP_CFG_NL) { fcport->login_retry--; diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c index bbe90354f49b0..d306f3ca0f3bf 100644 --- a/drivers/scsi/qla2xxx/qla_sup.c +++ b/drivers/scsi/qla2xxx/qla_sup.c @@ -843,7 +843,7 @@ qla2xxx_get_flt_info(scsi_qla_host_t *vha, uint32_t flt_addr) ha->flt_region_nvram = start; break; case FLT_REG_IMG_PRI_27XX: - if (IS_QLA27XX(ha) && !IS_QLA28XX(ha)) + if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) ha->flt_region_img_status_pri = start; break; case FLT_REG_IMG_SEC_27XX: @@ -1355,7 +1355,7 @@ qla24xx_write_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr, flash_data_addr(ha, faddr), cpu_to_le32(*dwptr)); if (ret) { ql_dbg(ql_dbg_user, vha, 0x7006, - "Failed slopw write %x (%x)\n", faddr, *dwptr); + "Failed slow write %x (%x)\n", faddr, *dwptr); break; } } diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 57068e2faef54..cb97565b6a333 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3251,8 +3251,8 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, "RESET-RSP online/active/old-count/new-count = %d/%d/%d/%d.\n", vha->flags.online, qla2x00_reset_active(vha), cmd->reset_count, qpair->chip_reset); - spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); - return 0; + res = 0; + goto out_unmap_unlock; } /* Does F/W have an IOCBs for this request */ @@ -3375,10 +3375,6 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) prm.sg = NULL; prm.req_cnt = 1; - /* Calculate number of entries and segments required */ - if (qlt_pci_map_calc_cnt(&prm) != 0) - return -EAGAIN; - if (!qpair->fw_started || (cmd->reset_count != qpair->chip_reset) || (cmd->sess && cmd->sess->deleted)) { /* @@ -3396,6 +3392,10 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) return 0; } + /* Calculate number of entries and segments required */ + if (qlt_pci_map_calc_cnt(&prm) != 0) + return -EAGAIN; + spin_lock_irqsave(qpair->qp_lock_ptr, flags); /* Does F/W have an IOCBs for this request */ res = qlt_check_reserve_free_req(qpair, prm.req_cnt); @@ -3768,6 +3768,9 @@ int qlt_abort_cmd(struct qla_tgt_cmd *cmd) spin_lock_irqsave(&cmd->cmd_lock, flags); if (cmd->aborted) { + if (cmd->sg_mapped) + qlt_unmap_sg(vha, cmd); + spin_unlock_irqrestore(&cmd->cmd_lock, flags); /* * It's normal to see 2 calls in this path: @@ -3800,9 +3803,6 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd) BUG_ON(cmd->cmd_in_wq); - if (cmd->sg_mapped) - qlt_unmap_sg(cmd->vha, cmd); - if (!cmd->q_full) qlt_decr_num_pend_cmds(cmd->vha); @@ -7087,8 +7087,7 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha) if (!QLA_TGT_MODE_ENABLED()) return; - if ((ql2xenablemsix == 0) || IS_QLA83XX(ha) || IS_QLA27XX(ha) || - IS_QLA28XX(ha)) { + if (ha->mqenable || IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { ISP_ATIO_Q_IN(base_vha) = &ha->mqiobase->isp25mq.atio_q_in; ISP_ATIO_Q_OUT(base_vha) = &ha->mqiobase->isp25mq.atio_q_out; } else { diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 1f5b5c8a7f726..1ce3f90f782fd 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -555,8 +555,10 @@ EXPORT_SYMBOL(scsi_device_get); */ void scsi_device_put(struct scsi_device *sdev) { - module_put(sdev->host->hostt->module); + struct module *mod = sdev->host->hostt->module; + put_device(&sdev->sdev_gendev); + module_put(mod); } EXPORT_SYMBOL(scsi_device_put); diff --git a/drivers/scsi/scsi_debugfs.c b/drivers/scsi/scsi_debugfs.c index c19ea7ab54cbd..d9a18124cfc9d 100644 --- a/drivers/scsi/scsi_debugfs.c +++ b/drivers/scsi/scsi_debugfs.c @@ -10,6 +10,7 @@ static const char *const scsi_cmd_flags[] = { SCSI_CMD_FLAG_NAME(TAGGED), SCSI_CMD_FLAG_NAME(UNCHECKED_ISA_DMA), SCSI_CMD_FLAG_NAME(INITIALIZED), + SCSI_CMD_FLAG_NAME(LAST), }; #undef SCSI_CMD_FLAG_NAME diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 8e6d7ba95df14..98e363d0025b4 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1719,8 +1719,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, case BLK_STS_OK: break; case BLK_STS_RESOURCE: - if (atomic_read(&sdev->device_busy) || - scsi_device_blocked(sdev)) + if (scsi_device_blocked(sdev)) ret = BLK_STS_DEV_RESOURCE; break; default: diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 0e3c46e6b8ab7..6faf1d6451b0c 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -438,9 +438,12 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work) struct list_head *this, *tmp; struct scsi_vpd *vpd_pg80 = NULL, *vpd_pg83 = NULL; unsigned long flags; + struct module *mod; sdev = container_of(work, struct scsi_device, ew.work); + mod = sdev->host->hostt->module; + scsi_dh_release_device(sdev); parent = sdev->sdev_gendev.parent; @@ -481,11 +484,17 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work) if (parent) put_device(parent); + module_put(mod); } static void scsi_device_dev_release(struct device *dev) { struct scsi_device *sdp = to_scsi_device(dev); + + /* Set module pointer as NULL in case of module unloading */ + if (!try_module_get(sdp->host->hostt->module)) + sdp->host->hostt->module = NULL; + execute_in_process_context(scsi_device_dev_release_usercontext, &sdp->ew); } diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 6f21cb75d95fd..f6cce0befa7de 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1894,12 +1894,12 @@ static void session_recovery_timedout(struct work_struct *work) } spin_unlock_irqrestore(&session->lock, flags); - if (session->transport->session_recovery_timedout) - session->transport->session_recovery_timedout(session); - ISCSI_DBG_TRANS_SESSION(session, "Unblocking SCSI target\n"); scsi_target_unblock(&session->dev, SDEV_TRANSPORT_OFFLINE); ISCSI_DBG_TRANS_SESSION(session, "Completed unblocking SCSI target\n"); + + if (session->transport->session_recovery_timedout) + session->transport->session_recovery_timedout(session); } static void __iscsi_unblock_session(struct work_struct *work) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index f55249766d224..152b486051522 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3345,15 +3345,16 @@ static int sd_probe(struct device *dev) } device_initialize(&sdkp->dev); - sdkp->dev.parent = dev; + sdkp->dev.parent = get_device(dev); sdkp->dev.class = &sd_disk_class; dev_set_name(&sdkp->dev, "%s", dev_name(dev)); error = device_add(&sdkp->dev); - if (error) - goto out_free_index; + if (error) { + put_device(&sdkp->dev); + goto out; + } - get_device(dev); dev_set_drvdata(dev, sdkp); gd->major = sd_major((index & 0xf0) >> 4); diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index c2afba2a5414d..0a1734f34587d 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -87,9 +87,16 @@ static int ses_recv_diag(struct scsi_device *sdev, int page_code, 0 }; unsigned char recv_page_code; + unsigned int retries = SES_RETRIES; + struct scsi_sense_hdr sshdr; + + do { + ret = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, bufflen, + &sshdr, SES_TIMEOUT, 1, NULL); + } while (ret > 0 && --retries && scsi_sense_valid(&sshdr) && + (sshdr.sense_key == NOT_READY || + (sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x29))); - ret = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, bufflen, - NULL, SES_TIMEOUT, SES_RETRIES, NULL); if (unlikely(ret)) return ret; @@ -111,7 +118,7 @@ static int ses_recv_diag(struct scsi_device *sdev, int page_code, static int ses_send_diag(struct scsi_device *sdev, int page_code, void *buf, int bufflen) { - u32 result; + int result; unsigned char cmd[] = { SEND_DIAGNOSTIC, @@ -121,9 +128,16 @@ static int ses_send_diag(struct scsi_device *sdev, int page_code, bufflen & 0xff, 0 }; + struct scsi_sense_hdr sshdr; + unsigned int retries = SES_RETRIES; + + do { + result = scsi_execute_req(sdev, cmd, DMA_TO_DEVICE, buf, bufflen, + &sshdr, SES_TIMEOUT, 1, NULL); + } while (result > 0 && --retries && scsi_sense_valid(&sshdr) && + (sshdr.sense_key == NOT_READY || + (sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x29))); - result = scsi_execute_req(sdev, cmd, DMA_TO_DEVICE, buf, bufflen, - NULL, SES_TIMEOUT, SES_RETRIES, NULL); if (result) sdev_printk(KERN_ERR, sdev, "SEND DIAGNOSTIC result: %8x\n", result); diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 9bc451004184f..80ff00025c03d 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -1192,6 +1192,7 @@ static int pqi_get_raid_map(struct pqi_ctrl_info *ctrl_info, "Requested %d bytes, received %d bytes", raid_map_size, get_unaligned_le32(&raid_map->structure_size)); + rc = -EINVAL; goto error; } } diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 279dea628620d..310da62cda263 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -887,7 +887,7 @@ static void get_capabilities(struct scsi_cd *cd) /* allocate transfer buffer */ - buffer = kmalloc(512, GFP_KERNEL | GFP_DMA); + buffer = kmalloc(512, GFP_KERNEL); if (!buffer) { sr_printk(KERN_ERR, cd, "out of memory.\n"); return; diff --git a/drivers/scsi/sr_vendor.c b/drivers/scsi/sr_vendor.c index b9db2ec6d0361..996bccadd3866 100644 --- a/drivers/scsi/sr_vendor.c +++ b/drivers/scsi/sr_vendor.c @@ -113,7 +113,7 @@ int sr_set_blocklength(Scsi_CD *cd, int blocklength) if (cd->vendor == VENDOR_TOSHIBA) density = (blocklength > 2048) ? 0x81 : 0x83; - buffer = kmalloc(512, GFP_KERNEL | GFP_DMA); + buffer = kmalloc(512, GFP_KERNEL); if (!buffer) return -ENOMEM; @@ -161,7 +161,7 @@ int sr_cd_check(struct cdrom_device_info *cdi) if (cd->cdi.mask & CDC_MULTI_SESSION) return 0; - buffer = kmalloc(512, GFP_KERNEL | GFP_DMA); + buffer = kmalloc(512, GFP_KERNEL); if (!buffer) return -ENOMEM; diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c index 2ca018ce796f7..f455243bdb9be 100644 --- a/drivers/scsi/sym53c8xx_2/sym_glue.c +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c @@ -1743,7 +1743,7 @@ static void sym2_remove(struct pci_dev *pdev) * @state: current state of the PCI slot */ static pci_ers_result_t sym2_io_error_detected(struct pci_dev *pdev, - enum pci_channel_state state) + pci_channel_state_t state) { /* If slot is permanently frozen, turn everything off */ if (state == pci_channel_io_perm_failure) { diff --git a/drivers/scsi/ufs/tc-dwc-g210-pci.c b/drivers/scsi/ufs/tc-dwc-g210-pci.c index 67a6a61154b71..4e471484539d2 100644 --- a/drivers/scsi/ufs/tc-dwc-g210-pci.c +++ b/drivers/scsi/ufs/tc-dwc-g210-pci.c @@ -135,7 +135,6 @@ tc_dwc_g210_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return err; } - pci_set_drvdata(pdev, hba); pm_runtime_put_noidle(&pdev->dev); pm_runtime_allow(&pdev->dev); diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index 4f066e3b19af1..7c9664c0c4c4f 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -781,8 +781,11 @@ static void ufs_qcom_dev_ref_clk_ctrl(struct ufs_qcom_host *host, bool enable) writel_relaxed(temp, host->dev_ref_clk_ctrl_mmio); - /* ensure that ref_clk is enabled/disabled before we return */ - wmb(); + /* + * Make sure the write to ref_clk reaches the destination and + * not stored in a Write Buffer (WB). + */ + readl(host->dev_ref_clk_ctrl_mmio); /* * If we call hibern8 exit after this, we need to make sure that diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c index 8d40dc918f4e1..bfc589f4baf53 100644 --- a/drivers/scsi/ufs/ufshcd-pltfrm.c +++ b/drivers/scsi/ufs/ufshcd-pltfrm.c @@ -125,9 +125,20 @@ static int ufshcd_parse_clock_info(struct ufs_hba *hba) return ret; } +static bool phandle_exists(const struct device_node *np, + const char *phandle_name, int index) +{ + struct device_node *parse_np = of_parse_phandle(np, phandle_name, index); + + if (parse_np) + of_node_put(parse_np); + + return parse_np != NULL; +} + #define MAX_PROP_SIZE 32 static int ufshcd_populate_vreg(struct device *dev, const char *name, - struct ufs_vreg **out_vreg) + struct ufs_vreg **out_vreg) { int ret = 0; char prop_name[MAX_PROP_SIZE]; @@ -140,7 +151,7 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name, } snprintf(prop_name, MAX_PROP_SIZE, "%s-supply", name); - if (!of_parse_phandle(np, prop_name, 0)) { + if (!phandle_exists(np, prop_name, 0)) { dev_info(dev, "%s: Unable to find %s regulator, assuming enabled\n", __func__, prop_name); goto out; @@ -436,8 +447,6 @@ int ufshcd_pltfrm_init(struct platform_device *pdev, goto dealloc_host; } - platform_set_drvdata(pdev, hba); - pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 0429ba5d7d23c..670f4c7934f85 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -118,8 +118,13 @@ int ufshcd_dump_regs(struct ufs_hba *hba, size_t offset, size_t len, if (!regs) return -ENOMEM; - for (pos = 0; pos < len; pos += 4) + for (pos = 0; pos < len; pos += 4) { + if (offset == 0 && + pos >= REG_UIC_ERROR_CODE_PHY_ADAPTER_LAYER && + pos <= REG_UIC_ERROR_CODE_DME) + continue; regs[pos / 4] = ufshcd_readl(hba, offset + pos); + } ufshcd_hex_dump(prefix, regs, len); kfree(regs); @@ -320,8 +325,7 @@ static void ufshcd_add_query_upiu_trace(struct ufs_hba *hba, unsigned int tag, static void ufshcd_add_tm_upiu_trace(struct ufs_hba *hba, unsigned int tag, const char *str) { - int off = (int)tag - hba->nutrs; - struct utp_task_req_desc *descp = &hba->utmrdl_base_addr[off]; + struct utp_task_req_desc *descp = &hba->utmrdl_base_addr[tag]; trace_ufshcd_upiu(dev_name(hba->dev), str, &descp->req_header, &descp->input_param1); @@ -4749,7 +4753,8 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) break; } /* end of switch */ - if ((host_byte(result) != DID_OK) && !hba->silence_err_logs) + if ((host_byte(result) != DID_OK) && + (host_byte(result) != DID_REQUEUE) && !hba->silence_err_logs) ufshcd_print_trs(hba, 1 << lrbp->task_tag, true); return result; } @@ -5662,9 +5667,12 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba) intr_status = ufshcd_readl(hba, REG_INTERRUPT_STATUS); } - if (retval == IRQ_NONE) { - dev_err(hba->dev, "%s: Unhandled interrupt 0x%08x\n", - __func__, intr_status); + if (enabled_intr_status && retval == IRQ_NONE && + !ufshcd_eh_in_progress(hba)) { + dev_err(hba->dev, "%s: Unhandled interrupt 0x%08x (-, 0x%08x)\n", + __func__, + intr_status, + enabled_intr_status); ufshcd_dump_regs(hba, 0, UFSHCI_REG_SPACE_SIZE, "host_regs: "); } @@ -5706,7 +5714,10 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba, /* * blk_get_request() is used here only to get a free tag. */ - req = blk_get_request(q, REQ_OP_DRV_OUT, BLK_MQ_REQ_RESERVED); + req = blk_get_request(q, REQ_OP_DRV_OUT, 0); + if (IS_ERR(req)) + return PTR_ERR(req); + req->end_io_data = &wait; ufshcd_hold(hba, false); @@ -8322,6 +8333,13 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) struct Scsi_Host *host = hba->host; struct device *dev = hba->dev; + /* + * dev_set_drvdata() must be called before any callbacks are registered + * that use dev_get_drvdata() (frequency scaling, clock scaling, hwmon, + * sysfs). + */ + dev_set_drvdata(dev, hba); + if (!mmio_base) { dev_err(hba->dev, "Invalid memory reference for mmio_base is NULL\n"); diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index bfec84aacd90b..cb833c5fb9ce2 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -297,7 +297,7 @@ static void virtscsi_handle_transport_reset(struct virtio_scsi *vscsi, } break; default: - pr_info("Unsupport virtio scsi event reason %x\n", event->reason); + pr_info("Unsupported virtio scsi event reason %x\n", event->reason); } } @@ -381,7 +381,7 @@ static void virtscsi_handle_event(struct work_struct *work) virtscsi_handle_param_change(vscsi, event); break; default: - pr_err("Unsupport virtio scsi event %x\n", event->event); + pr_err("Unsupported virtio scsi event %x\n", event->event); } virtscsi_kick_event(vscsi, event_node); } diff --git a/drivers/scsi/vmw_pvscsi.h b/drivers/scsi/vmw_pvscsi.h index 75966d3f326e0..d87c12324c032 100644 --- a/drivers/scsi/vmw_pvscsi.h +++ b/drivers/scsi/vmw_pvscsi.h @@ -333,8 +333,8 @@ struct PVSCSIRingReqDesc { u8 tag; u8 bus; u8 target; - u8 vcpuHint; - u8 unused[59]; + u16 vcpuHint; + u8 unused[58]; } __packed; /* diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c index f0068e96a177f..39e39869a1ad9 100644 --- a/drivers/scsi/xen-scsifront.c +++ b/drivers/scsi/xen-scsifront.c @@ -233,12 +233,11 @@ static void scsifront_gnttab_done(struct vscsifrnt_info *info, return; for (i = 0; i < shadow->nr_grants; i++) { - if (unlikely(gnttab_query_foreign_access(shadow->gref[i]))) { + if (unlikely(!gnttab_try_end_foreign_access(shadow->gref[i]))) { shost_printk(KERN_ALERT, info->host, KBUILD_MODNAME "grant still in use by backend\n"); BUG(); } - gnttab_end_foreign_access(shadow->gref[i], 0, 0UL); } kfree(shadow->sg); diff --git a/drivers/scsi/zorro7xx.c b/drivers/scsi/zorro7xx.c index 27b9e2baab1a6..7acf9193a9e80 100644 --- a/drivers/scsi/zorro7xx.c +++ b/drivers/scsi/zorro7xx.c @@ -159,6 +159,8 @@ static void zorro7xx_remove_one(struct zorro_dev *z) scsi_remove_host(host); NCR_700_release(host); + if (host->base > 0x01000000) + iounmap(hostdata->base); kfree(hostdata); free_irq(host->irq, host); zorro_release_device(z); diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c index e5d7fb81ad665..44a931d41a132 100644 --- a/drivers/sh/maple/maple.c +++ b/drivers/sh/maple/maple.c @@ -835,8 +835,10 @@ static int __init maple_bus_init(void) maple_queue_cache = KMEM_CACHE(maple_buffer, SLAB_HWCACHE_ALIGN); - if (!maple_queue_cache) + if (!maple_queue_cache) { + retval = -ENOMEM; goto cleanup_bothirqs; + } INIT_LIST_HEAD(&maple_waitq); INIT_LIST_HEAD(&maple_sentq); @@ -849,6 +851,7 @@ static int __init maple_bus_init(void) if (!mdev[i]) { while (i-- > 0) maple_free_dev(mdev[i]); + retval = -ENOMEM; goto cleanup_cache; } baseunits[i] = mdev[i]; diff --git a/drivers/slimbus/qcom-ctrl.c b/drivers/slimbus/qcom-ctrl.c index a444badd8df5a..0a6f7eeb7755b 100644 --- a/drivers/slimbus/qcom-ctrl.c +++ b/drivers/slimbus/qcom-ctrl.c @@ -515,9 +515,9 @@ static int qcom_slim_probe(struct platform_device *pdev) } ctrl->irq = platform_get_irq(pdev, 0); - if (!ctrl->irq) { + if (ctrl->irq < 0) { dev_err(&pdev->dev, "no slimbus IRQ\n"); - return -ENODEV; + return ctrl->irq; } sctrl = &ctrl->ctrl; diff --git a/drivers/soc/aspeed/aspeed-lpc-ctrl.c b/drivers/soc/aspeed/aspeed-lpc-ctrl.c index 01ed21e8bfee5..040c7dc1d4792 100644 --- a/drivers/soc/aspeed/aspeed-lpc-ctrl.c +++ b/drivers/soc/aspeed/aspeed-lpc-ctrl.c @@ -46,7 +46,7 @@ static int aspeed_lpc_ctrl_mmap(struct file *file, struct vm_area_struct *vma) unsigned long vsize = vma->vm_end - vma->vm_start; pgprot_t prot = vma->vm_page_prot; - if (vma->vm_pgoff + vsize > lpc_ctrl->mem_base + lpc_ctrl->mem_size) + if (vma->vm_pgoff + vma_pages(vma) > lpc_ctrl->mem_size >> PAGE_SHIFT) return -EINVAL; /* ast2400/2500 AHB accesses are not cache coherent */ diff --git a/drivers/soc/aspeed/aspeed-p2a-ctrl.c b/drivers/soc/aspeed/aspeed-p2a-ctrl.c index b60fbeaffcbd0..20b5fb2a207cc 100644 --- a/drivers/soc/aspeed/aspeed-p2a-ctrl.c +++ b/drivers/soc/aspeed/aspeed-p2a-ctrl.c @@ -110,7 +110,7 @@ static int aspeed_p2a_mmap(struct file *file, struct vm_area_struct *vma) vsize = vma->vm_end - vma->vm_start; prot = vma->vm_page_prot; - if (vma->vm_pgoff + vsize > ctrl->mem_base + ctrl->mem_size) + if (vma->vm_pgoff + vma_pages(vma) > ctrl->mem_size >> PAGE_SHIFT) return -EINVAL; /* ast2400/2500 AHB accesses are not cache coherent */ diff --git a/drivers/soc/bcm/brcmstb/pm/pm-arm.c b/drivers/soc/bcm/brcmstb/pm/pm-arm.c index b1062334e6089..c6ec7d95bcfcc 100644 --- a/drivers/soc/bcm/brcmstb/pm/pm-arm.c +++ b/drivers/soc/bcm/brcmstb/pm/pm-arm.c @@ -780,6 +780,7 @@ static int brcmstb_pm_probe(struct platform_device *pdev) } ret = brcmstb_init_sram(dn); + of_node_put(dn); if (ret) { pr_err("error setting up SRAM for PM\n"); return ret; diff --git a/drivers/soc/fsl/dpaa2-console.c b/drivers/soc/fsl/dpaa2-console.c index 27243f706f376..53917410f2bdb 100644 --- a/drivers/soc/fsl/dpaa2-console.c +++ b/drivers/soc/fsl/dpaa2-console.c @@ -231,6 +231,7 @@ static ssize_t dpaa2_console_read(struct file *fp, char __user *buf, cd->cur_ptr += bytes; written += bytes; + kfree(kbuf); return written; err_free_buf: diff --git a/drivers/soc/fsl/qe/qe_io.c b/drivers/soc/fsl/qe/qe_io.c index 3657e296a8a27..058c2fec9a4b4 100644 --- a/drivers/soc/fsl/qe/qe_io.c +++ b/drivers/soc/fsl/qe/qe_io.c @@ -37,6 +37,8 @@ int par_io_init(struct device_node *np) if (ret) return ret; par_io = ioremap(res.start, resource_size(&res)); + if (!par_io) + return -ENOMEM; num_ports = of_get_property(np, "num-ports", NULL); if (num_ports) diff --git a/drivers/soc/ixp4xx/ixp4xx-npe.c b/drivers/soc/ixp4xx/ixp4xx-npe.c index 6065aaab67403..8482a4892b83b 100644 --- a/drivers/soc/ixp4xx/ixp4xx-npe.c +++ b/drivers/soc/ixp4xx/ixp4xx-npe.c @@ -735,7 +735,7 @@ static const struct of_device_id ixp4xx_npe_of_match[] = { static struct platform_driver ixp4xx_npe_driver = { .driver = { .name = "ixp4xx-npe", - .of_match_table = of_match_ptr(ixp4xx_npe_of_match), + .of_match_table = ixp4xx_npe_of_match, }, .probe = ixp4xx_npe_probe, .remove = ixp4xx_npe_remove, diff --git a/drivers/soc/qcom/mdt_loader.c b/drivers/soc/qcom/mdt_loader.c index eba7f76f9d61a..6034cd8992b0e 100644 --- a/drivers/soc/qcom/mdt_loader.c +++ b/drivers/soc/qcom/mdt_loader.c @@ -98,7 +98,7 @@ void *qcom_mdt_read_metadata(const struct firmware *fw, size_t *data_len) if (ehdr->e_phnum < 2) return ERR_PTR(-EINVAL); - if (phdrs[0].p_type == PT_LOAD || phdrs[1].p_type == PT_LOAD) + if (phdrs[0].p_type == PT_LOAD) return ERR_PTR(-EINVAL); if ((phdrs[1].p_flags & QCOM_MDT_TYPE_MASK) != QCOM_MDT_TYPE_HASH) diff --git a/drivers/soc/qcom/qcom_aoss.c b/drivers/soc/qcom/qcom_aoss.c index 33a27e6c6d67d..f16d6ec780644 100644 --- a/drivers/soc/qcom/qcom_aoss.c +++ b/drivers/soc/qcom/qcom_aoss.c @@ -472,12 +472,12 @@ static int qmp_cooling_device_add(struct qmp *qmp, static int qmp_cooling_devices_register(struct qmp *qmp) { struct device_node *np, *child; - int count = QMP_NUM_COOLING_RESOURCES; + int count = 0; int ret; np = qmp->dev->of_node; - qmp->cooling_devs = devm_kcalloc(qmp->dev, count, + qmp->cooling_devs = devm_kcalloc(qmp->dev, QMP_NUM_COOLING_RESOURCES, sizeof(*qmp->cooling_devs), GFP_KERNEL); @@ -493,12 +493,16 @@ static int qmp_cooling_devices_register(struct qmp *qmp) goto unroll; } + if (!count) + devm_kfree(qmp->dev, qmp->cooling_devs); + return 0; unroll: while (--count >= 0) thermal_cooling_device_unregister (qmp->cooling_devs[count].cdev); + devm_kfree(qmp->dev, qmp->cooling_devs); return ret; } @@ -540,7 +544,7 @@ static int qmp_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - ret = devm_request_irq(&pdev->dev, irq, qmp_intr, IRQF_ONESHOT, + ret = devm_request_irq(&pdev->dev, irq, qmp_intr, 0, "aoss-qmp", qmp); if (ret < 0) { dev_err(&pdev->dev, "failed to request interrupt\n"); diff --git a/drivers/soc/qcom/rpmhpd.c b/drivers/soc/qcom/rpmhpd.c index 51850cc68b701..aa24237a78405 100644 --- a/drivers/soc/qcom/rpmhpd.c +++ b/drivers/soc/qcom/rpmhpd.c @@ -235,12 +235,11 @@ static int rpmhpd_power_on(struct generic_pm_domain *domain) static int rpmhpd_power_off(struct generic_pm_domain *domain) { struct rpmhpd *pd = domain_to_rpmhpd(domain); - int ret = 0; + int ret; mutex_lock(&rpmhpd_lock); - ret = rpmhpd_aggregate_corner(pd, pd->level[0]); - + ret = rpmhpd_aggregate_corner(pd, 0); if (!ret) pd->enabled = false; diff --git a/drivers/soc/qcom/rpmpd.c b/drivers/soc/qcom/rpmpd.c index 3c1a55cf25d62..4715acfecff49 100644 --- a/drivers/soc/qcom/rpmpd.c +++ b/drivers/soc/qcom/rpmpd.c @@ -362,6 +362,9 @@ static int rpmpd_probe(struct platform_device *pdev) data->domains = devm_kcalloc(&pdev->dev, num, sizeof(*data->domains), GFP_KERNEL); + if (!data->domains) + return -ENOMEM; + data->num_domains = num; for (i = 0; i < num; i++) { diff --git a/drivers/soc/qcom/smp2p.c b/drivers/soc/qcom/smp2p.c index 42e0b8f647aef..d42bcca3b98e2 100644 --- a/drivers/soc/qcom/smp2p.c +++ b/drivers/soc/qcom/smp2p.c @@ -420,6 +420,7 @@ static int smp2p_parse_ipc(struct qcom_smp2p *smp2p) } smp2p->ipc_regmap = syscon_node_to_regmap(syscon); + of_node_put(syscon); if (IS_ERR(smp2p->ipc_regmap)) return PTR_ERR(smp2p->ipc_regmap); diff --git a/drivers/soc/qcom/smsm.c b/drivers/soc/qcom/smsm.c index 70c3c90b997c9..6564f15c53190 100644 --- a/drivers/soc/qcom/smsm.c +++ b/drivers/soc/qcom/smsm.c @@ -109,7 +109,7 @@ struct smsm_entry { DECLARE_BITMAP(irq_enabled, 32); DECLARE_BITMAP(irq_rising, 32); DECLARE_BITMAP(irq_falling, 32); - u32 last_value; + unsigned long last_value; u32 *remote_state; u32 *subscription; @@ -204,8 +204,7 @@ static irqreturn_t smsm_intr(int irq, void *data) u32 val; val = readl(entry->remote_state); - changed = val ^ entry->last_value; - entry->last_value = val; + changed = val ^ xchg(&entry->last_value, val); for_each_set_bit(i, entry->irq_enabled, 32) { if (!(changed & BIT(i))) @@ -266,6 +265,12 @@ static void smsm_unmask_irq(struct irq_data *irqd) struct qcom_smsm *smsm = entry->smsm; u32 val; + /* Make sure our last cached state is up-to-date */ + if (readl(entry->remote_state) & BIT(irq)) + set_bit(irq, &entry->last_value); + else + clear_bit(irq, &entry->last_value); + set_bit(irq, entry->irq_enabled); if (entry->subscription) { @@ -354,6 +359,7 @@ static int smsm_parse_ipc(struct qcom_smsm *smsm, unsigned host_id) return 0; host->ipc_regmap = syscon_node_to_regmap(syscon); + of_node_put(syscon); if (IS_ERR(host->ipc_regmap)) return PTR_ERR(host->ipc_regmap); diff --git a/drivers/soc/qcom/socinfo.c b/drivers/soc/qcom/socinfo.c index 176696f8f38d1..3303bcaf67154 100644 --- a/drivers/soc/qcom/socinfo.c +++ b/drivers/soc/qcom/socinfo.c @@ -447,7 +447,7 @@ static int qcom_socinfo_probe(struct platform_device *pdev) /* Feed the soc specific unique data into entropy pool */ add_device_randomness(info, item_size); - platform_set_drvdata(pdev, qs->soc_dev); + platform_set_drvdata(pdev, qs); return 0; } diff --git a/drivers/soc/rockchip/Kconfig b/drivers/soc/rockchip/Kconfig index b71b73bf5fc5c..785990720479c 100644 --- a/drivers/soc/rockchip/Kconfig +++ b/drivers/soc/rockchip/Kconfig @@ -6,8 +6,8 @@ if ARCH_ROCKCHIP || COMPILE_TEST # config ROCKCHIP_GRF - bool - default y + bool "Rockchip General Register Files support" if COMPILE_TEST + default y if ARCH_ROCKCHIP help The General Register Files are a central component providing special additional settings registers for a lot of soc-components. diff --git a/drivers/soc/rockchip/grf.c b/drivers/soc/rockchip/grf.c index 494cf2b5bf7b6..343ff61ccccbb 100644 --- a/drivers/soc/rockchip/grf.c +++ b/drivers/soc/rockchip/grf.c @@ -148,12 +148,14 @@ static int __init rockchip_grf_init(void) return -ENODEV; if (!match || !match->data) { pr_err("%s: missing grf data\n", __func__); + of_node_put(np); return -EINVAL; } grf_info = match->data; grf = syscon_node_to_regmap(np); + of_node_put(np); if (IS_ERR(grf)) { pr_err("%s: could not get grf syscon\n", __func__); return PTR_ERR(grf); diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c index 3eb44e65b3261..1a54bac512b69 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra.c +++ b/drivers/soc/tegra/fuse/fuse-tegra.c @@ -172,7 +172,7 @@ static struct platform_driver tegra_fuse_driver = { }; builtin_platform_driver(tegra_fuse_driver); -bool __init tegra_fuse_read_spare(unsigned int spare) +u32 __init tegra_fuse_read_spare(unsigned int spare) { unsigned int offset = fuse->soc->info->spare + spare * 4; diff --git a/drivers/soc/tegra/fuse/fuse.h b/drivers/soc/tegra/fuse/fuse.h index 7230cb3305033..6996cfc7cbca3 100644 --- a/drivers/soc/tegra/fuse/fuse.h +++ b/drivers/soc/tegra/fuse/fuse.h @@ -53,7 +53,7 @@ struct tegra_fuse { void tegra_init_revision(void); void tegra_init_apbmisc(void); -bool __init tegra_fuse_read_spare(unsigned int spare); +u32 __init tegra_fuse_read_spare(unsigned int spare); u32 __init tegra_fuse_read_early(unsigned int offset); #ifdef CONFIG_ARCH_TEGRA_2x_SOC diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index 0447afa970f5e..d11320a4dfcf9 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -579,7 +579,7 @@ static int tegra_powergate_power_up(struct tegra_powergate *pg, err = tegra_powergate_enable_clocks(pg); if (err) - goto disable_clks; + goto powergate_off; usleep_range(10, 20); @@ -591,7 +591,7 @@ static int tegra_powergate_power_up(struct tegra_powergate *pg, err = reset_control_deassert(pg->reset); if (err) - goto powergate_off; + goto disable_clks; usleep_range(10, 20); diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c index e9ece45d7a333..ef3f95fefab58 100644 --- a/drivers/soc/ti/wkup_m3_ipc.c +++ b/drivers/soc/ti/wkup_m3_ipc.c @@ -447,9 +447,9 @@ static int wkup_m3_ipc_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (!irq) { + if (irq < 0) { dev_err(&pdev->dev, "no irq resource\n"); - return -ENXIO; + return irq; } ret = devm_request_irq(dev, irq, wkup_m3_txev_handler, diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index 5fd929e023e18..b4d85fd62ce91 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -202,6 +202,9 @@ static int atmel_qspi_find_mode(const struct spi_mem_op *op) static bool atmel_qspi_supports_op(struct spi_mem *mem, const struct spi_mem_op *op) { + if (!spi_mem_default_supports_op(mem, op)) + return false; + if (atmel_qspi_find_mode(op) < 0) return false; diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index e450ee17787f0..4a4e2877414a1 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -906,7 +906,7 @@ static int a3700_spi_probe(struct platform_device *pdev) return 0; error_clk: - clk_disable_unprepare(spi->clk); + clk_unprepare(spi->clk); error: spi_master_put(master); out: diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 8a4be34bccfd2..d933a6eda5fdc 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -509,7 +509,7 @@ static void bcm_qspi_chip_select(struct bcm_qspi *qspi, int cs) u32 rd = 0; u32 wr = 0; - if (qspi->base[CHIP_SELECT]) { + if (cs >= 0 && qspi->base[CHIP_SELECT]) { rd = bcm_qspi_read(qspi, CHIP_SELECT, 0); wr = (rd & ~0xff) | (1 << cs); if (rd == wr) @@ -960,7 +960,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem, addr = op->addr.val; len = op->data.nbytes; - if (bcm_qspi_bspi_ver_three(qspi) == true) { + if (has_bspi(qspi) && bcm_qspi_bspi_ver_three(qspi) == true) { /* * The address coming into this function is a raw flash offset. * But for BSPI <= V3, we need to convert it to a remapped BSPI @@ -979,7 +979,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem, len < 4) mspi_read = true; - if (mspi_read) + if (!has_bspi(qspi) || mspi_read) return bcm_qspi_mspi_exec_mem_op(spi, op); ret = bcm_qspi_bspi_set_mode(qspi, op, 0); @@ -1300,7 +1300,7 @@ int bcm_qspi_probe(struct platform_device *pdev, &qspi->dev_ids[val]); if (ret < 0) { dev_err(&pdev->dev, "IRQ %s not found\n", name); - goto qspi_probe_err; + goto qspi_unprepare_err; } qspi->dev_ids[val].dev = qspi; @@ -1315,7 +1315,7 @@ int bcm_qspi_probe(struct platform_device *pdev, if (!num_ints) { dev_err(&pdev->dev, "no IRQs registered, cannot init driver\n"); ret = -EINVAL; - goto qspi_probe_err; + goto qspi_unprepare_err; } /* @@ -1359,6 +1359,7 @@ int bcm_qspi_probe(struct platform_device *pdev, qspi_reg_err: bcm_qspi_hw_uninit(qspi); +qspi_unprepare_err: clk_disable_unprepare(qspi->clk); qspi_probe_err: kfree(qspi->dev_ids); diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index c86c3ac6097dd..b1003876cb350 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -1159,10 +1159,14 @@ static void bcm2835_spi_handle_err(struct spi_controller *ctlr, struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr); /* if an error occurred and we have an active dma, then terminate */ - dmaengine_terminate_sync(ctlr->dma_tx); - bs->tx_dma_active = false; - dmaengine_terminate_sync(ctlr->dma_rx); - bs->rx_dma_active = false; + if (ctlr->dma_tx) { + dmaengine_terminate_sync(ctlr->dma_tx); + bs->tx_dma_active = false; + } + if (ctlr->dma_rx) { + dmaengine_terminate_sync(ctlr->dma_rx); + bs->rx_dma_active = false; + } bcm2835_spi_undo_prologue(bs); /* and reset */ diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 40dccc580e866..3e0200618af30 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -423,6 +423,7 @@ static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr) goto err_rx_dma_buf; } + memset(&cfg, 0, sizeof(cfg)); cfg.src_addr = phy_addr + SPI_POPR; cfg.dst_addr = phy_addr + SPI_PUSHR; cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c index e9ef80983b791..5a6b02843f2bc 100644 --- a/drivers/spi/spi-img-spfi.c +++ b/drivers/spi/spi-img-spfi.c @@ -771,7 +771,7 @@ static int img_spfi_resume(struct device *dev) int ret; ret = pm_runtime_get_sync(dev); - if (ret) { + if (ret < 0) { pm_runtime_put_noidle(dev); return ret; } diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index 3c841ae0a3e91..c9c4c0f2e8b7a 100644 --- a/drivers/spi/spi-meson-spicc.c +++ b/drivers/spi/spi-meson-spicc.c @@ -527,6 +527,11 @@ static int meson_spicc_probe(struct platform_device *pdev) writel_relaxed(0, spicc->base + SPICC_INTREG); irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; + goto out_master; + } + ret = devm_request_irq(&pdev->dev, irq, meson_spicc_irq, 0, NULL, spicc); if (ret) { diff --git a/drivers/spi/spi-meson-spifc.c b/drivers/spi/spi-meson-spifc.c index c7b0399802913..cae934464f3dd 100644 --- a/drivers/spi/spi-meson-spifc.c +++ b/drivers/spi/spi-meson-spifc.c @@ -349,6 +349,7 @@ static int meson_spifc_probe(struct platform_device *pdev) return 0; out_clk: clk_disable_unprepare(spifc->clk); + pm_runtime_disable(spifc->dev); out_err: spi_master_put(master); return ret; diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index cafb773f78616..29d44f5d5212c 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -533,7 +533,7 @@ static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id) else mdata->state = MTK_SPI_IDLE; - if (!master->can_dma(master, master->cur_msg->spi, trans)) { + if (!master->can_dma(master, NULL, trans)) { if (trans->rx_buf) { cnt = mdata->xfer_len / 4; ioread32_rep(mdata->base + SPI_RX_DATA_REG, diff --git a/drivers/spi/spi-mxic.c b/drivers/spi/spi-mxic.c index eba706d5671e2..d0b5db88cd164 100644 --- a/drivers/spi/spi-mxic.c +++ b/drivers/spi/spi-mxic.c @@ -304,25 +304,21 @@ static int mxic_spi_data_xfer(struct mxic_spi *mxic, const void *txbuf, writel(data, mxic->regs + TXD(nbytes % 4)); + ret = readl_poll_timeout(mxic->regs + INT_STS, sts, + sts & INT_TX_EMPTY, 0, USEC_PER_SEC); + if (ret) + return ret; + + ret = readl_poll_timeout(mxic->regs + INT_STS, sts, + sts & INT_RX_NOT_EMPTY, 0, + USEC_PER_SEC); + if (ret) + return ret; + + data = readl(mxic->regs + RXD); if (rxbuf) { - ret = readl_poll_timeout(mxic->regs + INT_STS, sts, - sts & INT_TX_EMPTY, 0, - USEC_PER_SEC); - if (ret) - return ret; - - ret = readl_poll_timeout(mxic->regs + INT_STS, sts, - sts & INT_RX_NOT_EMPTY, 0, - USEC_PER_SEC); - if (ret) - return ret; - - data = readl(mxic->regs + RXD); data >>= (8 * (4 - nbytes)); memcpy(rxbuf + pos, &data, nbytes); - WARN_ON(readl(mxic->regs + INT_STS) & INT_RX_NOT_EMPTY); - } else { - readl(mxic->regs + RXD); } WARN_ON(readl(mxic->regs + INT_STS) & INT_RX_NOT_EMPTY); diff --git a/drivers/spi/spi-pic32.c b/drivers/spi/spi-pic32.c index 8272bde5d706f..b5268b0d7b4c8 100644 --- a/drivers/spi/spi-pic32.c +++ b/drivers/spi/spi-pic32.c @@ -361,6 +361,7 @@ static int pic32_spi_dma_config(struct pic32_spi *pic32s, u32 dma_width) struct dma_slave_config cfg; int ret; + memset(&cfg, 0, sizeof(cfg)); cfg.device_fc = true; cfg.src_addr = pic32s->dma_base + buf_offset; cfg.dst_addr = pic32s->dma_base + buf_offset; diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c index 7fedea67159c5..8523bb4f6a62e 100644 --- a/drivers/spi/spi-pl022.c +++ b/drivers/spi/spi-pl022.c @@ -1720,12 +1720,13 @@ static int verify_controller_parameters(struct pl022 *pl022, return -EINVAL; } } else { - if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX) + if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX) { dev_err(&pl022->adev->dev, "Microwire half duplex mode requested," " but this is only available in the" " ST version of PL022\n"); - return -EINVAL; + return -EINVAL; + } } } return 0; diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index aafac128bb5f1..4eb979a096c78 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -74,14 +74,23 @@ static bool lpss_dma_filter(struct dma_chan *chan, void *param) return true; } +static void lpss_dma_put_device(void *dma_dev) +{ + pci_dev_put(dma_dev); +} + static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) { struct pci_dev *dma_dev; + int ret; c->num_chipselect = 1; c->max_clk_rate = 50000000; dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); + ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev); + if (ret) + return ret; if (c->tx_param) { struct dw_dma_slave *slave = c->tx_param; @@ -105,8 +114,9 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) { - struct pci_dev *dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0)); struct dw_dma_slave *tx, *rx; + struct pci_dev *dma_dev; + int ret; switch (PCI_FUNC(dev->devfn)) { case 0: @@ -131,6 +141,11 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) return -ENODEV; } + dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0)); + ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev); + if (ret) + return ret; + tx = c->tx_param; tx->dma_dev = &dma_dev->dev; diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c index 7222c7689c3c4..0524741d73b90 100644 --- a/drivers/spi/spi-rspi.c +++ b/drivers/spi/spi-rspi.c @@ -1044,14 +1044,11 @@ static struct dma_chan *rspi_request_dma_chan(struct device *dev, } memset(&cfg, 0, sizeof(cfg)); + cfg.dst_addr = port_addr + RSPI_SPDR; + cfg.src_addr = port_addr + RSPI_SPDR; + cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; + cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; cfg.direction = dir; - if (dir == DMA_MEM_TO_DEV) { - cfg.dst_addr = port_addr; - cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; - } else { - cfg.src_addr = port_addr; - cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; - } ret = dmaengine_slave_config(chan, &cfg); if (ret) { @@ -1082,12 +1079,12 @@ static int rspi_request_dma(struct device *dev, struct spi_controller *ctlr, } ctlr->dma_tx = rspi_request_dma_chan(dev, DMA_MEM_TO_DEV, dma_tx_id, - res->start + RSPI_SPDR); + res->start); if (!ctlr->dma_tx) return -ENODEV; ctlr->dma_rx = rspi_request_dma_chan(dev, DMA_DEV_TO_MEM, dma_rx_id, - res->start + RSPI_SPDR); + res->start); if (!ctlr->dma_rx) { dma_release_channel(ctlr->dma_tx); ctlr->dma_tx = NULL; diff --git a/drivers/spi/spi-sprd-adi.c b/drivers/spi/spi-sprd-adi.c index 09f983524d51b..e804a3854c351 100644 --- a/drivers/spi/spi-sprd-adi.c +++ b/drivers/spi/spi-sprd-adi.c @@ -102,7 +102,7 @@ #define HWRST_STATUS_WATCHDOG 0xf0 /* Use default timeout 50 ms that converts to watchdog values */ -#define WDG_LOAD_VAL ((50 * 1000) / 32768) +#define WDG_LOAD_VAL ((50 * 32768) / 1000) #define WDG_LOAD_MASK GENMASK(15, 0) #define WDG_UNLOCK_KEY 0xe551 diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c index ea77d915216a2..8070b74202170 100644 --- a/drivers/spi/spi-stm32-qspi.c +++ b/drivers/spi/spi-stm32-qspi.c @@ -293,7 +293,8 @@ static int stm32_qspi_wait_cmd(struct stm32_qspi *qspi, if (!op->data.nbytes) goto wait_nobusy; - if (readl_relaxed(qspi->io_base + QSPI_SR) & SR_TCF) + if ((readl_relaxed(qspi->io_base + QSPI_SR) & SR_TCF) || + qspi->fmode == CCR_FMODE_APM) goto out; reinit_completion(&qspi->data_completion); diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index 594905bf89aa8..3f7a64b2a5d06 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -1352,6 +1352,10 @@ static int tegra_spi_probe(struct platform_device *pdev) tspi->phys = r->start; spi_irq = platform_get_irq(pdev, 0); + if (spi_irq < 0) { + ret = spi_irq; + goto exit_free_master; + } tspi->irq = spi_irq; tspi->clk = devm_clk_get(&pdev->dev, "spi"); diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index 2a1905c43a0b7..e6b12f78c8f01 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -1011,14 +1011,8 @@ static int tegra_slink_probe(struct platform_device *pdev) struct resource *r; int ret, spi_irq; const struct tegra_slink_chip_data *cdata = NULL; - const struct of_device_id *match; - match = of_match_device(tegra_slink_of_match, &pdev->dev); - if (!match) { - dev_err(&pdev->dev, "Error: No device match found\n"); - return -ENODEV; - } - cdata = match->data; + cdata = of_device_get_match_data(&pdev->dev); master = spi_alloc_master(&pdev->dev, sizeof(*tspi)); if (!master) { @@ -1205,7 +1199,7 @@ static int tegra_slink_resume(struct device *dev) } #endif -static int tegra_slink_runtime_suspend(struct device *dev) +static int __maybe_unused tegra_slink_runtime_suspend(struct device *dev) { struct spi_master *master = dev_get_drvdata(dev); struct tegra_slink_data *tspi = spi_master_get_devdata(master); @@ -1217,7 +1211,7 @@ static int tegra_slink_runtime_suspend(struct device *dev) return 0; } -static int tegra_slink_runtime_resume(struct device *dev) +static int __maybe_unused tegra_slink_runtime_resume(struct device *dev) { struct spi_master *master = dev_get_drvdata(dev); struct tegra_slink_data *tspi = spi_master_get_devdata(master); diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c index 6b6ef89442837..4bbad00244ab8 100644 --- a/drivers/spi/spi-ti-qspi.c +++ b/drivers/spi/spi-ti-qspi.c @@ -401,6 +401,7 @@ static int ti_qspi_dma_xfer(struct ti_qspi *qspi, dma_addr_t dma_dst, enum dma_ctrl_flags flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT; struct dma_async_tx_descriptor *tx; int ret; + unsigned long time_left; tx = dmaengine_prep_dma_memcpy(chan, dma_dst, dma_src, len, flags); if (!tx) { @@ -420,9 +421,9 @@ static int ti_qspi_dma_xfer(struct ti_qspi *qspi, dma_addr_t dma_dst, } dma_async_issue_pending(chan); - ret = wait_for_completion_timeout(&qspi->transfer_complete, + time_left = wait_for_completion_timeout(&qspi->transfer_complete, msecs_to_jiffies(len)); - if (ret <= 0) { + if (time_left == 0) { dmaengine_terminate_sync(chan); dev_err(qspi->dev, "DMA wait_for_completion_timeout\n"); return -ETIMEDOUT; diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c index 5cf6993ddce57..b3588240eb39b 100644 --- a/drivers/spi/spi-zynq-qspi.c +++ b/drivers/spi/spi-zynq-qspi.c @@ -533,7 +533,7 @@ static int zynq_qspi_exec_mem_op(struct spi_mem *mem, zynq_qspi_write_op(xqspi, ZYNQ_QSPI_FIFO_DEPTH, true); zynq_qspi_write(xqspi, ZYNQ_QSPI_IEN_OFFSET, ZYNQ_QSPI_IXR_RXTX_MASK); - if (!wait_for_completion_interruptible_timeout(&xqspi->data_completion, + if (!wait_for_completion_timeout(&xqspi->data_completion, msecs_to_jiffies(1000))) err = -ETIMEDOUT; } @@ -551,13 +551,16 @@ static int zynq_qspi_exec_mem_op(struct spi_mem *mem, zynq_qspi_write_op(xqspi, ZYNQ_QSPI_FIFO_DEPTH, true); zynq_qspi_write(xqspi, ZYNQ_QSPI_IEN_OFFSET, ZYNQ_QSPI_IXR_RXTX_MASK); - if (!wait_for_completion_interruptible_timeout(&xqspi->data_completion, + if (!wait_for_completion_timeout(&xqspi->data_completion, msecs_to_jiffies(1000))) err = -ETIMEDOUT; } if (op->dummy.nbytes) { tmpbuf = kzalloc(op->dummy.nbytes, GFP_KERNEL); + if (!tmpbuf) + return -ENOMEM; + memset(tmpbuf, 0xff, op->dummy.nbytes); reinit_completion(&xqspi->data_completion); xqspi->txbuf = tmpbuf; @@ -567,7 +570,7 @@ static int zynq_qspi_exec_mem_op(struct spi_mem *mem, zynq_qspi_write_op(xqspi, ZYNQ_QSPI_FIFO_DEPTH, true); zynq_qspi_write(xqspi, ZYNQ_QSPI_IEN_OFFSET, ZYNQ_QSPI_IXR_RXTX_MASK); - if (!wait_for_completion_interruptible_timeout(&xqspi->data_completion, + if (!wait_for_completion_timeout(&xqspi->data_completion, msecs_to_jiffies(1000))) err = -ETIMEDOUT; @@ -591,7 +594,7 @@ static int zynq_qspi_exec_mem_op(struct spi_mem *mem, zynq_qspi_write_op(xqspi, ZYNQ_QSPI_FIFO_DEPTH, true); zynq_qspi_write(xqspi, ZYNQ_QSPI_IEN_OFFSET, ZYNQ_QSPI_IXR_RXTX_MASK); - if (!wait_for_completion_interruptible_timeout(&xqspi->data_completion, + if (!wait_for_completion_timeout(&xqspi->data_completion, msecs_to_jiffies(1000))) err = -ETIMEDOUT; } diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index ac05c9c864884..b18ae50db1f52 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -844,10 +844,10 @@ int spi_map_buf(struct spi_controller *ctlr, struct device *dev, int i, ret; if (vmalloced_buf || kmap_buf) { - desc_len = min_t(int, max_seg_size, PAGE_SIZE); + desc_len = min_t(unsigned long, max_seg_size, PAGE_SIZE); sgs = DIV_ROUND_UP(len + offset_in_page(buf), desc_len); } else if (virt_addr_valid(buf)) { - desc_len = min_t(int, max_seg_size, ctlr->max_dma_len); + desc_len = min_t(size_t, max_seg_size, ctlr->max_dma_len); sgs = DIV_ROUND_UP(len, desc_len); } else { return -EINVAL; diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index e6b1ca141b930..88888cc5d1932 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -114,6 +114,9 @@ static void *ion_buffer_kmap_get(struct ion_buffer *buffer) void *vaddr; if (buffer->kmap_cnt) { + if (buffer->kmap_cnt == INT_MAX) + return ERR_PTR(-EOVERFLOW); + buffer->kmap_cnt++; return buffer->vaddr; } diff --git a/drivers/staging/board/board.c b/drivers/staging/board/board.c index cb6feb34dd401..f980af0373452 100644 --- a/drivers/staging/board/board.c +++ b/drivers/staging/board/board.c @@ -136,6 +136,7 @@ int __init board_staging_register_clock(const struct board_staging_clk *bsc) static int board_staging_add_dev_domain(struct platform_device *pdev, const char *domain) { + struct device *dev = &pdev->dev; struct of_phandle_args pd_args; struct device_node *np; @@ -148,7 +149,11 @@ static int board_staging_add_dev_domain(struct platform_device *pdev, pd_args.np = np; pd_args.args_count = 0; - return of_genpd_add_device(&pd_args, &pdev->dev); + /* Initialization similar to device_pm_init_common() */ + spin_lock_init(&dev->power.lock); + dev->power.early_init = true; + + return of_genpd_add_device(&pd_args, dev); } #else static inline int board_staging_add_dev_domain(struct platform_device *pdev, diff --git a/drivers/staging/comedi/drivers/dt9812.c b/drivers/staging/comedi/drivers/dt9812.c index 634f57730c1e0..704b04d2980d3 100644 --- a/drivers/staging/comedi/drivers/dt9812.c +++ b/drivers/staging/comedi/drivers/dt9812.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include "../comedi_usb.h" @@ -237,22 +238,42 @@ static int dt9812_read_info(struct comedi_device *dev, { struct usb_device *usb = comedi_to_usb_dev(dev); struct dt9812_private *devpriv = dev->private; - struct dt9812_usb_cmd cmd; + struct dt9812_usb_cmd *cmd; + size_t tbuf_size; int count, ret; + void *tbuf; - cmd.cmd = cpu_to_le32(DT9812_R_FLASH_DATA); - cmd.u.flash_data_info.address = + tbuf_size = max(sizeof(*cmd), buf_size); + + tbuf = kzalloc(tbuf_size, GFP_KERNEL); + if (!tbuf) + return -ENOMEM; + + cmd = tbuf; + + cmd->cmd = cpu_to_le32(DT9812_R_FLASH_DATA); + cmd->u.flash_data_info.address = cpu_to_le16(DT9812_DIAGS_BOARD_INFO_ADDR + offset); - cmd.u.flash_data_info.numbytes = cpu_to_le16(buf_size); + cmd->u.flash_data_info.numbytes = cpu_to_le16(buf_size); /* DT9812 only responds to 32 byte writes!! */ ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr), - &cmd, 32, &count, DT9812_USB_TIMEOUT); + cmd, sizeof(*cmd), &count, DT9812_USB_TIMEOUT); if (ret) - return ret; + goto out; + + ret = usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->cmd_rd.addr), + tbuf, buf_size, &count, DT9812_USB_TIMEOUT); + if (!ret) { + if (count == buf_size) + memcpy(buf, tbuf, buf_size); + else + ret = -EREMOTEIO; + } +out: + kfree(tbuf); - return usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->cmd_rd.addr), - buf, buf_size, &count, DT9812_USB_TIMEOUT); + return ret; } static int dt9812_read_multiple_registers(struct comedi_device *dev, @@ -261,22 +282,42 @@ static int dt9812_read_multiple_registers(struct comedi_device *dev, { struct usb_device *usb = comedi_to_usb_dev(dev); struct dt9812_private *devpriv = dev->private; - struct dt9812_usb_cmd cmd; + struct dt9812_usb_cmd *cmd; int i, count, ret; + size_t buf_size; + void *buf; - cmd.cmd = cpu_to_le32(DT9812_R_MULTI_BYTE_REG); - cmd.u.read_multi_info.count = reg_count; + buf_size = max_t(size_t, sizeof(*cmd), reg_count); + + buf = kzalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + cmd = buf; + + cmd->cmd = cpu_to_le32(DT9812_R_MULTI_BYTE_REG); + cmd->u.read_multi_info.count = reg_count; for (i = 0; i < reg_count; i++) - cmd.u.read_multi_info.address[i] = address[i]; + cmd->u.read_multi_info.address[i] = address[i]; /* DT9812 only responds to 32 byte writes!! */ ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr), - &cmd, 32, &count, DT9812_USB_TIMEOUT); + cmd, sizeof(*cmd), &count, DT9812_USB_TIMEOUT); if (ret) - return ret; + goto out; + + ret = usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->cmd_rd.addr), + buf, reg_count, &count, DT9812_USB_TIMEOUT); + if (!ret) { + if (count == reg_count) + memcpy(value, buf, reg_count); + else + ret = -EREMOTEIO; + } +out: + kfree(buf); - return usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->cmd_rd.addr), - value, reg_count, &count, DT9812_USB_TIMEOUT); + return ret; } static int dt9812_write_multiple_registers(struct comedi_device *dev, @@ -285,19 +326,27 @@ static int dt9812_write_multiple_registers(struct comedi_device *dev, { struct usb_device *usb = comedi_to_usb_dev(dev); struct dt9812_private *devpriv = dev->private; - struct dt9812_usb_cmd cmd; + struct dt9812_usb_cmd *cmd; int i, count; + int ret; + + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); + if (!cmd) + return -ENOMEM; - cmd.cmd = cpu_to_le32(DT9812_W_MULTI_BYTE_REG); - cmd.u.read_multi_info.count = reg_count; + cmd->cmd = cpu_to_le32(DT9812_W_MULTI_BYTE_REG); + cmd->u.read_multi_info.count = reg_count; for (i = 0; i < reg_count; i++) { - cmd.u.write_multi_info.write[i].address = address[i]; - cmd.u.write_multi_info.write[i].value = value[i]; + cmd->u.write_multi_info.write[i].address = address[i]; + cmd->u.write_multi_info.write[i].value = value[i]; } /* DT9812 only responds to 32 byte writes!! */ - return usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr), - &cmd, 32, &count, DT9812_USB_TIMEOUT); + ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr), + cmd, sizeof(*cmd), &count, DT9812_USB_TIMEOUT); + kfree(cmd); + + return ret; } static int dt9812_rmw_multiple_registers(struct comedi_device *dev, @@ -306,17 +355,25 @@ static int dt9812_rmw_multiple_registers(struct comedi_device *dev, { struct usb_device *usb = comedi_to_usb_dev(dev); struct dt9812_private *devpriv = dev->private; - struct dt9812_usb_cmd cmd; + struct dt9812_usb_cmd *cmd; int i, count; + int ret; + + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); + if (!cmd) + return -ENOMEM; - cmd.cmd = cpu_to_le32(DT9812_RMW_MULTI_BYTE_REG); - cmd.u.rmw_multi_info.count = reg_count; + cmd->cmd = cpu_to_le32(DT9812_RMW_MULTI_BYTE_REG); + cmd->u.rmw_multi_info.count = reg_count; for (i = 0; i < reg_count; i++) - cmd.u.rmw_multi_info.rmw[i] = rmw[i]; + cmd->u.rmw_multi_info.rmw[i] = rmw[i]; /* DT9812 only responds to 32 byte writes!! */ - return usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr), - &cmd, 32, &count, DT9812_USB_TIMEOUT); + ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr), + cmd, sizeof(*cmd), &count, DT9812_USB_TIMEOUT); + kfree(cmd); + + return ret; } static int dt9812_digital_in(struct comedi_device *dev, u8 *bits) diff --git a/drivers/staging/comedi/drivers/ni_usb6501.c b/drivers/staging/comedi/drivers/ni_usb6501.c index 360e86a19fe32..311632004f08a 100644 --- a/drivers/staging/comedi/drivers/ni_usb6501.c +++ b/drivers/staging/comedi/drivers/ni_usb6501.c @@ -144,6 +144,10 @@ static const u8 READ_COUNTER_RESPONSE[] = {0x00, 0x01, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00}; +/* Largest supported packets */ +static const size_t TX_MAX_SIZE = sizeof(SET_PORT_DIR_REQUEST); +static const size_t RX_MAX_SIZE = sizeof(READ_PORT_RESPONSE); + enum commands { READ_PORT, WRITE_PORT, @@ -501,6 +505,12 @@ static int ni6501_find_endpoints(struct comedi_device *dev) if (!devpriv->ep_rx || !devpriv->ep_tx) return -ENODEV; + if (usb_endpoint_maxp(devpriv->ep_rx) < RX_MAX_SIZE) + return -ENODEV; + + if (usb_endpoint_maxp(devpriv->ep_tx) < TX_MAX_SIZE) + return -ENODEV; + return 0; } diff --git a/drivers/staging/comedi/drivers/vmk80xx.c b/drivers/staging/comedi/drivers/vmk80xx.c index 7956abcbae22b..ccc65cfc519f5 100644 --- a/drivers/staging/comedi/drivers/vmk80xx.c +++ b/drivers/staging/comedi/drivers/vmk80xx.c @@ -90,6 +90,9 @@ enum { #define IC3_VERSION BIT(0) #define IC6_VERSION BIT(1) +#define MIN_BUF_SIZE 64 +#define PACKET_TIMEOUT 10000 /* ms */ + enum vmk80xx_model { VMK8055_MODEL, VMK8061_MODEL @@ -157,22 +160,21 @@ static void vmk80xx_do_bulk_msg(struct comedi_device *dev) __u8 rx_addr; unsigned int tx_pipe; unsigned int rx_pipe; - size_t size; + size_t tx_size; + size_t rx_size; tx_addr = devpriv->ep_tx->bEndpointAddress; rx_addr = devpriv->ep_rx->bEndpointAddress; tx_pipe = usb_sndbulkpipe(usb, tx_addr); rx_pipe = usb_rcvbulkpipe(usb, rx_addr); + tx_size = usb_endpoint_maxp(devpriv->ep_tx); + rx_size = usb_endpoint_maxp(devpriv->ep_rx); - /* - * The max packet size attributes of the K8061 - * input/output endpoints are identical - */ - size = usb_endpoint_maxp(devpriv->ep_tx); + usb_bulk_msg(usb, tx_pipe, devpriv->usb_tx_buf, tx_size, NULL, + PACKET_TIMEOUT); - usb_bulk_msg(usb, tx_pipe, devpriv->usb_tx_buf, - size, NULL, devpriv->ep_tx->bInterval); - usb_bulk_msg(usb, rx_pipe, devpriv->usb_rx_buf, size, NULL, HZ * 10); + usb_bulk_msg(usb, rx_pipe, devpriv->usb_rx_buf, rx_size, NULL, + PACKET_TIMEOUT); } static int vmk80xx_read_packet(struct comedi_device *dev) @@ -191,7 +193,7 @@ static int vmk80xx_read_packet(struct comedi_device *dev) pipe = usb_rcvintpipe(usb, ep->bEndpointAddress); return usb_interrupt_msg(usb, pipe, devpriv->usb_rx_buf, usb_endpoint_maxp(ep), NULL, - HZ * 10); + PACKET_TIMEOUT); } static int vmk80xx_write_packet(struct comedi_device *dev, int cmd) @@ -212,7 +214,7 @@ static int vmk80xx_write_packet(struct comedi_device *dev, int cmd) pipe = usb_sndintpipe(usb, ep->bEndpointAddress); return usb_interrupt_msg(usb, pipe, devpriv->usb_tx_buf, usb_endpoint_maxp(ep), NULL, - HZ * 10); + PACKET_TIMEOUT); } static int vmk80xx_reset_device(struct comedi_device *dev) @@ -678,12 +680,12 @@ static int vmk80xx_alloc_usb_buffers(struct comedi_device *dev) struct vmk80xx_private *devpriv = dev->private; size_t size; - size = usb_endpoint_maxp(devpriv->ep_rx); + size = max(usb_endpoint_maxp(devpriv->ep_rx), MIN_BUF_SIZE); devpriv->usb_rx_buf = kzalloc(size, GFP_KERNEL); if (!devpriv->usb_rx_buf) return -ENOMEM; - size = usb_endpoint_maxp(devpriv->ep_tx); + size = max(usb_endpoint_maxp(devpriv->ep_tx), MIN_BUF_SIZE); devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL); if (!devpriv->usb_tx_buf) return -ENOMEM; diff --git a/drivers/staging/fbtft/fb_ssd1351.c b/drivers/staging/fbtft/fb_ssd1351.c index cf263a58a1489..6fd549a424d53 100644 --- a/drivers/staging/fbtft/fb_ssd1351.c +++ b/drivers/staging/fbtft/fb_ssd1351.c @@ -187,7 +187,6 @@ static struct fbtft_display display = { }, }; -#ifdef CONFIG_FB_BACKLIGHT static int update_onboard_backlight(struct backlight_device *bd) { struct fbtft_par *par = bl_get_data(bd); @@ -231,9 +230,6 @@ static void register_onboard_backlight(struct fbtft_par *par) if (!par->fbtftops.unregister_backlight) par->fbtftops.unregister_backlight = fbtft_unregister_backlight; } -#else -static void register_onboard_backlight(struct fbtft_par *par) { }; -#endif FBTFT_REGISTER_DRIVER(DRVNAME, "solomon,ssd1351", &display); diff --git a/drivers/staging/fbtft/fb_st7789v.c b/drivers/staging/fbtft/fb_st7789v.c index 3c3f387936e80..30086ae03605f 100644 --- a/drivers/staging/fbtft/fb_st7789v.c +++ b/drivers/staging/fbtft/fb_st7789v.c @@ -76,6 +76,8 @@ enum st7789v_command { */ static int init_display(struct fbtft_par *par) { + par->fbtftops.reset(par); + /* turn off sleep mode */ write_reg(par, MIPI_DCS_EXIT_SLEEP_MODE); mdelay(120); diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c index bc53d68bfcaa3..771697508cec8 100644 --- a/drivers/staging/fbtft/fbtft-core.c +++ b/drivers/staging/fbtft/fbtft-core.c @@ -136,7 +136,6 @@ static int fbtft_request_gpios_dt(struct fbtft_par *par) } #endif -#ifdef CONFIG_FB_BACKLIGHT static int fbtft_backlight_update_status(struct backlight_device *bd) { struct fbtft_par *par = bl_get_data(bd); @@ -169,6 +168,7 @@ void fbtft_unregister_backlight(struct fbtft_par *par) par->info->bl_dev = NULL; } } +EXPORT_SYMBOL(fbtft_unregister_backlight); static const struct backlight_ops fbtft_bl_ops = { .get_brightness = fbtft_backlight_get_brightness, @@ -206,12 +206,7 @@ void fbtft_register_backlight(struct fbtft_par *par) if (!par->fbtftops.unregister_backlight) par->fbtftops.unregister_backlight = fbtft_unregister_backlight; } -#else -void fbtft_register_backlight(struct fbtft_par *par) { }; -void fbtft_unregister_backlight(struct fbtft_par *par) { }; -#endif EXPORT_SYMBOL(fbtft_register_backlight); -EXPORT_SYMBOL(fbtft_unregister_backlight); static void fbtft_set_addr_win(struct fbtft_par *par, int xs, int ys, int xe, int ye) @@ -860,13 +855,11 @@ int fbtft_register_framebuffer(struct fb_info *fb_info) fb_info->fix.smem_len >> 10, text1, HZ / fb_info->fbdefio->delay, text2); -#ifdef CONFIG_FB_BACKLIGHT /* Turn on backlight if available */ if (fb_info->bl_dev) { fb_info->bl_dev->props.power = FB_BLANK_UNBLANK; fb_info->bl_dev->ops->update_status(fb_info->bl_dev); } -#endif return 0; diff --git a/drivers/staging/fbtft/fbtft.h b/drivers/staging/fbtft/fbtft.h index 9b6bdb62093d7..736cd4955b733 100644 --- a/drivers/staging/fbtft/fbtft.h +++ b/drivers/staging/fbtft/fbtft.h @@ -332,7 +332,10 @@ static int __init fbtft_driver_module_init(void) \ ret = spi_register_driver(&fbtft_driver_spi_driver); \ if (ret < 0) \ return ret; \ - return platform_driver_register(&fbtft_driver_platform_driver); \ + ret = platform_driver_register(&fbtft_driver_platform_driver); \ + if (ret < 0) \ + spi_unregister_driver(&fbtft_driver_spi_driver); \ + return ret; \ } \ \ static void __exit fbtft_driver_module_exit(void) \ diff --git a/drivers/staging/fieldbus/anybuss/host.c b/drivers/staging/fieldbus/anybuss/host.c index f69dc49304571..b7a91bdef6f41 100644 --- a/drivers/staging/fieldbus/anybuss/host.c +++ b/drivers/staging/fieldbus/anybuss/host.c @@ -1384,7 +1384,7 @@ anybuss_host_common_probe(struct device *dev, goto err_device; return cd; err_device: - device_unregister(&cd->client->dev); + put_device(&cd->client->dev); err_kthread: kthread_stop(cd->qthread); err_reset: diff --git a/drivers/staging/gdm724x/gdm_lte.c b/drivers/staging/gdm724x/gdm_lte.c index 8093d06086388..4dba62aed10b8 100644 --- a/drivers/staging/gdm724x/gdm_lte.c +++ b/drivers/staging/gdm724x/gdm_lte.c @@ -76,14 +76,15 @@ static void tx_complete(void *arg) static int gdm_lte_rx(struct sk_buff *skb, struct nic *nic, int nic_type) { - int ret; + int ret, len; + len = skb->len + ETH_HLEN; ret = netif_rx_ni(skb); if (ret == NET_RX_DROP) { nic->stats.rx_dropped++; } else { nic->stats.rx_packets++; - nic->stats.rx_bytes += skb->len + ETH_HLEN; + nic->stats.rx_bytes += len; } return 0; diff --git a/drivers/staging/greybus/audio_codec.c b/drivers/staging/greybus/audio_codec.c index 3259bf02ba25e..2418fbf1d2ab9 100644 --- a/drivers/staging/greybus/audio_codec.c +++ b/drivers/staging/greybus/audio_codec.c @@ -620,8 +620,8 @@ static int gbcodec_mute_stream(struct snd_soc_dai *dai, int mute, int stream) break; } if (!data) { - dev_err(dai->dev, "%s:%s DATA connection missing\n", - dai->name, module->name); + dev_err(dai->dev, "%s DATA connection missing\n", + dai->name); mutex_unlock(&codec->lock); return -ENODEV; } diff --git a/drivers/staging/greybus/audio_topology.c b/drivers/staging/greybus/audio_topology.c index cc329b990e165..3e2fbcd20598a 100644 --- a/drivers/staging/greybus/audio_topology.c +++ b/drivers/staging/greybus/audio_topology.c @@ -145,6 +145,9 @@ static const char **gb_generate_enum_strings(struct gbaudio_module_info *gb, items = le32_to_cpu(gbenum->items); strings = devm_kcalloc(gb->dev, items, sizeof(char *), GFP_KERNEL); + if (!strings) + return NULL; + data = gbenum->names; for (i = 0; i < items; i++) { @@ -662,6 +665,8 @@ static int gbaudio_tplg_create_enum_kctl(struct gbaudio_module_info *gb, /* since count=1, and reg is dummy */ gbe->max = le32_to_cpu(gb_enum->items); gbe->texts = gb_generate_enum_strings(gb, gb_enum); + if (!gbe->texts) + return -ENOMEM; /* debug enum info */ dev_dbg(gb->dev, "Max:%d, name_length:%d\n", gbe->max, @@ -871,6 +876,8 @@ static int gbaudio_tplg_create_enum_ctl(struct gbaudio_module_info *gb, /* since count=1, and reg is dummy */ gbe->max = le32_to_cpu(gb_enum->items); gbe->texts = gb_generate_enum_strings(gb, gb_enum); + if (!gbe->texts) + return -ENOMEM; /* debug enum info */ dev_dbg(gb->dev, "Max:%d, name_length:%d\n", gbe->max, @@ -983,6 +990,44 @@ static int gbaudio_widget_event(struct snd_soc_dapm_widget *w, return ret; } +static const struct snd_soc_dapm_widget gbaudio_widgets[] = { + [snd_soc_dapm_spk] = SND_SOC_DAPM_SPK(NULL, gbcodec_event_spk), + [snd_soc_dapm_hp] = SND_SOC_DAPM_HP(NULL, gbcodec_event_hp), + [snd_soc_dapm_mic] = SND_SOC_DAPM_MIC(NULL, gbcodec_event_int_mic), + [snd_soc_dapm_output] = SND_SOC_DAPM_OUTPUT(NULL), + [snd_soc_dapm_input] = SND_SOC_DAPM_INPUT(NULL), + [snd_soc_dapm_switch] = SND_SOC_DAPM_SWITCH_E(NULL, SND_SOC_NOPM, + 0, 0, NULL, + gbaudio_widget_event, + SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD), + [snd_soc_dapm_pga] = SND_SOC_DAPM_PGA_E(NULL, SND_SOC_NOPM, + 0, 0, NULL, 0, + gbaudio_widget_event, + SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD), + [snd_soc_dapm_mixer] = SND_SOC_DAPM_MIXER_E(NULL, SND_SOC_NOPM, + 0, 0, NULL, 0, + gbaudio_widget_event, + SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD), + [snd_soc_dapm_mux] = SND_SOC_DAPM_MUX_E(NULL, SND_SOC_NOPM, + 0, 0, NULL, + gbaudio_widget_event, + SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD), + [snd_soc_dapm_aif_in] = SND_SOC_DAPM_AIF_IN_E(NULL, NULL, 0, + SND_SOC_NOPM, 0, 0, + gbaudio_widget_event, + SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD), + [snd_soc_dapm_aif_out] = SND_SOC_DAPM_AIF_OUT_E(NULL, NULL, 0, + SND_SOC_NOPM, 0, 0, + gbaudio_widget_event, + SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD), +}; + static int gbaudio_tplg_create_widget(struct gbaudio_module_info *module, struct snd_soc_dapm_widget *dw, struct gb_audio_widget *w, int *w_size) @@ -1043,6 +1088,10 @@ static int gbaudio_tplg_create_widget(struct gbaudio_module_info *module, csize += le16_to_cpu(gbenum->names_length); control->texts = (const char * const *) gb_generate_enum_strings(module, gbenum); + if (!control->texts) { + ret = -ENOMEM; + goto error; + } control->items = le32_to_cpu(gbenum->items); } else { csize = sizeof(struct gb_audio_control); @@ -1061,77 +1110,37 @@ static int gbaudio_tplg_create_widget(struct gbaudio_module_info *module, switch (w->type) { case snd_soc_dapm_spk: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_SPK(w->name, gbcodec_event_spk); + *dw = gbaudio_widgets[w->type]; module->op_devices |= GBAUDIO_DEVICE_OUT_SPEAKER; break; case snd_soc_dapm_hp: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_HP(w->name, gbcodec_event_hp); + *dw = gbaudio_widgets[w->type]; module->op_devices |= (GBAUDIO_DEVICE_OUT_WIRED_HEADSET | GBAUDIO_DEVICE_OUT_WIRED_HEADPHONE); module->ip_devices |= GBAUDIO_DEVICE_IN_WIRED_HEADSET; break; case snd_soc_dapm_mic: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_MIC(w->name, gbcodec_event_int_mic); + *dw = gbaudio_widgets[w->type]; module->ip_devices |= GBAUDIO_DEVICE_IN_BUILTIN_MIC; break; case snd_soc_dapm_output: - *dw = (struct snd_soc_dapm_widget)SND_SOC_DAPM_OUTPUT(w->name); - break; case snd_soc_dapm_input: - *dw = (struct snd_soc_dapm_widget)SND_SOC_DAPM_INPUT(w->name); - break; case snd_soc_dapm_switch: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_SWITCH_E(w->name, SND_SOC_NOPM, 0, 0, - widget_kctls, - gbaudio_widget_event, - SND_SOC_DAPM_PRE_PMU | - SND_SOC_DAPM_POST_PMD); - break; case snd_soc_dapm_pga: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_PGA_E(w->name, SND_SOC_NOPM, 0, 0, NULL, 0, - gbaudio_widget_event, - SND_SOC_DAPM_PRE_PMU | - SND_SOC_DAPM_POST_PMD); - break; case snd_soc_dapm_mixer: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_MIXER_E(w->name, SND_SOC_NOPM, 0, 0, NULL, - 0, gbaudio_widget_event, - SND_SOC_DAPM_PRE_PMU | - SND_SOC_DAPM_POST_PMD); - break; case snd_soc_dapm_mux: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_MUX_E(w->name, SND_SOC_NOPM, 0, 0, - widget_kctls, gbaudio_widget_event, - SND_SOC_DAPM_PRE_PMU | - SND_SOC_DAPM_POST_PMD); + *dw = gbaudio_widgets[w->type]; break; case snd_soc_dapm_aif_in: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_AIF_IN_E(w->name, w->sname, 0, - SND_SOC_NOPM, - 0, 0, gbaudio_widget_event, - SND_SOC_DAPM_PRE_PMU | - SND_SOC_DAPM_POST_PMD); - break; case snd_soc_dapm_aif_out: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_AIF_OUT_E(w->name, w->sname, 0, - SND_SOC_NOPM, - 0, 0, gbaudio_widget_event, - SND_SOC_DAPM_PRE_PMU | - SND_SOC_DAPM_POST_PMD); + *dw = gbaudio_widgets[w->type]; + dw->sname = w->sname; break; default: ret = -EINVAL; goto error; } + dw->name = w->name; dev_dbg(module->dev, "%s: widget of type %d created\n", dw->name, dw->id); @@ -1192,6 +1201,10 @@ static int gbaudio_tplg_process_kcontrols(struct gbaudio_module_info *module, csize += le16_to_cpu(gbenum->names_length); control->texts = (const char * const *) gb_generate_enum_strings(module, gbenum); + if (!control->texts) { + ret = -ENOMEM; + goto error; + } control->items = le32_to_cpu(gbenum->items); } else { csize = sizeof(struct gb_audio_control); diff --git a/drivers/staging/greybus/uart.c b/drivers/staging/greybus/uart.c index fc1bd68889c91..0e6bebc20695d 100644 --- a/drivers/staging/greybus/uart.c +++ b/drivers/staging/greybus/uart.c @@ -789,6 +789,17 @@ static void gb_tty_port_shutdown(struct tty_port *port) gbphy_runtime_put_autosuspend(gb_tty->gbphy_dev); } +static void gb_tty_port_destruct(struct tty_port *port) +{ + struct gb_tty *gb_tty = container_of(port, struct gb_tty, port); + + if (gb_tty->minor != GB_NUM_MINORS) + release_minor(gb_tty); + kfifo_free(&gb_tty->write_fifo); + kfree(gb_tty->buffer); + kfree(gb_tty); +} + static const struct tty_operations gb_ops = { .install = gb_tty_install, .open = gb_tty_open, @@ -814,6 +825,7 @@ static const struct tty_port_operations gb_port_ops = { .dtr_rts = gb_tty_dtr_rts, .activate = gb_tty_port_activate, .shutdown = gb_tty_port_shutdown, + .destruct = gb_tty_port_destruct, }; static int gb_uart_probe(struct gbphy_device *gbphy_dev, @@ -826,17 +838,11 @@ static int gb_uart_probe(struct gbphy_device *gbphy_dev, int retval; int minor; - gb_tty = kzalloc(sizeof(*gb_tty), GFP_KERNEL); - if (!gb_tty) - return -ENOMEM; - connection = gb_connection_create(gbphy_dev->bundle, le16_to_cpu(gbphy_dev->cport_desc->id), gb_uart_request_handler); - if (IS_ERR(connection)) { - retval = PTR_ERR(connection); - goto exit_tty_free; - } + if (IS_ERR(connection)) + return PTR_ERR(connection); max_payload = gb_operation_get_payload_size_max(connection); if (max_payload < sizeof(struct gb_uart_send_data_request)) { @@ -844,13 +850,23 @@ static int gb_uart_probe(struct gbphy_device *gbphy_dev, goto exit_connection_destroy; } + gb_tty = kzalloc(sizeof(*gb_tty), GFP_KERNEL); + if (!gb_tty) { + retval = -ENOMEM; + goto exit_connection_destroy; + } + + tty_port_init(&gb_tty->port); + gb_tty->port.ops = &gb_port_ops; + gb_tty->minor = GB_NUM_MINORS; + gb_tty->buffer_payload_max = max_payload - sizeof(struct gb_uart_send_data_request); gb_tty->buffer = kzalloc(gb_tty->buffer_payload_max, GFP_KERNEL); if (!gb_tty->buffer) { retval = -ENOMEM; - goto exit_connection_destroy; + goto exit_put_port; } INIT_WORK(&gb_tty->tx_work, gb_uart_tx_write_work); @@ -858,7 +874,7 @@ static int gb_uart_probe(struct gbphy_device *gbphy_dev, retval = kfifo_alloc(&gb_tty->write_fifo, GB_UART_WRITE_FIFO_SIZE, GFP_KERNEL); if (retval) - goto exit_buf_free; + goto exit_put_port; gb_tty->credits = GB_UART_FIRMWARE_CREDITS; init_completion(&gb_tty->credits_complete); @@ -872,7 +888,7 @@ static int gb_uart_probe(struct gbphy_device *gbphy_dev, } else { retval = minor; } - goto exit_kfifo_free; + goto exit_put_port; } gb_tty->minor = minor; @@ -881,9 +897,6 @@ static int gb_uart_probe(struct gbphy_device *gbphy_dev, init_waitqueue_head(&gb_tty->wioctl); mutex_init(&gb_tty->mutex); - tty_port_init(&gb_tty->port); - gb_tty->port.ops = &gb_port_ops; - gb_tty->connection = connection; gb_tty->gbphy_dev = gbphy_dev; gb_connection_set_data(connection, gb_tty); @@ -891,7 +904,7 @@ static int gb_uart_probe(struct gbphy_device *gbphy_dev, retval = gb_connection_enable_tx(connection); if (retval) - goto exit_release_minor; + goto exit_put_port; send_control(gb_tty, gb_tty->ctrlout); @@ -918,16 +931,10 @@ static int gb_uart_probe(struct gbphy_device *gbphy_dev, exit_connection_disable: gb_connection_disable(connection); -exit_release_minor: - release_minor(gb_tty); -exit_kfifo_free: - kfifo_free(&gb_tty->write_fifo); -exit_buf_free: - kfree(gb_tty->buffer); +exit_put_port: + tty_port_put(&gb_tty->port); exit_connection_destroy: gb_connection_destroy(connection); -exit_tty_free: - kfree(gb_tty); return retval; } @@ -958,15 +965,10 @@ static void gb_uart_remove(struct gbphy_device *gbphy_dev) gb_connection_disable_rx(connection); tty_unregister_device(gb_tty_driver, gb_tty->minor); - /* FIXME - free transmit / receive buffers */ - gb_connection_disable(connection); - tty_port_destroy(&gb_tty->port); gb_connection_destroy(connection); - release_minor(gb_tty); - kfifo_free(&gb_tty->write_fifo); - kfree(gb_tty->buffer); - kfree(gb_tty); + + tty_port_put(&gb_tty->port); } static int gb_tty_init(void) diff --git a/drivers/staging/iio/adc/ad7280a.c b/drivers/staging/iio/adc/ad7280a.c index 19a5f244dcae2..d8886c5c0d1f2 100644 --- a/drivers/staging/iio/adc/ad7280a.c +++ b/drivers/staging/iio/adc/ad7280a.c @@ -107,9 +107,9 @@ static unsigned int ad7280a_devaddr(unsigned int addr) { return ((addr & 0x1) << 4) | - ((addr & 0x2) << 3) | + ((addr & 0x2) << 2) | (addr & 0x4) | - ((addr & 0x8) >> 3) | + ((addr & 0x8) >> 2) | ((addr & 0x10) >> 4); } diff --git a/drivers/staging/ks7010/Kconfig b/drivers/staging/ks7010/Kconfig index 0987fdc2f70db..8ea6c09286798 100644 --- a/drivers/staging/ks7010/Kconfig +++ b/drivers/staging/ks7010/Kconfig @@ -5,6 +5,9 @@ config KS7010 select WIRELESS_EXT select WEXT_PRIV select FW_LOADER + select CRYPTO + select CRYPTO_HASH + select CRYPTO_MICHAEL_MIC help This is a driver for KeyStream KS7010 based SDIO WIFI cards. It is found on at least later Spectec SDW-821 (FCC-ID "S2Y-WLAN-11G-K" only, diff --git a/drivers/staging/ks7010/ks7010_sdio.c b/drivers/staging/ks7010/ks7010_sdio.c index 4b379542ecd50..3fbe223d59b8e 100644 --- a/drivers/staging/ks7010/ks7010_sdio.c +++ b/drivers/staging/ks7010/ks7010_sdio.c @@ -938,9 +938,9 @@ static void ks7010_private_init(struct ks_wlan_private *priv, memset(&priv->wstats, 0, sizeof(priv->wstats)); /* sleep mode */ + atomic_set(&priv->sleepstatus.status, 0); atomic_set(&priv->sleepstatus.doze_request, 0); atomic_set(&priv->sleepstatus.wakeup_request, 0); - atomic_set(&priv->sleepstatus.wakeup_request, 0); trx_device_init(priv); hostif_init(priv); diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c index 32e5966ba5c5f..58cf44045b396 100644 --- a/drivers/staging/media/hantro/hantro_drv.c +++ b/drivers/staging/media/hantro/hantro_drv.c @@ -823,7 +823,7 @@ static int hantro_probe(struct platform_device *pdev) ret = clk_bulk_prepare(vpu->variant->num_clocks, vpu->clocks); if (ret) { dev_err(&pdev->dev, "Failed to prepare clocks\n"); - return ret; + goto err_pm_disable; } ret = v4l2_device_register(&pdev->dev, &vpu->v4l2_dev); @@ -879,6 +879,7 @@ static int hantro_probe(struct platform_device *pdev) v4l2_device_unregister(&vpu->v4l2_dev); err_clk_unprepare: clk_bulk_unprepare(vpu->variant->num_clocks, vpu->clocks); +err_pm_disable: pm_runtime_dont_use_autosuspend(vpu->dev); pm_runtime_disable(vpu->dev); return ret; diff --git a/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c b/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c index 8b76f1f13b062..e81a354b88720 100644 --- a/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c +++ b/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c @@ -23,7 +23,7 @@ static void hantro_h1_set_src_img_ctrl(struct hantro_dev *vpu, reg = H1_REG_IN_IMG_CTRL_ROW_LEN(pix_fmt->width) | H1_REG_IN_IMG_CTRL_OVRFLR_D4(0) - | H1_REG_IN_IMG_CTRL_OVRFLB_D4(0) + | H1_REG_IN_IMG_CTRL_OVRFLB(0) | H1_REG_IN_IMG_CTRL_FMT(ctx->vpu_src_fmt->enc_fmt); vepu_write_relaxed(vpu, reg, H1_REG_IN_IMG_CTRL); } diff --git a/drivers/staging/media/hantro/hantro_h1_regs.h b/drivers/staging/media/hantro/hantro_h1_regs.h index d6e9825bb5c7b..30e7e7b920b55 100644 --- a/drivers/staging/media/hantro/hantro_h1_regs.h +++ b/drivers/staging/media/hantro/hantro_h1_regs.h @@ -47,7 +47,7 @@ #define H1_REG_IN_IMG_CTRL 0x03c #define H1_REG_IN_IMG_CTRL_ROW_LEN(x) ((x) << 12) #define H1_REG_IN_IMG_CTRL_OVRFLR_D4(x) ((x) << 10) -#define H1_REG_IN_IMG_CTRL_OVRFLB_D4(x) ((x) << 6) +#define H1_REG_IN_IMG_CTRL_OVRFLB(x) ((x) << 6) #define H1_REG_IN_IMG_CTRL_FMT(x) ((x) << 2) #define H1_REG_ENC_CTRL0 0x040 #define H1_REG_ENC_CTRL0_INIT_QP(x) ((x) << 26) diff --git a/drivers/staging/media/imx/imx-media-dev-common.c b/drivers/staging/media/imx/imx-media-dev-common.c index 66b505f7e8dff..137e414cda186 100644 --- a/drivers/staging/media/imx/imx-media-dev-common.c +++ b/drivers/staging/media/imx/imx-media-dev-common.c @@ -373,6 +373,8 @@ struct imx_media_dev *imx_media_dev_init(struct device *dev, imxmd->v4l2_dev.notify = imx_media_notify; strscpy(imxmd->v4l2_dev.name, "imx-media", sizeof(imxmd->v4l2_dev.name)); + snprintf(imxmd->md.bus_info, sizeof(imxmd->md.bus_info), + "platform:%s", dev_name(imxmd->md.dev)); media_device_init(&imxmd->md); diff --git a/drivers/staging/media/ipu3/ipu3-css-fw.c b/drivers/staging/media/ipu3/ipu3-css-fw.c index 45aff76198e2c..981693eed8155 100644 --- a/drivers/staging/media/ipu3/ipu3-css-fw.c +++ b/drivers/staging/media/ipu3/ipu3-css-fw.c @@ -124,12 +124,11 @@ int imgu_css_fw_init(struct imgu_css *css) /* Check and display fw header info */ css->fwp = (struct imgu_fw_header *)css->fw->data; - if (css->fw->size < sizeof(struct imgu_fw_header *) || + if (css->fw->size < struct_size(css->fwp, binary_header, 1) || css->fwp->file_header.h_size != sizeof(struct imgu_fw_bi_file_h)) goto bad_fw; - if (sizeof(struct imgu_fw_bi_file_h) + - css->fwp->file_header.binary_nr * sizeof(struct imgu_fw_info) > - css->fw->size) + if (struct_size(css->fwp, binary_header, + css->fwp->file_header.binary_nr) > css->fw->size) goto bad_fw; dev_info(dev, "loaded firmware version %.64s, %u binaries, %zu bytes\n", diff --git a/drivers/staging/media/ipu3/ipu3-css-fw.h b/drivers/staging/media/ipu3/ipu3-css-fw.h index 79ffa70451390..650fd25fc79ee 100644 --- a/drivers/staging/media/ipu3/ipu3-css-fw.h +++ b/drivers/staging/media/ipu3/ipu3-css-fw.h @@ -170,7 +170,7 @@ struct imgu_fw_bi_file_h { struct imgu_fw_header { struct imgu_fw_bi_file_h file_header; - struct imgu_fw_info binary_header[1]; /* binary_nr items */ + struct imgu_fw_info binary_header[]; /* binary_nr items */ }; /******************* Firmware functions *******************/ diff --git a/drivers/staging/media/ipu3/ipu3-v4l2.c b/drivers/staging/media/ipu3/ipu3-v4l2.c index 908ae74aa970d..53239ea67fe48 100644 --- a/drivers/staging/media/ipu3/ipu3-v4l2.c +++ b/drivers/staging/media/ipu3/ipu3-v4l2.c @@ -594,11 +594,12 @@ static const struct imgu_fmt *find_format(struct v4l2_format *f, u32 type) static int imgu_vidioc_querycap(struct file *file, void *fh, struct v4l2_capability *cap) { - struct imgu_video_device *node = file_to_intel_imgu_node(file); + struct imgu_device *imgu = video_drvdata(file); strscpy(cap->driver, IMGU_NAME, sizeof(cap->driver)); strscpy(cap->card, IMGU_NAME, sizeof(cap->card)); - snprintf(cap->bus_info, sizeof(cap->bus_info), "PCI:%s", node->name); + snprintf(cap->bus_info, sizeof(cap->bus_info), "PCI:%s", + pci_name(imgu->pci_dev)); return 0; } @@ -695,7 +696,7 @@ static int imgu_fmt(struct imgu_device *imgu, unsigned int pipe, int node, /* CSS expects some format on OUT queue */ if (i != IPU3_CSS_QUEUE_OUT && - !imgu_pipe->nodes[inode].enabled) { + !imgu_pipe->nodes[inode].enabled && !try) { fmts[i] = NULL; continue; } diff --git a/drivers/staging/mt7621-dts/gbpc1.dts b/drivers/staging/mt7621-dts/gbpc1.dts index 1fb560ff059c8..1713283e60d4e 100644 --- a/drivers/staging/mt7621-dts/gbpc1.dts +++ b/drivers/staging/mt7621-dts/gbpc1.dts @@ -11,7 +11,8 @@ memory@0 { device_type = "memory"; - reg = <0x0 0x1c000000>, <0x20000000 0x4000000>; + reg = <0x00000000 0x1c000000>, + <0x20000000 0x04000000>; }; chosen { @@ -37,24 +38,16 @@ gpio-leds { compatible = "gpio-leds"; - system { - label = "gb-pc1:green:system"; + power { + label = "green:power"; gpios = <&gpio 6 GPIO_ACTIVE_LOW>; + linux,default-trigger = "default-on"; }; - status { - label = "gb-pc1:green:status"; + system { + label = "green:system"; gpios = <&gpio 8 GPIO_ACTIVE_LOW>; - }; - - lan1 { - label = "gb-pc1:green:lan1"; - gpios = <&gpio 24 GPIO_ACTIVE_LOW>; - }; - - lan2 { - label = "gb-pc1:green:lan2"; - gpios = <&gpio 25 GPIO_ACTIVE_LOW>; + linux,default-trigger = "disk-activity"; }; }; }; @@ -94,9 +87,8 @@ partition@50000 { label = "firmware"; - reg = <0x50000 0x1FB0000>; + reg = <0x50000 0x1fb0000>; }; - }; }; @@ -118,9 +110,12 @@ }; &pinctrl { - state_default: pinctrl0 { - default_gpio: gpio { - groups = "wdt", "rgmii2", "uart3"; + pinctrl-names = "default"; + pinctrl-0 = <&state_default>; + + state_default: state-default { + gpio-pinmux { + groups = "rgmii2", "uart3", "wdt"; function = "gpio"; }; }; @@ -129,12 +124,13 @@ &switch0 { ports { port@0 { + status = "okay"; label = "ethblack"; - status = "ok"; }; + port@4 { + status = "okay"; label = "ethblue"; - status = "ok"; }; }; }; diff --git a/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c b/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c index 0ba4e4e070a9f..7cfbdfb10e23e 100644 --- a/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c +++ b/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c @@ -267,6 +267,8 @@ static int rt2880_pinmux_pins(struct rt2880_priv *p) p->func[i]->pin_count, sizeof(int), GFP_KERNEL); + if (!p->func[i]->pins) + return -ENOMEM; for (j = 0; j < p->func[i]->pin_count; j++) p->func[i]->pins[j] = p->func[i]->pin_first + j; diff --git a/drivers/staging/qlge/qlge_main.c b/drivers/staging/qlge/qlge_main.c index 6cae33072496c..1205a5478f575 100644 --- a/drivers/staging/qlge/qlge_main.c +++ b/drivers/staging/qlge/qlge_main.c @@ -4862,7 +4862,7 @@ static void ql_eeh_close(struct net_device *ndev) * a PCI bus error is detected. */ static pci_ers_result_t qlge_io_error_detected(struct pci_dev *pdev, - enum pci_channel_state state) + pci_channel_state_t state) { struct net_device *ndev = pci_get_drvdata(pdev); struct ql_adapter *qdev = netdev_priv(ndev); diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index c702ee9691b1d..bcbf0c8cd4209 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -2559,13 +2559,14 @@ static void _rtl92e_pci_disconnect(struct pci_dev *pdev) free_irq(dev->irq, dev); priv->irq = 0; } - free_rtllib(dev); if (dev->mem_start != 0) { iounmap((void __iomem *)dev->mem_start); release_mem_region(pci_resource_start(pdev, 1), pci_resource_len(pdev, 1)); } + + free_rtllib(dev); } else { priv = rtllib_priv(dev); } diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h index 2eeb9a43734e3..49bf3ad31f912 100644 --- a/drivers/staging/rtl8192e/rtllib.h +++ b/drivers/staging/rtl8192e/rtllib.h @@ -1982,7 +1982,7 @@ void rtllib_softmac_xmit(struct rtllib_txb *txb, struct rtllib_device *ieee); void rtllib_stop_send_beacons(struct rtllib_device *ieee); void notify_wx_assoc_event(struct rtllib_device *ieee); void rtllib_start_ibss(struct rtllib_device *ieee); -void rtllib_softmac_init(struct rtllib_device *ieee); +int rtllib_softmac_init(struct rtllib_device *ieee); void rtllib_softmac_free(struct rtllib_device *ieee); void rtllib_disassociate(struct rtllib_device *ieee); void rtllib_stop_scan(struct rtllib_device *ieee); diff --git a/drivers/staging/rtl8192e/rtllib_module.c b/drivers/staging/rtl8192e/rtllib_module.c index 64d9feee1f392..f00ac94b2639b 100644 --- a/drivers/staging/rtl8192e/rtllib_module.c +++ b/drivers/staging/rtl8192e/rtllib_module.c @@ -88,7 +88,7 @@ struct net_device *alloc_rtllib(int sizeof_priv) err = rtllib_networks_allocate(ieee); if (err) { pr_err("Unable to allocate beacon storage: %d\n", err); - goto failed; + goto free_netdev; } rtllib_networks_initialize(ieee); @@ -121,11 +121,13 @@ struct net_device *alloc_rtllib(int sizeof_priv) ieee->hwsec_active = 0; memset(ieee->swcamtable, 0, sizeof(struct sw_cam_table) * 32); - rtllib_softmac_init(ieee); + err = rtllib_softmac_init(ieee); + if (err) + goto free_crypt_info; ieee->pHTInfo = kzalloc(sizeof(struct rt_hi_throughput), GFP_KERNEL); if (!ieee->pHTInfo) - return NULL; + goto free_softmac; HTUpdateDefaultSetting(ieee); HTInitializeHTInfo(ieee); @@ -141,8 +143,14 @@ struct net_device *alloc_rtllib(int sizeof_priv) return dev; - failed: +free_softmac: + rtllib_softmac_free(ieee); +free_crypt_info: + lib80211_crypt_info_free(&ieee->crypt_info); + rtllib_networks_free(ieee); +free_netdev: free_netdev(dev); + return NULL; } EXPORT_SYMBOL(alloc_rtllib); diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c index f2f7529e7c80e..0154f5791b121 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac.c +++ b/drivers/staging/rtl8192e/rtllib_softmac.c @@ -651,9 +651,9 @@ static void rtllib_beacons_stop(struct rtllib_device *ieee) spin_lock_irqsave(&ieee->beacon_lock, flags); ieee->beacon_txing = 0; - del_timer_sync(&ieee->beacon_timer); spin_unlock_irqrestore(&ieee->beacon_lock, flags); + del_timer_sync(&ieee->beacon_timer); } @@ -2952,7 +2952,7 @@ void rtllib_start_protocol(struct rtllib_device *ieee) } } -void rtllib_softmac_init(struct rtllib_device *ieee) +int rtllib_softmac_init(struct rtllib_device *ieee) { int i; @@ -2963,7 +2963,8 @@ void rtllib_softmac_init(struct rtllib_device *ieee) ieee->seq_ctrl[i] = 0; ieee->dot11d_info = kzalloc(sizeof(struct rt_dot11d_info), GFP_ATOMIC); if (!ieee->dot11d_info) - netdev_err(ieee->dev, "Can't alloc memory for DOT11D\n"); + return -ENOMEM; + ieee->LinkDetectInfo.SlotIndex = 0; ieee->LinkDetectInfo.SlotNum = 2; ieee->LinkDetectInfo.NumRecvBcnInPeriod = 0; @@ -3031,6 +3032,7 @@ void rtllib_softmac_init(struct rtllib_device *ieee) (void(*)(unsigned long)) rtllib_sta_ps, (unsigned long)ieee); + return 0; } void rtllib_softmac_free(struct rtllib_device *ieee) diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c index 33a6af7aad225..a869694337f72 100644 --- a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c +++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c @@ -528,9 +528,9 @@ static void ieee80211_beacons_stop(struct ieee80211_device *ieee) spin_lock_irqsave(&ieee->beacon_lock, flags); ieee->beacon_txing = 0; - del_timer_sync(&ieee->beacon_timer); spin_unlock_irqrestore(&ieee->beacon_lock, flags); + del_timer_sync(&ieee->beacon_timer); } void ieee80211_stop_send_beacons(struct ieee80211_device *ieee) diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c index 66cd43f963c9a..6f65a9c9d6cf3 100644 --- a/drivers/staging/rtl8192u/r8192U_core.c +++ b/drivers/staging/rtl8192u/r8192U_core.c @@ -236,7 +236,7 @@ int write_nic_byte_E(struct net_device *dev, int indx, u8 data) status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE, - indx | 0xfe00, 0, usbdata, 1, HZ / 2); + indx | 0xfe00, 0, usbdata, 1, 500); kfree(usbdata); if (status < 0) { @@ -258,7 +258,7 @@ int read_nic_byte_E(struct net_device *dev, int indx, u8 *data) status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), RTL8187_REQ_GET_REGS, RTL8187_REQT_READ, - indx | 0xfe00, 0, usbdata, 1, HZ / 2); + indx | 0xfe00, 0, usbdata, 1, 500); *data = *usbdata; kfree(usbdata); @@ -286,7 +286,7 @@ int write_nic_byte(struct net_device *dev, int indx, u8 data) status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE, (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f, - usbdata, 1, HZ / 2); + usbdata, 1, 500); kfree(usbdata); if (status < 0) { @@ -313,7 +313,7 @@ int write_nic_word(struct net_device *dev, int indx, u16 data) status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE, (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f, - usbdata, 2, HZ / 2); + usbdata, 2, 500); kfree(usbdata); if (status < 0) { @@ -340,7 +340,7 @@ int write_nic_dword(struct net_device *dev, int indx, u32 data) status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE, (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f, - usbdata, 4, HZ / 2); + usbdata, 4, 500); kfree(usbdata); @@ -367,7 +367,7 @@ int read_nic_byte(struct net_device *dev, int indx, u8 *data) status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), RTL8187_REQ_GET_REGS, RTL8187_REQT_READ, (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f, - usbdata, 1, HZ / 2); + usbdata, 1, 500); *data = *usbdata; kfree(usbdata); @@ -394,7 +394,7 @@ int read_nic_word(struct net_device *dev, int indx, u16 *data) status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), RTL8187_REQ_GET_REGS, RTL8187_REQT_READ, (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f, - usbdata, 2, HZ / 2); + usbdata, 2, 500); *data = *usbdata; kfree(usbdata); @@ -418,7 +418,7 @@ static int read_nic_word_E(struct net_device *dev, int indx, u16 *data) status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), RTL8187_REQ_GET_REGS, RTL8187_REQT_READ, - indx | 0xfe00, 0, usbdata, 2, HZ / 2); + indx | 0xfe00, 0, usbdata, 2, 500); *data = *usbdata; kfree(usbdata); @@ -444,7 +444,7 @@ int read_nic_dword(struct net_device *dev, int indx, u32 *data) status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), RTL8187_REQ_GET_REGS, RTL8187_REQT_READ, (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f, - usbdata, 4, HZ / 2); + usbdata, 4, 500); *data = *usbdata; kfree(usbdata); @@ -4338,7 +4338,7 @@ static void TranslateRxSignalStuff819xUsb(struct sk_buff *skb, bpacket_match_bssid = (type != IEEE80211_FTYPE_CTL) && (ether_addr_equal(priv->ieee80211->current_network.bssid, (fc & IEEE80211_FCTL_TODS) ? hdr->addr1 : (fc & IEEE80211_FCTL_FROMDS) ? hdr->addr2 : hdr->addr3)) && (!pstats->bHwError) && (!pstats->bCRC) && (!pstats->bICV); - bpacket_toself = bpacket_match_bssid & + bpacket_toself = bpacket_match_bssid && (ether_addr_equal(praddr, priv->ieee80211->dev->dev_addr)); if (WLAN_FC_GET_FRAMETYPE(fc) == IEEE80211_STYPE_BEACON) diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c index 49188ab046123..f7c1258eaa394 100644 --- a/drivers/staging/rtl8712/usb_intf.c +++ b/drivers/staging/rtl8712/usb_intf.c @@ -539,13 +539,13 @@ static int r871xu_drv_init(struct usb_interface *pusb_intf, } else { AutoloadFail = false; } - if (((mac[0] == 0xff) && (mac[1] == 0xff) && + if ((!AutoloadFail) || + ((mac[0] == 0xff) && (mac[1] == 0xff) && (mac[2] == 0xff) && (mac[3] == 0xff) && (mac[4] == 0xff) && (mac[5] == 0xff)) || ((mac[0] == 0x00) && (mac[1] == 0x00) && (mac[2] == 0x00) && (mac[3] == 0x00) && - (mac[4] == 0x00) && (mac[5] == 0x00)) || - (!AutoloadFail)) { + (mac[4] == 0x00) && (mac[5] == 0x00))) { mac[0] = 0x00; mac[1] = 0xe0; mac[2] = 0x4c; diff --git a/drivers/staging/rtl8712/usb_ops.c b/drivers/staging/rtl8712/usb_ops.c index e64845e6adf3d..af9966d03979c 100644 --- a/drivers/staging/rtl8712/usb_ops.c +++ b/drivers/staging/rtl8712/usb_ops.c @@ -29,7 +29,8 @@ static u8 usb_read8(struct intf_hdl *intfhdl, u32 addr) u16 wvalue; u16 index; u16 len; - __le32 data; + int status; + __le32 data = 0; struct intf_priv *intfpriv = intfhdl->pintfpriv; request = 0x05; @@ -37,8 +38,10 @@ static u8 usb_read8(struct intf_hdl *intfhdl, u32 addr) index = 0; wvalue = (u16)(addr & 0x0000ffff); len = 1; - r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index, &data, len, - requesttype); + status = r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index, + &data, len, requesttype); + if (status < 0) + return 0; return (u8)(le32_to_cpu(data) & 0x0ff); } @@ -49,7 +52,8 @@ static u16 usb_read16(struct intf_hdl *intfhdl, u32 addr) u16 wvalue; u16 index; u16 len; - __le32 data; + int status; + __le32 data = 0; struct intf_priv *intfpriv = intfhdl->pintfpriv; request = 0x05; @@ -57,8 +61,10 @@ static u16 usb_read16(struct intf_hdl *intfhdl, u32 addr) index = 0; wvalue = (u16)(addr & 0x0000ffff); len = 2; - r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index, &data, len, - requesttype); + status = r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index, + &data, len, requesttype); + if (status < 0) + return 0; return (u16)(le32_to_cpu(data) & 0xffff); } @@ -69,7 +75,8 @@ static u32 usb_read32(struct intf_hdl *intfhdl, u32 addr) u16 wvalue; u16 index; u16 len; - __le32 data; + int status; + __le32 data = 0; struct intf_priv *intfpriv = intfhdl->pintfpriv; request = 0x05; @@ -77,8 +84,10 @@ static u32 usb_read32(struct intf_hdl *intfhdl, u32 addr) index = 0; wvalue = (u16)(addr & 0x0000ffff); len = 4; - r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index, &data, len, - requesttype); + status = r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index, + &data, len, requesttype); + if (status < 0) + return 0; return le32_to_cpu(data); } diff --git a/drivers/staging/rtl8712/usb_ops_linux.c b/drivers/staging/rtl8712/usb_ops_linux.c index 9d290bc2fdb7f..2202a0caa252e 100644 --- a/drivers/staging/rtl8712/usb_ops_linux.c +++ b/drivers/staging/rtl8712/usb_ops_linux.c @@ -493,7 +493,7 @@ int r8712_usbctrl_vendorreq(struct intf_priv *pintfpriv, u8 request, u16 value, memcpy(pIo_buf, pdata, len); } status = usb_control_msg(udev, pipe, request, reqtype, value, index, - pIo_buf, len, HZ / 2); + pIo_buf, len, 500); if (status > 0) { /* Success this control transfer. */ if (requesttype == 0x01) { /* For Control read transfer, we have to copy the read diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index d8d44fd9a92f4..ea2fd3a73c3a8 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -1351,9 +1351,11 @@ static int rtw_wx_set_scan(struct net_device *dev, struct iw_request_info *a, sec_len = *(pos++); len-= 1; - if (sec_len>0 && sec_len<=len) { + if (sec_len > 0 && + sec_len <= len && + sec_len <= 32) { ssid[ssid_index].SsidLength = sec_len; - memcpy(ssid[ssid_index].Ssid, pos, ssid[ssid_index].SsidLength); + memcpy(ssid[ssid_index].Ssid, pos, sec_len); /* DBG_871X("%s COMBO_SCAN with specific ssid:%s, %d\n", __func__ */ /* , ssid[ssid_index].Ssid, ssid[ssid_index].SsidLength); */ ssid_index++; diff --git a/drivers/staging/rts5208/rtsx_scsi.c b/drivers/staging/rts5208/rtsx_scsi.c index 1deb74112ad43..11d9d9155eef2 100644 --- a/drivers/staging/rts5208/rtsx_scsi.c +++ b/drivers/staging/rts5208/rtsx_scsi.c @@ -2802,10 +2802,10 @@ static int get_ms_information(struct scsi_cmnd *srb, struct rtsx_chip *chip) } if (dev_info_id == 0x15) { - buf_len = 0x3A; + buf_len = 0x3C; data_len = 0x3A; } else { - buf_len = 0x6A; + buf_len = 0x6C; data_len = 0x6A; } @@ -2855,11 +2855,7 @@ static int get_ms_information(struct scsi_cmnd *srb, struct rtsx_chip *chip) } rtsx_stor_set_xfer_buf(buf, buf_len, srb); - - if (dev_info_id == 0x15) - scsi_set_resid(srb, scsi_bufflen(srb) - 0x3C); - else - scsi_set_resid(srb, scsi_bufflen(srb) - 0x6C); + scsi_set_resid(srb, scsi_bufflen(srb) - buf_len); kfree(buf); return STATUS_SUCCESS; diff --git a/drivers/staging/speakup/spk_ttyio.c b/drivers/staging/speakup/spk_ttyio.c index 472804c3f44dc..aec8222361743 100644 --- a/drivers/staging/speakup/spk_ttyio.c +++ b/drivers/staging/speakup/spk_ttyio.c @@ -88,7 +88,7 @@ static int spk_ttyio_receive_buf2(struct tty_struct *tty, } if (!ldisc_data->buf_free) - /* ttyio_in will tty_schedule_flip */ + /* ttyio_in will tty_flip_buffer_push */ return 0; /* Make sure the consumer has read buf before we have seen @@ -325,7 +325,7 @@ static unsigned char ttyio_in(int timeout) mb(); ldisc_data->buf_free = true; /* Let TTY push more characters */ - tty_schedule_flip(speakup_tty->port); + tty_flip_buffer_push(speakup_tty->port); return rv; } diff --git a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c index c3cd6f389a989..2a369fdaf0cbb 100644 --- a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c +++ b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c @@ -97,8 +97,7 @@ static void cfg_scan_result(enum scan_event scan_event, info->frame_len, (s32)info->rssi * 100, GFP_KERNEL); - if (!bss) - cfg80211_put_bss(wiphy, bss); + cfg80211_put_bss(wiphy, bss); } else if (scan_event == SCAN_EVENT_DONE) { mutex_lock(&priv->scan_req_lock); diff --git a/drivers/staging/wlan-ng/hfa384x_usb.c b/drivers/staging/wlan-ng/hfa384x_usb.c index ed4ff78dd02aa..71cca6e1dea46 100644 --- a/drivers/staging/wlan-ng/hfa384x_usb.c +++ b/drivers/staging/wlan-ng/hfa384x_usb.c @@ -3779,18 +3779,18 @@ static void hfa384x_usb_throttlefn(struct timer_list *t) spin_lock_irqsave(&hw->ctlxq.lock, flags); - /* - * We need to check BOTH the RX and the TX throttle controls, - * so we use the bitwise OR instead of the logical OR. - */ pr_debug("flags=0x%lx\n", hw->usb_flags); - if (!hw->wlandev->hwremoved && - ((test_and_clear_bit(THROTTLE_RX, &hw->usb_flags) && - !test_and_set_bit(WORK_RX_RESUME, &hw->usb_flags)) | - (test_and_clear_bit(THROTTLE_TX, &hw->usb_flags) && - !test_and_set_bit(WORK_TX_RESUME, &hw->usb_flags)) - )) { - schedule_work(&hw->usb_work); + if (!hw->wlandev->hwremoved) { + bool rx_throttle = test_and_clear_bit(THROTTLE_RX, &hw->usb_flags) && + !test_and_set_bit(WORK_RX_RESUME, &hw->usb_flags); + bool tx_throttle = test_and_clear_bit(THROTTLE_TX, &hw->usb_flags) && + !test_and_set_bit(WORK_TX_RESUME, &hw->usb_flags); + /* + * We need to check BOTH the RX and the TX throttle controls, + * so we use the bitwise OR instead of the logical OR. + */ + if (rx_throttle | tx_throttle) + schedule_work(&hw->usb_work); } spin_unlock_irqrestore(&hw->ctlxq.lock, flags); diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 8075f60fd02c3..2d5cf1714ae05 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -443,6 +443,9 @@ static bool iscsit_tpg_check_network_portal( break; } spin_unlock(&tpg->tpg_np_lock); + + if (match) + break; } spin_unlock(&tiqn->tiqn_tpg_lock); diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 385e4cf9cfa63..0fc3135d3e4f6 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1702,7 +1702,6 @@ int core_alua_set_tg_pt_gp_id( pr_err("Maximum ALUA alua_tg_pt_gps_count:" " 0x0000ffff reached\n"); spin_unlock(&dev->t10_alua.tg_pt_gps_lock); - kmem_cache_free(t10_alua_tg_pt_gp_cache, tg_pt_gp); return -ENOSPC; } again: diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 2d19f0e332b01..20fe287039857 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -758,6 +758,8 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) INIT_LIST_HEAD(&dev->t10_alua.lba_map_list); spin_lock_init(&dev->t10_alua.lba_map_lock); + INIT_WORK(&dev->delayed_cmd_work, target_do_delayed_work); + dev->t10_wwn.t10_dev = dev; dev->t10_alua.t10_dev = dev; diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index e7b3c6e5d5744..e4f072a680d41 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -150,6 +150,7 @@ int transport_dump_vpd_ident(struct t10_vpd *, unsigned char *, int); void transport_clear_lun_ref(struct se_lun *); sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size); void target_qf_do_work(struct work_struct *work); +void target_do_delayed_work(struct work_struct *work); bool target_check_wce(struct se_device *dev); bool target_check_fua(struct se_device *dev); void __target_execute_cmd(struct se_cmd *, bool); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 5cf9e7677926f..f52fe40002259 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2021,32 +2021,35 @@ static bool target_handle_task_attr(struct se_cmd *cmd) */ switch (cmd->sam_task_attr) { case TCM_HEAD_TAG: + atomic_inc_mb(&dev->non_ordered); pr_debug("Added HEAD_OF_QUEUE for CDB: 0x%02x\n", cmd->t_task_cdb[0]); return false; case TCM_ORDERED_TAG: - atomic_inc_mb(&dev->dev_ordered_sync); + atomic_inc_mb(&dev->delayed_cmd_count); pr_debug("Added ORDERED for CDB: 0x%02x to ordered list\n", cmd->t_task_cdb[0]); - - /* - * Execute an ORDERED command if no other older commands - * exist that need to be completed first. - */ - if (!atomic_read(&dev->simple_cmds)) - return false; break; default: /* * For SIMPLE and UNTAGGED Task Attribute commands */ - atomic_inc_mb(&dev->simple_cmds); + atomic_inc_mb(&dev->non_ordered); + + if (atomic_read(&dev->delayed_cmd_count) == 0) + return false; break; } - if (atomic_read(&dev->dev_ordered_sync) == 0) - return false; + if (cmd->sam_task_attr != TCM_ORDERED_TAG) { + atomic_inc_mb(&dev->delayed_cmd_count); + /* + * We will account for this when we dequeue from the delayed + * list. + */ + atomic_dec_mb(&dev->non_ordered); + } spin_lock(&dev->delayed_cmd_lock); list_add_tail(&cmd->se_delayed_node, &dev->delayed_cmd_list); @@ -2054,6 +2057,12 @@ static bool target_handle_task_attr(struct se_cmd *cmd) pr_debug("Added CDB: 0x%02x Task Attr: 0x%02x to delayed CMD listn", cmd->t_task_cdb[0], cmd->sam_task_attr); + /* + * We may have no non ordered cmds when this function started or we + * could have raced with the last simple/head cmd completing, so kick + * the delayed handler here. + */ + schedule_work(&dev->delayed_cmd_work); return true; } @@ -2091,29 +2100,48 @@ EXPORT_SYMBOL(target_execute_cmd); * Process all commands up to the last received ORDERED task attribute which * requires another blocking boundary */ -static void target_restart_delayed_cmds(struct se_device *dev) +void target_do_delayed_work(struct work_struct *work) { - for (;;) { + struct se_device *dev = container_of(work, struct se_device, + delayed_cmd_work); + + spin_lock(&dev->delayed_cmd_lock); + while (!dev->ordered_sync_in_progress) { struct se_cmd *cmd; - spin_lock(&dev->delayed_cmd_lock); - if (list_empty(&dev->delayed_cmd_list)) { - spin_unlock(&dev->delayed_cmd_lock); + if (list_empty(&dev->delayed_cmd_list)) break; - } cmd = list_entry(dev->delayed_cmd_list.next, struct se_cmd, se_delayed_node); + + if (cmd->sam_task_attr == TCM_ORDERED_TAG) { + /* + * Check if we started with: + * [ordered] [simple] [ordered] + * and we are now at the last ordered so we have to wait + * for the simple cmd. + */ + if (atomic_read(&dev->non_ordered) > 0) + break; + + dev->ordered_sync_in_progress = true; + } + list_del(&cmd->se_delayed_node); + atomic_dec_mb(&dev->delayed_cmd_count); spin_unlock(&dev->delayed_cmd_lock); + if (cmd->sam_task_attr != TCM_ORDERED_TAG) + atomic_inc_mb(&dev->non_ordered); + cmd->transport_state |= CMD_T_SENT; __target_execute_cmd(cmd, true); - if (cmd->sam_task_attr == TCM_ORDERED_TAG) - break; + spin_lock(&dev->delayed_cmd_lock); } + spin_unlock(&dev->delayed_cmd_lock); } /* @@ -2131,14 +2159,17 @@ static void transport_complete_task_attr(struct se_cmd *cmd) goto restart; if (cmd->sam_task_attr == TCM_SIMPLE_TAG) { - atomic_dec_mb(&dev->simple_cmds); + atomic_dec_mb(&dev->non_ordered); dev->dev_cur_ordered_id++; } else if (cmd->sam_task_attr == TCM_HEAD_TAG) { + atomic_dec_mb(&dev->non_ordered); dev->dev_cur_ordered_id++; pr_debug("Incremented dev_cur_ordered_id: %u for HEAD_OF_QUEUE\n", dev->dev_cur_ordered_id); } else if (cmd->sam_task_attr == TCM_ORDERED_TAG) { - atomic_dec_mb(&dev->dev_ordered_sync); + spin_lock(&dev->delayed_cmd_lock); + dev->ordered_sync_in_progress = false; + spin_unlock(&dev->delayed_cmd_lock); dev->dev_cur_ordered_id++; pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED\n", @@ -2147,7 +2178,8 @@ static void transport_complete_task_attr(struct se_cmd *cmd) cmd->se_cmd_flags &= ~SCF_TASK_ATTR_SET; restart: - target_restart_delayed_cmds(dev); + if (atomic_read(&dev->delayed_cmd_count) > 0) + schedule_work(&dev->delayed_cmd_work); } static void transport_complete_qf(struct se_cmd *cmd) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 71144e33272a3..5e4a52fee37a3 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1213,7 +1213,7 @@ static bool tcmu_handle_completions(struct tcmu_dev *udev) if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) { pr_err("ring broken, not handling completions\n"); - return 0; + return false; } mb = udev->mb_addr; @@ -1488,6 +1488,7 @@ static struct page *tcmu_try_get_block_page(struct tcmu_dev *udev, uint32_t dbi) mutex_lock(&udev->cmdr_lock); page = tcmu_get_block_page(udev, dbi); if (likely(page)) { + get_page(page); mutex_unlock(&udev->cmdr_lock); return page; } @@ -1526,6 +1527,7 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf) /* For the vmalloc()ed cmd area pages */ addr = (void *)(unsigned long)info->mem[mi].addr + offset; page = vmalloc_to_page(addr); + get_page(page); } else { uint32_t dbi; @@ -1536,7 +1538,6 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; } - get_page(page); vmf->page = page; return 0; } diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 596ad3edec9c0..48fabece76443 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -533,7 +533,6 @@ void target_xcopy_release_pt(void) * @cdb: SCSI CDB to be copied into @xpt_cmd. * @remote_port: If false, use the LUN through which the XCOPY command has * been received. If true, use @se_dev->xcopy_lun. - * @alloc_mem: Whether or not to allocate an SGL list. * * Set up a SCSI command (READ or WRITE) that will be used to execute an * XCOPY command. @@ -543,12 +542,9 @@ static int target_xcopy_setup_pt_cmd( struct xcopy_op *xop, struct se_device *se_dev, unsigned char *cdb, - bool remote_port, - bool alloc_mem) + bool remote_port) { struct se_cmd *cmd = &xpt_cmd->se_cmd; - sense_reason_t sense_rc; - int ret = 0, rc; /* * Setup LUN+port to honor reservations based upon xop->op_origin for @@ -564,46 +560,17 @@ static int target_xcopy_setup_pt_cmd( cmd->se_cmd_flags |= SCF_SE_LUN_CMD; cmd->tag = 0; - sense_rc = target_setup_cmd_from_cdb(cmd, cdb); - if (sense_rc) { - ret = -EINVAL; - goto out; - } + if (target_setup_cmd_from_cdb(cmd, cdb)) + return -EINVAL; - if (alloc_mem) { - rc = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents, - cmd->data_length, false, false); - if (rc < 0) { - ret = rc; - goto out; - } - /* - * Set this bit so that transport_free_pages() allows the - * caller to release SGLs + physical memory allocated by - * transport_generic_get_mem().. - */ - cmd->se_cmd_flags |= SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; - } else { - /* - * Here the previously allocated SGLs for the internal READ - * are mapped zero-copy to the internal WRITE. - */ - sense_rc = transport_generic_map_mem_to_cmd(cmd, - xop->xop_data_sg, xop->xop_data_nents, - NULL, 0); - if (sense_rc) { - ret = -EINVAL; - goto out; - } + if (transport_generic_map_mem_to_cmd(cmd, xop->xop_data_sg, + xop->xop_data_nents, NULL, 0)) + return -EINVAL; - pr_debug("Setup PASSTHROUGH_NOALLOC t_data_sg: %p t_data_nents:" - " %u\n", cmd->t_data_sg, cmd->t_data_nents); - } + pr_debug("Setup PASSTHROUGH_NOALLOC t_data_sg: %p t_data_nents:" + " %u\n", cmd->t_data_sg, cmd->t_data_nents); return 0; - -out: - return ret; } static int target_xcopy_issue_pt_cmd(struct xcopy_pt_cmd *xpt_cmd) @@ -660,15 +627,13 @@ static int target_xcopy_read_source( xop->src_pt_cmd = xpt_cmd; rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, src_dev, &cdb[0], - remote_port, true); + remote_port); if (rc < 0) { ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status; transport_generic_free_cmd(se_cmd, 0); return rc; } - xop->xop_data_sg = se_cmd->t_data_sg; - xop->xop_data_nents = se_cmd->t_data_nents; pr_debug("XCOPY-READ: Saved xop->xop_data_sg: %p, num: %u for READ" " memory\n", xop->xop_data_sg, xop->xop_data_nents); @@ -678,12 +643,6 @@ static int target_xcopy_read_source( transport_generic_free_cmd(se_cmd, 0); return rc; } - /* - * Clear off the allocated t_data_sg, that has been saved for - * zero-copy WRITE submission reuse in struct xcopy_op.. - */ - se_cmd->t_data_sg = NULL; - se_cmd->t_data_nents = 0; return 0; } @@ -722,19 +681,9 @@ static int target_xcopy_write_destination( xop->dst_pt_cmd = xpt_cmd; rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, dst_dev, &cdb[0], - remote_port, false); + remote_port); if (rc < 0) { - struct se_cmd *src_cmd = &xop->src_pt_cmd->se_cmd; ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status; - /* - * If the failure happened before the t_mem_list hand-off in - * target_xcopy_setup_pt_cmd(), Reset memory + clear flag so that - * core releases this memory on error during X-COPY WRITE I/O. - */ - src_cmd->se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; - src_cmd->t_data_sg = xop->xop_data_sg; - src_cmd->t_data_nents = xop->xop_data_nents; - transport_generic_free_cmd(se_cmd, 0); return rc; } @@ -742,7 +691,6 @@ static int target_xcopy_write_destination( rc = target_xcopy_issue_pt_cmd(xpt_cmd); if (rc < 0) { ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status; - se_cmd->se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; transport_generic_free_cmd(se_cmd, 0); return rc; } @@ -758,7 +706,7 @@ static void target_xcopy_do_work(struct work_struct *work) sector_t src_lba, dst_lba, end_lba; unsigned int max_sectors; int rc = 0; - unsigned short nolb, cur_nolb, max_nolb, copied_nolb = 0; + unsigned short nolb, max_nolb, copied_nolb = 0; if (target_parse_xcopy_cmd(xop) != TCM_NO_SENSE) goto err_free; @@ -788,7 +736,23 @@ static void target_xcopy_do_work(struct work_struct *work) (unsigned long long)src_lba, (unsigned long long)dst_lba); while (src_lba < end_lba) { - cur_nolb = min(nolb, max_nolb); + unsigned short cur_nolb = min(nolb, max_nolb); + u32 cur_bytes = cur_nolb * src_dev->dev_attrib.block_size; + + if (cur_bytes != xop->xop_data_bytes) { + /* + * (Re)allocate a buffer large enough to hold the XCOPY + * I/O size, which can be reused each read / write loop. + */ + target_free_sgl(xop->xop_data_sg, xop->xop_data_nents); + rc = target_alloc_sgl(&xop->xop_data_sg, + &xop->xop_data_nents, + cur_bytes, + false, false); + if (rc < 0) + goto out; + xop->xop_data_bytes = cur_bytes; + } pr_debug("target_xcopy_do_work: Calling read src_dev: %p src_lba: %llu," " cur_nolb: %hu\n", src_dev, (unsigned long long)src_lba, cur_nolb); @@ -819,12 +783,11 @@ static void target_xcopy_do_work(struct work_struct *work) nolb -= cur_nolb; transport_generic_free_cmd(&xop->src_pt_cmd->se_cmd, 0); - xop->dst_pt_cmd->se_cmd.se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; - transport_generic_free_cmd(&xop->dst_pt_cmd->se_cmd, 0); } xcopy_pt_undepend_remotedev(xop); + target_free_sgl(xop->xop_data_sg, xop->xop_data_nents); kfree(xop); pr_debug("target_xcopy_do_work: Final src_lba: %llu, dst_lba: %llu\n", @@ -838,6 +801,7 @@ static void target_xcopy_do_work(struct work_struct *work) out: xcopy_pt_undepend_remotedev(xop); + target_free_sgl(xop->xop_data_sg, xop->xop_data_nents); err_free: kfree(xop); diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h index 974bc1e19ff2b..a1805a14eea07 100644 --- a/drivers/target/target_core_xcopy.h +++ b/drivers/target/target_core_xcopy.h @@ -41,6 +41,7 @@ struct xcopy_op { struct xcopy_pt_cmd *src_pt_cmd; struct xcopy_pt_cmd *dst_pt_cmd; + u32 xop_data_bytes; u32 xop_data_nents; struct scatterlist *xop_data_sg; struct work_struct xop_work; diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index f58f383e8ef21..ba19876368dd8 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -149,8 +149,7 @@ u32 optee_do_call_with_arg(struct tee_context *ctx, phys_addr_t parg) */ optee_cq_wait_for_completion(&optee->call_queue, &w); } else if (OPTEE_SMC_RETURN_IS_RPC(res.a0)) { - if (need_resched()) - cond_resched(); + cond_resched(); param.a0 = res.a0; param.a1 = res.a1; param.a2 = res.a2; diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index 4bb4c8f28cbd7..344f48c909181 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -552,6 +552,7 @@ static struct optee *optee_probe(struct device_node *np) struct optee *optee = NULL; void *memremaped_shm = NULL; struct tee_device *teedev; + struct tee_context *ctx; u32 sec_caps; int rc; @@ -582,6 +583,9 @@ static struct optee *optee_probe(struct device_node *np) if (sec_caps & OPTEE_SMC_SEC_CAP_DYNAMIC_SHM) pool = optee_config_dyn_shm(); + /* Unregister OP-TEE specific client devices on TEE bus */ + optee_unregister_devices(); + /* * If dynamic shared memory is not available or failed - try static one */ @@ -628,6 +632,12 @@ static struct optee *optee_probe(struct device_node *np) optee_supp_init(&optee->supp); optee->memremaped_shm = memremaped_shm; optee->pool = pool; + ctx = teedev_open(optee->teedev); + if (IS_ERR(ctx)) { + rc = PTR_ERR(ctx); + goto err; + } + optee->ctx = ctx; /* * Ensure that there are no pre-existing shm objects before enabling @@ -664,6 +674,7 @@ static struct optee *optee_probe(struct device_node *np) static void optee_remove(struct optee *optee) { + teedev_close_context(optee->ctx); /* * Ask OP-TEE to free all cached shared memory objects to decrease * reference counters and also avoid wild pointers in secure world diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c index e3a148521ec1d..acff7dd677d67 100644 --- a/drivers/tee/optee/device.c +++ b/drivers/tee/optee/device.c @@ -65,6 +65,13 @@ static int get_devices(struct tee_context *ctx, u32 session, return 0; } +static void optee_release_device(struct device *dev) +{ + struct tee_client_device *optee_device = to_tee_client_device(dev); + + kfree(optee_device); +} + static int optee_register_device(const uuid_t *device_uuid, u32 device_id) { struct tee_client_device *optee_device = NULL; @@ -75,6 +82,7 @@ static int optee_register_device(const uuid_t *device_uuid, u32 device_id) return -ENOMEM; optee_device->dev.bus = &tee_bus_type; + optee_device->dev.release = optee_release_device; dev_set_name(&optee_device->dev, "optee-clnt%u", device_id); uuid_copy(&optee_device->id.uuid, device_uuid); @@ -158,3 +166,17 @@ int optee_enumerate_devices(void) return rc; } + +static int __optee_unregister_device(struct device *dev, void *data) +{ + if (!strncmp(dev_name(dev), "optee-clnt", strlen("optee-clnt"))) + device_unregister(dev); + + return 0; +} + +void optee_unregister_devices(void) +{ + bus_for_each_dev(&tee_bus_type, NULL, NULL, + __optee_unregister_device); +} diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index 3eeaad2a28686..0250cfde6312d 100644 --- a/drivers/tee/optee/optee_private.h +++ b/drivers/tee/optee/optee_private.h @@ -69,6 +69,7 @@ struct optee_supp { * struct optee - main service struct * @supp_teedev: supplicant device * @teedev: client device + * @ctx: driver internal TEE context * @invoke_fn: function to issue smc or hvc * @call_queue: queue of threads waiting to call @invoke_fn * @wait_queue: queue of threads from secure world waiting for a @@ -83,6 +84,7 @@ struct optee { struct tee_device *supp_teedev; struct tee_device *teedev; optee_invoke_fn *invoke_fn; + struct tee_context *ctx; struct optee_call_queue call_queue; struct optee_wait_queue wait_queue; struct optee_supp supp; @@ -175,6 +177,7 @@ void optee_fill_pages_list(u64 *dst, struct page **pages, int num_pages, size_t page_offset); int optee_enumerate_devices(void); +void optee_unregister_devices(void); /* * Small helpers diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c index aecf62016e7b8..be45ee6202991 100644 --- a/drivers/tee/optee/rpc.c +++ b/drivers/tee/optee/rpc.c @@ -191,6 +191,7 @@ static struct tee_shm *cmd_alloc_suppl(struct tee_context *ctx, size_t sz) } static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx, + struct optee *optee, struct optee_msg_arg *arg, struct optee_call_ctx *call_ctx) { @@ -220,7 +221,8 @@ static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx, shm = cmd_alloc_suppl(ctx, sz); break; case OPTEE_MSG_RPC_SHM_TYPE_KERNEL: - shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED | TEE_SHM_PRIV); + shm = tee_shm_alloc(optee->ctx, sz, + TEE_SHM_MAPPED | TEE_SHM_PRIV); break; default: arg->ret = TEEC_ERROR_BAD_PARAMETERS; @@ -377,7 +379,7 @@ static void handle_rpc_func_cmd(struct tee_context *ctx, struct optee *optee, break; case OPTEE_MSG_RPC_CMD_SHM_ALLOC: free_pages_list(call_ctx); - handle_rpc_func_cmd_shm_alloc(ctx, arg, call_ctx); + handle_rpc_func_cmd_shm_alloc(ctx, optee, arg, call_ctx); break; case OPTEE_MSG_RPC_CMD_SHM_FREE: handle_rpc_func_cmd_shm_free(ctx, arg); @@ -405,7 +407,7 @@ void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param, switch (OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)) { case OPTEE_SMC_RPC_FUNC_ALLOC: - shm = tee_shm_alloc(ctx, param->a1, + shm = tee_shm_alloc(optee->ctx, param->a1, TEE_SHM_MAPPED | TEE_SHM_PRIV); if (!IS_ERR(shm) && !tee_shm_get_pa(shm, 0, &pa)) { reg_pair_from_64(¶m->a1, ¶m->a2, pa); diff --git a/drivers/tee/optee/shm_pool.c b/drivers/tee/optee/shm_pool.c index c41a9a501a6e9..fa75024f16f7f 100644 --- a/drivers/tee/optee/shm_pool.c +++ b/drivers/tee/optee/shm_pool.c @@ -41,10 +41,8 @@ static int pool_op_alloc(struct tee_shm_pool_mgr *poolm, goto err; } - for (i = 0; i < nr_pages; i++) { - pages[i] = page; - page++; - } + for (i = 0; i < nr_pages; i++) + pages[i] = page + i; shm->flags |= TEE_SHM_REGISTER; rc = optee_shm_register(shm->ctx, shm, pages, nr_pages, diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index 0f16d9ffd8d12..a7ccd4d2bd106 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -28,7 +28,7 @@ static DEFINE_SPINLOCK(driver_lock); static struct class *tee_class; static dev_t tee_devt; -static struct tee_context *teedev_open(struct tee_device *teedev) +struct tee_context *teedev_open(struct tee_device *teedev) { int rc; struct tee_context *ctx; @@ -56,6 +56,7 @@ static struct tee_context *teedev_open(struct tee_device *teedev) return ERR_PTR(rc); } +EXPORT_SYMBOL_GPL(teedev_open); void teedev_ctx_get(struct tee_context *ctx) { @@ -82,11 +83,14 @@ void teedev_ctx_put(struct tee_context *ctx) kref_put(&ctx->refcount, teedev_ctx_release); } -static void teedev_close_context(struct tee_context *ctx) +void teedev_close_context(struct tee_context *ctx) { - tee_device_put(ctx->teedev); + struct tee_device *teedev = ctx->teedev; + teedev_ctx_put(ctx); + tee_device_put(teedev); } +EXPORT_SYMBOL_GPL(teedev_close_context); static int tee_open(struct inode *inode, struct file *filp) { diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index d6491e973fa4c..0d5ae80530498 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -1,26 +1,18 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2015-2016, Linaro Limited + * Copyright (c) 2015-2017, 2019-2021 Linaro Limited */ +#include #include -#include -#include #include +#include #include #include #include #include "tee_private.h" -static void tee_shm_release(struct tee_shm *shm) +static void tee_shm_release(struct tee_device *teedev, struct tee_shm *shm) { - struct tee_device *teedev = shm->teedev; - - mutex_lock(&teedev->mutex); - idr_remove(&teedev->idr, shm->id); - if (shm->ctx) - list_del(&shm->link); - mutex_unlock(&teedev->mutex); - if (shm->flags & TEE_SHM_POOL) { struct tee_shm_pool_mgr *poolm; @@ -52,51 +44,6 @@ static void tee_shm_release(struct tee_shm *shm) tee_device_put(teedev); } -static struct sg_table *tee_shm_op_map_dma_buf(struct dma_buf_attachment - *attach, enum dma_data_direction dir) -{ - return NULL; -} - -static void tee_shm_op_unmap_dma_buf(struct dma_buf_attachment *attach, - struct sg_table *table, - enum dma_data_direction dir) -{ -} - -static void tee_shm_op_release(struct dma_buf *dmabuf) -{ - struct tee_shm *shm = dmabuf->priv; - - tee_shm_release(shm); -} - -static void *tee_shm_op_map(struct dma_buf *dmabuf, unsigned long pgnum) -{ - return NULL; -} - -static int tee_shm_op_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) -{ - struct tee_shm *shm = dmabuf->priv; - size_t size = vma->vm_end - vma->vm_start; - - /* Refuse sharing shared memory provided by application */ - if (shm->flags & TEE_SHM_REGISTER) - return -EINVAL; - - return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT, - size, vma->vm_page_prot); -} - -static const struct dma_buf_ops tee_shm_dma_buf_ops = { - .map_dma_buf = tee_shm_op_map_dma_buf, - .unmap_dma_buf = tee_shm_op_unmap_dma_buf, - .release = tee_shm_op_release, - .map = tee_shm_op_map, - .mmap = tee_shm_op_mmap, -}; - static struct tee_shm *__tee_shm_alloc(struct tee_context *ctx, struct tee_device *teedev, size_t size, u32 flags) @@ -137,6 +84,7 @@ static struct tee_shm *__tee_shm_alloc(struct tee_context *ctx, goto err_dev_put; } + refcount_set(&shm->refcount, 1); shm->flags = flags | TEE_SHM_POOL; shm->teedev = teedev; shm->ctx = ctx; @@ -159,21 +107,6 @@ static struct tee_shm *__tee_shm_alloc(struct tee_context *ctx, goto err_pool_free; } - if (flags & TEE_SHM_DMA_BUF) { - DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - - exp_info.ops = &tee_shm_dma_buf_ops; - exp_info.size = shm->size; - exp_info.flags = O_RDWR; - exp_info.priv = shm; - - shm->dmabuf = dma_buf_export(&exp_info); - if (IS_ERR(shm->dmabuf)) { - ret = ERR_CAST(shm->dmabuf); - goto err_rem; - } - } - if (ctx) { teedev_ctx_get(ctx); mutex_lock(&teedev->mutex); @@ -182,10 +115,6 @@ static struct tee_shm *__tee_shm_alloc(struct tee_context *ctx, } return shm; -err_rem: - mutex_lock(&teedev->mutex); - idr_remove(&teedev->idr, shm->id); - mutex_unlock(&teedev->mutex); err_pool_free: poolm->ops->free(poolm, shm); err_kfree: @@ -268,6 +197,7 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, goto err; } + refcount_set(&shm->refcount, 1); shm->flags = flags | TEE_SHM_REGISTER; shm->teedev = teedev; shm->ctx = ctx; @@ -309,22 +239,6 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, goto err; } - if (flags & TEE_SHM_DMA_BUF) { - DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - - exp_info.ops = &tee_shm_dma_buf_ops; - exp_info.size = shm->size; - exp_info.flags = O_RDWR; - exp_info.priv = shm; - - shm->dmabuf = dma_buf_export(&exp_info); - if (IS_ERR(shm->dmabuf)) { - ret = ERR_CAST(shm->dmabuf); - teedev->desc->ops->shm_unregister(ctx, shm); - goto err; - } - } - mutex_lock(&teedev->mutex); list_add_tail(&shm->link, &ctx->list_shm); mutex_unlock(&teedev->mutex); @@ -352,6 +266,35 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, } EXPORT_SYMBOL_GPL(tee_shm_register); +static int tee_shm_fop_release(struct inode *inode, struct file *filp) +{ + tee_shm_put(filp->private_data); + return 0; +} + +static int tee_shm_fop_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct tee_shm *shm = filp->private_data; + size_t size = vma->vm_end - vma->vm_start; + + /* Refuse sharing shared memory provided by application */ + if (shm->flags & TEE_SHM_USER_MAPPED) + return -EINVAL; + + /* check for overflowing the buffer's size */ + if (vma->vm_pgoff + vma_pages(vma) > shm->size >> PAGE_SHIFT) + return -EINVAL; + + return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT, + size, vma->vm_page_prot); +} + +static const struct file_operations tee_shm_fops = { + .owner = THIS_MODULE, + .release = tee_shm_fop_release, + .mmap = tee_shm_fop_mmap, +}; + /** * tee_shm_get_fd() - Increase reference count and return file descriptor * @shm: Shared memory handle @@ -364,10 +307,11 @@ int tee_shm_get_fd(struct tee_shm *shm) if (!(shm->flags & TEE_SHM_DMA_BUF)) return -EINVAL; - get_dma_buf(shm->dmabuf); - fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC); + /* matched by tee_shm_put() in tee_shm_op_release() */ + refcount_inc(&shm->refcount); + fd = anon_inode_getfd("tee_shm", &tee_shm_fops, shm, O_RDWR); if (fd < 0) - dma_buf_put(shm->dmabuf); + tee_shm_put(shm); return fd; } @@ -377,17 +321,7 @@ int tee_shm_get_fd(struct tee_shm *shm) */ void tee_shm_free(struct tee_shm *shm) { - /* - * dma_buf_put() decreases the dmabuf reference counter and will - * call tee_shm_release() when the last reference is gone. - * - * In the case of driver private memory we call tee_shm_release - * directly instead as it doesn't have a reference counter. - */ - if (shm->flags & TEE_SHM_DMA_BUF) - dma_buf_put(shm->dmabuf); - else - tee_shm_release(shm); + tee_shm_put(shm); } EXPORT_SYMBOL_GPL(tee_shm_free); @@ -494,10 +428,15 @@ struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id) teedev = ctx->teedev; mutex_lock(&teedev->mutex); shm = idr_find(&teedev->idr, id); + /* + * If the tee_shm was found in the IDR it must have a refcount + * larger than 0 due to the guarantee in tee_shm_put() below. So + * it's safe to use refcount_inc(). + */ if (!shm || shm->ctx != ctx) shm = ERR_PTR(-EINVAL); - else if (shm->flags & TEE_SHM_DMA_BUF) - get_dma_buf(shm->dmabuf); + else + refcount_inc(&shm->refcount); mutex_unlock(&teedev->mutex); return shm; } @@ -509,7 +448,25 @@ EXPORT_SYMBOL_GPL(tee_shm_get_from_id); */ void tee_shm_put(struct tee_shm *shm) { - if (shm->flags & TEE_SHM_DMA_BUF) - dma_buf_put(shm->dmabuf); + struct tee_device *teedev = shm->teedev; + bool do_release = false; + + mutex_lock(&teedev->mutex); + if (refcount_dec_and_test(&shm->refcount)) { + /* + * refcount has reached 0, we must now remove it from the + * IDR before releasing the mutex. This will guarantee that + * the refcount_inc() in tee_shm_get_from_id() never starts + * from 0. + */ + idr_remove(&teedev->idr, shm->id); + if (shm->ctx) + list_del(&shm->link); + do_release = true; + } + mutex_unlock(&teedev->mutex); + + if (do_release) + tee_shm_release(teedev, shm); } EXPORT_SYMBOL_GPL(tee_shm_put); diff --git a/drivers/thermal/broadcom/sr-thermal.c b/drivers/thermal/broadcom/sr-thermal.c index 475ce29007713..85ab9edd580cc 100644 --- a/drivers/thermal/broadcom/sr-thermal.c +++ b/drivers/thermal/broadcom/sr-thermal.c @@ -60,6 +60,9 @@ static int sr_thermal_probe(struct platform_device *pdev) return -ENOMEM; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENOENT; + sr_thermal->regs = (void __iomem *)devm_memremap(&pdev->dev, res->start, resource_size(res), MEMREMAP_WB); diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 3517883b5cdb9..3a1a28a79d463 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -49,7 +49,7 @@ struct int3400_thermal_priv { struct art *arts; int trt_count; struct trt *trts; - u8 uuid_bitmap; + u32 uuid_bitmap; int rel_misc_dev_res; int current_uuid_index; }; @@ -216,6 +216,10 @@ static void int3400_notify(acpi_handle handle, thermal_prop[4] = NULL; kobject_uevent_env(&priv->thermal->device.kobj, KOBJ_CHANGE, thermal_prop); + kfree(thermal_prop[0]); + kfree(thermal_prop[1]); + kfree(thermal_prop[2]); + kfree(thermal_prop[3]); break; default: /* Ignore unknown notification codes sent to INT3400 device */ diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c index 576523d0326c8..a902e2a053ee3 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c @@ -179,7 +179,7 @@ static int tcc_offset_update(unsigned int tcc) return 0; } -static unsigned int tcc_offset_save; +static int tcc_offset_save = -1; static ssize_t tcc_offset_degree_celsius_store(struct device *dev, struct device_attribute *attr, const char *buf, @@ -703,7 +703,8 @@ static int proc_thermal_resume(struct device *dev) proc_dev = dev_get_drvdata(dev); proc_thermal_read_ppcc(proc_dev); - tcc_offset_update(tcc_offset_save); + if (tcc_offset_save >= 0) + tcc_offset_update(tcc_offset_save); return 0; } diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c index 68d0c181ec7bb..1f38da5da6e45 100644 --- a/drivers/thermal/of-thermal.c +++ b/drivers/thermal/of-thermal.c @@ -91,7 +91,7 @@ static int of_thermal_get_temp(struct thermal_zone_device *tz, { struct __thermal_zone *data = tz->devdata; - if (!data->ops->get_temp) + if (!data->ops || !data->ops->get_temp) return -EINVAL; return data->ops->get_temp(data->sensor_data, temp); @@ -188,6 +188,9 @@ static int of_thermal_set_emul_temp(struct thermal_zone_device *tz, { struct __thermal_zone *data = tz->devdata; + if (!data->ops || !data->ops->set_emul_temp) + return -EINVAL; + return data->ops->set_emul_temp(data->sensor_data, temp); } @@ -196,7 +199,7 @@ static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip, { struct __thermal_zone *data = tz->devdata; - if (!data->ops->get_trend) + if (!data->ops || !data->ops->get_trend) return -EINVAL; return data->ops->get_trend(data->sensor_data, trip, trend); @@ -336,7 +339,7 @@ static int of_thermal_set_trip_temp(struct thermal_zone_device *tz, int trip, if (trip >= data->ntrips || trip < 0) return -EDOM; - if (data->ops->set_trip_temp) { + if (data->ops && data->ops->set_trip_temp) { int ret; ret = data->ops->set_trip_temp(data->sensor_data, trip, temp); diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index fb2c55123a99e..059e3d1610c98 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -1070,6 +1070,7 @@ static int exynos_tmu_probe(struct platform_device *pdev) data->sclk = devm_clk_get(&pdev->dev, "tmu_sclk"); if (IS_ERR(data->sclk)) { dev_err(&pdev->dev, "Failed to get sclk\n"); + ret = PTR_ERR(data->sclk); goto err_clk; } else { ret = clk_prepare_enable(data->sclk); diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index f526ce31f5a2f..f4490b8120176 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -228,15 +228,14 @@ int thermal_build_list_of_policies(char *buf) { struct thermal_governor *pos; ssize_t count = 0; - ssize_t size = PAGE_SIZE; mutex_lock(&thermal_governor_lock); list_for_each_entry(pos, &thermal_governor_list, governor_list) { - size = PAGE_SIZE - count; - count += scnprintf(buf + count, size, "%s ", pos->name); + count += scnprintf(buf + count, PAGE_SIZE - count, "%s ", + pos->name); } - count += scnprintf(buf + count, size, "\n"); + count += scnprintf(buf + count, PAGE_SIZE - count, "\n"); mutex_unlock(&thermal_governor_lock); @@ -461,6 +460,8 @@ static void thermal_zone_device_init(struct thermal_zone_device *tz) { struct thermal_instance *pos; tz->temperature = THERMAL_TEMP_INVALID; + tz->prev_low_trip = -INT_MAX; + tz->prev_high_trip = INT_MAX; list_for_each_entry(pos, &tz->thermal_instances, tz_node) pos->initialized = false; } diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c index 4562c8060d09e..26581d2456c8f 100644 --- a/drivers/tty/cyclades.c +++ b/drivers/tty/cyclades.c @@ -556,7 +556,7 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip, } info->idle_stats.recv_idle = jiffies; } - tty_schedule_flip(port); + tty_flip_buffer_push(port); /* end of service */ cyy_writeb(info, CyRIR, save_xir & 0x3f); @@ -996,7 +996,7 @@ static void cyz_handle_rx(struct cyclades_port *info) mod_timer(&info->rx_full_timer, jiffies + 1); #endif info->idle_stats.recv_idle = jiffies; - tty_schedule_flip(&info->port); + tty_flip_buffer_push(&info->port); /* Update rx_get */ cy_writel(&buf_ctrl->rx_get, new_rx_get); @@ -1172,7 +1172,7 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo) if (delta_count) wake_up_interruptible(&info->port.delta_msr_wait); if (special_count) - tty_schedule_flip(&info->port); + tty_flip_buffer_push(&info->port); } } diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c index c8c5cdfc5e199..d6e82eb61fc2d 100644 --- a/drivers/tty/goldfish.c +++ b/drivers/tty/goldfish.c @@ -151,7 +151,7 @@ static irqreturn_t goldfish_tty_interrupt(int irq, void *dev_id) address = (unsigned long)(void *)buf; goldfish_tty_rw(qtty, address, count, 0); - tty_schedule_flip(&qtty->port); + tty_flip_buffer_push(&qtty->port); return IRQ_HANDLED; } @@ -407,6 +407,7 @@ static int goldfish_tty_probe(struct platform_device *pdev) err_tty_register_device_failed: free_irq(irq, qtty); err_dec_line_count: + tty_port_destroy(&qtty->port); goldfish_tty_current_line_count--; if (goldfish_tty_current_line_count == 0) goldfish_tty_delete_driver(); @@ -427,7 +428,8 @@ static int goldfish_tty_remove(struct platform_device *pdev) tty_unregister_device(goldfish_tty_driver, qtty->console.index); iounmap(qtty->base); qtty->base = NULL; - free_irq(qtty->irq, pdev); + free_irq(qtty->irq, qtty); + tty_port_destroy(&qtty->port); goldfish_tty_current_line_count--; if (goldfish_tty_current_line_count == 0) goldfish_tty_delete_driver(); diff --git a/drivers/tty/hvc/hvc_iucv.c b/drivers/tty/hvc/hvc_iucv.c index 2af1e5751bd63..796fbff623f6e 100644 --- a/drivers/tty/hvc/hvc_iucv.c +++ b/drivers/tty/hvc/hvc_iucv.c @@ -1470,7 +1470,9 @@ static int __init hvc_iucv_init(void) */ static int __init hvc_iucv_config(char *val) { - return kstrtoul(val, 10, &hvc_iucv_devices); + if (kstrtoul(val, 10, &hvc_iucv_devices)) + pr_warn("hvc_iucv= invalid parameter value '%s'\n", val); + return 1; } diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 5ef08905fe05c..2d2d04c071401 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -37,6 +37,8 @@ struct xencons_info { struct xenbus_device *xbdev; struct xencons_interface *intf; unsigned int evtchn; + XENCONS_RING_IDX out_cons; + unsigned int out_cons_same; struct hvc_struct *hvc; int irq; int vtermno; @@ -86,7 +88,11 @@ static int __write_console(struct xencons_info *xencons, cons = intf->out_cons; prod = intf->out_prod; mb(); /* update queue values before going on */ - BUG_ON((prod - cons) > sizeof(intf->out)); + + if ((prod - cons) > sizeof(intf->out)) { + pr_err_once("xencons: Illegal ring page indices"); + return -EINVAL; + } while ((sent < len) && ((prod - cons) < sizeof(intf->out))) intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++]; @@ -114,7 +120,10 @@ static int domU_write_console(uint32_t vtermno, const char *data, int len) */ while (len) { int sent = __write_console(cons, data, len); - + + if (sent < 0) + return sent; + data += sent; len -= sent; @@ -131,6 +140,8 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) XENCONS_RING_IDX cons, prod; int recv = 0; struct xencons_info *xencons = vtermno_to_xencons(vtermno); + unsigned int eoiflag = 0; + if (xencons == NULL) return -EINVAL; intf = xencons->intf; @@ -138,7 +149,11 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) cons = intf->in_cons; prod = intf->in_prod; mb(); /* get pointers before reading ring */ - BUG_ON((prod - cons) > sizeof(intf->in)); + + if ((prod - cons) > sizeof(intf->in)) { + pr_err_once("xencons: Illegal ring page indices"); + return -EINVAL; + } while (cons != prod && recv < len) buf[recv++] = intf->in[MASK_XENCONS_IDX(cons++, intf->in)]; @@ -146,7 +161,27 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) mb(); /* read ring before consuming */ intf->in_cons = cons; - notify_daemon(xencons); + /* + * When to mark interrupt having been spurious: + * - there was no new data to be read, and + * - the backend did not consume some output bytes, and + * - the previous round with no read data didn't see consumed bytes + * (we might have a race with an interrupt being in flight while + * updating xencons->out_cons, so account for that by allowing one + * round without any visible reason) + */ + if (intf->out_cons != xencons->out_cons) { + xencons->out_cons = intf->out_cons; + xencons->out_cons_same = 0; + } + if (recv) { + notify_daemon(xencons); + } else if (xencons->out_cons_same++ > 1) { + eoiflag = XEN_EOI_FLAG_SPURIOUS; + } + + xen_irq_lateeoi(xencons->irq, eoiflag); + return recv; } @@ -375,7 +410,7 @@ static int xencons_connect_backend(struct xenbus_device *dev, if (ret) return ret; info->evtchn = evtchn; - irq = bind_evtchn_to_irq(evtchn); + irq = bind_interdomain_evtchn_to_irq_lateeoi(dev->otherend_id, evtchn); if (irq < 0) return irq; info->irq = irq; @@ -539,7 +574,7 @@ static int __init xen_hvc_init(void) return r; info = vtermno_to_xencons(HVC_COOKIE); - info->irq = bind_evtchn_to_irq(info->evtchn); + info->irq = bind_evtchn_to_irq_lateeoi(info->evtchn); } if (info->irq < 0) info->irq = 0; /* NO_IRQ */ diff --git a/drivers/tty/hvc/hvsi.c b/drivers/tty/hvc/hvsi.c index 66f95f758be05..73226337f5610 100644 --- a/drivers/tty/hvc/hvsi.c +++ b/drivers/tty/hvc/hvsi.c @@ -1038,7 +1038,7 @@ static const struct tty_operations hvsi_ops = { static int __init hvsi_init(void) { - int i; + int i, ret; hvsi_driver = alloc_tty_driver(hvsi_count); if (!hvsi_driver) @@ -1069,12 +1069,25 @@ static int __init hvsi_init(void) } hvsi_wait = wait_for_state; /* irqs active now */ - if (tty_register_driver(hvsi_driver)) - panic("Couldn't register hvsi console driver\n"); + ret = tty_register_driver(hvsi_driver); + if (ret) { + pr_err("Couldn't register hvsi console driver\n"); + goto err_free_irq; + } printk(KERN_DEBUG "HVSI: registered %i devices\n", hvsi_count); return 0; +err_free_irq: + hvsi_wait = poll_for_state; + for (i = 0; i < hvsi_count; i++) { + struct hvsi_struct *hp = &hvsi_ports[i]; + + free_irq(hp->virq, hp); + } + tty_driver_kref_put(hvsi_driver); + + return ret; } device_initcall(hvsi_init); diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c index 1254b39074edb..e67a1aef1fd0d 100644 --- a/drivers/tty/moxa.c +++ b/drivers/tty/moxa.c @@ -1385,7 +1385,7 @@ static int moxa_poll_port(struct moxa_port *p, unsigned int handle, if (inited && !tty_throttled(tty) && MoxaPortRxQueue(p) > 0) { /* RX */ MoxaPortReadData(p); - tty_schedule_flip(&p->port); + tty_flip_buffer_push(&p->port); } } else { clear_bit(EMPTYWAIT, &p->statusflags); @@ -1410,7 +1410,7 @@ static int moxa_poll_port(struct moxa_port *p, unsigned int handle, if (tty && (intr & IntrBreak) && !I_IGNBRK(tty)) { /* BREAK */ tty_insert_flip_char(&p->port, 0, TTY_BREAK); - tty_schedule_flip(&p->port); + tty_flip_buffer_push(&p->port); } if (intr & IntrLine) diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c index 9d00ff5ef9611..085dc8dd1327b 100644 --- a/drivers/tty/mxser.c +++ b/drivers/tty/mxser.c @@ -861,6 +861,7 @@ static int mxser_activate(struct tty_port *port, struct tty_struct *tty) struct mxser_port *info = container_of(port, struct mxser_port, port); unsigned long page; unsigned long flags; + int ret; page = __get_free_page(GFP_KERNEL); if (!page) @@ -870,9 +871,9 @@ static int mxser_activate(struct tty_port *port, struct tty_struct *tty) if (!info->ioaddr || !info->type) { set_bit(TTY_IO_ERROR, &tty->flags); - free_page(page); spin_unlock_irqrestore(&info->slock, flags); - return 0; + ret = 0; + goto err_free_xmit; } info->port.xmit_buf = (unsigned char *) page; @@ -898,8 +899,10 @@ static int mxser_activate(struct tty_port *port, struct tty_struct *tty) if (capable(CAP_SYS_ADMIN)) { set_bit(TTY_IO_ERROR, &tty->flags); return 0; - } else - return -ENODEV; + } + + ret = -ENODEV; + goto err_free_xmit; } /* @@ -944,6 +947,10 @@ static int mxser_activate(struct tty_port *port, struct tty_struct *tty) spin_unlock_irqrestore(&info->slock, flags); return 0; +err_free_xmit: + free_page(page); + info->port.xmit_buf = NULL; + return ret; } /* diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 38eb49ba361f0..907a4d0784ac0 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -72,6 +72,8 @@ module_param(debug, int, 0600); */ #define MAX_MRU 1500 #define MAX_MTU 1500 +/* SOF, ADDR, CTRL, LEN1, LEN2, ..., FCS, EOF */ +#define PROT_OVERHEAD 7 #define GSM_NET_TX_TIMEOUT (HZ*10) /** @@ -313,6 +315,7 @@ static struct tty_driver *gsm_tty_driver; #define GSM1_ESCAPE_BITS 0x20 #define XON 0x11 #define XOFF 0x13 +#define ISO_IEC_646_MASK 0x7F static const struct tty_port_operations gsm_port_ops; @@ -427,7 +430,7 @@ static u8 gsm_encode_modem(const struct gsm_dlci *dlci) modembits |= MDM_RTR; if (dlci->modem_tx & TIOCM_RI) modembits |= MDM_IC; - if (dlci->modem_tx & TIOCM_CD) + if (dlci->modem_tx & TIOCM_CD || dlci->gsm->initiator) modembits |= MDM_DV; return modembits; } @@ -531,7 +534,8 @@ static int gsm_stuff_frame(const u8 *input, u8 *output, int len) int olen = 0; while (len--) { if (*input == GSM1_SOF || *input == GSM1_ESCAPE - || *input == XON || *input == XOFF) { + || (*input & ISO_IEC_646_MASK) == XON + || (*input & ISO_IEC_646_MASK) == XOFF) { *output++ = GSM1_ESCAPE; *output++ = *input++ ^ GSM1_ESCAPE_BITS; olen++; @@ -821,7 +825,7 @@ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci) break; case 2: /* Unstructed with modem bits. Always one byte as we never send inline break data */ - *dp++ = gsm_encode_modem(dlci); + *dp++ = (gsm_encode_modem(dlci) << 1) | EA; break; } WARN_ON(kfifo_out_locked(dlci->fifo, dp , len, &dlci->lock) != len); @@ -1298,11 +1302,12 @@ static void gsm_control_response(struct gsm_mux *gsm, unsigned int command, static void gsm_control_transmit(struct gsm_mux *gsm, struct gsm_control *ctrl) { - struct gsm_msg *msg = gsm_data_alloc(gsm, 0, ctrl->len + 1, gsm->ftype); + struct gsm_msg *msg = gsm_data_alloc(gsm, 0, ctrl->len + 2, gsm->ftype); if (msg == NULL) return; - msg->data[0] = (ctrl->cmd << 1) | 2 | EA; /* command */ - memcpy(msg->data + 1, ctrl->data, ctrl->len); + msg->data[0] = (ctrl->cmd << 1) | CR | EA; /* command */ + msg->data[1] = (ctrl->len << 1) | EA; + memcpy(msg->data + 2, ctrl->data, ctrl->len); gsm_data_queue(gsm->dlci[0], msg); } @@ -1325,7 +1330,6 @@ static void gsm_control_retransmit(struct timer_list *t) spin_lock_irqsave(&gsm->control_lock, flags); ctrl = gsm->pending_cmd; if (ctrl) { - gsm->cretries--; if (gsm->cretries == 0) { gsm->pending_cmd = NULL; ctrl->error = -ETIMEDOUT; @@ -1334,6 +1338,7 @@ static void gsm_control_retransmit(struct timer_list *t) wake_up(&gsm->event); return; } + gsm->cretries--; gsm_control_transmit(gsm, ctrl); mod_timer(&gsm->t2_timer, jiffies + gsm->t2 * HZ / 100); } @@ -1374,7 +1379,7 @@ static struct gsm_control *gsm_control_send(struct gsm_mux *gsm, /* If DLCI0 is in ADM mode skip retries, it won't respond */ if (gsm->dlci[0]->mode == DLCI_MODE_ADM) - gsm->cretries = 1; + gsm->cretries = 0; else gsm->cretries = gsm->n2; @@ -1488,7 +1493,7 @@ static void gsm_dlci_t1(struct timer_list *t) dlci->mode = DLCI_MODE_ADM; gsm_dlci_open(dlci); } else { - gsm_dlci_close(dlci); + gsm_dlci_begin_close(dlci); /* prevent half open link */ } break; @@ -1720,7 +1725,12 @@ static void gsm_dlci_release(struct gsm_dlci *dlci) gsm_destroy_network(dlci); mutex_unlock(&dlci->mutex); - tty_hangup(tty); + /* We cannot use tty_hangup() because in tty_kref_put() the tty + * driver assumes that the hangup queue is free and reuses it to + * queue release_one_tty() -> NULL pointer panic in + * process_one_work(). + */ + tty_vhangup(tty); tty_port_tty_set(&dlci->port, NULL); tty_kref_put(tty); @@ -1803,7 +1813,6 @@ static void gsm_queue(struct gsm_mux *gsm) gsm_response(gsm, address, UA); gsm_dlci_close(dlci); break; - case UA: case UA|PF: if (cr == 0 || dlci == NULL) break; @@ -1954,7 +1963,8 @@ static void gsm1_receive(struct gsm_mux *gsm, unsigned char c) } /* Any partial frame was a runt so go back to start */ if (gsm->state != GSM_START) { - gsm->malformed++; + if (gsm->state != GSM_SEARCH) + gsm->malformed++; gsm->state = GSM_START; } /* A SOF in GSM_START means we are still reading idling or @@ -2091,6 +2101,7 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm) gsm_dlci_release(gsm->dlci[i]); mutex_unlock(&gsm->mutex); /* Now wipe the queues */ + tty_ldisc_flush(gsm->tty); list_for_each_entry_safe(txq, ntxq, &gsm->tx_list, list) kfree(txq); INIT_LIST_HEAD(&gsm->tx_list); @@ -2201,7 +2212,7 @@ static struct gsm_mux *gsm_alloc_mux(void) kfree(gsm); return NULL; } - gsm->txframe = kmalloc(2 * MAX_MRU + 2, GFP_KERNEL); + gsm->txframe = kmalloc(2 * (MAX_MTU + PROT_OVERHEAD - 1), GFP_KERNEL); if (gsm->txframe == NULL) { kfree(gsm->buf); kfree(gsm); @@ -2248,6 +2259,7 @@ static void gsm_copy_config_values(struct gsm_mux *gsm, static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) { + int ret = 0; int need_close = 0; int need_restart = 0; @@ -2257,7 +2269,7 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) /* Check the MRU/MTU range looks sane */ if (c->mru > MAX_MRU || c->mtu > MAX_MTU || c->mru < 8 || c->mtu < 8) return -EINVAL; - if (c->n2 < 3) + if (c->n2 > 255) return -EINVAL; if (c->encapsulation > 1) /* Basic, advanced, no I */ return -EINVAL; @@ -2323,10 +2335,13 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) * FIXME: We need to separate activation/deactivation from adding * and removing from the mux array */ - if (need_restart) - gsm_activate_mux(gsm); - if (gsm->initiator && need_close) - gsm_dlci_begin_open(gsm->dlci[0]); + if (gsm->dead) { + ret = gsm_activate_mux(gsm); + if (ret) + return ret; + if (gsm->initiator) + gsm_dlci_begin_open(gsm->dlci[0]); + } return 0; } @@ -2872,19 +2887,17 @@ static struct tty_ldisc_ops tty_ldisc_packet = { static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk) { - u8 modembits[5]; + u8 modembits[3]; struct gsm_control *ctrl; int len = 2; - if (brk) + modembits[0] = (dlci->addr << 2) | 2 | EA; /* DLCI, Valid, EA */ + modembits[1] = (gsm_encode_modem(dlci) << 1) | EA; + if (brk) { + modembits[2] = (brk << 4) | 2 | EA; /* Length, Break, EA */ len++; - - modembits[0] = len << 1 | EA; /* Data bytes */ - modembits[1] = dlci->addr << 2 | 3; /* DLCI, EA, 1 */ - modembits[2] = gsm_encode_modem(dlci) << 1 | EA; - if (brk) - modembits[3] = brk << 4 | 2 | EA; /* Valid, EA */ - ctrl = gsm_control_send(dlci->gsm, CMD_MSC, modembits, len + 1); + } + ctrl = gsm_control_send(dlci->gsm, CMD_MSC, modembits, len); if (ctrl == NULL) return -ENOMEM; return gsm_control_wait(dlci->gsm, ctrl); diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index f9c584244f72c..6a55f06c0af26 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1377,7 +1377,7 @@ n_tty_receive_char_special(struct tty_struct *tty, unsigned char c) put_tty_queue(c, ldata); smp_store_release(&ldata->canon_head, ldata->read_head); kill_fasync(&tty->fasync, SIGIO, POLL_IN); - wake_up_interruptible_poll(&tty->read_wait, EPOLLIN); + wake_up_interruptible_poll(&tty->read_wait, EPOLLIN | EPOLLRDNORM); return 0; } } @@ -1658,7 +1658,7 @@ static void __receive_buf(struct tty_struct *tty, const unsigned char *cp, if (read_cnt(ldata)) { kill_fasync(&tty->fasync, SIGIO, POLL_IN); - wake_up_interruptible_poll(&tty->read_wait, EPOLLIN); + wake_up_interruptible_poll(&tty->read_wait, EPOLLIN | EPOLLRDNORM); } } diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index c6a1d8c4e6894..73226e482e919 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -111,21 +111,11 @@ static void pty_unthrottle(struct tty_struct *tty) static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c) { struct tty_struct *to = tty->link; - unsigned long flags; - if (tty->stopped) + if (tty->stopped || !c) return 0; - if (c > 0) { - spin_lock_irqsave(&to->port->lock, flags); - /* Stuff the data into the input queue of the other end */ - c = tty_insert_flip_string(to->port, buf, c); - spin_unlock_irqrestore(&to->port->lock, flags); - /* And shovel */ - if (c) - tty_flip_buffer_push(to->port); - } - return c; + return tty_insert_flip_string_and_push_buffer(to->port, buf, c); } /** diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index 51a7d3b19b394..381c5117aec1b 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -660,7 +660,7 @@ static struct platform_driver dw8250_platform_driver = { .name = "dw-apb-uart", .pm = &dw8250_pm_ops, .of_match_table = dw8250_of_match, - .acpi_match_table = ACPI_PTR(dw8250_acpi_match), + .acpi_match_table = dw8250_acpi_match, }, .probe = dw8250_probe, .remove = dw8250_remove, diff --git a/drivers/tty/serial/8250/8250_fintek.c b/drivers/tty/serial/8250/8250_fintek.c index 31c91c2f8c6e7..9b1cddbfc75c0 100644 --- a/drivers/tty/serial/8250/8250_fintek.c +++ b/drivers/tty/serial/8250/8250_fintek.c @@ -197,12 +197,12 @@ static int fintek_8250_rs485_config(struct uart_port *port, if (!pdata) return -EINVAL; - /* Hardware do not support same RTS level on send and receive */ - if (!(rs485->flags & SER_RS485_RTS_ON_SEND) == - !(rs485->flags & SER_RS485_RTS_AFTER_SEND)) - return -EINVAL; if (rs485->flags & SER_RS485_ENABLED) { + /* Hardware do not support same RTS level on send and receive */ + if (!(rs485->flags & SER_RS485_RTS_ON_SEND) == + !(rs485->flags & SER_RS485_RTS_AFTER_SEND)) + return -EINVAL; memset(rs485->padding, 0, sizeof(rs485->padding)); config |= RS485_URA; } else { @@ -285,24 +285,6 @@ static void fintek_8250_set_max_fifo(struct fintek_8250 *pdata) } } -static void fintek_8250_goto_highspeed(struct uart_8250_port *uart, - struct fintek_8250 *pdata) -{ - sio_write_reg(pdata, LDN, pdata->index); - - switch (pdata->pid) { - case CHIP_ID_F81866: /* set uart clock for high speed serial mode */ - sio_write_mask_reg(pdata, F81866_UART_CLK, - F81866_UART_CLK_MASK, - F81866_UART_CLK_14_769MHZ); - - uart->port.uartclk = 921600 * 16; - break; - default: /* leave clock speed untouched */ - break; - } -} - static void fintek_8250_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old) @@ -422,7 +404,6 @@ static int probe_setup_port(struct fintek_8250 *pdata, fintek_8250_set_irq_mode(pdata, level_mode); fintek_8250_set_max_fifo(pdata); - fintek_8250_goto_highspeed(uart, pdata); fintek_8250_exit_key(addr[i]); diff --git a/drivers/tty/serial/8250/8250_gsc.c b/drivers/tty/serial/8250/8250_gsc.c index 0809ae2aa9b14..51cc985216ff3 100644 --- a/drivers/tty/serial/8250/8250_gsc.c +++ b/drivers/tty/serial/8250/8250_gsc.c @@ -26,7 +26,7 @@ static int __init serial_init_chip(struct parisc_device *dev) unsigned long address; int err; -#ifdef CONFIG_64BIT +#if defined(CONFIG_64BIT) && defined(CONFIG_IOSAPIC) if (!dev->irq && (dev->id.sversion == 0xad)) dev->irq = iosapic_serial_irq(dev); #endif diff --git a/drivers/tty/serial/8250/8250_mid.c b/drivers/tty/serial/8250/8250_mid.c index efa0515139f8e..e6c1791609ddf 100644 --- a/drivers/tty/serial/8250/8250_mid.c +++ b/drivers/tty/serial/8250/8250_mid.c @@ -73,6 +73,11 @@ static int pnw_setup(struct mid8250 *mid, struct uart_port *p) return 0; } +static void pnw_exit(struct mid8250 *mid) +{ + pci_dev_put(mid->dma_dev); +} + static int tng_handle_irq(struct uart_port *p) { struct mid8250 *mid = p->private_data; @@ -124,6 +129,11 @@ static int tng_setup(struct mid8250 *mid, struct uart_port *p) return 0; } +static void tng_exit(struct mid8250 *mid) +{ + pci_dev_put(mid->dma_dev); +} + static int dnv_handle_irq(struct uart_port *p) { struct mid8250 *mid = p->private_data; @@ -330,9 +340,9 @@ static int mid8250_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, mid); return 0; + err: - if (mid->board->exit) - mid->board->exit(mid); + mid->board->exit(mid); return ret; } @@ -342,8 +352,7 @@ static void mid8250_remove(struct pci_dev *pdev) serial8250_unregister_port(mid->line); - if (mid->board->exit) - mid->board->exit(mid); + mid->board->exit(mid); } static const struct mid8250_board pnw_board = { @@ -351,6 +360,7 @@ static const struct mid8250_board pnw_board = { .freq = 50000000, .base_baud = 115200, .setup = pnw_setup, + .exit = pnw_exit, }; static const struct mid8250_board tng_board = { @@ -358,6 +368,7 @@ static const struct mid8250_board tng_board = { .freq = 38400000, .base_baud = 1843200, .setup = tng_setup, + .exit = tng_exit, }; static const struct mid8250_board dnv_board = { diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index 98e68f25a5f34..a79ab53f8e87d 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -36,6 +36,7 @@ #define MTK_UART_IER_RTSI 0x40 /* Enable RTS Modem status interrupt */ #define MTK_UART_IER_CTSI 0x80 /* Enable CTS Modem status interrupt */ +#define MTK_UART_EFR 38 /* I/O: Extended Features Register */ #define MTK_UART_EFR_EN 0x10 /* Enable enhancement feature */ #define MTK_UART_EFR_RTS 0x40 /* Enable hardware rx flow control */ #define MTK_UART_EFR_CTS 0x80 /* Enable hardware tx flow control */ @@ -52,6 +53,9 @@ #define MTK_UART_TX_TRIGGER 1 #define MTK_UART_RX_TRIGGER MTK_UART_RX_SIZE +#define MTK_UART_XON1 40 /* I/O: Xon character 1 */ +#define MTK_UART_XOFF1 42 /* I/O: Xoff character 1 */ + #ifdef CONFIG_SERIAL_8250_DMA enum dma_rx_status { DMA_RX_START = 0, @@ -168,7 +172,7 @@ static void mtk8250_dma_enable(struct uart_8250_port *up) MTK_UART_DMA_EN_RX | MTK_UART_DMA_EN_TX); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - serial_out(up, UART_EFR, UART_EFR_ECB); + serial_out(up, MTK_UART_EFR, UART_EFR_ECB); serial_out(up, UART_LCR, lcr); if (dmaengine_slave_config(dma->rxchan, &dma->rxconf) != 0) @@ -231,7 +235,7 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) int lcr = serial_in(up, UART_LCR); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - serial_out(up, UART_EFR, UART_EFR_ECB); + serial_out(up, MTK_UART_EFR, UART_EFR_ECB); serial_out(up, UART_LCR, lcr); lcr = serial_in(up, UART_LCR); @@ -240,7 +244,7 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) serial_out(up, MTK_UART_ESCAPE_DAT, MTK_UART_ESCAPE_CHAR); serial_out(up, MTK_UART_ESCAPE_EN, 0x00); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - serial_out(up, UART_EFR, serial_in(up, UART_EFR) & + serial_out(up, MTK_UART_EFR, serial_in(up, MTK_UART_EFR) & (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK))); serial_out(up, UART_LCR, lcr); mtk8250_disable_intrs(up, MTK_UART_IER_XOFFI | @@ -254,8 +258,8 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); /*enable hw flow control*/ - serial_out(up, UART_EFR, MTK_UART_EFR_HW_FC | - (serial_in(up, UART_EFR) & + serial_out(up, MTK_UART_EFR, MTK_UART_EFR_HW_FC | + (serial_in(up, MTK_UART_EFR) & (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK)))); serial_out(up, UART_LCR, lcr); @@ -269,12 +273,12 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); /*enable sw flow control */ - serial_out(up, UART_EFR, MTK_UART_EFR_XON1_XOFF1 | - (serial_in(up, UART_EFR) & + serial_out(up, MTK_UART_EFR, MTK_UART_EFR_XON1_XOFF1 | + (serial_in(up, MTK_UART_EFR) & (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK)))); - serial_out(up, UART_XON1, START_CHAR(port->state->port.tty)); - serial_out(up, UART_XOFF1, STOP_CHAR(port->state->port.tty)); + serial_out(up, MTK_UART_XON1, START_CHAR(port->state->port.tty)); + serial_out(up, MTK_UART_XOFF1, STOP_CHAR(port->state->port.tty)); serial_out(up, UART_LCR, lcr); mtk8250_disable_intrs(up, MTK_UART_IER_CTSI|MTK_UART_IER_RTSI); mtk8250_enable_intrs(up, MTK_UART_IER_XOFFI); diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index 9ba31701a372e..a9b0a84b1e433 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -105,8 +105,17 @@ static int of_platform_serial_setup(struct platform_device *ofdev, port->mapsize = resource_size(&resource); /* Check for shifted address mapping */ - if (of_property_read_u32(np, "reg-offset", &prop) == 0) + if (of_property_read_u32(np, "reg-offset", &prop) == 0) { + if (prop >= port->mapsize) { + dev_warn(&ofdev->dev, "reg-offset %u exceeds region size %pa\n", + prop, &port->mapsize); + ret = -EINVAL; + goto err_unprepare; + } + port->mapbase += prop; + port->mapsize -= prop; + } port->iotype = UPIO_MEM; if (of_property_read_u32(np, "reg-io-width", &prop) == 0) { diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 43fc5b6a25d35..92e2ee2785239 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -89,7 +89,7 @@ static void moan_device(const char *str, struct pci_dev *dev) static int setup_port(struct serial_private *priv, struct uart_8250_port *port, - int bar, int offset, int regshift) + u8 bar, unsigned int offset, int regshift) { struct pci_dev *dev = priv->dev; @@ -1351,29 +1351,33 @@ pericom_do_set_divisor(struct uart_port *port, unsigned int baud, { int scr; int lcr; - int actual_baud; - int tolerance; - for (scr = 5 ; scr <= 15 ; scr++) { - actual_baud = 921600 * 16 / scr; - tolerance = actual_baud / 50; + for (scr = 16; scr > 4; scr--) { + unsigned int maxrate = port->uartclk / scr; + unsigned int divisor = max(maxrate / baud, 1U); + int delta = maxrate / divisor - baud; - if ((baud < actual_baud + tolerance) && - (baud > actual_baud - tolerance)) { + if (baud > maxrate + baud / 50) + continue; + if (delta > baud / 50) + divisor++; + + if (divisor > 0xffff) + continue; + + /* Update delta due to possible divisor change */ + delta = maxrate / divisor - baud; + if (abs(delta) < baud / 50) { lcr = serial_port_in(port, UART_LCR); serial_port_out(port, UART_LCR, lcr | 0x80); - - serial_port_out(port, UART_DLL, 1); - serial_port_out(port, UART_DLM, 0); + serial_port_out(port, UART_DLL, divisor & 0xff); + serial_port_out(port, UART_DLM, divisor >> 8 & 0xff); serial_port_out(port, 2, 16 - scr); serial_port_out(port, UART_LCR, lcr); return; - } else if (baud > actual_baud) { - break; } } - serial8250_do_set_divisor(port, baud, quot, quot_frac); } static int pci_pericom_setup(struct serial_private *priv, const struct pciserial_board *board, @@ -2285,12 +2289,19 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .setup = pci_pericom_setup_four_at_eight, }, { - .vendor = PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S, + .vendor = PCI_VENDOR_ID_ACCESIO, .device = PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, .setup = pci_pericom_setup_four_at_eight, }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup_four_at_eight, + }, { .vendor = PCI_VENDOR_ID_ACCESIO, .device = PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4, @@ -2896,7 +2907,7 @@ enum pci_board_num_t { pbn_panacom2, pbn_panacom4, pbn_plx_romulus, - pbn_endrun_2_4000000, + pbn_endrun_2_3906250, pbn_oxsemi, pbn_oxsemi_1_4000000, pbn_oxsemi_2_4000000, @@ -3423,10 +3434,10 @@ static struct pciserial_board pci_boards[] = { * signal now many ports are available * 2 port 952 Uart support */ - [pbn_endrun_2_4000000] = { + [pbn_endrun_2_3906250] = { .flags = FL_BASE0, .num_ports = 2, - .base_baud = 4000000, + .base_baud = 3906250, .uart_offset = 0x200, .first_offset = 0x1000, }, @@ -4334,7 +4345,7 @@ static const struct pci_device_id serial_pci_tbl[] = { */ { PCI_VENDOR_ID_ENDRUN, PCI_DEVICE_ID_ENDRUN_1588, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_endrun_2_4000000 }, + pbn_endrun_2_3906250 }, /* * Quatech cards. These actually have configurable clocks but for * now we just use the default. @@ -5119,8 +5130,30 @@ static const struct pci_device_id serial_pci_tbl[] = { { PCI_VENDOR_ID_INTASHIELD, PCI_DEVICE_ID_INTASHIELD_IS400, PCI_ANY_ID, PCI_ANY_ID, 0, 0, /* 135a.0dc0 */ pbn_b2_4_115200 }, + /* Brainboxes Devices */ + /* + * Brainboxes UC-101 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0BA1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, /* - * BrainBoxes UC-260 + * Brainboxes UC-235/246 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0AA1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_1_115200 }, + /* + * Brainboxes UC-257 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0861, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-260/271/701/756 */ { PCI_VENDOR_ID_INTASHIELD, 0x0D21, PCI_ANY_ID, PCI_ANY_ID, @@ -5128,7 +5161,81 @@ static const struct pci_device_id serial_pci_tbl[] = { pbn_b2_4_115200 }, { PCI_VENDOR_ID_INTASHIELD, 0x0E34, PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00, + PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-268 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0841, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-275/279 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0881, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_8_115200 }, + /* + * Brainboxes UC-302 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x08E1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-310 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x08C1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-313 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x08A3, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-320/324 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0A61, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_1_115200 }, + /* + * Brainboxes UC-346 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0B02, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-357 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0A81, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x0A83, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-368 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0C41, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-420/431 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0921, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, pbn_b2_4_115200 }, /* * Perle PCI-RAS cards diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 8a7c6d65f10ef..f8819f72304a8 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -125,7 +125,8 @@ static const struct serial8250_config uart_config[] = { .name = "16C950/954", .fifo_size = 128, .tx_loadsz = 128, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01, + .rxtrig_bytes = {16, 32, 112, 120}, /* UART_CAP_EFR breaks billionon CF bluetooth card. */ .flags = UART_CAP_FIFO | UART_CAP_SLEEP, }, @@ -1475,6 +1476,8 @@ static inline void __stop_tx(struct uart_8250_port *p) if (em485) { unsigned char lsr = serial_in(p, UART_LSR); + p->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; + /* * To provide required timeing and allow FIFO transfer, * __stop_tx_rs485() must be called only when both FIFO and @@ -1544,6 +1547,18 @@ static inline void start_tx_rs485(struct uart_port *port) if (!(up->port.rs485.flags & SER_RS485_RX_DURING_TX)) serial8250_stop_rx(&up->port); + /* + * While serial8250_em485_handle_stop_tx() is a noop if + * em485->active_timer != &em485->stop_tx_timer, it might happen that + * the timer is still armed and triggers only after the current bunch of + * chars is send and em485->active_timer == &em485->stop_tx_timer again. + * So cancel the timer. There is still a theoretical race condition if + * the timer is already running and only comes around to check for + * em485->active_timer when &em485->stop_tx_timer is armed again. + */ + if (em485->active_timer == &em485->stop_tx_timer) + hrtimer_try_to_cancel(&em485->stop_tx_timer); + em485->active_timer = NULL; mcr = serial8250_in_MCR(up); @@ -2813,8 +2828,10 @@ static int serial8250_request_std_resource(struct uart_8250_port *up) case UPIO_MEM32BE: case UPIO_MEM16: case UPIO_MEM: - if (!port->mapbase) + if (!port->mapbase) { + ret = -EINVAL; break; + } if (!request_mem_region(port->mapbase, size, "serial")) { ret = -EBUSY; @@ -3171,7 +3188,7 @@ static void serial8250_console_restore(struct uart_8250_port *up) serial8250_set_divisor(port, baud, quot, frac); serial_port_out(port, UART_LCR, up->lcr); - serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS); + serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS); } /* diff --git a/drivers/tty/serial/amba-pl010.c b/drivers/tty/serial/amba-pl010.c index 2c37d11726aba..13f882e5e7b76 100644 --- a/drivers/tty/serial/amba-pl010.c +++ b/drivers/tty/serial/amba-pl010.c @@ -452,14 +452,11 @@ pl010_set_termios(struct uart_port *port, struct ktermios *termios, if ((termios->c_cflag & CREAD) == 0) uap->port.ignore_status_mask |= UART_DUMMY_RSR_RX; - /* first, disable everything */ old_cr = readb(uap->port.membase + UART010_CR) & ~UART010_CR_MSIE; if (UART_ENABLE_MS(port, termios->c_cflag)) old_cr |= UART010_CR_MSIE; - writel(0, uap->port.membase + UART010_CR); - /* Set baud rate */ quot -= 1; writel((quot & 0xf00) >> 8, uap->port.membase + UART010_LCRM); diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 16720c97a4dde..52b7d559b44bf 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1335,6 +1335,15 @@ static void pl011_stop_rx(struct uart_port *port) pl011_dma_rx_stop(uap); } +static void pl011_throttle_rx(struct uart_port *port) +{ + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + pl011_stop_rx(port); + spin_unlock_irqrestore(&port->lock, flags); +} + static void pl011_enable_ms(struct uart_port *port) { struct uart_amba_port *uap = @@ -1728,9 +1737,10 @@ static int pl011_allocate_irq(struct uart_amba_port *uap) */ static void pl011_enable_interrupts(struct uart_amba_port *uap) { + unsigned long flags; unsigned int i; - spin_lock_irq(&uap->port.lock); + spin_lock_irqsave(&uap->port.lock, flags); /* Clear out any spuriously appearing RX interrupts */ pl011_write(UART011_RTIS | UART011_RXIS, uap, REG_ICR); @@ -1752,7 +1762,14 @@ static void pl011_enable_interrupts(struct uart_amba_port *uap) if (!pl011_dma_rx_running(uap)) uap->im |= UART011_RXIM; pl011_write(uap->im, uap, REG_IMSC); - spin_unlock_irq(&uap->port.lock); + spin_unlock_irqrestore(&uap->port.lock, flags); +} + +static void pl011_unthrottle_rx(struct uart_port *port) +{ + struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port); + + pl011_enable_interrupts(uap); } static int pl011_startup(struct uart_port *port) @@ -2094,32 +2111,13 @@ static const char *pl011_type(struct uart_port *port) return uap->port.type == PORT_AMBA ? uap->type : NULL; } -/* - * Release the memory region(s) being used by 'port' - */ -static void pl011_release_port(struct uart_port *port) -{ - release_mem_region(port->mapbase, SZ_4K); -} - -/* - * Request the memory region(s) being used by 'port' - */ -static int pl011_request_port(struct uart_port *port) -{ - return request_mem_region(port->mapbase, SZ_4K, "uart-pl011") - != NULL ? 0 : -EBUSY; -} - /* * Configure/autoconfigure the port. */ static void pl011_config_port(struct uart_port *port, int flags) { - if (flags & UART_CONFIG_TYPE) { + if (flags & UART_CONFIG_TYPE) port->type = PORT_AMBA; - pl011_request_port(port); - } } /* @@ -2134,6 +2132,8 @@ static int pl011_verify_port(struct uart_port *port, struct serial_struct *ser) ret = -EINVAL; if (ser->baud_base < 9600) ret = -EINVAL; + if (port->mapbase != (unsigned long) ser->iomem_base) + ret = -EINVAL; return ret; } @@ -2144,6 +2144,8 @@ static const struct uart_ops amba_pl011_pops = { .stop_tx = pl011_stop_tx, .start_tx = pl011_start_tx, .stop_rx = pl011_stop_rx, + .throttle = pl011_throttle_rx, + .unthrottle = pl011_unthrottle_rx, .enable_ms = pl011_enable_ms, .break_ctl = pl011_break_ctl, .startup = pl011_startup, @@ -2151,8 +2153,6 @@ static const struct uart_ops amba_pl011_pops = { .flush_buffer = pl011_dma_flush_buffer, .set_termios = pl011_set_termios, .type = pl011_type, - .release_port = pl011_release_port, - .request_port = pl011_request_port, .config_port = pl011_config_port, .verify_port = pl011_verify_port, #ifdef CONFIG_CONSOLE_POLL @@ -2182,8 +2182,6 @@ static const struct uart_ops sbsa_uart_pops = { .shutdown = sbsa_uart_shutdown, .set_termios = sbsa_uart_set_termios, .type = pl011_type, - .release_port = pl011_release_port, - .request_port = pl011_request_port, .config_port = pl011_config_port, .verify_port = pl011_verify_port, #ifdef CONFIG_CONSOLE_POLL @@ -2770,6 +2768,7 @@ MODULE_DEVICE_TABLE(of, sbsa_uart_of_match); static const struct acpi_device_id sbsa_uart_acpi_match[] = { { "ARMH0011", 0 }, + { "ARMHB000", 0 }, {}, }; MODULE_DEVICE_TABLE(acpi, sbsa_uart_acpi_match); diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 8a909d5561859..3b2c25bd2e06b 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -1002,6 +1002,13 @@ static void atmel_tx_dma(struct uart_port *port) desc->callback = atmel_complete_tx_dma; desc->callback_param = atmel_port; atmel_port->cookie_tx = dmaengine_submit(desc); + if (dma_submit_error(atmel_port->cookie_tx)) { + dev_err(port->dev, "dma_submit_error %d\n", + atmel_port->cookie_tx); + return; + } + + dma_async_issue_pending(chan); } if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) @@ -1262,6 +1269,13 @@ static int atmel_prepare_rx_dma(struct uart_port *port) desc->callback_param = port; atmel_port->desc_rx = desc; atmel_port->cookie_rx = dmaengine_submit(desc); + if (dma_submit_error(atmel_port->cookie_rx)) { + dev_err(port->dev, "dma_submit_error %d\n", + atmel_port->cookie_rx); + goto chan_err; + } + + dma_async_issue_pending(atmel_port->chan_rx); return 0; diff --git a/drivers/tty/serial/digicolor-usart.c b/drivers/tty/serial/digicolor-usart.c index 13ac36e2da4f0..e06967ca62fa6 100644 --- a/drivers/tty/serial/digicolor-usart.c +++ b/drivers/tty/serial/digicolor-usart.c @@ -309,6 +309,8 @@ static void digicolor_uart_set_termios(struct uart_port *port, case CS8: default: config |= UA_CONFIG_CHAR_LEN; + termios->c_cflag &= ~CSIZE; + termios->c_cflag |= CS8; break; } @@ -472,10 +474,10 @@ static int digicolor_uart_probe(struct platform_device *pdev) return PTR_ERR(uart_clk); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - dp->port.mapbase = res->start; dp->port.membase = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(dp->port.membase)) return PTR_ERR(dp->port.membase); + dp->port.mapbase = res->start; irq = platform_get_irq(pdev, 0); if (irq < 0) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index b053345dfd1ae..4bdc12908146e 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -233,8 +233,6 @@ /* IMX lpuart has four extra unused regs located at the beginning */ #define IMX_REG_OFF 0x10 -static DEFINE_IDA(fsl_lpuart_ida); - enum lpuart_type { VF610_LPUART, LS1021A_LPUART, @@ -269,7 +267,6 @@ struct lpuart_port { int rx_dma_rng_buf_len; unsigned int dma_tx_nents; wait_queue_head_t dma_wait; - bool id_allocated; }; struct lpuart_soc_data { @@ -2414,7 +2411,7 @@ static int lpuart_probe(struct platform_device *pdev) return PTR_ERR(sport->port.membase); sport->port.membase += sdata->reg_off; - sport->port.mapbase = res->start; + sport->port.mapbase = res->start + sdata->reg_off; sport->port.dev = &pdev->dev; sport->port.type = PORT_LPUART; sport->devtype = sdata->devtype; @@ -2450,23 +2447,18 @@ static int lpuart_probe(struct platform_device *pdev) ret = of_alias_get_id(np, "serial"); if (ret < 0) { - ret = ida_simple_get(&fsl_lpuart_ida, 0, UART_NR, GFP_KERNEL); - if (ret < 0) { - dev_err(&pdev->dev, "port line is full, add device failed\n"); - return ret; - } - sport->id_allocated = true; + dev_err(&pdev->dev, "failed to get alias id, errno %d\n", ret); + return ret; } if (ret >= ARRAY_SIZE(lpuart_ports)) { dev_err(&pdev->dev, "serial%d out of range\n", ret); - ret = -EINVAL; - goto failed_out_of_range; + return -EINVAL; } sport->port.line = ret; ret = lpuart_enable_clks(sport); if (ret) - goto failed_clock_enable; + return ret; sport->port.uartclk = lpuart_get_baud_clk_rate(sport); lpuart_ports[sport->port.line] = sport; @@ -2516,10 +2508,6 @@ static int lpuart_probe(struct platform_device *pdev) failed_attach_port: failed_irq_request: lpuart_disable_clks(sport); -failed_clock_enable: -failed_out_of_range: - if (sport->id_allocated) - ida_simple_remove(&fsl_lpuart_ida, sport->port.line); return ret; } @@ -2529,9 +2517,6 @@ static int lpuart_remove(struct platform_device *pdev) uart_remove_one_port(&lpuart_reg, &sport->port); - if (sport->id_allocated) - ida_simple_remove(&fsl_lpuart_ida, sport->port.line); - lpuart_disable_clks(sport); if (sport->dma_tx_chan) @@ -2663,7 +2648,6 @@ static int __init lpuart_serial_init(void) static void __exit lpuart_serial_exit(void) { - ida_destroy(&fsl_lpuart_ida); platform_driver_unregister(&lpuart_driver); uart_unregister_driver(&lpuart_reg); } diff --git a/drivers/tty/serial/icom.c b/drivers/tty/serial/icom.c index 624f3d541c687..d047380259b53 100644 --- a/drivers/tty/serial/icom.c +++ b/drivers/tty/serial/icom.c @@ -1499,7 +1499,7 @@ static int icom_probe(struct pci_dev *dev, retval = pci_read_config_dword(dev, PCI_COMMAND, &command_reg); if (retval) { dev_err(&dev->dev, "PCI Config read FAILED\n"); - return retval; + goto probe_exit0; } pci_write_config_dword(dev, PCI_COMMAND, diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index e5ed4ab2b08df..8b41a783b37ee 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1401,7 +1401,7 @@ static int imx_uart_startup(struct uart_port *port) imx_uart_writel(sport, ucr1, UCR1); ucr4 = imx_uart_readl(sport, UCR4) & ~UCR4_OREN; - if (!sport->dma_is_enabled) + if (!dma_is_inited) ucr4 |= UCR4_OREN; imx_uart_writel(sport, ucr4, UCR4); diff --git a/drivers/tty/serial/jsm/jsm_neo.c b/drivers/tty/serial/jsm/jsm_neo.c index bf0e2a4cb0cef..c6f927a76c3be 100644 --- a/drivers/tty/serial/jsm/jsm_neo.c +++ b/drivers/tty/serial/jsm/jsm_neo.c @@ -815,7 +815,9 @@ static void neo_parse_isr(struct jsm_board *brd, u32 port) /* Parse any modem signal changes */ jsm_dbg(INTR, &ch->ch_bd->pci_dev, "MOD_STAT: sending to parse_modem_sigs\n"); + spin_lock_irqsave(&ch->uart_port.lock, lock_flags); neo_parse_modem(ch, readb(&ch->ch_neo_uart->msr)); + spin_unlock_irqrestore(&ch->uart_port.lock, lock_flags); } } diff --git a/drivers/tty/serial/jsm/jsm_tty.c b/drivers/tty/serial/jsm/jsm_tty.c index 689774c073ca4..8438454ca653f 100644 --- a/drivers/tty/serial/jsm/jsm_tty.c +++ b/drivers/tty/serial/jsm/jsm_tty.c @@ -187,6 +187,7 @@ static void jsm_tty_break(struct uart_port *port, int break_state) static int jsm_tty_open(struct uart_port *port) { + unsigned long lock_flags; struct jsm_board *brd; struct jsm_channel *channel = container_of(port, struct jsm_channel, uart_port); @@ -240,6 +241,7 @@ static int jsm_tty_open(struct uart_port *port) channel->ch_cached_lsr = 0; channel->ch_stops_sent = 0; + spin_lock_irqsave(&port->lock, lock_flags); termios = &port->state->port.tty->termios; channel->ch_c_cflag = termios->c_cflag; channel->ch_c_iflag = termios->c_iflag; @@ -259,6 +261,7 @@ static int jsm_tty_open(struct uart_port *port) jsm_carrier(channel); channel->ch_open_count++; + spin_unlock_irqrestore(&port->lock, lock_flags); jsm_dbg(OPEN, &channel->ch_bd->pci_dev, "finish\n"); return 0; diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index f5608ad68ae1a..6d4792ec9e5fa 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -391,16 +391,16 @@ static int kgdboc_option_setup(char *opt) { if (!opt) { pr_err("config string not provided\n"); - return -EINVAL; + return 1; } if (strlen(opt) >= MAX_CONFIG_LEN) { pr_err("config string too long\n"); - return -ENOSPC; + return 1; } strcpy(config, opt); - return 0; + return 1; } __setup("kgdboc=", kgdboc_option_setup); diff --git a/drivers/tty/serial/lpc32xx_hs.c b/drivers/tty/serial/lpc32xx_hs.c index 9a836dcac157c..a03618f89c0dd 100644 --- a/drivers/tty/serial/lpc32xx_hs.c +++ b/drivers/tty/serial/lpc32xx_hs.c @@ -345,7 +345,7 @@ static irqreturn_t serial_lpc32xx_interrupt(int irq, void *dev_id) LPC32XX_HSUART_IIR(port->membase)); port->icount.overrun++; tty_insert_flip_char(tport, 0, TTY_OVERRUN); - tty_schedule_flip(tport); + tty_flip_buffer_push(tport); } /* Data received? */ diff --git a/drivers/tty/serial/meson_uart.c b/drivers/tty/serial/meson_uart.c index fbc5bc022a392..849ce8c1ef392 100644 --- a/drivers/tty/serial/meson_uart.c +++ b/drivers/tty/serial/meson_uart.c @@ -256,6 +256,14 @@ static const char *meson_uart_type(struct uart_port *port) return (port->type == PORT_MESON) ? "meson_uart" : NULL; } +/* + * This function is called only from probe() using a temporary io mapping + * in order to perform a reset before setting up the device. Since the + * temporarily mapped region was successfully requested, there can be no + * console on this port at this time. Hence it is not necessary for this + * function to acquire the port->lock. (Since there is no console on this + * port at this time, the port->lock is not initialized yet.) + */ static void meson_uart_reset(struct uart_port *port) { u32 val; @@ -270,9 +278,12 @@ static void meson_uart_reset(struct uart_port *port) static int meson_uart_startup(struct uart_port *port) { + unsigned long flags; u32 val; int ret = 0; + spin_lock_irqsave(&port->lock, flags); + val = readl(port->membase + AML_UART_CONTROL); val |= AML_UART_CLEAR_ERR; writel(val, port->membase + AML_UART_CONTROL); @@ -288,6 +299,8 @@ static int meson_uart_startup(struct uart_port *port) val = (AML_UART_RECV_IRQ(1) | AML_UART_XMIT_IRQ(port->fifosize / 2)); writel(val, port->membase + AML_UART_MISC); + spin_unlock_irqrestore(&port->lock, flags); + ret = request_irq(port->irq, meson_uart_interrupt, 0, port->name, port); diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c index e0718ee5d42a7..aac96659694d6 100644 --- a/drivers/tty/serial/msm_serial.c +++ b/drivers/tty/serial/msm_serial.c @@ -603,6 +603,9 @@ static void msm_start_rx_dma(struct msm_port *msm_port) u32 val; int ret; + if (IS_ENABLED(CONFIG_CONSOLE_POLL)) + return; + if (!dma->chan) return; @@ -1576,6 +1579,7 @@ static inline struct uart_port *msm_get_port_from_line(unsigned int line) static void __msm_console_write(struct uart_port *port, const char *s, unsigned int count, bool is_uartdm) { + unsigned long flags; int i; int num_newlines = 0; bool replaced = false; @@ -1593,6 +1597,8 @@ static void __msm_console_write(struct uart_port *port, const char *s, num_newlines++; count += num_newlines; + local_irq_save(flags); + if (port->sysrq) locked = 0; else if (oops_in_progress) @@ -1638,6 +1644,8 @@ static void __msm_console_write(struct uart_port *port, const char *s, if (locked) spin_unlock(&port->lock); + + local_irq_restore(flags); } static void msm_console_write(struct console *co, const char *s, diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 51b4d8d1dcaca..2ce0d05be3681 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -164,7 +164,7 @@ static unsigned int mvebu_uart_tx_empty(struct uart_port *port) st = readl(port->membase + UART_STAT); spin_unlock_irqrestore(&port->lock, flags); - return (st & STAT_TX_FIFO_EMP) ? TIOCSER_TEMT : 0; + return (st & STAT_TX_EMP) ? TIOCSER_TEMT : 0; } static unsigned int mvebu_uart_get_mctrl(struct uart_port *port) @@ -443,13 +443,13 @@ static void mvebu_uart_shutdown(struct uart_port *port) } } -static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) +static unsigned int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) { unsigned int d_divisor, m_divisor; u32 brdv, osamp; if (!port->uartclk) - return -EOPNOTSUPP; + return 0; /* * The baudrate is derived from the UART clock thanks to two divisors: @@ -473,7 +473,7 @@ static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) osamp &= ~OSAMP_DIVISORS_MASK; writel(osamp, port->membase + UART_OSAMP); - return 0; + return DIV_ROUND_CLOSEST(port->uartclk, d_divisor * m_divisor); } static void mvebu_uart_set_termios(struct uart_port *port, @@ -510,15 +510,11 @@ static void mvebu_uart_set_termios(struct uart_port *port, max_baud = 230400; baud = uart_get_baud_rate(port, termios, old, min_baud, max_baud); - if (mvebu_uart_baud_rate_set(port, baud)) { - /* No clock available, baudrate cannot be changed */ - if (old) - baud = uart_get_baud_rate(port, old, NULL, - min_baud, max_baud); - } else { - tty_termios_encode_baud_rate(termios, baud, baud); - uart_update_timeout(port, termios->c_cflag, baud); - } + baud = mvebu_uart_baud_rate_set(port, baud); + + /* In case baudrate cannot be changed, report previous old value */ + if (baud == 0 && old) + baud = tty_termios_baud_rate(old); /* Only the following flag changes are supported */ if (old) { @@ -529,6 +525,11 @@ static void mvebu_uart_set_termios(struct uart_port *port, termios->c_cflag |= CS8; } + if (baud != 0) { + tty_termios_encode_baud_rate(termios, baud, baud); + uart_update_timeout(port, termios->c_cflag, baud); + } + spin_unlock_irqrestore(&port->lock, flags); } diff --git a/drivers/tty/serial/owl-uart.c b/drivers/tty/serial/owl-uart.c index c55c8507713c3..e87953f8a7685 100644 --- a/drivers/tty/serial/owl-uart.c +++ b/drivers/tty/serial/owl-uart.c @@ -695,6 +695,7 @@ static int owl_uart_probe(struct platform_device *pdev) owl_port->port.uartclk = clk_get_rate(owl_port->clk); if (owl_port->port.uartclk == 0) { dev_err(&pdev->dev, "clock rate is zero\n"); + clk_disable_unprepare(owl_port->clk); return -EINVAL; } owl_port->port.flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP | UPF_LOW_LATENCY; diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index c16234bca78fb..77f18445bb988 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -635,22 +635,6 @@ static int push_rx(struct eg20t_port *priv, const unsigned char *buf, return 0; } -static int pop_tx_x(struct eg20t_port *priv, unsigned char *buf) -{ - int ret = 0; - struct uart_port *port = &priv->port; - - if (port->x_char) { - dev_dbg(priv->port.dev, "%s:X character send %02x (%lu)\n", - __func__, port->x_char, jiffies); - buf[0] = port->x_char; - port->x_char = 0; - ret = 1; - } - - return ret; -} - static int dma_push_rx(struct eg20t_port *priv, int size) { int room; @@ -900,9 +884,10 @@ static unsigned int handle_tx(struct eg20t_port *priv) fifo_size = max(priv->fifo_size, 1); tx_empty = 1; - if (pop_tx_x(priv, xmit->buf)) { - pch_uart_hal_write(priv, xmit->buf, 1); + if (port->x_char) { + pch_uart_hal_write(priv, &port->x_char, 1); port->icount.tx++; + port->x_char = 0; tx_empty = 0; fifo_size--; } @@ -957,9 +942,11 @@ static unsigned int dma_handle_tx(struct eg20t_port *priv) } fifo_size = max(priv->fifo_size, 1); - if (pop_tx_x(priv, xmit->buf)) { - pch_uart_hal_write(priv, xmit->buf, 1); + + if (port->x_char) { + pch_uart_hal_write(priv, &port->x_char, 1); port->icount.tx++; + port->x_char = 0; fifo_size--; } diff --git a/drivers/tty/serial/rda-uart.c b/drivers/tty/serial/rda-uart.c index ff9a27d48bca8..877d86ff68190 100644 --- a/drivers/tty/serial/rda-uart.c +++ b/drivers/tty/serial/rda-uart.c @@ -262,6 +262,8 @@ static void rda_uart_set_termios(struct uart_port *port, /* Fall through */ case CS7: ctrl &= ~RDA_UART_DBITS_8; + termios->c_cflag &= ~CSIZE; + termios->c_cflag |= CS7; break; default: ctrl |= RDA_UART_DBITS_8; diff --git a/drivers/tty/serial/sa1100.c b/drivers/tty/serial/sa1100.c index 8e618129e65c9..ff4b44bdf6b67 100644 --- a/drivers/tty/serial/sa1100.c +++ b/drivers/tty/serial/sa1100.c @@ -454,6 +454,8 @@ sa1100_set_termios(struct uart_port *port, struct ktermios *termios, baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16); quot = uart_get_divisor(port, baud); + del_timer_sync(&sport->timer); + spin_lock_irqsave(&sport->port.lock, flags); sport->port.read_status_mask &= UTSR0_TO_SM(UTSR0_TFS); @@ -484,8 +486,6 @@ sa1100_set_termios(struct uart_port *port, struct ktermios *termios, UTSR1_TO_SM(UTSR1_ROR); } - del_timer_sync(&sport->timer); - /* * Update the per-port timeout. */ diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index c7683beb3412a..1df74bad10630 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -238,8 +238,7 @@ static void enable_tx_dma(struct s3c24xx_uart_port *ourport) /* Enable tx dma mode */ ucon = rd_regl(port, S3C2410_UCON); ucon &= ~(S3C64XX_UCON_TXBURST_MASK | S3C64XX_UCON_TXMODE_MASK); - ucon |= (dma_get_cache_alignment() >= 16) ? - S3C64XX_UCON_TXBURST_16 : S3C64XX_UCON_TXBURST_1; + ucon |= S3C64XX_UCON_TXBURST_1; ucon |= S3C64XX_UCON_TXMODE_DMA; wr_regl(port, S3C2410_UCON, ucon); @@ -512,7 +511,7 @@ static void enable_rx_dma(struct s3c24xx_uart_port *ourport) S3C64XX_UCON_DMASUS_EN | S3C64XX_UCON_TIMEOUT_EN | S3C64XX_UCON_RXMODE_MASK); - ucon |= S3C64XX_UCON_RXBURST_16 | + ucon |= S3C64XX_UCON_RXBURST_1 | 0xf << S3C64XX_UCON_TIMEOUT_SHIFT | S3C64XX_UCON_EMPTYINT_EN | S3C64XX_UCON_TIMEOUT_EN | @@ -761,11 +760,8 @@ static irqreturn_t s3c24xx_serial_tx_chars(int irq, void *id) goto out; } - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) { - spin_unlock(&port->lock); + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(port); - spin_lock(&port->lock); - } if (uart_circ_empty(xmit)) s3c24xx_serial_stop_tx(port); diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c index 64f18bf1e694a..74c21152367ae 100644 --- a/drivers/tty/serial/serial-tegra.c +++ b/drivers/tty/serial/serial-tegra.c @@ -1494,7 +1494,7 @@ static struct tegra_uart_chip_data tegra20_uart_chip_data = { .fifo_mode_enable_status = false, .uart_max_port = 5, .max_dma_burst_bytes = 4, - .error_tolerance_low_range = 0, + .error_tolerance_low_range = -4, .error_tolerance_high_range = 4, }; @@ -1505,7 +1505,7 @@ static struct tegra_uart_chip_data tegra30_uart_chip_data = { .fifo_mode_enable_status = false, .uart_max_port = 5, .max_dma_burst_bytes = 4, - .error_tolerance_low_range = 0, + .error_tolerance_low_range = -4, .error_tolerance_high_range = 4, }; diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index fa3bd8a97b244..7cc7bd8aca0f5 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -160,7 +160,7 @@ static void uart_port_dtr_rts(struct uart_port *uport, int raise) int RTS_after_send = !!(uport->rs485.flags & SER_RS485_RTS_AFTER_SEND); if (raise) { - if (rs485_on && !RTS_after_send) { + if (rs485_on && RTS_after_send) { uart_set_mctrl(uport, TIOCM_DTR); uart_clear_mctrl(uport, TIOCM_RTS); } else { @@ -169,7 +169,7 @@ static void uart_port_dtr_rts(struct uart_port *uport, int raise) } else { unsigned int clear = TIOCM_DTR; - clear |= (!rs485_on || !RTS_after_send) ? TIOCM_RTS : 0; + clear |= (!rs485_on || RTS_after_send) ? TIOCM_RTS : 0; uart_clear_mctrl(uport, clear); } } @@ -220,7 +220,11 @@ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state, if (retval == 0) { if (uart_console(uport) && uport->cons->cflag) { tty->termios.c_cflag = uport->cons->cflag; + tty->termios.c_ispeed = uport->cons->ispeed; + tty->termios.c_ospeed = uport->cons->ospeed; uport->cons->cflag = 0; + uport->cons->ispeed = 0; + uport->cons->ospeed = 0; } /* * Initialise the hardware port settings. @@ -288,8 +292,11 @@ static void uart_shutdown(struct tty_struct *tty, struct uart_state *state) /* * Turn off DTR and RTS early. */ - if (uport && uart_console(uport) && tty) + if (uport && uart_console(uport) && tty) { uport->cons->cflag = tty->termios.c_cflag; + uport->cons->ispeed = tty->termios.c_ispeed; + uport->cons->ospeed = tty->termios.c_ospeed; + } if (!tty || C_HUPCL(tty)) uart_port_dtr_rts(uport, 0); @@ -1566,6 +1573,7 @@ static void uart_tty_port_shutdown(struct tty_port *port) { struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport = uart_port_check(state); + char *buf; /* * At this point, we stop accepting input. To do this, we @@ -1587,8 +1595,18 @@ static void uart_tty_port_shutdown(struct tty_port *port) */ tty_port_set_suspended(port, 0); - uart_change_pm(state, UART_PM_STATE_OFF); + /* + * Free the transmit buffer. + */ + spin_lock_irq(&uport->lock); + buf = state->xmit.buf; + state->xmit.buf = NULL; + spin_unlock_irq(&uport->lock); + + if (buf) + free_page((unsigned long)buf); + uart_change_pm(state, UART_PM_STATE_OFF); } static void uart_wait_until_sent(struct tty_struct *tty, int timeout) @@ -2110,8 +2128,11 @@ uart_set_options(struct uart_port *port, struct console *co, * Allow the setting of the UART parameters with a NULL console * too: */ - if (co) + if (co) { co->cflag = termios.c_cflag; + co->ispeed = termios.c_ispeed; + co->ospeed = termios.c_ospeed; + } return 0; } @@ -2245,6 +2266,8 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport) */ memset(&termios, 0, sizeof(struct ktermios)); termios.c_cflag = uport->cons->cflag; + termios.c_ispeed = uport->cons->ispeed; + termios.c_ospeed = uport->cons->ospeed; /* * If that's unset, use the tty termios setting. @@ -2372,7 +2395,8 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, * We probably don't need a spinlock around this, but */ spin_lock_irqsave(&port->lock, flags); - port->ops->set_mctrl(port, port->mctrl & TIOCM_DTR); + port->mctrl &= TIOCM_DTR; + port->ops->set_mctrl(port, port->mctrl); spin_unlock_irqrestore(&port->lock, flags); /* diff --git a/drivers/tty/serial/serial_txx9.c b/drivers/tty/serial/serial_txx9.c index 8507f18900d09..2783baa5dfe59 100644 --- a/drivers/tty/serial/serial_txx9.c +++ b/drivers/tty/serial/serial_txx9.c @@ -648,6 +648,8 @@ serial_txx9_set_termios(struct uart_port *port, struct ktermios *termios, case CS6: /* not supported */ case CS8: cval |= TXX9_SILCR_UMODE_8BIT; + termios->c_cflag &= ~CSIZE; + termios->c_cflag |= CS8; break; } diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 97ee1fc1cd247..c066bb7f07b01 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1763,6 +1763,10 @@ static irqreturn_t sci_br_interrupt(int irq, void *ptr) /* Handle BREAKs */ sci_handle_breaks(port); + + /* drop invalid character received before break was detected */ + serial_port_in(port, SCxRDR); + sci_clear_SCxSR(port, SCxSR_BREAK_CLEAR(port)); return IRQ_HANDLED; @@ -1842,7 +1846,8 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr) ret = sci_er_interrupt(irq, ptr); /* Break Interrupt */ - if ((ssr_status & SCxSR_BRK(port)) && err_enabled) + if (s->irqs[SCIx_ERI_IRQ] != s->irqs[SCIx_BRI_IRQ] && + (ssr_status & SCxSR_BRK(port)) && err_enabled) ret = sci_br_interrupt(irq, ptr); /* Overrun Interrupt */ @@ -2390,8 +2395,12 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios, int best_clk = -1; unsigned long flags; - if ((termios->c_cflag & CSIZE) == CS7) + if ((termios->c_cflag & CSIZE) == CS7) { smr_val |= SCSMR_CHR; + } else { + termios->c_cflag &= ~CSIZE; + termios->c_cflag |= CS8; + } if (termios->c_cflag & PARENB) smr_val |= SCSMR_PE; if (termios->c_cflag & PARODD) diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c index 6a2dc823ea828..7015632c49905 100644 --- a/drivers/tty/serial/sifive.c +++ b/drivers/tty/serial/sifive.c @@ -667,12 +667,16 @@ static void sifive_serial_set_termios(struct uart_port *port, int rate; char nstop; - if ((termios->c_cflag & CSIZE) != CS8) + if ((termios->c_cflag & CSIZE) != CS8) { dev_err_once(ssp->port.dev, "only 8-bit words supported\n"); + termios->c_cflag &= ~CSIZE; + termios->c_cflag |= CS8; + } if (termios->c_iflag & (INPCK | PARMRK)) dev_err_once(ssp->port.dev, "parity checking not supported\n"); if (termios->c_iflag & BRKINT) dev_err_once(ssp->port.dev, "BREAK detection not supported\n"); + termios->c_iflag &= ~(INPCK|PARMRK|BRKINT); /* Set number of stop bits */ nstop = (termios->c_cflag & CSTOPB) ? 2 : 1; @@ -973,7 +977,7 @@ static int sifive_serial_probe(struct platform_device *pdev) /* Set up clock divider */ ssp->clkin_rate = clk_get_rate(ssp->clk); ssp->baud_rate = SIFIVE_DEFAULT_BAUD_RATE; - ssp->port.uartclk = ssp->baud_rate * 16; + ssp->port.uartclk = ssp->clkin_rate; __ssp_update_div(ssp); platform_set_drvdata(pdev, ssp); diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c index 7971997cdead7..ce35e3a131b16 100644 --- a/drivers/tty/serial/st-asc.c +++ b/drivers/tty/serial/st-asc.c @@ -540,10 +540,14 @@ static void asc_set_termios(struct uart_port *port, struct ktermios *termios, /* set character length */ if ((cflag & CSIZE) == CS7) { ctrl_val |= ASC_CTL_MODE_7BIT_PAR; + cflag |= PARENB; } else { ctrl_val |= (cflag & PARENB) ? ASC_CTL_MODE_8BIT_PAR : ASC_CTL_MODE_8BIT; + cflag &= ~CSIZE; + cflag |= CS8; } + termios->c_cflag = cflag; /* set stop bit */ ctrl_val |= (cflag & CSTOPB) ? ASC_CTL_STOP_2BIT : ASC_CTL_STOP_1BIT; diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 23b7bdae173c8..5e0b9f783cacb 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -73,6 +73,8 @@ static void stm32_config_reg_rs485(u32 *cr1, u32 *cr3, u32 delay_ADE, *cr3 |= USART_CR3_DEM; over8 = *cr1 & USART_CR1_OVER8; + *cr1 &= ~(USART_CR1_DEDT_MASK | USART_CR1_DEAT_MASK); + if (over8) rs485_deat_dedt = delay_ADE * baud * 8; else @@ -536,7 +538,7 @@ static void stm32_start_tx(struct uart_port *port) { struct circ_buf *xmit = &port->state->xmit; - if (uart_circ_empty(xmit)) + if (uart_circ_empty(xmit) && !port->x_char) return; stm32_transmit_chars(port); @@ -745,13 +747,22 @@ static void stm32_set_termios(struct uart_port *port, struct ktermios *termios, * CS8 or (CS7 + parity), 8 bits word aka [M1:M0] = 0b00 * M0 and M1 already cleared by cr1 initialization. */ - if (bits == 9) + if (bits == 9) { cr1 |= USART_CR1_M0; - else if ((bits == 7) && cfg->has_7bits_data) + } else if ((bits == 7) && cfg->has_7bits_data) { cr1 |= USART_CR1_M1; - else if (bits != 8) + } else if (bits != 8) { dev_dbg(port->dev, "Unsupported data bits config: %u bits\n" , bits); + cflag &= ~CSIZE; + cflag |= CS8; + termios->c_cflag = cflag; + bits = 8; + if (cflag & PARENB) { + bits++; + cr1 |= USART_CR1_M0; + } + } if (ofs->rtor != UNDEF_REG && (stm32_port->rx_ch || stm32_port->fifoen)) { diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c index 56066d93a65b8..9a4049c894f7a 100644 --- a/drivers/tty/serial/uartlite.c +++ b/drivers/tty/serial/uartlite.c @@ -618,7 +618,7 @@ static struct uart_driver ulite_uart_driver = { * * Returns: 0 on success, <0 otherwise */ -static int ulite_assign(struct device *dev, int id, u32 base, int irq, +static int ulite_assign(struct device *dev, int id, phys_addr_t base, int irq, struct uartlite_data *pdata) { struct uart_port *port; diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index 9359c80fbb9f5..a1409251fbcc3 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -595,9 +595,10 @@ static void cdns_uart_start_tx(struct uart_port *port) if (uart_circ_empty(&port->state->xmit)) return; + writel(CDNS_UART_IXR_TXEMPTY, port->membase + CDNS_UART_ISR); + cdns_uart_handle_tx(port); - writel(CDNS_UART_IXR_TXEMPTY, port->membase + CDNS_UART_ISR); /* Enable the TX Empty interrupt */ writel(CDNS_UART_IXR_TXEMPTY, port->membase + CDNS_UART_IER); } diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index 36f1a4d870eb1..b72471373c71d 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -137,37 +137,14 @@ MODULE_PARM_DESC(maxframe, "Maximum frame size used by device (4096 to 65535)"); */ static struct tty_driver *serial_driver; -static int open(struct tty_struct *tty, struct file * filp); -static void close(struct tty_struct *tty, struct file * filp); -static void hangup(struct tty_struct *tty); -static void set_termios(struct tty_struct *tty, struct ktermios *old_termios); - -static int write(struct tty_struct *tty, const unsigned char *buf, int count); -static int put_char(struct tty_struct *tty, unsigned char ch); -static void send_xchar(struct tty_struct *tty, char ch); static void wait_until_sent(struct tty_struct *tty, int timeout); -static int write_room(struct tty_struct *tty); -static void flush_chars(struct tty_struct *tty); static void flush_buffer(struct tty_struct *tty); -static void tx_hold(struct tty_struct *tty); static void tx_release(struct tty_struct *tty); -static int ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg); -static int chars_in_buffer(struct tty_struct *tty); -static void throttle(struct tty_struct * tty); -static void unthrottle(struct tty_struct * tty); -static int set_break(struct tty_struct *tty, int break_state); - /* - * generic HDLC support and callbacks + * generic HDLC support */ -#if SYNCLINK_GENERIC_HDLC #define dev_to_port(D) (dev_to_hdlc(D)->priv) -static void hdlcdev_tx_done(struct slgt_info *info); -static void hdlcdev_rx(struct slgt_info *info, char *buf, int size); -static int hdlcdev_init(struct slgt_info *info); -static void hdlcdev_exit(struct slgt_info *info); -#endif /* @@ -186,9 +163,6 @@ struct cond_wait { wait_queue_entry_t wait; unsigned int data; }; -static void init_cond_wait(struct cond_wait *w, unsigned int data); -static void add_cond_wait(struct cond_wait **head, struct cond_wait *w); -static void remove_cond_wait(struct cond_wait **head, struct cond_wait *w); static void flush_cond_wait(struct cond_wait **head); /* @@ -443,12 +417,8 @@ static void shutdown(struct slgt_info *info); static void program_hw(struct slgt_info *info); static void change_params(struct slgt_info *info); -static int register_test(struct slgt_info *info); -static int irq_test(struct slgt_info *info); -static int loopback_test(struct slgt_info *info); static int adapter_test(struct slgt_info *info); -static void reset_adapter(struct slgt_info *info); static void reset_port(struct slgt_info *info); static void async_mode(struct slgt_info *info); static void sync_mode(struct slgt_info *info); @@ -457,41 +427,23 @@ static void rx_stop(struct slgt_info *info); static void rx_start(struct slgt_info *info); static void reset_rbufs(struct slgt_info *info); static void free_rbufs(struct slgt_info *info, unsigned int first, unsigned int last); -static void rdma_reset(struct slgt_info *info); static bool rx_get_frame(struct slgt_info *info); static bool rx_get_buf(struct slgt_info *info); static void tx_start(struct slgt_info *info); static void tx_stop(struct slgt_info *info); static void tx_set_idle(struct slgt_info *info); -static unsigned int free_tbuf_count(struct slgt_info *info); static unsigned int tbuf_bytes(struct slgt_info *info); static void reset_tbufs(struct slgt_info *info); static void tdma_reset(struct slgt_info *info); static bool tx_load(struct slgt_info *info, const char *buf, unsigned int count); -static void get_signals(struct slgt_info *info); -static void set_signals(struct slgt_info *info); -static void enable_loopback(struct slgt_info *info); +static void get_gtsignals(struct slgt_info *info); +static void set_gtsignals(struct slgt_info *info); static void set_rate(struct slgt_info *info, u32 data_rate); -static int bh_action(struct slgt_info *info); -static void bh_handler(struct work_struct *work); static void bh_transmit(struct slgt_info *info); -static void isr_serial(struct slgt_info *info); -static void isr_rdma(struct slgt_info *info); static void isr_txeom(struct slgt_info *info, unsigned short status); -static void isr_tdma(struct slgt_info *info); - -static int alloc_dma_bufs(struct slgt_info *info); -static void free_dma_bufs(struct slgt_info *info); -static int alloc_desc(struct slgt_info *info); -static void free_desc(struct slgt_info *info); -static int alloc_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count); -static void free_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count); - -static int alloc_tmp_rbuf(struct slgt_info *info); -static void free_tmp_rbuf(struct slgt_info *info); static void tx_timeout(struct timer_list *t); static void rx_timeout(struct timer_list *t); @@ -509,10 +461,6 @@ static int tx_abort(struct slgt_info *info); static int rx_enable(struct slgt_info *info, int enable); static int modem_input_wait(struct slgt_info *info,int arg); static int wait_mgsl_event(struct slgt_info *info, int __user *mask_ptr); -static int tiocmget(struct tty_struct *tty); -static int tiocmset(struct tty_struct *tty, - unsigned int set, unsigned int clear); -static int set_break(struct tty_struct *tty, int break_state); static int get_interface(struct slgt_info *info, int __user *if_mode); static int set_interface(struct slgt_info *info, int if_mode); static int set_gpio(struct slgt_info *info, struct gpio_desc __user *gpio); @@ -526,9 +474,6 @@ static int set_xctrl(struct slgt_info *info, int if_mode); /* * driver functions */ -static void add_device(struct slgt_info *info); -static void device_init(int adapter_num, struct pci_dev *pdev); -static int claim_resources(struct slgt_info *info); static void release_resources(struct slgt_info *info); /* @@ -776,7 +721,7 @@ static void set_termios(struct tty_struct *tty, struct ktermios *old_termios) if ((old_termios->c_cflag & CBAUD) && !C_BAUD(tty)) { info->signals &= ~(SerialSignal_RTS | SerialSignal_DTR); spin_lock_irqsave(&info->lock,flags); - set_signals(info); + set_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); } @@ -786,7 +731,7 @@ static void set_termios(struct tty_struct *tty, struct ktermios *old_termios) if (!C_CRTSCTS(tty) || !tty_throttled(tty)) info->signals |= SerialSignal_RTS; spin_lock_irqsave(&info->lock,flags); - set_signals(info); + set_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); } @@ -1237,7 +1182,7 @@ static inline void line_info(struct seq_file *m, struct slgt_info *info) /* output current serial signal states */ spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); stat_buf[0] = 0; @@ -1337,7 +1282,7 @@ static void throttle(struct tty_struct * tty) if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->signals &= ~SerialSignal_RTS; - set_signals(info); + set_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -1362,7 +1307,7 @@ static void unthrottle(struct tty_struct * tty) if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->signals |= SerialSignal_RTS; - set_signals(info); + set_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -1534,7 +1479,7 @@ static int hdlcdev_open(struct net_device *dev) /* inform generic HDLC layer of current DCD status */ spin_lock_irqsave(&info->lock, flags); - get_signals(info); + get_gtsignals(info); spin_unlock_irqrestore(&info->lock, flags); if (info->signals & SerialSignal_DCD) netif_carrier_on(dev); @@ -1807,6 +1752,8 @@ static int hdlcdev_init(struct slgt_info *info) */ static void hdlcdev_exit(struct slgt_info *info) { + if (!info->netdev) + return; unregister_hdlc_device(info->netdev); free_netdev(info->netdev); info->netdev = NULL; @@ -2290,7 +2237,7 @@ static void isr_txeom(struct slgt_info *info, unsigned short status) if (info->params.mode != MGSL_MODE_ASYNC && info->drop_rts_on_tx_done) { info->signals &= ~SerialSignal_RTS; info->drop_rts_on_tx_done = false; - set_signals(info); + set_gtsignals(info); } #if SYNCLINK_GENERIC_HDLC @@ -2455,7 +2402,7 @@ static void shutdown(struct slgt_info *info) if (!info->port.tty || info->port.tty->termios.c_cflag & HUPCL) { info->signals &= ~(SerialSignal_RTS | SerialSignal_DTR); - set_signals(info); + set_gtsignals(info); } flush_cond_wait(&info->gpio_wait_q); @@ -2483,7 +2430,7 @@ static void program_hw(struct slgt_info *info) else async_mode(info); - set_signals(info); + set_gtsignals(info); info->dcd_chkcount = 0; info->cts_chkcount = 0; @@ -2491,7 +2438,7 @@ static void program_hw(struct slgt_info *info) info->dsr_chkcount = 0; slgt_irq_on(info, IRQ_DCD | IRQ_CTS | IRQ_DSR | IRQ_RI); - get_signals(info); + get_gtsignals(info); if (info->netcount || (info->port.tty && info->port.tty->termios.c_cflag & CREAD)) @@ -2735,7 +2682,7 @@ static int wait_mgsl_event(struct slgt_info *info, int __user *mask_ptr) spin_lock_irqsave(&info->lock,flags); /* return immediately if state matches requested events */ - get_signals(info); + get_gtsignals(info); s = info->signals; events = mask & @@ -3153,7 +3100,7 @@ static int tiocmget(struct tty_struct *tty) unsigned long flags; spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); result = ((info->signals & SerialSignal_RTS) ? TIOCM_RTS:0) + @@ -3192,7 +3139,7 @@ static int tiocmset(struct tty_struct *tty, info->signals &= ~SerialSignal_DTR; spin_lock_irqsave(&info->lock,flags); - set_signals(info); + set_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); return 0; } @@ -3203,7 +3150,7 @@ static int carrier_raised(struct tty_port *port) struct slgt_info *info = container_of(port, struct slgt_info, port); spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); return (info->signals & SerialSignal_DCD) ? 1 : 0; } @@ -3218,7 +3165,7 @@ static void dtr_rts(struct tty_port *port, int on) info->signals |= SerialSignal_RTS | SerialSignal_DTR; else info->signals &= ~(SerialSignal_RTS | SerialSignal_DTR); - set_signals(info); + set_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); } @@ -4017,10 +3964,10 @@ static void tx_start(struct slgt_info *info) if (info->params.mode != MGSL_MODE_ASYNC) { if (info->params.flags & HDLC_FLAG_AUTO_RTS) { - get_signals(info); + get_gtsignals(info); if (!(info->signals & SerialSignal_RTS)) { info->signals |= SerialSignal_RTS; - set_signals(info); + set_gtsignals(info); info->drop_rts_on_tx_done = true; } } @@ -4074,7 +4021,7 @@ static void reset_port(struct slgt_info *info) rx_stop(info); info->signals &= ~(SerialSignal_RTS | SerialSignal_DTR); - set_signals(info); + set_gtsignals(info); slgt_irq_off(info, IRQ_ALL | IRQ_MASTER); } @@ -4496,7 +4443,7 @@ static void tx_set_idle(struct slgt_info *info) /* * get state of V24 status (input) signals */ -static void get_signals(struct slgt_info *info) +static void get_gtsignals(struct slgt_info *info) { unsigned short status = rd_reg16(info, SSR); @@ -4558,7 +4505,7 @@ static void msc_set_vcr(struct slgt_info *info) /* * set state of V24 control (output) signals */ -static void set_signals(struct slgt_info *info) +static void set_gtsignals(struct slgt_info *info) { unsigned char val = rd_reg8(info, VCR); if (info->signals & SerialSignal_DTR) diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index ec145a59f1993..49f39c041c351 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -172,7 +172,8 @@ static struct tty_buffer *tty_buffer_alloc(struct tty_port *port, size_t size) have queued and recycle that ? */ if (atomic_read(&port->buf.mem_used) > port->buf.mem_limit) return NULL; - p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC); + p = kmalloc(sizeof(struct tty_buffer) + 2 * size, + GFP_ATOMIC | __GFP_NOWARN); if (p == NULL) return NULL; @@ -393,27 +394,6 @@ int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag) } EXPORT_SYMBOL(__tty_insert_flip_char); -/** - * tty_schedule_flip - push characters to ldisc - * @port: tty port to push from - * - * Takes any pending buffers and transfers their ownership to the - * ldisc side of the queue. It then schedules those characters for - * processing by the line discipline. - */ - -void tty_schedule_flip(struct tty_port *port) -{ - struct tty_bufhead *buf = &port->buf; - - /* paired w/ acquire in flush_to_ldisc(); ensures - * flush_to_ldisc() sees buffer data. - */ - smp_store_release(&buf->tail->commit, buf->tail->used); - queue_work(system_unbound_wq, &buf->work); -} -EXPORT_SYMBOL(tty_schedule_flip); - /** * tty_prepare_flip_string - make room for characters * @port: tty port @@ -534,12 +514,24 @@ static void flush_to_ldisc(struct work_struct *work) if (!count) break; head->read += count; + + if (need_resched()) + cond_resched(); } mutex_unlock(&buf->lock); } +static inline void tty_flip_buffer_commit(struct tty_buffer *tail) +{ + /* + * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees + * buffer data. + */ + smp_store_release(&tail->commit, tail->used); +} + /** * tty_flip_buffer_push - terminal * @port: tty port to push @@ -553,10 +545,44 @@ static void flush_to_ldisc(struct work_struct *work) void tty_flip_buffer_push(struct tty_port *port) { - tty_schedule_flip(port); + struct tty_bufhead *buf = &port->buf; + + tty_flip_buffer_commit(buf->tail); + queue_work(system_unbound_wq, &buf->work); } EXPORT_SYMBOL(tty_flip_buffer_push); +/** + * tty_insert_flip_string_and_push_buffer - add characters to the tty buffer and + * push + * @port: tty port + * @chars: characters + * @size: size + * + * The function combines tty_insert_flip_string() and tty_flip_buffer_push() + * with the exception of properly holding the @port->lock. + * + * To be used only internally (by pty currently). + * + * Returns: the number added. + */ +int tty_insert_flip_string_and_push_buffer(struct tty_port *port, + const unsigned char *chars, size_t size) +{ + struct tty_bufhead *buf = &port->buf; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + size = tty_insert_flip_string(port, chars, size); + if (size) + tty_flip_buffer_commit(buf->tail); + spin_unlock_irqrestore(&port->lock, flags); + + queue_work(system_unbound_wq, &buf->work); + + return size; +} + /** * tty_buffer_init - prepare a tty buffer structure * @tty: tty to initialise diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index cee7514c3aaf2..ddfe873b5fccb 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2176,8 +2176,6 @@ static int tty_fasync(int fd, struct file *filp, int on) * Locking: * Called functions take tty_ldiscs_lock * current->signal->tty check is safe without locks - * - * FIXME: may race normal receive processing */ static int tiocsti(struct tty_struct *tty, char __user *p) @@ -2193,8 +2191,10 @@ static int tiocsti(struct tty_struct *tty, char __user *p) ld = tty_ldisc_ref_wait(tty); if (!ld) return -EIO; + tty_buffer_lock_exclusive(tty->port); if (ld->ops->receive_buf) ld->ops->receive_buf(tty, &ch, &mbz, 1); + tty_buffer_unlock_exclusive(tty->port); tty_ldisc_deref(ld); return 0; } diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index b6e78fdbfdff9..68643f61f6f90 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -310,7 +310,7 @@ int kbd_rate(struct kbd_repeat *rpt) static void put_queue(struct vc_data *vc, int ch) { tty_insert_flip_char(&vc->port, ch, 0); - tty_schedule_flip(&vc->port); + tty_flip_buffer_push(&vc->port); } static void puts_queue(struct vc_data *vc, char *cp) @@ -319,7 +319,7 @@ static void puts_queue(struct vc_data *vc, char *cp) tty_insert_flip_char(&vc->port, *cp, 0); cp++; } - tty_schedule_flip(&vc->port); + tty_flip_buffer_push(&vc->port); } static void applkey(struct vc_data *vc, int key, char mode) @@ -564,7 +564,7 @@ static void fn_inc_console(struct vc_data *vc) static void fn_send_intr(struct vc_data *vc) { tty_insert_flip_char(&vc->port, 0, TTY_BREAK); - tty_schedule_flip(&vc->port); + tty_flip_buffer_push(&vc->port); } static void fn_scroll_forw(struct vc_data *vc) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 6f013d7f5bd0f..9d9e056f94ac8 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1215,8 +1215,25 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, new_row_size = new_cols << 1; new_screen_size = new_row_size * new_rows; - if (new_cols == vc->vc_cols && new_rows == vc->vc_rows) - return 0; + if (new_cols == vc->vc_cols && new_rows == vc->vc_rows) { + /* + * This function is being called here to cover the case + * where the userspace calls the FBIOPUT_VSCREENINFO twice, + * passing the same fb_var_screeninfo containing the fields + * yres/xres equal to a number non-multiple of vc_font.height + * and yres_virtual/xres_virtual equal to number lesser than the + * vc_font.height and yres/xres. + * In the second call, the struct fb_var_screeninfo isn't + * being modified by the underlying driver because of the + * if above, and this causes the fbcon_display->vrows to become + * negative and it eventually leads to out-of-bound + * access by the imageblit function. + * To give the correct values to the struct and to not have + * to deal with possible errors from the code below, we call + * the resize_screen here as well. + */ + return resize_screen(vc, new_cols, new_rows, user); + } if (new_screen_size > KMALLOC_MAX_SIZE || !new_screen_size) return -EINVAL; @@ -1820,7 +1837,7 @@ static void respond_string(const char *p, struct tty_port *port) tty_insert_flip_char(port, *p, 0); p++; } - tty_schedule_flip(port); + tty_flip_buffer_push(port); } static void cursor_report(struct vc_data *vc, struct tty_struct *tty) @@ -2070,7 +2087,7 @@ static void restore_cur(struct vc_data *vc) enum { ESnormal, ESesc, ESsquare, ESgetpars, ESfunckey, EShash, ESsetG0, ESsetG1, ESpercent, EScsiignore, ESnonstd, - ESpalette, ESosc }; + ESpalette, ESosc, ESapc, ESpm, ESdcs }; /* console_lock is held (except via vc_init()) */ static void reset_terminal(struct vc_data *vc, int do_clear) @@ -2124,20 +2141,28 @@ static void reset_terminal(struct vc_data *vc, int do_clear) csi_J(vc, 2); } +/* is this state an ANSI control string? */ +static bool ansi_control_string(unsigned int state) +{ + if (state == ESosc || state == ESapc || state == ESpm || state == ESdcs) + return true; + return false; +} + /* console_lock is held */ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) { /* * Control characters can be used in the _middle_ - * of an escape sequence. + * of an escape sequence, aside from ANSI control strings. */ - if (vc->vc_state == ESosc && c>=8 && c<=13) /* ... except for OSC */ + if (ansi_control_string(vc->vc_state) && c >= 8 && c <= 13) return; switch (c) { case 0: return; case 7: - if (vc->vc_state == ESosc) + if (ansi_control_string(vc->vc_state)) vc->vc_state = ESnormal; else if (vc->vc_bell_duration) kd_mksound(vc->vc_bell_pitch, vc->vc_bell_duration); @@ -2196,6 +2221,12 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) case ']': vc->vc_state = ESnonstd; return; + case '_': + vc->vc_state = ESapc; + return; + case '^': + vc->vc_state = ESpm; + return; case '%': vc->vc_state = ESpercent; return; @@ -2212,6 +2243,9 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) case 'H': vc->vc_tab_stop[7 & (vc->vc_x >> 5)] |= (1 << (vc->vc_x & 31)); return; + case 'P': + vc->vc_state = ESdcs; + return; case 'Z': respond_ID(tty); return; @@ -2531,8 +2565,14 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) vc->vc_translate = set_translate(vc->vc_G1_charset, vc); vc->vc_state = ESnormal; return; + case ESapc: + return; case ESosc: return; + case ESpm: + return; + case ESdcs: + return; default: vc->vc_state = ESnormal; } @@ -4501,16 +4541,8 @@ static int con_font_get(struct vc_data *vc, struct console_font_op *op) if (op->data && font.charcount > op->charcount) rc = -ENOSPC; - if (!(op->flags & KD_FONT_FLAG_OLD)) { - if (font.width > op->width || font.height > op->height) - rc = -ENOSPC; - } else { - if (font.width != 8) - rc = -EIO; - else if ((op->height && font.height > op->height) || - font.height > 32) - rc = -ENOSPC; - } + if (font.width > op->width || font.height > op->height) + rc = -ENOSPC; if (rc) goto out; @@ -4538,7 +4570,7 @@ static int con_font_set(struct vc_data *vc, struct console_font_op *op) return -EINVAL; if (op->charcount > 512) return -EINVAL; - if (op->width <= 0 || op->width > 32 || op->height > 32) + if (op->width <= 0 || op->width > 32 || !op->height || op->height > 32) return -EINVAL; size = (op->width+7)/8 * 32 * op->charcount; if (size > max_font_size) @@ -4548,31 +4580,6 @@ static int con_font_set(struct vc_data *vc, struct console_font_op *op) if (IS_ERR(font.data)) return PTR_ERR(font.data); - if (!op->height) { /* Need to guess font height [compat] */ - int h, i; - u8 *charmap = font.data; - - /* - * If from KDFONTOP ioctl, don't allow things which can be done - * in userland,so that we can get rid of this soon - */ - if (!(op->flags & KD_FONT_FLAG_OLD)) { - kfree(font.data); - return -EINVAL; - } - - for (h = 32; h > 0; h--) - for (i = 0; i < op->charcount; i++) - if (charmap[32*i+h-1]) - goto nonzero; - - kfree(font.data); - return -EINVAL; - - nonzero: - op->height = h; - } - font.charcount = op->charcount; font.width = op->width; font.height = op->height; diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index b5d2ad900ec09..f623b3859e980 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -241,48 +241,6 @@ int vt_waitactive(int n) #define GPLAST 0x3df #define GPNUM (GPLAST - GPFIRST + 1) - - -static inline int -do_fontx_ioctl(struct vc_data *vc, int cmd, struct consolefontdesc __user *user_cfd, int perm, struct console_font_op *op) -{ - struct consolefontdesc cfdarg; - int i; - - if (copy_from_user(&cfdarg, user_cfd, sizeof(struct consolefontdesc))) - return -EFAULT; - - switch (cmd) { - case PIO_FONTX: - if (!perm) - return -EPERM; - op->op = KD_FONT_OP_SET; - op->flags = KD_FONT_FLAG_OLD; - op->width = 8; - op->height = cfdarg.charheight; - op->charcount = cfdarg.charcount; - op->data = cfdarg.chardata; - return con_font_op(vc, op); - - case GIO_FONTX: - op->op = KD_FONT_OP_GET; - op->flags = KD_FONT_FLAG_OLD; - op->width = 8; - op->height = cfdarg.charheight; - op->charcount = cfdarg.charcount; - op->data = cfdarg.chardata; - i = con_font_op(vc, op); - if (i) - return i; - cfdarg.charheight = op->height; - cfdarg.charcount = op->charcount; - if (copy_to_user(user_cfd, &cfdarg, sizeof(struct consolefontdesc))) - return -EFAULT; - return 0; - } - return -EINVAL; -} - static inline int do_unimap_ioctl(int cmd, struct unimapdesc __user *user_ud, int perm, struct vc_data *vc) { @@ -484,16 +442,19 @@ int vt_ioctl(struct tty_struct *tty, ret = -EINVAL; goto out; } - /* FIXME: this needs the console lock extending */ - if (vc->vc_mode == (unsigned char) arg) + console_lock(); + if (vc->vc_mode == (unsigned char) arg) { + console_unlock(); break; + } vc->vc_mode = (unsigned char) arg; - if (console != fg_console) + if (console != fg_console) { + console_unlock(); break; + } /* * explicitly blank/unblank the screen if switching modes */ - console_lock(); if (arg == KD_TEXT) do_unblank_screen(1); else @@ -688,6 +649,7 @@ int vt_ioctl(struct tty_struct *tty, ret = -ENXIO; else { arg--; + arg = array_index_nospec(arg, MAX_NR_CONSOLES); console_lock(); ret = vc_allocate(arg); console_unlock(); @@ -712,9 +674,9 @@ int vt_ioctl(struct tty_struct *tty, if (vsa.console == 0 || vsa.console > MAX_NR_CONSOLES) ret = -ENXIO; else { - vsa.console = array_index_nospec(vsa.console, - MAX_NR_CONSOLES + 1); vsa.console--; + vsa.console = array_index_nospec(vsa.console, + MAX_NR_CONSOLES); console_lock(); ret = vc_allocate(vsa.console); if (ret == 0) { @@ -915,30 +877,6 @@ int vt_ioctl(struct tty_struct *tty, break; } - case PIO_FONT: { - if (!perm) - return -EPERM; - op.op = KD_FONT_OP_SET; - op.flags = KD_FONT_FLAG_OLD | KD_FONT_FLAG_DONT_RECALC; /* Compatibility */ - op.width = 8; - op.height = 0; - op.charcount = 256; - op.data = up; - ret = con_font_op(vc, &op); - break; - } - - case GIO_FONT: { - op.op = KD_FONT_OP_GET; - op.flags = KD_FONT_FLAG_OLD; - op.width = 8; - op.height = 32; - op.charcount = 256; - op.data = up; - ret = con_font_op(vc, &op); - break; - } - case PIO_CMAP: if (!perm) ret = -EPERM; @@ -950,36 +888,6 @@ int vt_ioctl(struct tty_struct *tty, ret = con_get_cmap(up); break; - case PIO_FONTX: - case GIO_FONTX: - ret = do_fontx_ioctl(vc, cmd, up, perm, &op); - break; - - case PIO_FONTRESET: - { - if (!perm) - return -EPERM; - -#ifdef BROKEN_GRAPHICS_PROGRAMS - /* With BROKEN_GRAPHICS_PROGRAMS defined, the default - font is not saved. */ - ret = -ENOSYS; - break; -#else - { - op.op = KD_FONT_OP_SET_DEFAULT; - op.data = NULL; - ret = con_font_op(vc, &op); - if (ret) - break; - console_lock(); - con_set_default_unimap(vc); - console_unlock(); - break; - } -#endif - } - case KDFONTOP: { if (copy_from_user(&op, up, sizeof(op))) { ret = -EFAULT; @@ -1093,54 +1001,6 @@ void vc_SAK(struct work_struct *work) #ifdef CONFIG_COMPAT -struct compat_consolefontdesc { - unsigned short charcount; /* characters in font (256 or 512) */ - unsigned short charheight; /* scan lines per character (1-32) */ - compat_caddr_t chardata; /* font data in expanded form */ -}; - -static inline int -compat_fontx_ioctl(struct vc_data *vc, int cmd, - struct compat_consolefontdesc __user *user_cfd, - int perm, struct console_font_op *op) -{ - struct compat_consolefontdesc cfdarg; - int i; - - if (copy_from_user(&cfdarg, user_cfd, sizeof(struct compat_consolefontdesc))) - return -EFAULT; - - switch (cmd) { - case PIO_FONTX: - if (!perm) - return -EPERM; - op->op = KD_FONT_OP_SET; - op->flags = KD_FONT_FLAG_OLD; - op->width = 8; - op->height = cfdarg.charheight; - op->charcount = cfdarg.charcount; - op->data = compat_ptr(cfdarg.chardata); - return con_font_op(vc, op); - - case GIO_FONTX: - op->op = KD_FONT_OP_GET; - op->flags = KD_FONT_FLAG_OLD; - op->width = 8; - op->height = cfdarg.charheight; - op->charcount = cfdarg.charcount; - op->data = compat_ptr(cfdarg.chardata); - i = con_font_op(vc, op); - if (i) - return i; - cfdarg.charheight = op->height; - cfdarg.charcount = op->charcount; - if (copy_to_user(user_cfd, &cfdarg, sizeof(struct compat_consolefontdesc))) - return -EFAULT; - return 0; - } - return -EINVAL; -} - struct compat_console_font_op { compat_uint_t op; /* operation code KD_FONT_OP_* */ compat_uint_t flags; /* KD_FONT_FLAG_* */ @@ -1217,9 +1077,6 @@ long vt_compat_ioctl(struct tty_struct *tty, /* * these need special handlers for incompatible data structures */ - case PIO_FONTX: - case GIO_FONTX: - return compat_fontx_ioctl(vc, cmd, up, perm, &op); case KDFONTOP: return compat_kdfontop_ioctl(up, perm, &op, vc); diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c index c3bf54cc530f9..296f2ee1b6803 100644 --- a/drivers/usb/cdns3/gadget.c +++ b/drivers/usb/cdns3/gadget.c @@ -807,6 +807,19 @@ static void cdns3_wa1_tray_restore_cycle_bit(struct cdns3_device *priv_dev, cdns3_wa1_restore_cycle_bit(priv_ep); } +static void cdns3_rearm_drdy_if_needed(struct cdns3_endpoint *priv_ep) +{ + struct cdns3_device *priv_dev = priv_ep->cdns3_dev; + + if (priv_dev->dev_ver < DEV_VER_V3) + return; + + if (readl(&priv_dev->regs->ep_sts) & EP_STS_TRBERR) { + writel(EP_STS_TRBERR, &priv_dev->regs->ep_sts); + writel(EP_CMD_DRDY, &priv_dev->regs->ep_cmd); + } +} + /** * cdns3_ep_run_transfer - start transfer on no-default endpoint hardware * @priv_ep: endpoint object @@ -1003,6 +1016,7 @@ int cdns3_ep_run_transfer(struct cdns3_endpoint *priv_ep, /*clearing TRBERR and EP_STS_DESCMIS before seting DRDY*/ writel(EP_STS_TRBERR | EP_STS_DESCMIS, &priv_dev->regs->ep_sts); writel(EP_CMD_DRDY, &priv_dev->regs->ep_cmd); + cdns3_rearm_drdy_if_needed(priv_ep); trace_cdns3_doorbell_epx(priv_ep->name, readl(&priv_dev->regs->ep_traddr)); } diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index b7da2a273c451..74cdc7ef476ae 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -534,7 +534,7 @@ int hw_device_reset(struct ci_hdrc *ci) return 0; } -static irqreturn_t ci_irq(int irq, void *data) +static irqreturn_t ci_irq_handler(int irq, void *data) { struct ci_hdrc *ci = data; irqreturn_t ret = IRQ_NONE; @@ -587,6 +587,15 @@ static irqreturn_t ci_irq(int irq, void *data) return ret; } +static void ci_irq(struct ci_hdrc *ci) +{ + unsigned long flags; + + local_irq_save(flags); + ci_irq_handler(ci->irq, ci); + local_irq_restore(flags); +} + static int ci_cable_notifier(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -596,7 +605,7 @@ static int ci_cable_notifier(struct notifier_block *nb, unsigned long event, cbl->connected = event; cbl->changed = true; - ci_irq(ci->irq, ci); + ci_irq(ci); return NOTIFY_DONE; } @@ -634,7 +643,7 @@ static int ci_usb_role_switch_set(struct device *dev, enum usb_role role) if (cable) { cable->changed = true; cable->connected = false; - ci_irq(ci->irq, ci); + ci_irq(ci); spin_unlock_irqrestore(&ci->lock, flags); if (ci->wq && role != USB_ROLE_NONE) flush_workqueue(ci->wq); @@ -652,7 +661,7 @@ static int ci_usb_role_switch_set(struct device *dev, enum usb_role role) if (cable) { cable->changed = true; cable->connected = true; - ci_irq(ci->irq, ci); + ci_irq(ci); } spin_unlock_irqrestore(&ci->lock, flags); pm_runtime_put_sync(ci->dev); @@ -1156,7 +1165,7 @@ static int ci_hdrc_probe(struct platform_device *pdev) } } - ret = devm_request_irq(dev, ci->irq, ci_irq, IRQF_SHARED, + ret = devm_request_irq(dev, ci->irq, ci_irq_handler, IRQF_SHARED, ci->platdata->name, ci); if (ret) goto stop; @@ -1277,11 +1286,11 @@ static void ci_extcon_wakeup_int(struct ci_hdrc *ci) if (!IS_ERR(cable_id->edev) && ci->is_otg && (otgsc & OTGSC_IDIE) && (otgsc & OTGSC_IDIS)) - ci_irq(ci->irq, ci); + ci_irq(ci); if (!IS_ERR(cable_vbus->edev) && ci->is_otg && (otgsc & OTGSC_BSVIE) && (otgsc & OTGSC_BSVIS)) - ci_irq(ci->irq, ci); + ci_irq(ci); } static int ci_controller_resume(struct device *dev) diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c index 48e4a5ca18359..f5f56ee07729f 100644 --- a/drivers/usb/chipidea/host.c +++ b/drivers/usb/chipidea/host.c @@ -233,18 +233,26 @@ static int ci_ehci_hub_control( ) { struct ehci_hcd *ehci = hcd_to_ehci(hcd); + unsigned int ports = HCS_N_PORTS(ehci->hcs_params); u32 __iomem *status_reg; - u32 temp; + u32 temp, port_index; unsigned long flags; int retval = 0; struct device *dev = hcd->self.controller; struct ci_hdrc *ci = dev_get_drvdata(dev); - status_reg = &ehci->regs->port_status[(wIndex & 0xff) - 1]; + port_index = wIndex & 0xff; + port_index -= (port_index > 0); + status_reg = &ehci->regs->port_status[port_index]; spin_lock_irqsave(&ehci->lock, flags); if (typeReq == SetPortFeature && wValue == USB_PORT_FEAT_SUSPEND) { + if (!wIndex || wIndex > ports) { + retval = -EPIPE; + goto done; + } + temp = ehci_readl(ehci, status_reg); if ((temp & PORT_PE) == 0 || (temp & PORT_RESET) != 0) { retval = -EPIPE; @@ -273,7 +281,7 @@ static int ci_ehci_hub_control( ehci_writel(ehci, temp, status_reg); } - set_bit((wIndex & 0xff) - 1, &ehci->suspended_ports); + set_bit(port_index, &ehci->suspended_ports); goto done; } diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 8f18e7b6cadf4..21c299c85505d 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -921,6 +921,9 @@ isr_setup_status_complete(struct usb_ep *ep, struct usb_request *req) struct ci_hdrc *ci = req->context; unsigned long flags; + if (req->status < 0) + return; + if (ci->setaddr) { hw_usb_set_address(ci, ci->address); ci->setaddr = false; diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index c0604c60ebd01..5dc8827ede7e8 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -339,6 +339,9 @@ static void acm_process_notification(struct acm *acm, unsigned char *buf) acm->iocount.overrun++; spin_unlock_irqrestore(&acm->read_lock, flags); + if (newctrl & ACM_CTRL_BRK) + tty_flip_buffer_push(&acm->port); + if (difference) wake_up_all(&acm->wioctl); @@ -474,11 +477,16 @@ static int acm_submit_read_urbs(struct acm *acm, gfp_t mem_flags) static void acm_process_read_urb(struct acm *acm, struct urb *urb) { + unsigned long flags; + if (!urb->actual_length) return; + spin_lock_irqsave(&acm->read_lock, flags); tty_insert_flip_string(&acm->port, urb->transfer_buffer, urb->actual_length); + spin_unlock_irqrestore(&acm->read_lock, flags); + tty_flip_buffer_push(&acm->port); } @@ -725,7 +733,8 @@ static void acm_port_destruct(struct tty_port *port) { struct acm *acm = container_of(port, struct acm, port); - acm_release_minor(acm); + if (acm->minor != ACM_MINOR_INVALID) + acm_release_minor(acm); usb_put_intf(acm->control); kfree(acm->country_codes); kfree(acm); @@ -1356,8 +1365,10 @@ static int acm_probe(struct usb_interface *intf, usb_get_intf(acm->control); /* undone in destruct() */ minor = acm_alloc_minor(acm); - if (minor < 0) + if (minor < 0) { + acm->minor = ACM_MINOR_INVALID; goto alloc_fail1; + } acm->minor = minor; acm->dev = usb_dev; diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index b95ff769072e7..ef7fe5eacff69 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -22,6 +22,8 @@ #define ACM_TTY_MAJOR 166 #define ACM_TTY_MINORS 256 +#define ACM_MINOR_INVALID ACM_TTY_MINORS + /* * Requests. */ diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index de7bb8e6a1efc..98022584f7092 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -755,6 +755,7 @@ static int wdm_release(struct inode *inode, struct file *file) poison_urbs(desc); spin_lock_irq(&desc->iuspin); desc->resp_count = 0; + clear_bit(WDM_RESPONDING, &desc->flags); spin_unlock_irq(&desc->iuspin); desc->manage_power(desc->intf, 0); unpoison_urbs(desc); diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 3922a6f8c50a6..77b1802f829b3 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -1889,6 +1889,7 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data, struct usbtmc_ctrlrequest request; u8 *buffer = NULL; int rv; + unsigned int is_in, pipe; unsigned long res; res = copy_from_user(&request, arg, sizeof(struct usbtmc_ctrlrequest)); @@ -1898,12 +1899,14 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data, if (request.req.wLength > USBTMC_BUFSIZE) return -EMSGSIZE; + is_in = request.req.bRequestType & USB_DIR_IN; + if (request.req.wLength) { buffer = kmalloc(request.req.wLength, GFP_KERNEL); if (!buffer) return -ENOMEM; - if ((request.req.bRequestType & USB_DIR_IN) == 0) { + if (!is_in) { /* Send control data to device */ res = copy_from_user(buffer, request.data, request.req.wLength); @@ -1914,8 +1917,12 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data, } } + if (is_in) + pipe = usb_rcvctrlpipe(data->usb_dev, 0); + else + pipe = usb_sndctrlpipe(data->usb_dev, 0); rv = usb_control_msg(data->usb_dev, - usb_rcvctrlpipe(data->usb_dev, 0), + pipe, request.req.bRequest, request.req.bRequestType, request.req.wValue, @@ -1927,7 +1934,7 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data, goto exit; } - if (rv && (request.req.bRequestType & USB_DIR_IN)) { + if (rv && is_in) { /* Read control data from device */ res = copy_to_user(request.data, buffer, rv); if (res) diff --git a/drivers/usb/common/Kconfig b/drivers/usb/common/Kconfig index d611477aae414..196f4a3975871 100644 --- a/drivers/usb/common/Kconfig +++ b/drivers/usb/common/Kconfig @@ -6,8 +6,7 @@ config USB_COMMON config USB_LED_TRIG bool "USB LED Triggers" - depends on LEDS_CLASS && LEDS_TRIGGERS - select USB_COMMON + depends on LEDS_CLASS && USB_COMMON && LEDS_TRIGGERS help This option adds LED triggers for USB host and/or gadget activity. diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 9a2ab6751a23c..c42c152bbc335 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -39,8 +39,11 @@ static int ulpi_match(struct device *dev, struct device_driver *driver) struct ulpi *ulpi = to_ulpi_dev(dev); const struct ulpi_device_id *id; - /* Some ULPI devices don't have a vendor id so rely on OF match */ - if (ulpi->id.vendor == 0) + /* + * Some ULPI devices don't have a vendor id + * or provide an id_table so rely on OF match. + */ + if (ulpi->id.vendor == 0 || !drv->id_table) return of_driver_match_device(dev, driver); for (id = drv->id_table; id->vendor; id++) @@ -129,6 +132,7 @@ static const struct attribute_group *ulpi_dev_attr_groups[] = { static void ulpi_dev_release(struct device *dev) { + of_node_put(dev->of_node); kfree(to_ulpi_dev(dev)); } @@ -245,12 +249,16 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi) return ret; ret = ulpi_read_id(ulpi); - if (ret) + if (ret) { + of_node_put(ulpi->dev.of_node); return ret; + } ret = device_register(&ulpi->dev); - if (ret) + if (ret) { + put_device(&ulpi->dev); return ret; + } dev_dbg(&ulpi->dev, "registered ULPI PHY: vendor %04x, product %04x\n", ulpi->id.vendor, ulpi->id.product); @@ -297,7 +305,6 @@ EXPORT_SYMBOL_GPL(ulpi_register_interface); */ void ulpi_unregister_interface(struct ulpi *ulpi) { - of_node_put(ulpi->dev.of_node); device_unregister(&ulpi->dev); } EXPORT_SYMBOL_GPL(ulpi_unregister_interface); diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index c68217b7dace4..a8d97773d3d98 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -409,7 +409,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, * the USB-2 spec requires such endpoints to have wMaxPacketSize = 0 * (see the end of section 5.6.3), so don't warn about them. */ - maxp = usb_endpoint_maxp(&endpoint->desc); + maxp = le16_to_cpu(endpoint->desc.wMaxPacketSize); if (maxp == 0 && !(usb_endpoint_xfer_isoc(d) && asnum == 0)) { dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid wMaxPacketSize 0\n", cfgno, inum, asnum, d->bEndpointAddress); @@ -425,9 +425,9 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, maxpacket_maxes = full_speed_maxpacket_maxes; break; case USB_SPEED_HIGH: - /* Bits 12..11 are allowed only for HS periodic endpoints */ + /* Multiple-transactions bits are allowed only for HS periodic endpoints */ if (usb_endpoint_xfer_int(d) || usb_endpoint_xfer_isoc(d)) { - i = maxp & (BIT(12) | BIT(11)); + i = maxp & USB_EP_MAXP_MULT_MASK; maxp &= ~i; } /* fallthrough */ diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index 9e26b0143a59a..db16efe293e0b 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -604,10 +604,10 @@ const struct dev_pm_ops usb_hcd_pci_pm_ops = { .suspend_noirq = hcd_pci_suspend_noirq, .resume_noirq = hcd_pci_resume_noirq, .resume = hcd_pci_resume, - .freeze = check_root_hub_suspended, + .freeze = hcd_pci_suspend, .freeze_noirq = check_root_hub_suspended, .thaw_noirq = NULL, - .thaw = NULL, + .thaw = hcd_pci_resume, .poweroff = hcd_pci_suspend, .poweroff_noirq = hcd_pci_suspend_noirq, .restore_noirq = hcd_pci_resume_noirq, diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index d0f45600b6698..fde211519a973 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -753,6 +753,7 @@ void usb_hcd_poll_rh_status(struct usb_hcd *hcd) { struct urb *urb; int length; + int status; unsigned long flags; char buffer[6]; /* Any root hubs with > 31 ports? */ @@ -770,11 +771,17 @@ void usb_hcd_poll_rh_status(struct usb_hcd *hcd) if (urb) { clear_bit(HCD_FLAG_POLL_PENDING, &hcd->flags); hcd->status_urb = NULL; + if (urb->transfer_buffer_length >= length) { + status = 0; + } else { + status = -EOVERFLOW; + length = urb->transfer_buffer_length; + } urb->actual_length = length; memcpy(urb->transfer_buffer, buffer, length); usb_hcd_unlink_urb_from_ep(hcd, urb); - usb_hcd_giveback_urb(hcd, urb, 0); + usb_hcd_giveback_urb(hcd, urb, status); } else { length = 0; set_bit(HCD_FLAG_POLL_PENDING, &hcd->flags); @@ -1560,6 +1567,13 @@ int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags) urb->hcpriv = NULL; INIT_LIST_HEAD(&urb->urb_list); atomic_dec(&urb->use_count); + /* + * Order the write of urb->use_count above before the read + * of urb->reject below. Pairs with the memory barriers in + * usb_kill_urb() and usb_poison_urb(). + */ + smp_mb__after_atomic(); + atomic_dec(&urb->dev->urbnum); if (atomic_read(&urb->reject)) wake_up(&usb_kill_urb_queue); @@ -1655,6 +1669,13 @@ static void __usb_hcd_giveback_urb(struct urb *urb) usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); + /* + * Order the write of urb->use_count above before the read + * of urb->reject below. Pairs with the memory barriers in + * usb_kill_urb() and usb_poison_urb(). + */ + smp_mb__after_atomic(); + if (unlikely(atomic_read(&urb->reject))) wake_up(&usb_kill_urb_queue); usb_put_urb(urb); @@ -2636,6 +2657,7 @@ int usb_add_hcd(struct usb_hcd *hcd, { int retval; struct usb_device *rhdev; + struct usb_hcd *shared_hcd; if (!hcd->skip_phy_initialization && usb_hcd_is_primary_hcd(hcd)) { hcd->phy_roothub = usb_phy_roothub_alloc(hcd->self.sysdev); @@ -2792,13 +2814,26 @@ int usb_add_hcd(struct usb_hcd *hcd, goto err_hcd_driver_start; } + /* starting here, usbcore will pay attention to the shared HCD roothub */ + shared_hcd = hcd->shared_hcd; + if (!usb_hcd_is_primary_hcd(hcd) && shared_hcd && HCD_DEFER_RH_REGISTER(shared_hcd)) { + retval = register_root_hub(shared_hcd); + if (retval != 0) + goto err_register_root_hub; + + if (shared_hcd->uses_new_polling && HCD_POLL_RH(shared_hcd)) + usb_hcd_poll_rh_status(shared_hcd); + } + /* starting here, usbcore will pay attention to this root hub */ - retval = register_root_hub(hcd); - if (retval != 0) - goto err_register_root_hub; + if (!HCD_DEFER_RH_REGISTER(hcd)) { + retval = register_root_hub(hcd); + if (retval != 0) + goto err_register_root_hub; - if (hcd->uses_new_polling && HCD_POLL_RH(hcd)) - usb_hcd_poll_rh_status(hcd); + if (hcd->uses_new_polling && HCD_POLL_RH(hcd)) + usb_hcd_poll_rh_status(hcd); + } return retval; @@ -2841,6 +2876,7 @@ EXPORT_SYMBOL_GPL(usb_add_hcd); void usb_remove_hcd(struct usb_hcd *hcd) { struct usb_device *rhdev = hcd->self.root_hub; + bool rh_registered; dev_info(hcd->self.controller, "remove, state %x\n", hcd->state); @@ -2851,6 +2887,7 @@ void usb_remove_hcd(struct usb_hcd *hcd) dev_dbg(hcd->self.controller, "roothub graceful disconnect\n"); spin_lock_irq (&hcd_root_hub_lock); + rh_registered = hcd->rh_registered; hcd->rh_registered = 0; spin_unlock_irq (&hcd_root_hub_lock); @@ -2860,7 +2897,8 @@ void usb_remove_hcd(struct usb_hcd *hcd) cancel_work_sync(&hcd->died_work); mutex_lock(&usb_bus_idr_lock); - usb_disconnect(&rhdev); /* Sets rhdev to NULL */ + if (rh_registered) + usb_disconnect(&rhdev); /* Sets rhdev to NULL */ mutex_unlock(&usb_bus_idr_lock); /* diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 303e8b3c1bdae..4cf0dc7f330dd 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1108,7 +1108,10 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) } else { hub_power_on(hub, true); } - } + /* Give some time on remote wakeup to let links to transit to U0 */ + } else if (hub_is_superspeed(hub->hdev)) + msleep(20); + init2: /* @@ -1223,7 +1226,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) */ if (portchange || (hub_is_superspeed(hub->hdev) && port_resumed)) - set_bit(port1, hub->change_bits); + set_bit(port1, hub->event_bits); } else if (udev->persist_enabled) { #ifdef CONFIG_PM @@ -4609,8 +4612,6 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, if (oldspeed == USB_SPEED_LOW) delay = HUB_LONG_RESET_TIME; - mutex_lock(hcd->address0_mutex); - /* Reset the device; full speed may morph to high speed */ /* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */ retval = hub_port_reset(hub, port1, udev, delay, false); @@ -4925,7 +4926,6 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, hub_port_disable(hub, port1, 0); update_devnum(udev, devnum); /* for disconnect processing */ } - mutex_unlock(hcd->address0_mutex); return retval; } @@ -5015,6 +5015,7 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, struct usb_port *port_dev = hub->ports[port1 - 1]; struct usb_device *udev = port_dev->child; static int unreliable_port = -1; + bool retry_locked; /* Disconnect any existing devices under this port */ if (udev) { @@ -5070,7 +5071,11 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, unit_load = 100; status = 0; + for (i = 0; i < SET_CONFIG_TRIES; i++) { + usb_lock_port(port_dev); + mutex_lock(hcd->address0_mutex); + retry_locked = true; /* reallocate for each attempt, since references * to the previous one can escape in various ways @@ -5079,6 +5084,8 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, if (!udev) { dev_err(&port_dev->dev, "couldn't allocate usb_device\n"); + mutex_unlock(hcd->address0_mutex); + usb_unlock_port(port_dev); goto done; } @@ -5100,12 +5107,14 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, } /* reset (non-USB 3.0 devices) and get descriptor */ - usb_lock_port(port_dev); status = hub_port_init(hub, udev, port1, i); - usb_unlock_port(port_dev); if (status < 0) goto loop; + mutex_unlock(hcd->address0_mutex); + usb_unlock_port(port_dev); + retry_locked = false; + if (udev->quirks & USB_QUIRK_DELAY_INIT) msleep(2000); @@ -5198,6 +5207,10 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, usb_ep0_reinit(udev); release_devnum(udev); hub_free_dev(udev); + if (retry_locked) { + mutex_unlock(hcd->address0_mutex); + usb_unlock_port(port_dev); + } usb_put_dev(udev); if ((status == -ENOTCONN) || (status == -ENOTSUPP)) break; @@ -5794,6 +5807,8 @@ static int usb_reset_and_verify_device(struct usb_device *udev) bos = udev->bos; udev->bos = NULL; + mutex_lock(hcd->address0_mutex); + for (i = 0; i < SET_CONFIG_TRIES; ++i) { /* ep0 maxpacket size may change; let the HCD know about it. @@ -5803,6 +5818,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev) if (ret >= 0 || ret == -ENOTCONN || ret == -ENODEV) break; } + mutex_unlock(hcd->address0_mutex); if (ret < 0) goto re_enumerate; diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index d97544fd339b1..f8f2de7899a94 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -404,6 +404,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0b05, 0x17e0), .driver_info = USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + /* Realtek Semiconductor Corp. Mass Storage Device (Multicard Reader)*/ + { USB_DEVICE(0x0bda, 0x0151), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* Realtek hub in Dell WD19 (Type-C) */ { USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM }, { USB_DEVICE(0x0bda, 0x5487), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -435,6 +438,12 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1532, 0x0116), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Lenovo USB-C to Ethernet Adapter RTL8153-04 */ + { USB_DEVICE(0x17ef, 0x720c), .driver_info = USB_QUIRK_NO_LPM }, + + /* Lenovo Powered USB-C Travel Hub (4X90S92381, RTL8153 GigE) */ + { USB_DEVICE(0x17ef, 0x721e), .driver_info = USB_QUIRK_NO_LPM }, + /* Lenovo ThinkCenter A630Z TI024Gen3 usb-audio */ { USB_DEVICE(0x17ef, 0xa012), .driver_info = USB_QUIRK_DISCONNECT_SUSPEND }, @@ -502,6 +511,12 @@ static const struct usb_device_id usb_quirk_list[] = { /* DJI CineSSD */ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM }, + /* DELL USB GEN2 */ + { USB_DEVICE(0x413c, 0xb062), .driver_info = USB_QUIRK_NO_LPM | USB_QUIRK_RESET_RESUME }, + + /* VCOM device */ + { USB_DEVICE(0x4296, 0x7570), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index 31ca5abb4c12a..0045bbc3627dd 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -691,6 +691,12 @@ void usb_kill_urb(struct urb *urb) if (!(urb && urb->dev && urb->ep)) return; atomic_inc(&urb->reject); + /* + * Order the write of urb->reject above before the read + * of urb->use_count below. Pairs with the barriers in + * __usb_hcd_giveback_urb() and usb_hcd_submit_urb(). + */ + smp_mb__after_atomic(); usb_hcd_unlink_urb(urb, -ENOENT); wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0); @@ -732,6 +738,12 @@ void usb_poison_urb(struct urb *urb) if (!urb) return; atomic_inc(&urb->reject); + /* + * Order the write of urb->reject above before the read + * of urb->use_count below. Pairs with the barriers in + * __usb_hcd_giveback_urb() and usb_hcd_submit_urb(). + */ + smp_mb__after_atomic(); if (!urb->dev || !urb->ep) return; diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 66dfcdbd1e03a..8fd6eefc671c7 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -115,10 +115,16 @@ static inline bool using_desc_dma(struct dwc2_hsotg *hsotg) */ static inline void dwc2_gadget_incr_frame_num(struct dwc2_hsotg_ep *hs_ep) { + struct dwc2_hsotg *hsotg = hs_ep->parent; + u16 limit = DSTS_SOFFN_LIMIT; + + if (hsotg->gadget.speed != USB_SPEED_HIGH) + limit >>= 3; + hs_ep->target_frame += hs_ep->interval; - if (hs_ep->target_frame > DSTS_SOFFN_LIMIT) { + if (hs_ep->target_frame > limit) { hs_ep->frame_overrun = true; - hs_ep->target_frame &= DSTS_SOFFN_LIMIT; + hs_ep->target_frame &= limit; } else { hs_ep->frame_overrun = false; } @@ -136,10 +142,16 @@ static inline void dwc2_gadget_incr_frame_num(struct dwc2_hsotg_ep *hs_ep) */ static inline void dwc2_gadget_dec_frame_num_by_one(struct dwc2_hsotg_ep *hs_ep) { + struct dwc2_hsotg *hsotg = hs_ep->parent; + u16 limit = DSTS_SOFFN_LIMIT; + + if (hsotg->gadget.speed != USB_SPEED_HIGH) + limit >>= 3; + if (hs_ep->target_frame) hs_ep->target_frame -= 1; else - hs_ep->target_frame = DSTS_SOFFN_LIMIT; + hs_ep->target_frame = limit; } /** @@ -1018,6 +1030,12 @@ static void dwc2_gadget_start_isoc_ddma(struct dwc2_hsotg_ep *hs_ep) dwc2_writel(hsotg, ctrl, depctl); } +static bool dwc2_gadget_target_frame_elapsed(struct dwc2_hsotg_ep *hs_ep); +static void dwc2_hsotg_complete_request(struct dwc2_hsotg *hsotg, + struct dwc2_hsotg_ep *hs_ep, + struct dwc2_hsotg_req *hs_req, + int result); + /** * dwc2_hsotg_start_req - start a USB request from an endpoint's queue * @hsotg: The controller state. @@ -1170,14 +1188,21 @@ static void dwc2_hsotg_start_req(struct dwc2_hsotg *hsotg, } } - if (hs_ep->isochronous && hs_ep->interval == 1) { - hs_ep->target_frame = dwc2_hsotg_read_frameno(hsotg); - dwc2_gadget_incr_frame_num(hs_ep); - - if (hs_ep->target_frame & 0x1) - ctrl |= DXEPCTL_SETODDFR; - else - ctrl |= DXEPCTL_SETEVENFR; + if (hs_ep->isochronous) { + if (!dwc2_gadget_target_frame_elapsed(hs_ep)) { + if (hs_ep->interval == 1) { + if (hs_ep->target_frame & 0x1) + ctrl |= DXEPCTL_SETODDFR; + else + ctrl |= DXEPCTL_SETEVENFR; + } + ctrl |= DXEPCTL_CNAK; + } else { + hs_req->req.frame_number = hs_ep->target_frame; + hs_req->req.actual = 0; + dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, -ENODATA); + return; + } } ctrl |= DXEPCTL_EPENA; /* ensure ep enabled */ @@ -1325,12 +1350,16 @@ static bool dwc2_gadget_target_frame_elapsed(struct dwc2_hsotg_ep *hs_ep) u32 target_frame = hs_ep->target_frame; u32 current_frame = hsotg->frame_number; bool frame_overrun = hs_ep->frame_overrun; + u16 limit = DSTS_SOFFN_LIMIT; + + if (hsotg->gadget.speed != USB_SPEED_HIGH) + limit >>= 3; if (!frame_overrun && current_frame >= target_frame) return true; if (frame_overrun && current_frame >= target_frame && - ((current_frame - target_frame) < DSTS_SOFFN_LIMIT / 2)) + ((current_frame - target_frame) < limit / 2)) return true; return false; @@ -1712,11 +1741,9 @@ static struct dwc2_hsotg_req *get_ep_head(struct dwc2_hsotg_ep *hs_ep) */ static void dwc2_gadget_start_next_request(struct dwc2_hsotg_ep *hs_ep) { - u32 mask; struct dwc2_hsotg *hsotg = hs_ep->parent; int dir_in = hs_ep->dir_in; struct dwc2_hsotg_req *hs_req; - u32 epmsk_reg = dir_in ? DIEPMSK : DOEPMSK; if (!list_empty(&hs_ep->queue)) { hs_req = get_ep_head(hs_ep); @@ -1732,9 +1759,6 @@ static void dwc2_gadget_start_next_request(struct dwc2_hsotg_ep *hs_ep) } else { dev_dbg(hsotg->dev, "%s: No more ISOC-OUT requests\n", __func__); - mask = dwc2_readl(hsotg, epmsk_reg); - mask |= DOEPMSK_OUTTKNEPDISMSK; - dwc2_writel(hsotg, mask, epmsk_reg); } } @@ -2304,19 +2328,6 @@ static void dwc2_hsotg_ep0_zlp(struct dwc2_hsotg *hsotg, bool dir_in) dwc2_hsotg_program_zlp(hsotg, hsotg->eps_out[0]); } -static void dwc2_hsotg_change_ep_iso_parity(struct dwc2_hsotg *hsotg, - u32 epctl_reg) -{ - u32 ctrl; - - ctrl = dwc2_readl(hsotg, epctl_reg); - if (ctrl & DXEPCTL_EOFRNUM) - ctrl |= DXEPCTL_SETEVENFR; - else - ctrl |= DXEPCTL_SETODDFR; - dwc2_writel(hsotg, ctrl, epctl_reg); -} - /* * dwc2_gadget_get_xfersize_ddma - get transferred bytes amount from desc * @hs_ep - The endpoint on which transfer went @@ -2437,20 +2448,11 @@ static void dwc2_hsotg_handle_outdone(struct dwc2_hsotg *hsotg, int epnum) dwc2_hsotg_ep0_zlp(hsotg, true); } - /* - * Slave mode OUT transfers do not go through XferComplete so - * adjust the ISOC parity here. - */ - if (!using_dma(hsotg)) { - if (hs_ep->isochronous && hs_ep->interval == 1) - dwc2_hsotg_change_ep_iso_parity(hsotg, DOEPCTL(epnum)); - else if (hs_ep->isochronous && hs_ep->interval > 1) - dwc2_gadget_incr_frame_num(hs_ep); - } - /* Set actual frame number for completed transfers */ - if (!using_desc_dma(hsotg) && hs_ep->isochronous) - req->frame_number = hsotg->frame_number; + if (!using_desc_dma(hsotg) && hs_ep->isochronous) { + req->frame_number = hs_ep->target_frame; + dwc2_gadget_incr_frame_num(hs_ep); + } dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, result); } @@ -2764,6 +2766,12 @@ static void dwc2_hsotg_complete_in(struct dwc2_hsotg *hsotg, return; } + /* Set actual frame number for completed transfers */ + if (!using_desc_dma(hsotg) && hs_ep->isochronous) { + hs_req->req.frame_number = hs_ep->target_frame; + dwc2_gadget_incr_frame_num(hs_ep); + } + dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, 0); } @@ -2824,23 +2832,18 @@ static void dwc2_gadget_handle_ep_disabled(struct dwc2_hsotg_ep *hs_ep) dwc2_hsotg_txfifo_flush(hsotg, hs_ep->fifo_index); - if (hs_ep->isochronous) { - dwc2_hsotg_complete_in(hsotg, hs_ep); - return; - } - if ((epctl & DXEPCTL_STALL) && (epctl & DXEPCTL_EPTYPE_BULK)) { int dctl = dwc2_readl(hsotg, DCTL); dctl |= DCTL_CGNPINNAK; dwc2_writel(hsotg, dctl, DCTL); } - return; - } + } else { - if (dctl & DCTL_GOUTNAKSTS) { - dctl |= DCTL_CGOUTNAK; - dwc2_writel(hsotg, dctl, DCTL); + if (dctl & DCTL_GOUTNAKSTS) { + dctl |= DCTL_CGOUTNAK; + dwc2_writel(hsotg, dctl, DCTL); + } } if (!hs_ep->isochronous) @@ -2854,15 +2857,16 @@ static void dwc2_gadget_handle_ep_disabled(struct dwc2_hsotg_ep *hs_ep) do { hs_req = get_ep_head(hs_ep); - if (hs_req) + if (hs_req) { + hs_req->req.frame_number = hs_ep->target_frame; + hs_req->req.actual = 0; dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, -ENODATA); + } dwc2_gadget_incr_frame_num(hs_ep); /* Update current frame number value. */ hsotg->frame_number = dwc2_hsotg_read_frameno(hsotg); } while (dwc2_gadget_target_frame_elapsed(hs_ep)); - - dwc2_gadget_start_next_request(hs_ep); } /** @@ -2879,8 +2883,8 @@ static void dwc2_gadget_handle_ep_disabled(struct dwc2_hsotg_ep *hs_ep) static void dwc2_gadget_handle_out_token_ep_disabled(struct dwc2_hsotg_ep *ep) { struct dwc2_hsotg *hsotg = ep->parent; + struct dwc2_hsotg_req *hs_req; int dir_in = ep->dir_in; - u32 doepmsk; if (dir_in || !ep->isochronous) return; @@ -2894,28 +2898,42 @@ static void dwc2_gadget_handle_out_token_ep_disabled(struct dwc2_hsotg_ep *ep) return; } - if (ep->interval > 1 && - ep->target_frame == TARGET_FRAME_INITIAL) { + if (ep->target_frame == TARGET_FRAME_INITIAL) { u32 ctrl; ep->target_frame = hsotg->frame_number; - dwc2_gadget_incr_frame_num(ep); + if (ep->interval > 1) { + ctrl = dwc2_readl(hsotg, DOEPCTL(ep->index)); + if (ep->target_frame & 0x1) + ctrl |= DXEPCTL_SETODDFR; + else + ctrl |= DXEPCTL_SETEVENFR; - ctrl = dwc2_readl(hsotg, DOEPCTL(ep->index)); - if (ep->target_frame & 0x1) - ctrl |= DXEPCTL_SETODDFR; - else - ctrl |= DXEPCTL_SETEVENFR; + dwc2_writel(hsotg, ctrl, DOEPCTL(ep->index)); + } + } + + while (dwc2_gadget_target_frame_elapsed(ep)) { + hs_req = get_ep_head(ep); + if (hs_req) { + hs_req->req.frame_number = ep->target_frame; + hs_req->req.actual = 0; + dwc2_hsotg_complete_request(hsotg, ep, hs_req, -ENODATA); + } - dwc2_writel(hsotg, ctrl, DOEPCTL(ep->index)); + dwc2_gadget_incr_frame_num(ep); + /* Update current frame number value. */ + hsotg->frame_number = dwc2_hsotg_read_frameno(hsotg); } - dwc2_gadget_start_next_request(ep); - doepmsk = dwc2_readl(hsotg, DOEPMSK); - doepmsk &= ~DOEPMSK_OUTTKNEPDISMSK; - dwc2_writel(hsotg, doepmsk, DOEPMSK); + if (!ep->req) + dwc2_gadget_start_next_request(ep); + } +static void dwc2_hsotg_ep_stop_xfr(struct dwc2_hsotg *hsotg, + struct dwc2_hsotg_ep *hs_ep); + /** * dwc2_gadget_handle_nak - handle NAK interrupt * @hs_ep: The endpoint on which interrupt is asserted. @@ -2933,7 +2951,9 @@ static void dwc2_gadget_handle_out_token_ep_disabled(struct dwc2_hsotg_ep *ep) static void dwc2_gadget_handle_nak(struct dwc2_hsotg_ep *hs_ep) { struct dwc2_hsotg *hsotg = hs_ep->parent; + struct dwc2_hsotg_req *hs_req; int dir_in = hs_ep->dir_in; + u32 ctrl; if (!dir_in || !hs_ep->isochronous) return; @@ -2975,13 +2995,32 @@ static void dwc2_gadget_handle_nak(struct dwc2_hsotg_ep *hs_ep) dwc2_writel(hsotg, ctrl, DIEPCTL(hs_ep->index)); } - - dwc2_hsotg_complete_request(hsotg, hs_ep, - get_ep_head(hs_ep), 0); } - if (!using_desc_dma(hsotg)) + if (using_desc_dma(hsotg)) + return; + + ctrl = dwc2_readl(hsotg, DIEPCTL(hs_ep->index)); + if (ctrl & DXEPCTL_EPENA) + dwc2_hsotg_ep_stop_xfr(hsotg, hs_ep); + else + dwc2_hsotg_txfifo_flush(hsotg, hs_ep->fifo_index); + + while (dwc2_gadget_target_frame_elapsed(hs_ep)) { + hs_req = get_ep_head(hs_ep); + if (hs_req) { + hs_req->req.frame_number = hs_ep->target_frame; + hs_req->req.actual = 0; + dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, -ENODATA); + } + dwc2_gadget_incr_frame_num(hs_ep); + /* Update current frame number value. */ + hsotg->frame_number = dwc2_hsotg_read_frameno(hsotg); + } + + if (!hs_ep->req) + dwc2_gadget_start_next_request(hs_ep); } /** @@ -3039,21 +3078,15 @@ static void dwc2_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx, /* In DDMA handle isochronous requests separately */ if (using_desc_dma(hsotg) && hs_ep->isochronous) { - /* XferCompl set along with BNA */ - if (!(ints & DXEPINT_BNAINTR)) - dwc2_gadget_complete_isoc_request_ddma(hs_ep); + dwc2_gadget_complete_isoc_request_ddma(hs_ep); } else if (dir_in) { /* * We get OutDone from the FIFO, so we only * need to look at completing IN requests here * if operating slave mode */ - if (hs_ep->isochronous && hs_ep->interval > 1) - dwc2_gadget_incr_frame_num(hs_ep); - - dwc2_hsotg_complete_in(hsotg, hs_ep); - if (ints & DXEPINT_NAKINTRPT) - ints &= ~DXEPINT_NAKINTRPT; + if (!hs_ep->isochronous || !(ints & DXEPINT_NAKINTRPT)) + dwc2_hsotg_complete_in(hsotg, hs_ep); if (idx == 0 && !hs_ep->req) dwc2_hsotg_enqueue_setup(hsotg); @@ -3062,10 +3095,8 @@ static void dwc2_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx, * We're using DMA, we need to fire an OutDone here * as we ignore the RXFIFO. */ - if (hs_ep->isochronous && hs_ep->interval > 1) - dwc2_gadget_incr_frame_num(hs_ep); - - dwc2_hsotg_handle_outdone(hsotg, idx); + if (!hs_ep->isochronous || !(ints & DXEPINT_OUTTKNEPDIS)) + dwc2_hsotg_handle_outdone(hsotg, idx); } } @@ -4055,6 +4086,7 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep, mask |= DIEPMSK_NAKMSK; dwc2_writel(hsotg, mask, DIEPMSK); } else { + epctrl |= DXEPCTL_SNAK; mask = dwc2_readl(hsotg, DOEPMSK); mask |= DOEPMSK_OUTTKNEPDISMSK; dwc2_writel(hsotg, mask, DOEPMSK); @@ -4454,7 +4486,6 @@ static int dwc2_hsotg_udc_start(struct usb_gadget *gadget, WARN_ON(hsotg->driver); - driver->driver.bus = NULL; hsotg->driver = driver; hsotg->gadget.dev.of_node = hsotg->dev->of_node; hsotg->gadget.speed = USB_SPEED_UNKNOWN; @@ -4947,7 +4978,7 @@ int dwc2_hsotg_suspend(struct dwc2_hsotg *hsotg) hsotg->gadget.speed = USB_SPEED_UNKNOWN; spin_unlock_irqrestore(&hsotg->lock, flags); - for (ep = 0; ep < hsotg->num_of_eps; ep++) { + for (ep = 1; ep < hsotg->num_of_eps; ep++) { if (hsotg->eps_in[ep]) dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep); if (hsotg->eps_out[ep]) diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index f29fbadb05487..a412ef67af18d 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -5074,6 +5074,10 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg) hcd->has_tt = 1; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + retval = -EINVAL; + goto error2; + } hcd->rsrc_start = res->start; hcd->rsrc_len = resource_size(res); diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c index 68bbac64b7536..94af71e9856f2 100644 --- a/drivers/usb/dwc2/hcd_queue.c +++ b/drivers/usb/dwc2/hcd_queue.c @@ -59,7 +59,7 @@ #define DWC2_UNRESERVE_DELAY (msecs_to_jiffies(5)) /* If we get a NAK, wait this long before retrying */ -#define DWC2_RETRY_WAIT_DELAY 1*1E6L +#define DWC2_RETRY_WAIT_DELAY (1 * NSEC_PER_MSEC) /** * dwc2_periodic_channel_available() - Checks that a channel is available for a diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 2c68ec60d39a3..8cc81f193e9c1 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1229,10 +1229,10 @@ static void dwc3_get_properties(struct dwc3 *dwc) u8 lpm_nyet_threshold; u8 tx_de_emphasis; u8 hird_threshold; - u8 rx_thr_num_pkt_prd; - u8 rx_max_burst_prd; - u8 tx_thr_num_pkt_prd; - u8 tx_max_burst_prd; + u8 rx_thr_num_pkt_prd = 0; + u8 rx_max_burst_prd = 0; + u8 tx_thr_num_pkt_prd = 0; + u8 tx_max_burst_prd = 0; /* default to highest possible threshold */ lpm_nyet_threshold = 0xf; diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index e8acad49a53a8..81ee1cd794f7d 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -242,7 +242,7 @@ static void dwc3_omap_set_mailbox(struct dwc3_omap *omap, break; case OMAP_DWC3_ID_FLOAT: - if (omap->vbus_reg) + if (omap->vbus_reg && regulator_is_enabled(omap->vbus_reg)) regulator_disable(omap->vbus_reg); val = dwc3_omap_read_utmi_ctrl(omap); val |= USBOTGSS_UTMI_OTG_CTRL_IDDIG; diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 5a7c152c9ee39..955bf820f4102 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -81,8 +81,8 @@ static const struct acpi_gpio_mapping acpi_dwc3_byt_gpios[] = { static struct gpiod_lookup_table platform_bytcr_gpios = { .dev_id = "0000:00:16.0", .table = { - GPIO_LOOKUP("INT33FC:00", 54, "reset", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("INT33FC:02", 14, "cs", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("INT33FC:00", 54, "cs", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("INT33FC:02", 14, "reset", GPIO_ACTIVE_HIGH), {} }, }; @@ -211,7 +211,7 @@ static void dwc3_pci_resume_work(struct work_struct *work) int ret; ret = pm_runtime_get_sync(&dwc3->dev); - if (ret) { + if (ret < 0) { pm_runtime_put_sync_autosuspend(&dwc3->dev); return; } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 8a3752fcf7b46..80fee7ea83ca4 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -894,19 +894,19 @@ static struct dwc3_trb *dwc3_ep_prev_trb(struct dwc3_ep *dep, u8 index) static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep) { - struct dwc3_trb *tmp; u8 trbs_left; /* - * If enqueue & dequeue are equal than it is either full or empty. - * - * One way to know for sure is if the TRB right before us has HWO bit - * set or not. If it has, then we're definitely full and can't fit any - * more transfers in our ring. + * If the enqueue & dequeue are equal then the TRB ring is either full + * or empty. It's considered full when there are DWC3_TRB_NUM-1 of TRBs + * pending to be processed by the driver. */ if (dep->trb_enqueue == dep->trb_dequeue) { - tmp = dwc3_ep_prev_trb(dep, dep->trb_enqueue); - if (tmp->ctrl & DWC3_TRB_CTRL_HWO) + /* + * If there is any request remained in the started_list at + * this point, that means there is no TRB available. + */ + if (!list_empty(&dep->started_list)) return 0; return DWC3_TRB_NUM - 1; @@ -1020,6 +1020,19 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, if (usb_endpoint_xfer_bulk(dep->endpoint.desc) && dep->stream_capable) trb->ctrl |= DWC3_TRB_CTRL_SID_SOFN(stream_id); + /* + * As per data book 4.2.3.2TRB Control Bit Rules section + * + * The controller autonomously checks the HWO field of a TRB to determine if the + * entire TRB is valid. Therefore, software must ensure that the rest of the TRB + * is valid before setting the HWO field to '1'. In most systems, this means that + * software must update the fourth DWORD of a TRB last. + * + * However there is a possibility of CPU re-ordering here which can cause + * controller to observe the HWO bit set prematurely. + * Add a write memory barrier to prevent CPU re-ordering. + */ + wmb(); trb->ctrl |= DWC3_TRB_CTRL_HWO; dwc3_ep_inc_enq(dep); @@ -2012,10 +2025,8 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) ret = wait_for_completion_timeout(&dwc->ep0_in_setup, msecs_to_jiffies(DWC3_PULL_UP_TIMEOUT)); - if (ret == 0) { - dev_err(dwc->dev, "timed out waiting for SETUP phase\n"); - return -ETIMEDOUT; - } + if (ret == 0) + dev_warn(dwc->dev, "timed out waiting for SETUP phase\n"); } /* @@ -2217,6 +2228,7 @@ static int __dwc3_gadget_start(struct dwc3 *dwc) /* begin to receive SETUP packets */ dwc->ep0state = EP0_SETUP_PHASE; dwc->link_state = DWC3_LINK_STATE_SS_DIS; + dwc->delayed_status = false; dwc3_ep0_out_start(dwc); dwc3_gadget_enable_irq(dwc); @@ -2716,6 +2728,7 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, const struct dwc3_event_depevt *event, struct dwc3_request *req, int status) { + int request_status; int ret; if (req->request.num_mapped_sgs) @@ -2745,7 +2758,35 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, req->needs_extra_trb = false; } - dwc3_gadget_giveback(dep, req, status); + /* + * The event status only reflects the status of the TRB with IOC set. + * For the requests that don't set interrupt on completion, the driver + * needs to check and return the status of the completed TRBs associated + * with the request. Use the status of the last TRB of the request. + */ + if (req->request.no_interrupt) { + struct dwc3_trb *trb; + + trb = dwc3_ep_prev_trb(dep, dep->trb_dequeue); + switch (DWC3_TRB_SIZE_TRBSTS(trb->size)) { + case DWC3_TRBSTS_MISSED_ISOC: + /* Isoc endpoint only */ + request_status = -EXDEV; + break; + case DWC3_TRB_STS_XFER_IN_PROG: + /* Applicable when End Transfer with ForceRM=0 */ + case DWC3_TRBSTS_SETUP_PENDING: + /* Control endpoint only */ + case DWC3_TRBSTS_OK: + default: + request_status = 0; + break; + } + } else { + request_status = status; + } + + dwc3_gadget_giveback(dep, req, request_status); out: return ret; @@ -3494,7 +3535,6 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) } evt->count = 0; - evt->flags &= ~DWC3_EVENT_PENDING; ret = IRQ_HANDLED; /* Unmask interrupt */ @@ -3507,6 +3547,9 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) dwc3_writel(dwc->regs, DWC3_DEV_IMOD(0), dwc->imod_interval); } + /* Keep the clearing of DWC3_EVENT_PENDING at the end */ + evt->flags &= ~DWC3_EVENT_PENDING; + return ret; } @@ -3517,9 +3560,11 @@ static irqreturn_t dwc3_thread_interrupt(int irq, void *_evt) unsigned long flags; irqreturn_t ret = IRQ_NONE; + local_bh_disable(); spin_lock_irqsave(&dwc->lock, flags); ret = dwc3_process_event_buf(evt); spin_unlock_irqrestore(&dwc->lock, flags); + local_bh_enable(); return ret; } diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 24dad1d78d1ea..a3106b179562c 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -481,7 +481,7 @@ static u8 encode_bMaxPower(enum usb_device_speed speed, { unsigned val; - if (c->MaxPower) + if (c->MaxPower || (c->bmAttributes & USB_CONFIG_ATT_SELFPOWER)) val = c->MaxPower; else val = CONFIG_USB_GADGET_VBUS_DRAW; @@ -905,7 +905,11 @@ static int set_config(struct usb_composite_dev *cdev, } /* when we return, be sure our power usage is valid */ - power = c->MaxPower ? c->MaxPower : CONFIG_USB_GADGET_VBUS_DRAW; + if (c->MaxPower || (c->bmAttributes & USB_CONFIG_ATT_SELFPOWER)) + power = c->MaxPower; + else + power = CONFIG_USB_GADGET_VBUS_DRAW; + if (gadget->speed < USB_SPEED_SUPER) power = min(power, 500U); else @@ -1644,6 +1648,18 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) struct usb_function *f = NULL; u8 endp; + if (w_length > USB_COMP_EP0_BUFSIZ) { + if (ctrl->bRequestType & USB_DIR_IN) { + /* Cast away the const, we are going to overwrite on purpose. */ + __le16 *temp = (__le16 *)&ctrl->wLength; + + *temp = cpu_to_le16(USB_COMP_EP0_BUFSIZ); + w_length = USB_COMP_EP0_BUFSIZ; + } else { + goto done; + } + } + /* partial re-init of the response message; the function or the * gadget might need to intercept e.g. a control-OUT completion * when we delegate to it. @@ -1928,6 +1944,9 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) if (w_index != 0x5 || (w_value >> 8)) break; interface = w_value & 0xFF; + if (interface >= MAX_CONFIG_INTERFACES || + !os_desc_cfg->interface[interface]) + break; buf[6] = w_index; count = count_ext_prop(os_desc_cfg, interface); @@ -2157,7 +2176,7 @@ int composite_dev_prepare(struct usb_composite_driver *composite, if (!cdev->req) return -ENOMEM; - cdev->req->buf = kmalloc(USB_COMP_EP0_BUFSIZ, GFP_KERNEL); + cdev->req->buf = kzalloc(USB_COMP_EP0_BUFSIZ, GFP_KERNEL); if (!cdev->req->buf) goto fail; diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 3d4710cc34bc1..2350e97a1662c 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1412,6 +1412,8 @@ static void configfs_composite_unbind(struct usb_gadget *gadget) usb_ep_autoconfig_reset(cdev->gadget); spin_lock_irqsave(&gi->spinlock, flags); cdev->gadget = NULL; + cdev->deactivations = 0; + gadget->deactivated = false; set_gadget_data(gadget, NULL); spin_unlock_irqrestore(&gi->spinlock, flags); } diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 5f28646d909df..25c1333badd32 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -614,7 +614,7 @@ static int ffs_ep0_open(struct inode *inode, struct file *file) file->private_data = ffs; ffs_data_opened(ffs); - return 0; + return stream_open(inode, file); } static int ffs_ep0_release(struct inode *inode, struct file *file) @@ -1152,7 +1152,7 @@ ffs_epfile_open(struct inode *inode, struct file *file) file->private_data = epfile; ffs_data_opened(epfile->ffs); - return 0; + return stream_open(inode, file); } static int ffs_aio_cancel(struct kiocb *kiocb) @@ -1725,16 +1725,24 @@ static void ffs_data_put(struct ffs_data *ffs) static void ffs_data_closed(struct ffs_data *ffs) { + struct ffs_epfile *epfiles; + unsigned long flags; + ENTER(); if (atomic_dec_and_test(&ffs->opened)) { if (ffs->no_disconnect) { ffs->state = FFS_DEACTIVATED; - if (ffs->epfiles) { - ffs_epfiles_destroy(ffs->epfiles, - ffs->eps_count); - ffs->epfiles = NULL; - } + spin_lock_irqsave(&ffs->eps_lock, flags); + epfiles = ffs->epfiles; + ffs->epfiles = NULL; + spin_unlock_irqrestore(&ffs->eps_lock, + flags); + + if (epfiles) + ffs_epfiles_destroy(epfiles, + ffs->eps_count); + if (ffs->setup_state == FFS_SETUP_PENDING) __ffs_ep0_stall(ffs); } else { @@ -1781,17 +1789,34 @@ static struct ffs_data *ffs_data_new(const char *dev_name) static void ffs_data_clear(struct ffs_data *ffs) { + struct ffs_epfile *epfiles; + unsigned long flags; + ENTER(); ffs_closed(ffs); BUG_ON(ffs->gadget); - if (ffs->epfiles) - ffs_epfiles_destroy(ffs->epfiles, ffs->eps_count); + spin_lock_irqsave(&ffs->eps_lock, flags); + epfiles = ffs->epfiles; + ffs->epfiles = NULL; + spin_unlock_irqrestore(&ffs->eps_lock, flags); - if (ffs->ffs_eventfd) + /* + * potential race possible between ffs_func_eps_disable + * & ffs_epfile_release therefore maintaining a local + * copy of epfile will save us from use-after-free. + */ + if (epfiles) { + ffs_epfiles_destroy(epfiles, ffs->eps_count); + ffs->epfiles = NULL; + } + + if (ffs->ffs_eventfd) { eventfd_ctx_put(ffs->ffs_eventfd); + ffs->ffs_eventfd = NULL; + } kfree(ffs->raw_descs_data); kfree(ffs->raw_strings); @@ -1804,7 +1829,6 @@ static void ffs_data_reset(struct ffs_data *ffs) ffs_data_clear(ffs); - ffs->epfiles = NULL; ffs->raw_descs_data = NULL; ffs->raw_descs = NULL; ffs->raw_strings = NULL; @@ -1933,12 +1957,15 @@ static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count) static void ffs_func_eps_disable(struct ffs_function *func) { - struct ffs_ep *ep = func->eps; - struct ffs_epfile *epfile = func->ffs->epfiles; - unsigned count = func->ffs->eps_count; + struct ffs_ep *ep; + struct ffs_epfile *epfile; + unsigned short count; unsigned long flags; spin_lock_irqsave(&func->ffs->eps_lock, flags); + count = func->ffs->eps_count; + epfile = func->ffs->epfiles; + ep = func->eps; while (count--) { /* pending requests get nuked */ if (likely(ep->ep)) @@ -1956,14 +1983,18 @@ static void ffs_func_eps_disable(struct ffs_function *func) static int ffs_func_eps_enable(struct ffs_function *func) { - struct ffs_data *ffs = func->ffs; - struct ffs_ep *ep = func->eps; - struct ffs_epfile *epfile = ffs->epfiles; - unsigned count = ffs->eps_count; + struct ffs_data *ffs; + struct ffs_ep *ep; + struct ffs_epfile *epfile; + unsigned short count; unsigned long flags; int ret = 0; spin_lock_irqsave(&func->ffs->eps_lock, flags); + ffs = func->ffs; + ep = func->eps; + epfile = ffs->epfiles; + count = ffs->eps_count; while(count--) { ep->ep->driver_data = ep; diff --git a/drivers/usb/gadget/function/f_sourcesink.c b/drivers/usb/gadget/function/f_sourcesink.c index 282737e4609ce..2c65a9bb3c81b 100644 --- a/drivers/usb/gadget/function/f_sourcesink.c +++ b/drivers/usb/gadget/function/f_sourcesink.c @@ -583,6 +583,7 @@ static int source_sink_start_ep(struct f_sourcesink *ss, bool is_in, if (is_iso) { switch (speed) { + case USB_SPEED_SUPER_PLUS: case USB_SPEED_SUPER: size = ss->isoc_maxpacket * (ss->isoc_mult + 1) * diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index dd960cea642f3..11cc6056b5902 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -176,7 +176,7 @@ static struct uac2_input_terminal_descriptor io_in_it_desc = { .bDescriptorSubtype = UAC_INPUT_TERMINAL, /* .bTerminalID = DYNAMIC */ - .wTerminalType = cpu_to_le16(UAC_INPUT_TERMINAL_UNDEFINED), + .wTerminalType = cpu_to_le16(UAC_INPUT_TERMINAL_MICROPHONE), .bAssocTerminal = 0, /* .bCSourceID = DYNAMIC */ .iChannelNames = 0, @@ -204,7 +204,7 @@ static struct uac2_output_terminal_descriptor io_out_ot_desc = { .bDescriptorSubtype = UAC_OUTPUT_TERMINAL, /* .bTerminalID = DYNAMIC */ - .wTerminalType = cpu_to_le16(UAC_OUTPUT_TERMINAL_UNDEFINED), + .wTerminalType = cpu_to_le16(UAC_OUTPUT_TERMINAL_SPEAKER), .bAssocTerminal = 0, /* .bSourceID = DYNAMIC */ /* .bCSourceID = DYNAMIC */ diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c index 04c142c130759..fa0c173a0d26f 100644 --- a/drivers/usb/gadget/function/rndis.c +++ b/drivers/usb/gadget/function/rndis.c @@ -637,14 +637,18 @@ static int rndis_set_response(struct rndis_params *params, rndis_set_cmplt_type *resp; rndis_resp_t *r; + BufLength = le32_to_cpu(buf->InformationBufferLength); + BufOffset = le32_to_cpu(buf->InformationBufferOffset); + if ((BufLength > RNDIS_MAX_TOTAL_SIZE) || + (BufOffset > RNDIS_MAX_TOTAL_SIZE) || + (BufOffset + 8 >= RNDIS_MAX_TOTAL_SIZE)) + return -EINVAL; + r = rndis_add_response(params, sizeof(rndis_set_cmplt_type)); if (!r) return -ENOMEM; resp = (rndis_set_cmplt_type *)r->buf; - BufLength = le32_to_cpu(buf->InformationBufferLength); - BufOffset = le32_to_cpu(buf->InformationBufferOffset); - #ifdef VERBOSE_DEBUG pr_debug("%s: Length: %d\n", __func__, BufLength); pr_debug("%s: Offset: %d\n", __func__, BufOffset); @@ -919,6 +923,7 @@ struct rndis_params *rndis_register(void (*resp_avail)(void *v), void *v) params->resp_avail = resp_avail; params->v = v; INIT_LIST_HEAD(¶ms->resp_queue); + spin_lock_init(¶ms->resp_lock); pr_debug("%s: configNr = %d\n", __func__, i); return params; @@ -1012,12 +1017,14 @@ void rndis_free_response(struct rndis_params *params, u8 *buf) { rndis_resp_t *r, *n; + spin_lock(¶ms->resp_lock); list_for_each_entry_safe(r, n, ¶ms->resp_queue, list) { if (r->buf == buf) { list_del(&r->list); kfree(r); } } + spin_unlock(¶ms->resp_lock); } EXPORT_SYMBOL_GPL(rndis_free_response); @@ -1027,14 +1034,17 @@ u8 *rndis_get_next_response(struct rndis_params *params, u32 *length) if (!length) return NULL; + spin_lock(¶ms->resp_lock); list_for_each_entry_safe(r, n, ¶ms->resp_queue, list) { if (!r->send) { r->send = 1; *length = r->length; + spin_unlock(¶ms->resp_lock); return r->buf; } } + spin_unlock(¶ms->resp_lock); return NULL; } EXPORT_SYMBOL_GPL(rndis_get_next_response); @@ -1051,7 +1061,9 @@ static rndis_resp_t *rndis_add_response(struct rndis_params *params, u32 length) r->length = length; r->send = 0; + spin_lock(¶ms->resp_lock); list_add_tail(&r->list, ¶ms->resp_queue); + spin_unlock(¶ms->resp_lock); return r; } diff --git a/drivers/usb/gadget/function/rndis.h b/drivers/usb/gadget/function/rndis.h index c7e3a70ce6c1f..c996ba28bcb77 100644 --- a/drivers/usb/gadget/function/rndis.h +++ b/drivers/usb/gadget/function/rndis.h @@ -174,6 +174,7 @@ typedef struct rndis_params { void (*resp_avail)(void *v); void *v; struct list_head resp_queue; + spinlock_t resp_lock; } rndis_params; /* RNDIS Message parser and other useless functions */ diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index 223029fa84459..4e01ba0ab8ecb 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -349,8 +349,6 @@ static inline void free_ep(struct uac_rtd_params *prm, struct usb_ep *ep) if (!prm->ep_enabled) return; - prm->ep_enabled = false; - audio_dev = uac->audio_dev; params = &audio_dev->params; @@ -368,11 +366,12 @@ static inline void free_ep(struct uac_rtd_params *prm, struct usb_ep *ep) } } + prm->ep_enabled = false; + if (usb_ep_disable(ep)) dev_err(uac->card->dev, "%s:%d Error!\n", __func__, __LINE__); } - int u_audio_start_capture(struct g_audio *audio_dev) { struct snd_uac_chip *uac = audio_dev->uac; diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 99b840daf3d94..3f053b11e2cee 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -491,8 +491,9 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, } spin_unlock_irqrestore(&dev->lock, flags); - if (skb && !in) { - dev_kfree_skb_any(skb); + if (!in) { + if (skb) + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -771,9 +772,13 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, dev->qmult = qmult; snprintf(net->name, sizeof(net->name), "%s%%d", netname); - if (get_ether_addr(dev_addr, net->dev_addr)) + if (get_ether_addr(dev_addr, net->dev_addr)) { + net->addr_assign_type = NET_ADDR_RANDOM; dev_warn(&g->dev, "using random %s ethernet address\n", "self"); + } else { + net->addr_assign_type = NET_ADDR_SET; + } if (get_ether_addr(host_addr, dev->host_mac)) dev_warn(&g->dev, "using random %s ethernet address\n", "host"); @@ -830,6 +835,9 @@ struct net_device *gether_setup_name_default(const char *netname) INIT_LIST_HEAD(&dev->tx_reqs); INIT_LIST_HEAD(&dev->rx_reqs); + /* by default we always have a random MAC address */ + net->addr_assign_type = NET_ADDR_RANDOM; + skb_queue_head_init(&dev->rx_frames); /* network device setup */ @@ -859,19 +867,22 @@ int gether_register_netdev(struct net_device *net) { struct eth_dev *dev; struct usb_gadget *g; - struct sockaddr sa; int status; if (!net->dev.parent) return -EINVAL; dev = netdev_priv(net); g = dev->gadget; + + memcpy(net->dev_addr, dev->dev_mac, ETH_ALEN); + status = register_netdev(net); if (status < 0) { dev_dbg(&g->dev, "register_netdev failed, %d\n", status); return status; } else { INFO(dev, "HOST MAC %pM\n", dev->host_mac); + INFO(dev, "MAC %pM\n", dev->dev_mac); /* two kinds of host-initiated state changes: * - iff DATA transfer is active, carrier is "on" @@ -879,15 +890,6 @@ int gether_register_netdev(struct net_device *net) */ netif_carrier_off(net); } - sa.sa_family = net->type; - memcpy(sa.sa_data, dev->dev_mac, ETH_ALEN); - rtnl_lock(); - status = dev_set_mac_address(net, &sa, NULL); - rtnl_unlock(); - if (status) - pr_warn("cannot set self ethernet address: %d\n", status); - else - INFO(dev, "MAC %pM\n", dev->dev_mac); return status; } @@ -912,6 +914,7 @@ int gether_set_dev_addr(struct net_device *net, const char *dev_addr) if (get_ether_addr(dev_addr, new_addr)) return -EINVAL; memcpy(dev->dev_mac, new_addr, ETH_ALEN); + net->addr_assign_type = NET_ADDR_SET; return 0; } EXPORT_SYMBOL_GPL(gether_set_dev_addr); diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c index 61e2c94cc0b0c..cab1e30462c24 100644 --- a/drivers/usb/gadget/function/uvc_queue.c +++ b/drivers/usb/gadget/function/uvc_queue.c @@ -242,6 +242,8 @@ void uvcg_queue_cancel(struct uvc_video_queue *queue, int disconnect) buf->state = UVC_BUF_STATE_ERROR; vb2_buffer_done(&buf->buf.vb2_buf, VB2_BUF_STATE_ERROR); } + queue->buf_used = 0; + /* This must be protected by the irqlock spinlock to avoid race * conditions between uvc_queue_buffer and the disconnection event that * could result in an interruptible wait in uvc_dequeue_buffer. Do not diff --git a/drivers/usb/gadget/legacy/dbgp.c b/drivers/usb/gadget/legacy/dbgp.c index e1d566c9918ae..6bcbad3825802 100644 --- a/drivers/usb/gadget/legacy/dbgp.c +++ b/drivers/usb/gadget/legacy/dbgp.c @@ -137,7 +137,7 @@ static int dbgp_enable_ep_req(struct usb_ep *ep) goto fail_1; } - req->buf = kmalloc(DBGP_REQ_LEN, GFP_KERNEL); + req->buf = kzalloc(DBGP_REQ_LEN, GFP_KERNEL); if (!req->buf) { err = -ENOMEM; stp = 2; @@ -345,6 +345,19 @@ static int dbgp_setup(struct usb_gadget *gadget, void *data = NULL; u16 len = 0; + if (length > DBGP_REQ_LEN) { + if (ctrl->bRequestType & USB_DIR_IN) { + /* Cast away the const, we are going to overwrite on purpose. */ + __le16 *temp = (__le16 *)&ctrl->wLength; + + *temp = cpu_to_le16(DBGP_REQ_LEN); + length = DBGP_REQ_LEN; + } else { + return err; + } + } + + if (request == USB_REQ_GET_DESCRIPTOR) { switch (value>>8) { case USB_DT_DEVICE: diff --git a/drivers/usb/gadget/legacy/hid.c b/drivers/usb/gadget/legacy/hid.c index 5b27d289443fe..3912cc805f3af 100644 --- a/drivers/usb/gadget/legacy/hid.c +++ b/drivers/usb/gadget/legacy/hid.c @@ -99,8 +99,10 @@ static int do_config(struct usb_configuration *c) list_for_each_entry(e, &hidg_func_list, node) { e->f = usb_get_function(e->fi); - if (IS_ERR(e->f)) + if (IS_ERR(e->f)) { + status = PTR_ERR(e->f); goto put; + } status = usb_add_function(c, e->f); if (status < 0) { usb_put_function(e->f); diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index ecd808839cc5b..0d183c2ef3877 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -110,6 +110,8 @@ enum ep0_state { /* enough for the whole queue: most events invalidate others */ #define N_EVENT 5 +#define RBUF_SIZE 256 + struct dev_data { spinlock_t lock; refcount_t count; @@ -144,7 +146,7 @@ struct dev_data { struct dentry *dentry; /* except this scratch i/o buffer for ep0 */ - u8 rbuf [256]; + u8 rbuf[RBUF_SIZE]; }; static inline void get_dev (struct dev_data *data) @@ -1333,6 +1335,18 @@ gadgetfs_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) u16 w_value = le16_to_cpu(ctrl->wValue); u16 w_length = le16_to_cpu(ctrl->wLength); + if (w_length > RBUF_SIZE) { + if (ctrl->bRequestType & USB_DIR_IN) { + /* Cast away the const, we are going to overwrite on purpose. */ + __le16 *temp = (__le16 *)&ctrl->wLength; + + *temp = cpu_to_le16(RBUF_SIZE); + w_length = RBUF_SIZE; + } else { + return value; + } + } + spin_lock (&dev->lock); dev->setup_abort = 0; if (dev->state == STATE_DEV_UNCONNECTED) { @@ -1815,8 +1829,9 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) spin_lock_irq (&dev->lock); value = -EINVAL; if (dev->buf) { + spin_unlock_irq(&dev->lock); kfree(kbuf); - goto fail; + return value; } dev->buf = kbuf; @@ -1863,8 +1878,8 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) value = usb_gadget_probe_driver(&gadgetfs_driver); if (value != 0) { - kfree (dev->buf); - dev->buf = NULL; + spin_lock_irq(&dev->lock); + goto fail; } else { /* at this point "good" hardware has for the first time * let the USB the host see us. alternatively, if users @@ -1881,6 +1896,9 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) return value; fail: + dev->config = NULL; + dev->hs_config = NULL; + dev->dev = NULL; spin_unlock_irq (&dev->lock); pr_debug ("%s: %s fail %zd, %p\n", shortname, __func__, value, dev); kfree (dev->buf); diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig index d354036ff6c86..f985bb4a42db2 100644 --- a/drivers/usb/gadget/udc/Kconfig +++ b/drivers/usb/gadget/udc/Kconfig @@ -330,6 +330,7 @@ config USB_AMD5536UDC config USB_FSL_QE tristate "Freescale QE/CPM USB Device Controller" depends on FSL_SOC && (QUICC_ENGINE || CPM) + depends on !64BIT || BROKEN help Some of Freescale PowerPC processors have a Full Speed QE/CPM2 USB controller, which support device mode with 4 diff --git a/drivers/usb/gadget/udc/at91_udc.c b/drivers/usb/gadget/udc/at91_udc.c index 194ffb1ed4620..d7714c94b1196 100644 --- a/drivers/usb/gadget/udc/at91_udc.c +++ b/drivers/usb/gadget/udc/at91_udc.c @@ -1878,7 +1878,9 @@ static int at91udc_probe(struct platform_device *pdev) clk_disable(udc->iclk); /* request UDC and maybe VBUS irqs */ - udc->udp_irq = platform_get_irq(pdev, 0); + udc->udp_irq = retval = platform_get_irq(pdev, 0); + if (retval < 0) + goto err_unprepare_iclk; retval = devm_request_irq(dev, udc->udp_irq, at91_udc_irq, 0, driver_name, udc); if (retval) { diff --git a/drivers/usb/gadget/udc/bdc/bdc_core.c b/drivers/usb/gadget/udc/bdc/bdc_core.c index 3d33499db50b5..845aead48d85b 100644 --- a/drivers/usb/gadget/udc/bdc/bdc_core.c +++ b/drivers/usb/gadget/udc/bdc/bdc_core.c @@ -565,7 +565,8 @@ static int bdc_probe(struct platform_device *pdev) if (ret) { dev_err(dev, "No suitable DMA config available, abort\n"); - return -ENOTSUPP; + ret = -ENOTSUPP; + goto phycleanup; } dev_dbg(dev, "Using 32-bit address\n"); } diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index e41f67cd3d469..f9d2737aabe82 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1303,7 +1303,6 @@ static void usb_gadget_remove_driver(struct usb_udc *udc) usb_gadget_udc_stop(udc); udc->driver = NULL; - udc->dev.driver = NULL; udc->gadget->dev.driver = NULL; } @@ -1352,7 +1351,6 @@ static int udc_bind_to_driver(struct usb_udc *udc, struct usb_gadget_driver *dri driver->function); udc->driver = driver; - udc->dev.driver = &driver->driver; udc->gadget->dev.driver = &driver->driver; usb_gadget_udc_set_speed(udc, driver->max_speed); @@ -1374,7 +1372,6 @@ static int udc_bind_to_driver(struct usb_udc *udc, struct usb_gadget_driver *dri dev_err(&udc->dev, "failed to start %s: %d\n", udc->driver->function, ret); udc->driver = NULL; - udc->dev.driver = NULL; udc->gadget->dev.driver = NULL; return ret; } diff --git a/drivers/usb/gadget/udc/lpc32xx_udc.c b/drivers/usb/gadget/udc/lpc32xx_udc.c index 6d2f1f98f13df..f71f61bcfe1e1 100644 --- a/drivers/usb/gadget/udc/lpc32xx_udc.c +++ b/drivers/usb/gadget/udc/lpc32xx_udc.c @@ -3027,6 +3027,7 @@ static int lpc32xx_udc_probe(struct platform_device *pdev) } udc->isp1301_i2c_client = isp1301_get_client(isp1301_node); + of_node_put(isp1301_node); if (!udc->isp1301_i2c_client) { return -EPROBE_DEFER; } diff --git a/drivers/usb/gadget/udc/mv_u3d_core.c b/drivers/usb/gadget/udc/mv_u3d_core.c index 35e02a8d0091a..bdba3f48c0527 100644 --- a/drivers/usb/gadget/udc/mv_u3d_core.c +++ b/drivers/usb/gadget/udc/mv_u3d_core.c @@ -1922,14 +1922,6 @@ static int mv_u3d_probe(struct platform_device *dev) goto err_get_irq; } u3d->irq = r->start; - if (request_irq(u3d->irq, mv_u3d_irq, - IRQF_SHARED, driver_name, u3d)) { - u3d->irq = 0; - dev_err(&dev->dev, "Request irq %d for u3d failed\n", - u3d->irq); - retval = -ENODEV; - goto err_request_irq; - } /* initialize gadget structure */ u3d->gadget.ops = &mv_u3d_ops; /* usb_gadget_ops */ @@ -1942,6 +1934,15 @@ static int mv_u3d_probe(struct platform_device *dev) mv_u3d_eps_init(u3d); + if (request_irq(u3d->irq, mv_u3d_irq, + IRQF_SHARED, driver_name, u3d)) { + u3d->irq = 0; + dev_err(&dev->dev, "Request irq %d for u3d failed\n", + u3d->irq); + retval = -ENODEV; + goto err_request_irq; + } + /* external vbus detection */ if (u3d->vbus) { u3d->clock_gating = 1; @@ -1965,8 +1966,8 @@ static int mv_u3d_probe(struct platform_device *dev) err_unregister: free_irq(u3d->irq, u3d); -err_request_irq: err_get_irq: +err_request_irq: kfree(u3d->status_req); err_alloc_status_req: kfree(u3d->eps); diff --git a/drivers/usb/gadget/udc/r8a66597-udc.c b/drivers/usb/gadget/udc/r8a66597-udc.c index a766476fd742e..ca0aebb5bd0cc 100644 --- a/drivers/usb/gadget/udc/r8a66597-udc.c +++ b/drivers/usb/gadget/udc/r8a66597-udc.c @@ -1250,7 +1250,7 @@ static void set_feature(struct r8a66597 *r8a66597, struct usb_ctrlrequest *ctrl) do { tmp = r8a66597_read(r8a66597, INTSTS0) & CTSQ; udelay(1); - } while (tmp != CS_IDST || timeout-- > 0); + } while (tmp != CS_IDST && timeout-- > 0); if (tmp == CS_IDST) r8a66597_bset(r8a66597, diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 08a93cf68efff..0bbd180022aa7 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -2363,6 +2363,8 @@ static void handle_ext_role_switch_states(struct device *dev, switch (role) { case USB_ROLE_NONE: usb3->connection_state = USB_ROLE_NONE; + if (cur_role == USB_ROLE_HOST) + device_release_driver(host); if (usb3->driver) usb3_disconnect(usb3); usb3_vbus_out(usb3, false); @@ -2692,10 +2694,15 @@ static const struct renesas_usb3_priv renesas_usb3_priv_r8a77990 = { static const struct of_device_id usb3_of_match[] = { { + .compatible = "renesas,r8a774c0-usb3-peri", + .data = &renesas_usb3_priv_r8a77990, + }, { .compatible = "renesas,r8a7795-usb3-peri", .data = &renesas_usb3_priv_gen3, - }, - { + }, { + .compatible = "renesas,r8a77990-usb3-peri", + .data = &renesas_usb3_priv_r8a77990, + }, { .compatible = "renesas,rcar-gen3-usb3-peri", .data = &renesas_usb3_priv_gen3, }, @@ -2704,18 +2711,10 @@ static const struct of_device_id usb3_of_match[] = { MODULE_DEVICE_TABLE(of, usb3_of_match); static const struct soc_device_attribute renesas_usb3_quirks_match[] = { - { - .soc_id = "r8a774c0", - .data = &renesas_usb3_priv_r8a77990, - }, { .soc_id = "r8a7795", .revision = "ES1.*", .data = &renesas_usb3_priv_r8a7795_es1, }, - { - .soc_id = "r8a77990", - .data = &renesas_usb3_priv_r8a77990, - }, { /* sentinel */ }, }; diff --git a/drivers/usb/gadget/udc/udc-xilinx.c b/drivers/usb/gadget/udc/udc-xilinx.c index 29d8e5f8bb583..de22dd5436538 100644 --- a/drivers/usb/gadget/udc/udc-xilinx.c +++ b/drivers/usb/gadget/udc/udc-xilinx.c @@ -1613,6 +1613,8 @@ static void xudc_getstatus(struct xusb_udc *udc) break; case USB_RECIP_ENDPOINT: epnum = udc->setup.wIndex & USB_ENDPOINT_NUMBER_MASK; + if (epnum >= XUSB_MAX_ENDPOINTS) + goto stall; target_ep = &udc->ep[epnum]; epcfgreg = udc->read_fn(udc->addr + target_ep->offset); halt = epcfgreg & XUSB_EP_CFG_STALL_MASK; @@ -1680,6 +1682,10 @@ static void xudc_set_clear_feature(struct xusb_udc *udc) case USB_RECIP_ENDPOINT: if (!udc->setup.wValue) { endpoint = udc->setup.wIndex & USB_ENDPOINT_NUMBER_MASK; + if (endpoint >= XUSB_MAX_ENDPOINTS) { + xudc_ep0_stall(udc); + return; + } target_ep = &udc->ep[endpoint]; outinbit = udc->setup.wIndex & USB_ENDPOINT_DIR_MASK; outinbit = outinbit >> 7; diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 5c6ce1ef3f4b0..c0e344c3a8487 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -634,7 +634,16 @@ static int ehci_run (struct usb_hcd *hcd) /* Wait until HC become operational */ ehci_readl(ehci, &ehci->regs->command); /* unblock posted writes */ msleep(5); - rc = ehci_handshake(ehci, &ehci->regs->status, STS_HALT, 0, 100 * 1000); + + /* For Aspeed, STS_HALT also depends on ASS/PSS status. + * Check CMD_RUN instead. + */ + if (ehci->is_aspeed) + rc = ehci_handshake(ehci, &ehci->regs->command, CMD_RUN, + 1, 100 * 1000); + else + rc = ehci_handshake(ehci, &ehci->regs->status, STS_HALT, + 0, 100 * 1000); up_write(&ehci_cf_port_reset_rwsem); diff --git a/drivers/usb/host/ehci-mv.c b/drivers/usb/host/ehci-mv.c index b6f196f5e252e..b0e0f8ea98a9c 100644 --- a/drivers/usb/host/ehci-mv.c +++ b/drivers/usb/host/ehci-mv.c @@ -41,26 +41,25 @@ struct ehci_hcd_mv { int (*set_vbus)(unsigned int vbus); }; -static void ehci_clock_enable(struct ehci_hcd_mv *ehci_mv) +static int mv_ehci_enable(struct ehci_hcd_mv *ehci_mv) { - clk_prepare_enable(ehci_mv->clk); -} + int retval; -static void ehci_clock_disable(struct ehci_hcd_mv *ehci_mv) -{ - clk_disable_unprepare(ehci_mv->clk); -} + retval = clk_prepare_enable(ehci_mv->clk); + if (retval) + return retval; -static int mv_ehci_enable(struct ehci_hcd_mv *ehci_mv) -{ - ehci_clock_enable(ehci_mv); - return phy_init(ehci_mv->phy); + retval = phy_init(ehci_mv->phy); + if (retval) + clk_disable_unprepare(ehci_mv->clk); + + return retval; } static void mv_ehci_disable(struct ehci_hcd_mv *ehci_mv) { phy_exit(ehci_mv->phy); - ehci_clock_disable(ehci_mv); + clk_disable_unprepare(ehci_mv->clk); } static int mv_ehci_reset(struct usb_hcd *hcd) diff --git a/drivers/usb/host/ehci-orion.c b/drivers/usb/host/ehci-orion.c index a319b1df3011c..3626758b3e2aa 100644 --- a/drivers/usb/host/ehci-orion.c +++ b/drivers/usb/host/ehci-orion.c @@ -264,8 +264,11 @@ static int ehci_orion_drv_probe(struct platform_device *pdev) * the clock does not exists. */ priv->clk = devm_clk_get(&pdev->dev, NULL); - if (!IS_ERR(priv->clk)) - clk_prepare_enable(priv->clk); + if (!IS_ERR(priv->clk)) { + err = clk_prepare_enable(priv->clk); + if (err) + goto err_put_hcd; + } priv->phy = devm_phy_optional_get(&pdev->dev, "usb"); if (IS_ERR(priv->phy)) { @@ -311,6 +314,7 @@ static int ehci_orion_drv_probe(struct platform_device *pdev) err_dis_clk: if (!IS_ERR(priv->clk)) clk_disable_unprepare(priv->clk); +err_put_hcd: usb_put_hcd(hcd); err: dev_err(&pdev->dev, "init %s fail, %d\n", diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index 774ccaa5aceea..1962140a59f20 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -21,6 +21,9 @@ static const char hcd_name[] = "ehci-pci"; /* defined here to avoid adding to pci_ids.h for single instance use */ #define PCI_DEVICE_ID_INTEL_CE4100_USB 0x2e70 +#define PCI_VENDOR_ID_ASPEED 0x1a03 +#define PCI_DEVICE_ID_ASPEED_EHCI 0x2603 + /*-------------------------------------------------------------------------*/ #define PCI_DEVICE_ID_INTEL_QUARK_X1000_SOC 0x0939 static inline bool is_intel_quark_x1000(struct pci_dev *pdev) @@ -223,6 +226,12 @@ static int ehci_pci_setup(struct usb_hcd *hcd) ehci->has_synopsys_hc_bug = 1; } break; + case PCI_VENDOR_ID_ASPEED: + if (pdev->device == PCI_DEVICE_ID_ASPEED_EHCI) { + ehci_info(ehci, "applying Aspeed HC workaround\n"); + ehci->is_aspeed = 1; + } + break; } /* optional debug port, normally in the first BAR */ diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c index e4fc3f66d43bf..accbcc989c503 100644 --- a/drivers/usb/host/ehci-platform.c +++ b/drivers/usb/host/ehci-platform.c @@ -286,6 +286,12 @@ static int ehci_platform_probe(struct platform_device *dev) "has-transaction-translator")) hcd->has_tt = 1; + if (of_device_is_compatible(dev->dev.of_node, + "aspeed,ast2500-ehci") || + of_device_is_compatible(dev->dev.of_node, + "aspeed,ast2600-ehci")) + ehci->is_aspeed = 1; + if (soc_device_match(quirk_poll_match)) priv->quirk_poll = true; diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h index ac5e967907d14..356738462ce45 100644 --- a/drivers/usb/host/ehci.h +++ b/drivers/usb/host/ehci.h @@ -218,6 +218,7 @@ struct ehci_hcd { /* one per controller */ unsigned frame_index_bug:1; /* MosChip (AKA NetMos) */ unsigned need_oc_pp_cycle:1; /* MPC834X port power */ unsigned imx28_write_fix:1; /* For Freescale i.MX28 */ + unsigned is_aspeed:1; /* required for usb32 quirk */ #define OHCI_CTRL_HCFS (3 << 6) diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c index c3f74d6674e1d..f457e083a6f89 100644 --- a/drivers/usb/host/fotg210-hcd.c +++ b/drivers/usb/host/fotg210-hcd.c @@ -2511,11 +2511,6 @@ static unsigned qh_completions(struct fotg210_hcd *fotg210, return count; } -/* high bandwidth multiplier, as encoded in highspeed endpoint descriptors */ -#define hb_mult(wMaxPacketSize) (1 + (((wMaxPacketSize) >> 11) & 0x03)) -/* ... and packet size, for any kind of endpoint descriptor */ -#define max_packet(wMaxPacketSize) ((wMaxPacketSize) & 0x07ff) - /* reverse of qh_urb_transaction: free a list of TDs. * used for cleanup after errors, before HC sees an URB's TDs. */ @@ -2601,7 +2596,7 @@ static struct list_head *qh_urb_transaction(struct fotg210_hcd *fotg210, token |= (1 /* "in" */ << 8); /* else it's already initted to "out" pid (0 << 8) */ - maxpacket = max_packet(usb_maxpacket(urb->dev, urb->pipe, !is_input)); + maxpacket = usb_maxpacket(urb->dev, urb->pipe, !is_input); /* * buffer gets wrapped in one or more qtds; @@ -2715,9 +2710,11 @@ static struct fotg210_qh *qh_make(struct fotg210_hcd *fotg210, struct urb *urb, gfp_t flags) { struct fotg210_qh *qh = fotg210_qh_alloc(fotg210, flags); + struct usb_host_endpoint *ep; u32 info1 = 0, info2 = 0; int is_input, type; int maxp = 0; + int mult; struct usb_tt *tt = urb->dev->tt; struct fotg210_qh_hw *hw; @@ -2732,14 +2729,15 @@ static struct fotg210_qh *qh_make(struct fotg210_hcd *fotg210, struct urb *urb, is_input = usb_pipein(urb->pipe); type = usb_pipetype(urb->pipe); - maxp = usb_maxpacket(urb->dev, urb->pipe, !is_input); + ep = usb_pipe_endpoint(urb->dev, urb->pipe); + maxp = usb_endpoint_maxp(&ep->desc); + mult = usb_endpoint_maxp_mult(&ep->desc); /* 1024 byte maxpacket is a hardware ceiling. High bandwidth * acts like up to 3KB, but is built from smaller packets. */ - if (max_packet(maxp) > 1024) { - fotg210_dbg(fotg210, "bogus qh maxpacket %d\n", - max_packet(maxp)); + if (maxp > 1024) { + fotg210_dbg(fotg210, "bogus qh maxpacket %d\n", maxp); goto done; } @@ -2753,8 +2751,7 @@ static struct fotg210_qh *qh_make(struct fotg210_hcd *fotg210, struct urb *urb, */ if (type == PIPE_INTERRUPT) { qh->usecs = NS_TO_US(usb_calc_bus_time(USB_SPEED_HIGH, - is_input, 0, - hb_mult(maxp) * max_packet(maxp))); + is_input, 0, mult * maxp)); qh->start = NO_FRAME; if (urb->dev->speed == USB_SPEED_HIGH) { @@ -2791,7 +2788,7 @@ static struct fotg210_qh *qh_make(struct fotg210_hcd *fotg210, struct urb *urb, think_time = tt ? tt->think_time : 0; qh->tt_usecs = NS_TO_US(think_time + usb_calc_bus_time(urb->dev->speed, - is_input, 0, max_packet(maxp))); + is_input, 0, maxp)); qh->period = urb->interval; if (qh->period > fotg210->periodic_size) { qh->period = fotg210->periodic_size; @@ -2854,11 +2851,11 @@ static struct fotg210_qh *qh_make(struct fotg210_hcd *fotg210, struct urb *urb, * to help them do so. So now people expect to use * such nonconformant devices with Linux too; sigh. */ - info1 |= max_packet(maxp) << 16; + info1 |= maxp << 16; info2 |= (FOTG210_TUNE_MULT_HS << 30); } else { /* PIPE_INTERRUPT */ - info1 |= max_packet(maxp) << 16; - info2 |= hb_mult(maxp) << 30; + info1 |= maxp << 16; + info2 |= mult << 30; } break; default: @@ -3928,6 +3925,7 @@ static void iso_stream_init(struct fotg210_hcd *fotg210, int is_input; long bandwidth; unsigned multi; + struct usb_host_endpoint *ep; /* * this might be a "high bandwidth" highspeed endpoint, @@ -3935,14 +3933,14 @@ static void iso_stream_init(struct fotg210_hcd *fotg210, */ epnum = usb_pipeendpoint(pipe); is_input = usb_pipein(pipe) ? USB_DIR_IN : 0; - maxp = usb_maxpacket(dev, pipe, !is_input); + ep = usb_pipe_endpoint(dev, pipe); + maxp = usb_endpoint_maxp(&ep->desc); if (is_input) buf1 = (1 << 11); else buf1 = 0; - maxp = max_packet(maxp); - multi = hb_mult(maxp); + multi = usb_endpoint_maxp_mult(&ep->desc); buf1 |= maxp; maxp *= multi; @@ -4463,13 +4461,12 @@ static bool itd_complete(struct fotg210_hcd *fotg210, struct fotg210_itd *itd) /* HC need not update length with this error */ if (!(t & FOTG210_ISOC_BABBLE)) { - desc->actual_length = - fotg210_itdlen(urb, desc, t); + desc->actual_length = FOTG210_ITD_LENGTH(t); urb->actual_length += desc->actual_length; } } else if (likely((t & FOTG210_ISOC_ACTIVE) == 0)) { desc->status = 0; - desc->actual_length = fotg210_itdlen(urb, desc, t); + desc->actual_length = FOTG210_ITD_LENGTH(t); urb->actual_length += desc->actual_length; } else { /* URB was too late */ diff --git a/drivers/usb/host/fotg210.h b/drivers/usb/host/fotg210.h index 1b4db95e5c43a..291add93d84ee 100644 --- a/drivers/usb/host/fotg210.h +++ b/drivers/usb/host/fotg210.h @@ -686,11 +686,6 @@ static inline unsigned fotg210_read_frame_index(struct fotg210_hcd *fotg210) return fotg210_readl(fotg210, &fotg210->regs->frame_index); } -#define fotg210_itdlen(urb, desc, t) ({ \ - usb_pipein((urb)->pipe) ? \ - (desc)->length - FOTG210_ITD_LENGTH(t) : \ - FOTG210_ITD_LENGTH(t); \ -}) /*-------------------------------------------------------------------------*/ #endif /* __LINUX_FOTG210_H */ diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c index a87c0b26279e7..00a4e12a1f158 100644 --- a/drivers/usb/host/isp116x-hcd.c +++ b/drivers/usb/host/isp116x-hcd.c @@ -1541,10 +1541,12 @@ static int isp116x_remove(struct platform_device *pdev) iounmap(isp116x->data_reg); res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - release_mem_region(res->start, 2); + if (res) + release_mem_region(res->start, 2); iounmap(isp116x->addr_reg); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, 2); + if (res) + release_mem_region(res->start, 2); usb_put_hcd(hcd); return 0; diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c index 5ef0747225f6b..e0b25dd869094 100644 --- a/drivers/usb/host/max3421-hcd.c +++ b/drivers/usb/host/max3421-hcd.c @@ -125,8 +125,6 @@ struct max3421_hcd { struct task_struct *spi_thread; - struct max3421_hcd *next; - enum max3421_rh_state rh_state; /* lower 16 bits contain port status, upper 16 bits the change mask: */ u32 port_status; @@ -174,8 +172,6 @@ struct max3421_ep { u8 retransmit; /* packet needs retransmission */ }; -static struct max3421_hcd *max3421_hcd_list; - #define MAX3421_FIFO_SIZE 64 #define MAX3421_SPI_DIR_RD 0 /* read register from MAX3421 */ @@ -1882,9 +1878,8 @@ max3421_probe(struct spi_device *spi) } set_bit(HCD_FLAG_POLL_RH, &hcd->flags); max3421_hcd = hcd_to_max3421(hcd); - max3421_hcd->next = max3421_hcd_list; - max3421_hcd_list = max3421_hcd; INIT_LIST_HEAD(&max3421_hcd->ep_list); + spi_set_drvdata(spi, max3421_hcd); max3421_hcd->tx = kmalloc(sizeof(*max3421_hcd->tx), GFP_KERNEL); if (!max3421_hcd->tx) @@ -1934,28 +1929,18 @@ max3421_probe(struct spi_device *spi) static int max3421_remove(struct spi_device *spi) { - struct max3421_hcd *max3421_hcd = NULL, **prev; - struct usb_hcd *hcd = NULL; + struct max3421_hcd *max3421_hcd; + struct usb_hcd *hcd; unsigned long flags; - for (prev = &max3421_hcd_list; *prev; prev = &(*prev)->next) { - max3421_hcd = *prev; - hcd = max3421_to_hcd(max3421_hcd); - if (hcd->self.controller == &spi->dev) - break; - } - if (!max3421_hcd) { - dev_err(&spi->dev, "no MAX3421 HCD found for SPI device %p\n", - spi); - return -ENODEV; - } + max3421_hcd = spi_get_drvdata(spi); + hcd = max3421_to_hcd(max3421_hcd); usb_remove_hcd(hcd); spin_lock_irqsave(&max3421_hcd->lock, flags); kthread_stop(max3421_hcd->spi_thread); - *prev = max3421_hcd->next; spin_unlock_irqrestore(&max3421_hcd->lock, flags); diff --git a/drivers/usb/host/ohci-tmio.c b/drivers/usb/host/ohci-tmio.c index fb6f5e9ae5c62..b611c8b09a89f 100644 --- a/drivers/usb/host/ohci-tmio.c +++ b/drivers/usb/host/ohci-tmio.c @@ -199,9 +199,12 @@ static int ohci_hcd_tmio_drv_probe(struct platform_device *dev) if (usb_disabled()) return -ENODEV; - if (!cell) + if (!cell || !regs || !config || !sram) return -EINVAL; + if (irq < 0) + return irq; + hcd = usb_create_hcd(&ohci_tmio_hc_driver, &dev->dev, dev_name(&dev->dev)); if (!hcd) { ret = -ENOMEM; diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c index 65985247fc00f..f05b6f2b08656 100644 --- a/drivers/usb/host/oxu210hp-hcd.c +++ b/drivers/usb/host/oxu210hp-hcd.c @@ -3906,8 +3906,10 @@ static int oxu_bus_suspend(struct usb_hcd *hcd) } } + spin_unlock_irq(&oxu->lock); /* turn off now-idle HC */ del_timer_sync(&oxu->watchdog); + spin_lock_irq(&oxu->lock); ehci_halt(oxu); hcd->state = HC_STATE_SUSPENDED; diff --git a/drivers/usb/host/uhci-platform.c b/drivers/usb/host/uhci-platform.c index 70dbd95c3f063..be9e9db7cad10 100644 --- a/drivers/usb/host/uhci-platform.c +++ b/drivers/usb/host/uhci-platform.c @@ -113,7 +113,8 @@ static int uhci_hcd_platform_probe(struct platform_device *pdev) num_ports); } if (of_device_is_compatible(np, "aspeed,ast2400-uhci") || - of_device_is_compatible(np, "aspeed,ast2500-uhci")) { + of_device_is_compatible(np, "aspeed,ast2500-uhci") || + of_device_is_compatible(np, "aspeed,ast2600-uhci")) { uhci->is_aspeed = 1; dev_info(&pdev->dev, "Enabled Aspeed implementation workarounds\n"); diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c index 448d7b11dec4c..f5c8e4eb62ae6 100644 --- a/drivers/usb/host/xhci-debugfs.c +++ b/drivers/usb/host/xhci-debugfs.c @@ -197,12 +197,13 @@ static void xhci_ring_dump_segment(struct seq_file *s, int i; dma_addr_t dma; union xhci_trb *trb; + char str[XHCI_MSG_MAX]; for (i = 0; i < TRBS_PER_SEGMENT; i++) { trb = &seg->trbs[i]; dma = seg->dma + i * sizeof(*trb); seq_printf(s, "%pad: %s\n", &dma, - xhci_decode_trb(le32_to_cpu(trb->generic.field[0]), + xhci_decode_trb(str, XHCI_MSG_MAX, le32_to_cpu(trb->generic.field[0]), le32_to_cpu(trb->generic.field[1]), le32_to_cpu(trb->generic.field[2]), le32_to_cpu(trb->generic.field[3]))); @@ -340,9 +341,10 @@ static int xhci_portsc_show(struct seq_file *s, void *unused) { struct xhci_port *port = s->private; u32 portsc; + char str[XHCI_MSG_MAX]; portsc = readl(port->addr); - seq_printf(s, "%s\n", xhci_decode_portsc(portsc)); + seq_printf(s, "%s\n", xhci_decode_portsc(str, portsc)); return 0; } diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 6358d4e0653ed..9c066d1c512b1 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -171,7 +171,6 @@ static void xhci_common_hub_descriptor(struct xhci_hcd *xhci, { u16 temp; - desc->bPwrOn2PwrGood = 10; /* xhci section 5.4.9 says 20ms max */ desc->bHubContrCurrent = 0; desc->bNbrPorts = ports; @@ -206,6 +205,7 @@ static void xhci_usb2_hub_descriptor(struct usb_hcd *hcd, struct xhci_hcd *xhci, desc->bDescriptorType = USB_DT_HUB; temp = 1 + (ports / 8); desc->bDescLength = USB_DT_HUB_NONVAR_SIZE + 2 * temp; + desc->bPwrOn2PwrGood = 10; /* xhci section 5.4.8 says 20ms */ /* The Device Removable bits are reported on a byte granularity. * If the port doesn't exist within that byte, the bit is set to 0. @@ -258,6 +258,7 @@ static void xhci_usb3_hub_descriptor(struct usb_hcd *hcd, struct xhci_hcd *xhci, xhci_common_hub_descriptor(xhci, desc, ports); desc->bDescriptorType = USB_DT_SS_HUB; desc->bDescLength = USB_DT_SS_HUB_SIZE; + desc->bPwrOn2PwrGood = 50; /* usb 3.1 may fail if less than 100ms */ /* header decode latency should be zero for roothubs, * see section 4.23.5.2. @@ -565,7 +566,7 @@ struct xhci_hub *xhci_get_rhub(struct usb_hcd *hcd) * It will release and re-aquire the lock while calling ACPI * method. */ -static void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, +void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, u16 index, bool on, unsigned long *flags) { struct xhci_hub *rhub; @@ -628,6 +629,7 @@ static int xhci_enter_test_mode(struct xhci_hcd *xhci, continue; retval = xhci_disable_slot(xhci, i); + xhci_free_virt_device(xhci, i); if (retval) xhci_err(xhci, "Failed to disable slot %d, %d. Enter test mode anyway\n", i, retval); @@ -672,7 +674,7 @@ static int xhci_exit_test_mode(struct xhci_hcd *xhci) } pm_runtime_allow(xhci_to_hcd(xhci)->self.controller); xhci->test_mode = 0; - return xhci_reset(xhci); + return xhci_reset(xhci, XHCI_RESET_SHORT_USEC); } void xhci_set_link_state(struct xhci_hcd *xhci, struct xhci_port *port, @@ -998,6 +1000,9 @@ static void xhci_get_usb2_port_status(struct xhci_port *port, u32 *status, if (link_state == XDEV_U2) *status |= USB_PORT_STAT_L1; if (link_state == XDEV_U0) { + if (bus_state->resume_done[portnum]) + usb_hcd_end_port_resume(&port->rhub->hcd->self, + portnum); bus_state->resume_done[portnum] = 0; clear_bit(portnum, &bus_state->resuming_ports); if (bus_state->suspended_ports & (1 << portnum)) { @@ -1338,7 +1343,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, } spin_unlock_irqrestore(&xhci->lock, flags); if (!wait_for_completion_timeout(&bus_state->u3exit_done[wIndex], - msecs_to_jiffies(100))) + msecs_to_jiffies(500))) xhci_dbg(xhci, "missing U0 port change event for port %d\n", wIndex); spin_lock_irqsave(&xhci->lock, flags); diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 160caef09c5ea..ef23a69c6553a 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2583,7 +2583,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) fail: xhci_halt(xhci); - xhci_reset(xhci); + xhci_reset(xhci, XHCI_RESET_SHORT_USEC); xhci_mem_cleanup(xhci); return -ENOMEM; } diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index d242779297ba7..e7533787db411 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -28,6 +28,7 @@ #define PCI_VENDOR_ID_FRESCO_LOGIC 0x1b73 #define PCI_DEVICE_ID_FRESCO_LOGIC_PDK 0x1000 #define PCI_DEVICE_ID_FRESCO_LOGIC_FL1009 0x1009 +#define PCI_DEVICE_ID_FRESCO_LOGIC_FL1100 0x1100 #define PCI_DEVICE_ID_FRESCO_LOGIC_FL1400 0x1400 #define PCI_VENDOR_ID_ETRON 0x1b6f @@ -58,6 +59,15 @@ #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba #define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb #define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_1 0x161a +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_2 0x161b +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_3 0x161d +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_4 0x161e +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_5 0x15d6 +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6 0x15d7 +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_7 0x161c +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_8 0x161f + #define PCI_DEVICE_ID_ASMEDIA_1042_XHCI 0x1042 #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 #define PCI_DEVICE_ID_ASMEDIA_1142_XHCI 0x1242 @@ -132,6 +142,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1009) xhci->quirks |= XHCI_BROKEN_STREAMS; + if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC && + pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1100) + xhci->quirks |= XHCI_TRUST_TX_LENGTH; + if (pdev->vendor == PCI_VENDOR_ID_NEC) xhci->quirks |= XHCI_NEC_HOST; @@ -288,6 +302,17 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4)) xhci->quirks |= XHCI_NO_SOFT_RETRY; + if (pdev->vendor == PCI_VENDOR_ID_AMD && + (pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_1 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_2 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_3 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_4 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_5 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_7 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_8)) + xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; + if (xhci->quirks & XHCI_RESET_ON_RESUME) xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, "QUIRK: Resetting on resume"); diff --git a/drivers/usb/host/xhci-rcar.c b/drivers/usb/host/xhci-rcar.c index c1025d321a417..3da75b367f952 100644 --- a/drivers/usb/host/xhci-rcar.c +++ b/drivers/usb/host/xhci-rcar.c @@ -134,6 +134,13 @@ static int xhci_rcar_download_firmware(struct usb_hcd *hcd) const struct soc_device_attribute *attr; const char *firmware_name; + /* + * According to the datasheet, "Upon the completion of FW Download, + * there is no need to write or reload FW". + */ + if (readl(regs + RCAR_USB3_DL_CTRL) & RCAR_USB3_DL_CTRL_FW_SUCCESS) + return 0; + attr = soc_device_match(rcar_quirks_match); if (attr) quirks = (uintptr_t)attr->data; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 1228b3d92db06..6cedae902adf9 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -339,16 +339,29 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, /* Must be called with xhci->lock held, releases and aquires lock back */ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags) { - u64 temp_64; + struct xhci_segment *new_seg = xhci->cmd_ring->deq_seg; + union xhci_trb *new_deq = xhci->cmd_ring->dequeue; + u64 crcr; int ret; xhci_dbg(xhci, "Abort command ring\n"); reinit_completion(&xhci->cmd_ring_stop_completion); - temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); - xhci_write_64(xhci, temp_64 | CMD_RING_ABORT, - &xhci->op_regs->cmd_ring); + /* + * The control bits like command stop, abort are located in lower + * dword of the command ring control register. + * Some controllers require all 64 bits to be written to abort the ring. + * Make sure the upper dword is valid, pointing to the next command, + * avoiding corrupting the command ring pointer in case the command ring + * is stopped by the time the upper dword is written. + */ + next_trb(xhci, NULL, &new_seg, &new_deq); + if (trb_is_link(new_deq)) + next_trb(xhci, NULL, &new_seg, &new_deq); + + crcr = xhci_trb_virt_to_dma(new_seg, new_deq); + xhci_write_64(xhci, crcr | CMD_RING_ABORT, &xhci->op_regs->cmd_ring); /* Section 4.6.1.2 of xHCI 1.0 spec says software should also time the * completion of the Command Abort operation. If CRR is not negated in 5 @@ -1252,7 +1265,6 @@ static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id) if (xhci->quirks & XHCI_EP_LIMIT_QUIRK) /* Delete default control endpoint resources */ xhci_free_device_endpoint_resources(xhci, virt_dev, true); - xhci_free_virt_device(xhci, slot_id); } static void xhci_handle_cmd_config_ep(struct xhci_hcd *xhci, int slot_id, @@ -2920,6 +2932,8 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) if (event_loop++ < TRBS_PER_SEGMENT / 2) continue; xhci_update_erst_dequeue(xhci, event_ring_deq); + event_ring_deq = xhci->event_ring->dequeue; + event_loop = 0; } diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h index 87da9098fb347..dab2af3f2c4f7 100644 --- a/drivers/usb/host/xhci-trace.h +++ b/drivers/usb/host/xhci-trace.h @@ -25,8 +25,6 @@ #include "xhci.h" #include "xhci-dbgcap.h" -#define XHCI_MSG_MAX 500 - DECLARE_EVENT_CLASS(xhci_log_msg, TP_PROTO(struct va_format *vaf), TP_ARGS(vaf), @@ -122,6 +120,7 @@ DECLARE_EVENT_CLASS(xhci_log_trb, __field(u32, field1) __field(u32, field2) __field(u32, field3) + __dynamic_array(char, str, XHCI_MSG_MAX) ), TP_fast_assign( __entry->type = ring->type; @@ -131,7 +130,7 @@ DECLARE_EVENT_CLASS(xhci_log_trb, __entry->field3 = le32_to_cpu(trb->field[3]); ), TP_printk("%s: %s", xhci_ring_type_string(__entry->type), - xhci_decode_trb(__entry->field0, __entry->field1, + xhci_decode_trb(__get_str(str), XHCI_MSG_MAX, __entry->field0, __entry->field1, __entry->field2, __entry->field3) ) ); @@ -523,6 +522,7 @@ DECLARE_EVENT_CLASS(xhci_log_portsc, TP_STRUCT__entry( __field(u32, portnum) __field(u32, portsc) + __dynamic_array(char, str, XHCI_MSG_MAX) ), TP_fast_assign( __entry->portnum = portnum; @@ -530,7 +530,7 @@ DECLARE_EVENT_CLASS(xhci_log_portsc, ), TP_printk("port-%d: %s", __entry->portnum, - xhci_decode_portsc(__entry->portsc) + xhci_decode_portsc(__get_str(str), __entry->portsc) ) ); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index a3813c75a3de8..9fe35bb67731e 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -66,7 +66,7 @@ static bool td_on_ring(struct xhci_td *td, struct xhci_ring *ring) * handshake done). There are two failure modes: "usec" have passed (major * hardware flakeout), or the register reads as all-ones (hardware removed). */ -int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, int usec) +int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us) { u32 result; int ret; @@ -74,7 +74,7 @@ int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, int usec) ret = readl_poll_timeout_atomic(ptr, result, (result & mask) == done || result == U32_MAX, - 1, usec); + 1, timeout_us); if (result == U32_MAX) /* card removed */ return -ENODEV; @@ -163,7 +163,7 @@ int xhci_start(struct xhci_hcd *xhci) * Transactions will be terminated immediately, and operational registers * will be set to their defaults. */ -int xhci_reset(struct xhci_hcd *xhci) +int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us) { u32 command; u32 state; @@ -196,8 +196,7 @@ int xhci_reset(struct xhci_hcd *xhci) if (xhci->quirks & XHCI_INTEL_HOST) udelay(1000); - ret = xhci_handshake(&xhci->op_regs->command, - CMD_RESET, 0, 10 * 1000 * 1000); + ret = xhci_handshake(&xhci->op_regs->command, CMD_RESET, 0, timeout_us); if (ret) return ret; @@ -210,8 +209,7 @@ int xhci_reset(struct xhci_hcd *xhci) * xHCI cannot write to any doorbells or operational registers other * than status until the "Controller Not Ready" flag is cleared. */ - ret = xhci_handshake(&xhci->op_regs->status, - STS_CNR, 0, 10 * 1000 * 1000); + ret = xhci_handshake(&xhci->op_regs->status, STS_CNR, 0, timeout_us); xhci->usb2_rhub.bus_state.port_c_suspend = 0; xhci->usb2_rhub.bus_state.suspended_ports = 0; @@ -732,7 +730,7 @@ static void xhci_stop(struct usb_hcd *hcd) xhci->xhc_state |= XHCI_STATE_HALTED; xhci->cmd_ring_state = CMD_RING_STATE_STOPPED; xhci_halt(xhci); - xhci_reset(xhci); + xhci_reset(xhci, XHCI_RESET_SHORT_USEC); spin_unlock_irq(&xhci->lock); xhci_cleanup_msix(xhci); @@ -777,16 +775,38 @@ static void xhci_stop(struct usb_hcd *hcd) void xhci_shutdown(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); + unsigned long flags; + int i; if (xhci->quirks & XHCI_SPURIOUS_REBOOT) usb_disable_xhci_ports(to_pci_dev(hcd->self.sysdev)); - spin_lock_irq(&xhci->lock); + /* Don't poll the roothubs after shutdown. */ + xhci_dbg(xhci, "%s: stopping usb%d port polling.\n", + __func__, hcd->self.busnum); + clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); + del_timer_sync(&hcd->rh_timer); + + if (xhci->shared_hcd) { + clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags); + del_timer_sync(&xhci->shared_hcd->rh_timer); + } + + spin_lock_irqsave(&xhci->lock, flags); xhci_halt(xhci); + + /* Power off USB2 ports*/ + for (i = 0; i < xhci->usb2_rhub.num_ports; i++) + xhci_set_port_power(xhci, xhci->main_hcd, i, false, &flags); + + /* Power off USB3 ports*/ + for (i = 0; i < xhci->usb3_rhub.num_ports; i++) + xhci_set_port_power(xhci, xhci->shared_hcd, i, false, &flags); + /* Workaround for spurious wakeups at shutdown with HSW */ if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) - xhci_reset(xhci); - spin_unlock_irq(&xhci->lock); + xhci_reset(xhci, XHCI_RESET_SHORT_USEC); + spin_unlock_irqrestore(&xhci->lock, flags); xhci_cleanup_msix(xhci); @@ -1091,6 +1111,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) int retval = 0; bool comp_timer_running = false; bool pending_portevent = false; + bool reinit_xhc = false; if (!hcd->state) return 0; @@ -1107,10 +1128,11 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) set_bit(HCD_FLAG_HW_ACCESSIBLE, &xhci->shared_hcd->flags); spin_lock_irq(&xhci->lock); - if ((xhci->quirks & XHCI_RESET_ON_RESUME) || xhci->broken_suspend) - hibernated = true; - if (!hibernated) { + if (hibernated || xhci->quirks & XHCI_RESET_ON_RESUME || xhci->broken_suspend) + reinit_xhc = true; + + if (!reinit_xhc) { /* * Some controllers might lose power during suspend, so wait * for controller not ready bit to clear, just as in xHC init. @@ -1143,12 +1165,17 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) spin_unlock_irq(&xhci->lock); return -ETIMEDOUT; } - temp = readl(&xhci->op_regs->status); } - /* If restore operation fails, re-initialize the HC during resume */ - if ((temp & STS_SRE) || hibernated) { + temp = readl(&xhci->op_regs->status); + + /* re-initialize the HC on Restore Error, or Host Controller Error */ + if (temp & (STS_SRE | STS_HCE)) { + reinit_xhc = true; + xhci_warn(xhci, "xHC error in resume, USBSTS 0x%x, Reinit\n", temp); + } + if (reinit_xhc) { if ((xhci->quirks & XHCI_COMP_MODE_QUIRK) && !(xhci_all_ports_seen_u0(xhci))) { del_timer_sync(&xhci->comp_mode_recovery_timer); @@ -1163,7 +1190,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) xhci_dbg(xhci, "Stop HCD\n"); xhci_halt(xhci); xhci_zero_64b_regs(xhci); - retval = xhci_reset(xhci); + retval = xhci_reset(xhci, XHCI_RESET_LONG_USEC); spin_unlock_irq(&xhci->lock); if (retval) return retval; @@ -1480,9 +1507,12 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag struct urb_priv *urb_priv; int num_tds; - if (!urb || xhci_check_args(hcd, urb->dev, urb->ep, - true, true, __func__) <= 0) + if (!urb) return -EINVAL; + ret = xhci_check_args(hcd, urb->dev, urb->ep, + true, true, __func__); + if (ret <= 0) + return ret ? ret : -EINVAL; slot_id = urb->dev->slot_id; ep_index = xhci_get_endpoint_index(&urb->ep->desc); @@ -3172,10 +3202,13 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd, return; /* Bail out if toggle is already being cleared by a endpoint reset */ + spin_lock_irqsave(&xhci->lock, flags); if (ep->ep_state & EP_HARD_CLEAR_TOGGLE) { ep->ep_state &= ~EP_HARD_CLEAR_TOGGLE; + spin_unlock_irqrestore(&xhci->lock, flags); return; } + spin_unlock_irqrestore(&xhci->lock, flags); /* Only interrupt and bulk ep's use data toggle, USB2 spec 5.5.4-> */ if (usb_endpoint_xfer_control(&host_ep->desc) || usb_endpoint_xfer_isoc(&host_ep->desc)) @@ -3261,8 +3294,10 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd, xhci_free_command(xhci, cfg_cmd); cleanup: xhci_free_command(xhci, stop_cmd); + spin_lock_irqsave(&xhci->lock, flags); if (ep->ep_state & EP_SOFT_CLEAR_TOGGLE) ep->ep_state &= ~EP_SOFT_CLEAR_TOGGLE; + spin_unlock_irqrestore(&xhci->lock, flags); } static int xhci_check_streams_endpoint(struct xhci_hcd *xhci, @@ -3277,7 +3312,7 @@ static int xhci_check_streams_endpoint(struct xhci_hcd *xhci, return -EINVAL; ret = xhci_check_args(xhci_to_hcd(xhci), udev, ep, 1, true, __func__); if (ret <= 0) - return -EINVAL; + return ret ? ret : -EINVAL; if (usb_ss_max_streams(&ep->ss_ep_comp) == 0) { xhci_warn(xhci, "WARN: SuperSpeed Endpoint Companion" " descriptor for ep 0x%x does not support streams\n", @@ -3884,7 +3919,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) struct xhci_slot_ctx *slot_ctx; int i, ret; -#ifndef CONFIG_USB_DEFAULT_PERSIST /* * We called pm_runtime_get_noresume when the device was attached. * Decrement the counter here to allow controller to runtime suspend @@ -3892,7 +3926,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) */ if (xhci->quirks & XHCI_RESET_ON_RESUME) pm_runtime_put_noidle(hcd->self.controller); -#endif ret = xhci_check_args(hcd, udev, NULL, 0, true, __func__); /* If the host is halted due to driver unload, we still need to free the @@ -3911,9 +3944,8 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) del_timer_sync(&virt_dev->eps[i].stop_cmd_timer); } virt_dev->udev = NULL; - ret = xhci_disable_slot(xhci, udev->slot_id); - if (ret) - xhci_free_virt_device(xhci, udev->slot_id); + xhci_disable_slot(xhci, udev->slot_id); + xhci_free_virt_device(xhci, udev->slot_id); } int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) @@ -3923,7 +3955,7 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) u32 state; int ret = 0; - command = xhci_alloc_command(xhci, false, GFP_KERNEL); + command = xhci_alloc_command(xhci, true, GFP_KERNEL); if (!command) return -ENOMEM; @@ -3948,6 +3980,15 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) } xhci_ring_cmd_db(xhci); spin_unlock_irqrestore(&xhci->lock, flags); + + wait_for_completion(command->completion); + + if (command->status != COMP_SUCCESS) + xhci_warn(xhci, "Unsuccessful disable slot %u command, status %d\n", + slot_id, command->status); + + xhci_free_command(xhci, command); + return ret; } @@ -4044,23 +4085,20 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) xhci_debugfs_create_slot(xhci, slot_id); -#ifndef CONFIG_USB_DEFAULT_PERSIST /* * If resetting upon resume, we can't put the controller into runtime * suspend if there is a device attached. */ if (xhci->quirks & XHCI_RESET_ON_RESUME) pm_runtime_get_noresume(hcd->self.controller); -#endif /* Is this a LS or FS device under a HS hub? */ /* Hub or peripherial? */ return 1; disable_slot: - ret = xhci_disable_slot(xhci, udev->slot_id); - if (ret) - xhci_free_virt_device(xhci, udev->slot_id); + xhci_disable_slot(xhci, udev->slot_id); + xhci_free_virt_device(xhci, udev->slot_id); return 0; } @@ -4190,6 +4228,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_unlock(&xhci->mutex); ret = xhci_disable_slot(xhci, udev->slot_id); + xhci_free_virt_device(xhci, udev->slot_id); if (!ret) xhci_alloc_dev(hcd, udev); kfree(command->completion); @@ -4662,19 +4701,19 @@ static u16 xhci_calculate_u1_timeout(struct xhci_hcd *xhci, { unsigned long long timeout_ns; - if (xhci->quirks & XHCI_INTEL_HOST) - timeout_ns = xhci_calculate_intel_u1_timeout(udev, desc); - else - timeout_ns = udev->u1_params.sel; - /* Prevent U1 if service interval is shorter than U1 exit latency */ if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { - if (xhci_service_interval_to_ns(desc) <= timeout_ns) { + if (xhci_service_interval_to_ns(desc) <= udev->u1_params.mel) { dev_dbg(&udev->dev, "Disable U1, ESIT shorter than exit latency\n"); return USB3_LPM_DISABLED; } } + if (xhci->quirks & XHCI_INTEL_HOST) + timeout_ns = xhci_calculate_intel_u1_timeout(udev, desc); + else + timeout_ns = udev->u1_params.sel; + /* The U1 timeout is encoded in 1us intervals. * Don't return a timeout of zero, because that's USB3_LPM_DISABLED. */ @@ -4726,19 +4765,19 @@ static u16 xhci_calculate_u2_timeout(struct xhci_hcd *xhci, { unsigned long long timeout_ns; - if (xhci->quirks & XHCI_INTEL_HOST) - timeout_ns = xhci_calculate_intel_u2_timeout(udev, desc); - else - timeout_ns = udev->u2_params.sel; - /* Prevent U2 if service interval is shorter than U2 exit latency */ if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { - if (xhci_service_interval_to_ns(desc) <= timeout_ns) { + if (xhci_service_interval_to_ns(desc) <= udev->u2_params.mel) { dev_dbg(&udev->dev, "Disable U2, ESIT shorter than exit latency\n"); return USB3_LPM_DISABLED; } } + if (xhci->quirks & XHCI_INTEL_HOST) + timeout_ns = xhci_calculate_intel_u2_timeout(udev, desc); + else + timeout_ns = udev->u2_params.sel; + /* The U2 timeout is encoded in 256us intervals */ timeout_ns = DIV_ROUND_UP_ULL(timeout_ns, 256 * 1000); /* If the necessary timeout value is bigger than what we can set in the @@ -5253,7 +5292,7 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) xhci_dbg(xhci, "Resetting HCD\n"); /* Reset the internal HC memory state and registers. */ - retval = xhci_reset(xhci); + retval = xhci_reset(xhci, XHCI_RESET_LONG_USEC); if (retval) return retval; xhci_dbg(xhci, "Reset complete\n"); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 834f32fe99308..a9031f4949842 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -22,6 +22,9 @@ #include "xhci-ext-caps.h" #include "pci-quirks.h" +/* max buffer size for trace and debug messages */ +#define XHCI_MSG_MAX 500 + /* xHCI PCI Configuration Registers */ #define XHCI_SBRN_OFFSET (0x60) @@ -226,6 +229,9 @@ struct xhci_op_regs { #define CMD_ETE (1 << 14) /* bits 15:31 are reserved (and should be preserved on writes). */ +#define XHCI_RESET_LONG_USEC (10 * 1000 * 1000) +#define XHCI_RESET_SHORT_USEC (250 * 1000) + /* IMAN - Interrupt Management Register */ #define IMAN_IE (1 << 1) #define IMAN_IP (1 << 0) @@ -2058,11 +2064,11 @@ void xhci_free_container_ctx(struct xhci_hcd *xhci, /* xHCI host controller glue */ typedef void (*xhci_get_quirks_t)(struct device *, struct xhci_hcd *); -int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, int usec); +int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us); void xhci_quiesce(struct xhci_hcd *xhci); int xhci_halt(struct xhci_hcd *xhci); int xhci_start(struct xhci_hcd *xhci); -int xhci_reset(struct xhci_hcd *xhci); +int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us); int xhci_run(struct usb_hcd *hcd); int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks); void xhci_shutdown(struct usb_hcd *hcd); @@ -2149,6 +2155,8 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, int xhci_hub_status_data(struct usb_hcd *hcd, char *buf); int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1); struct xhci_hub *xhci_get_rhub(struct usb_hcd *hcd); +void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, u16 index, + bool on, unsigned long *flags); void xhci_hc_died(struct xhci_hcd *xhci); @@ -2217,15 +2225,14 @@ static inline char *xhci_slot_state_string(u32 state) } } -static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, - u32 field3) +static inline const char *xhci_decode_trb(char *str, size_t size, + u32 field0, u32 field1, u32 field2, u32 field3) { - static char str[256]; int type = TRB_FIELD_TO_TYPE(field3); switch (type) { case TRB_LINK: - sprintf(str, + snprintf(str, size, "LINK %08x%08x intr %d type '%s' flags %c:%c:%c:%c", field1, field0, GET_INTR_TARGET(field2), xhci_trb_type_string(type), @@ -2242,7 +2249,7 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, case TRB_HC_EVENT: case TRB_DEV_NOTE: case TRB_MFINDEX_WRAP: - sprintf(str, + snprintf(str, size, "TRB %08x%08x status '%s' len %d slot %d ep %d type '%s' flags %c:%c", field1, field0, xhci_trb_comp_code_string(GET_COMP_CODE(field2)), @@ -2255,7 +2262,8 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, break; case TRB_SETUP: - sprintf(str, "bRequestType %02x bRequest %02x wValue %02x%02x wIndex %02x%02x wLength %d length %d TD size %d intr %d type '%s' flags %c:%c:%c", + snprintf(str, size, + "bRequestType %02x bRequest %02x wValue %02x%02x wIndex %02x%02x wLength %d length %d TD size %d intr %d type '%s' flags %c:%c:%c", field0 & 0xff, (field0 & 0xff00) >> 8, (field0 & 0xff000000) >> 24, @@ -2272,7 +2280,8 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_DATA: - sprintf(str, "Buffer %08x%08x length %d TD size %d intr %d type '%s' flags %c:%c:%c:%c:%c:%c:%c", + snprintf(str, size, + "Buffer %08x%08x length %d TD size %d intr %d type '%s' flags %c:%c:%c:%c:%c:%c:%c", field1, field0, TRB_LEN(field2), GET_TD_SIZE(field2), GET_INTR_TARGET(field2), xhci_trb_type_string(type), @@ -2285,7 +2294,8 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_STATUS: - sprintf(str, "Buffer %08x%08x length %d TD size %d intr %d type '%s' flags %c:%c:%c:%c", + snprintf(str, size, + "Buffer %08x%08x length %d TD size %d intr %d type '%s' flags %c:%c:%c:%c", field1, field0, TRB_LEN(field2), GET_TD_SIZE(field2), GET_INTR_TARGET(field2), xhci_trb_type_string(type), @@ -2298,7 +2308,7 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, case TRB_ISOC: case TRB_EVENT_DATA: case TRB_TR_NOOP: - sprintf(str, + snprintf(str, size, "Buffer %08x%08x length %d TD size %d intr %d type '%s' flags %c:%c:%c:%c:%c:%c:%c:%c", field1, field0, TRB_LEN(field2), GET_TD_SIZE(field2), GET_INTR_TARGET(field2), @@ -2315,21 +2325,21 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, case TRB_CMD_NOOP: case TRB_ENABLE_SLOT: - sprintf(str, + snprintf(str, size, "%s: flags %c", xhci_trb_type_string(type), field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_DISABLE_SLOT: case TRB_NEG_BANDWIDTH: - sprintf(str, + snprintf(str, size, "%s: slot %d flags %c", xhci_trb_type_string(type), TRB_TO_SLOT_ID(field3), field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_ADDR_DEV: - sprintf(str, + snprintf(str, size, "%s: ctx %08x%08x slot %d flags %c:%c", xhci_trb_type_string(type), field1, field0, @@ -2338,7 +2348,7 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_CONFIG_EP: - sprintf(str, + snprintf(str, size, "%s: ctx %08x%08x slot %d flags %c:%c", xhci_trb_type_string(type), field1, field0, @@ -2347,7 +2357,7 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_EVAL_CONTEXT: - sprintf(str, + snprintf(str, size, "%s: ctx %08x%08x slot %d flags %c", xhci_trb_type_string(type), field1, field0, @@ -2355,7 +2365,7 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_RESET_EP: - sprintf(str, + snprintf(str, size, "%s: ctx %08x%08x slot %d ep %d flags %c:%c", xhci_trb_type_string(type), field1, field0, @@ -2376,7 +2386,7 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_SET_DEQ: - sprintf(str, + snprintf(str, size, "%s: deq %08x%08x stream %d slot %d ep %d flags %c", xhci_trb_type_string(type), field1, field0, @@ -2387,14 +2397,14 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_RESET_DEV: - sprintf(str, + snprintf(str, size, "%s: slot %d flags %c", xhci_trb_type_string(type), TRB_TO_SLOT_ID(field3), field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_FORCE_EVENT: - sprintf(str, + snprintf(str, size, "%s: event %08x%08x vf intr %d vf id %d flags %c", xhci_trb_type_string(type), field1, field0, @@ -2403,14 +2413,14 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_SET_LT: - sprintf(str, + snprintf(str, size, "%s: belt %d flags %c", xhci_trb_type_string(type), TRB_TO_BELT(field3), field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_GET_BW: - sprintf(str, + snprintf(str, size, "%s: ctx %08x%08x slot %d speed %d flags %c", xhci_trb_type_string(type), field1, field0, @@ -2419,7 +2429,7 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_FORCE_HEADER: - sprintf(str, + snprintf(str, size, "%s: info %08x%08x%08x pkt type %d roothub port %d flags %c", xhci_trb_type_string(type), field2, field1, field0 & 0xffffffe0, @@ -2428,7 +2438,7 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, field3 & TRB_CYCLE ? 'C' : 'c'); break; default: - sprintf(str, + snprintf(str, size, "type '%s' -> raw %08x %08x %08x %08x", xhci_trb_type_string(type), field0, field1, field2, field3); @@ -2444,6 +2454,8 @@ static inline const char *xhci_decode_ctrl_ctx(unsigned long drop, unsigned int bit; int ret = 0; + str[0] = '\0'; + if (drop) { ret = sprintf(str, "Drop:"); for_each_set_bit(bit, &drop, 32) @@ -2553,9 +2565,8 @@ static inline const char *xhci_portsc_link_state_string(u32 portsc) return "Unknown"; } -static inline const char *xhci_decode_portsc(u32 portsc) +static inline const char *xhci_decode_portsc(char *str, u32 portsc) { - static char str[256]; int ret; ret = sprintf(str, "%s %s %s Link:%s PortSpeed:%d ", diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c index cdee3af33ad7b..684800c66bb4d 100644 --- a/drivers/usb/misc/ftdi-elan.c +++ b/drivers/usb/misc/ftdi-elan.c @@ -202,6 +202,7 @@ static void ftdi_elan_delete(struct kref *kref) mutex_unlock(&ftdi_module_lock); kfree(ftdi->bulk_in_buffer); ftdi->bulk_in_buffer = NULL; + kfree(ftdi); } static void ftdi_elan_put_kref(struct usb_ftdi *ftdi) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 103c69c692bad..888defdea546f 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -99,10 +99,6 @@ struct iowarrior { /* globals */ /*--------------*/ -/* - * USB spec identifies 5 second timeouts. - */ -#define GET_TIMEOUT 5 #define USB_REQ_GET_REPORT 0x01 //#if 0 static int usb_get_report(struct usb_device *dev, @@ -114,7 +110,7 @@ static int usb_get_report(struct usb_device *dev, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, (type << 8) + id, inter->desc.bInterfaceNumber, buf, size, - GET_TIMEOUT*HZ); + USB_CTRL_GET_TIMEOUT); } //#endif @@ -129,7 +125,7 @@ static int usb_set_report(struct usb_interface *intf, unsigned char type, USB_TYPE_CLASS | USB_RECIP_INTERFACE, (type << 8) + id, intf->cur_altsetting->desc.bInterfaceNumber, buf, - size, HZ); + size, 1000); } /*---------------------*/ diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c index 748139d262633..0be8efcda15d5 100644 --- a/drivers/usb/misc/uss720.c +++ b/drivers/usb/misc/uss720.c @@ -71,6 +71,7 @@ static void destroy_priv(struct kref *kref) dev_dbg(&priv->usbdev->dev, "destroying priv datastructure\n"); usb_put_dev(priv->usbdev); + priv->usbdev = NULL; kfree(priv); } @@ -736,7 +737,6 @@ static int uss720_probe(struct usb_interface *intf, parport_announce_port(pp); usb_set_intfdata(intf, pp); - usb_put_dev(usbdev); return 0; probe_abort: @@ -754,7 +754,6 @@ static void uss720_disconnect(struct usb_interface *intf) usb_set_intfdata(intf, NULL); if (pp) { priv = pp->private_data; - priv->usbdev = NULL; priv->pp = NULL; dev_dbg(&intf->dev, "parport_remove_port\n"); parport_remove_port(pp); diff --git a/drivers/usb/mtu3/mtu3_dr.c b/drivers/usb/mtu3/mtu3_dr.c index 08e18448e8b82..db30365345e0f 100644 --- a/drivers/usb/mtu3/mtu3_dr.c +++ b/drivers/usb/mtu3/mtu3_dr.c @@ -41,10 +41,8 @@ static char *mailbox_state_string(enum mtu3_vbus_id_state state) static void toggle_opstate(struct ssusb_mtk *ssusb) { - if (!ssusb->otg_switch.is_u3_drd) { - mtu3_setbits(ssusb->mac_base, U3D_DEVICE_CONTROL, DC_SESSION); - mtu3_setbits(ssusb->mac_base, U3D_POWER_MANAGEMENT, SOFT_CONN); - } + mtu3_setbits(ssusb->mac_base, U3D_DEVICE_CONTROL, DC_SESSION); + mtu3_setbits(ssusb->mac_base, U3D_POWER_MANAGEMENT, SOFT_CONN); } /* only port0 supports dual-role mode */ diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c index 08db02f3172df..061da9b82b967 100644 --- a/drivers/usb/mtu3/mtu3_gadget.c +++ b/drivers/usb/mtu3/mtu3_gadget.c @@ -72,14 +72,12 @@ static int mtu3_ep_enable(struct mtu3_ep *mep) u32 interval = 0; u32 mult = 0; u32 burst = 0; - int max_packet; int ret; desc = mep->desc; comp_desc = mep->comp_desc; mep->type = usb_endpoint_type(desc); - max_packet = usb_endpoint_maxp(desc); - mep->maxp = max_packet & GENMASK(10, 0); + mep->maxp = usb_endpoint_maxp(desc); switch (mtu->g.speed) { case USB_SPEED_SUPER: @@ -87,7 +85,7 @@ static int mtu3_ep_enable(struct mtu3_ep *mep) if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { interval = desc->bInterval; - interval = clamp_val(interval, 1, 16) - 1; + interval = clamp_val(interval, 1, 16); if (usb_endpoint_xfer_isoc(desc) && comp_desc) mult = comp_desc->bmAttributes; } @@ -99,9 +97,16 @@ static int mtu3_ep_enable(struct mtu3_ep *mep) if (usb_endpoint_xfer_isoc(desc) || usb_endpoint_xfer_int(desc)) { interval = desc->bInterval; - interval = clamp_val(interval, 1, 16) - 1; - burst = (max_packet & GENMASK(12, 11)) >> 11; + interval = clamp_val(interval, 1, 16); + mult = usb_endpoint_maxp_mult(desc) - 1; } + break; + case USB_SPEED_FULL: + if (usb_endpoint_xfer_isoc(desc)) + interval = clamp_val(desc->bInterval, 1, 16); + else if (usb_endpoint_xfer_int(desc)) + interval = clamp_val(desc->bInterval, 1, 255); + break; default: break; /*others are ignored */ @@ -247,6 +252,7 @@ struct usb_request *mtu3_alloc_request(struct usb_ep *ep, gfp_t gfp_flags) mreq->request.dma = DMA_ADDR_INVALID; mreq->epnum = mep->epnum; mreq->mep = mep; + INIT_LIST_HEAD(&mreq->list); trace_mtu3_alloc_request(mreq); return &mreq->request; diff --git a/drivers/usb/mtu3/mtu3_qmu.c b/drivers/usb/mtu3/mtu3_qmu.c index 3f414f91b5899..2ea3157ddb6e2 100644 --- a/drivers/usb/mtu3/mtu3_qmu.c +++ b/drivers/usb/mtu3/mtu3_qmu.c @@ -273,6 +273,8 @@ static int mtu3_prepare_tx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq) gpd->dw3_info |= cpu_to_le32(GPD_EXT_FLAG_ZLP); } + /* prevent reorder, make sure GPD's HWO is set last */ + mb(); gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO); mreq->gpd = gpd; @@ -306,6 +308,8 @@ static int mtu3_prepare_rx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq) gpd->next_gpd = cpu_to_le32(lower_32_bits(enq_dma)); ext_addr |= GPD_EXT_NGP(mtu, upper_32_bits(enq_dma)); gpd->dw3_info = cpu_to_le32(ext_addr); + /* prevent reorder, make sure GPD's HWO is set last */ + mb(); gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO); mreq->gpd = gpd; @@ -445,7 +449,8 @@ static void qmu_tx_zlp_error_handler(struct mtu3 *mtu, u8 epnum) return; } mtu3_setbits(mbase, MU3D_EP_TXCR0(mep->epnum), TX_TXPKTRDY); - + /* prevent reorder, make sure GPD's HWO is set last */ + mb(); /* by pass the current GDP */ gpd_current->dw0_info |= cpu_to_le32(GPD_FLAGS_BPS | GPD_FLAGS_HWO); diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index 327d4f7baaf7c..1b9ebd756f097 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -890,23 +890,24 @@ static int dsps_probe(struct platform_device *pdev) if (!glue->usbss_base) return -ENXIO; - if (usb_get_dr_mode(&pdev->dev) == USB_DR_MODE_PERIPHERAL) { - ret = dsps_setup_optional_vbus_irq(pdev, glue); - if (ret) - goto err_iounmap; - } - platform_set_drvdata(pdev, glue); pm_runtime_enable(&pdev->dev); ret = dsps_create_musb_pdev(glue, pdev); if (ret) goto err; + if (usb_get_dr_mode(&pdev->dev) == USB_DR_MODE_PERIPHERAL) { + ret = dsps_setup_optional_vbus_irq(pdev, glue); + if (ret) + goto unregister_pdev; + } + return 0; +unregister_pdev: + platform_device_unregister(glue->musb); err: pm_runtime_disable(&pdev->dev); -err_iounmap: iounmap(glue->usbss_base); return ret; } diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index ffe462a657b15..4622400ba4ddb 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1248,9 +1248,11 @@ static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req, status = musb_queue_resume_work(musb, musb_ep_restart_resume_work, request); - if (status < 0) + if (status < 0) { dev_err(musb->controller, "%s resume work: %i\n", __func__, status); + list_del(&request->list); + } } unlock: diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 5c93226e0e20a..8def19fc50250 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -433,6 +433,7 @@ static int omap2430_probe(struct platform_device *pdev) control_node = of_parse_phandle(np, "ctrl-module", 0); if (control_node) { control_pdev = of_find_device_by_node(control_node); + of_node_put(control_node); if (!control_pdev) { dev_err(&pdev->dev, "Failed to get control device\n"); ret = -EINVAL; diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c index 39453287b5c36..902507da8aa85 100644 --- a/drivers/usb/musb/tusb6010.c +++ b/drivers/usb/musb/tusb6010.c @@ -190,6 +190,7 @@ tusb_fifo_write_unaligned(void __iomem *fifo, const u8 *buf, u16 len) } if (len > 0) { /* Write the rest 1 - 3 bytes to FIFO */ + val = 0; memcpy(&val, buf, len); musb_writel(fifo, 0, val); } @@ -1102,6 +1103,11 @@ static int tusb_musb_init(struct musb *musb) /* dma address for async dma */ mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) { + pr_debug("no async dma resource?\n"); + ret = -ENODEV; + goto done; + } musb->async = mem->start; /* dma address for sync dma */ diff --git a/drivers/usb/phy/phy-fsl-usb.c b/drivers/usb/phy/phy-fsl-usb.c index b451f4695f3f0..446c7bf67873c 100644 --- a/drivers/usb/phy/phy-fsl-usb.c +++ b/drivers/usb/phy/phy-fsl-usb.c @@ -873,6 +873,8 @@ int usb_otg_start(struct platform_device *pdev) /* request irq */ p_otg->irq = platform_get_irq(pdev, 0); + if (p_otg->irq < 0) + return p_otg->irq; status = request_irq(p_otg->irq, fsl_otg_isr, IRQF_SHARED, driver_name, p_otg); if (status) { diff --git a/drivers/usb/phy/phy-tahvo.c b/drivers/usb/phy/phy-tahvo.c index baebb1f5a9737..a3e043e3e4aae 100644 --- a/drivers/usb/phy/phy-tahvo.c +++ b/drivers/usb/phy/phy-tahvo.c @@ -393,7 +393,9 @@ static int tahvo_usb_probe(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, tu); - tu->irq = platform_get_irq(pdev, 0); + tu->irq = ret = platform_get_irq(pdev, 0); + if (ret < 0) + return ret; ret = request_threaded_irq(tu->irq, NULL, tahvo_usb_vbus_interrupt, IRQF_ONESHOT, "tahvo-vbus", tu); diff --git a/drivers/usb/phy/phy-twl6030-usb.c b/drivers/usb/phy/phy-twl6030-usb.c index 9a7e655d52801..9337c30f0743b 100644 --- a/drivers/usb/phy/phy-twl6030-usb.c +++ b/drivers/usb/phy/phy-twl6030-usb.c @@ -348,6 +348,11 @@ static int twl6030_usb_probe(struct platform_device *pdev) twl->irq2 = platform_get_irq(pdev, 1); twl->linkstat = MUSB_UNKNOWN; + if (twl->irq1 < 0) + return twl->irq1; + if (twl->irq2 < 0) + return twl->irq2; + twl->comparator.set_vbus = twl6030_set_vbus; twl->comparator.start_srp = twl6030_start_srp; diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig index 67279c6bce338..31ad8682ad9b8 100644 --- a/drivers/usb/serial/Kconfig +++ b/drivers/usb/serial/Kconfig @@ -66,6 +66,7 @@ config USB_SERIAL_SIMPLE - Libtransistor USB console - a number of Motorola phones - Motorola Tetra devices + - Nokia mobile phones - Novatel Wireless GPS receivers - Siemens USB/MPI adapter. - ViVOtech ViVOpay USB device. diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index e9c39a41faae9..f06a09e59d8ba 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -80,10 +80,10 @@ #define CH341_LCR_CS5 0x00 static const struct usb_device_id id_table[] = { - { USB_DEVICE(0x1a86, 0x5512) }, { USB_DEVICE(0x1a86, 0x5523) }, { USB_DEVICE(0x1a86, 0x7522) }, { USB_DEVICE(0x1a86, 0x7523) }, + { USB_DEVICE(0x2184, 0x0057) }, { USB_DEVICE(0x4348, 0x5523) }, { USB_DEVICE(0x9986, 0x7523) }, { }, @@ -678,7 +678,6 @@ static struct usb_serial_driver ch341_device = { .owner = THIS_MODULE, .name = "ch341-uart", }, - .bulk_in_size = 512, .id_table = id_table, .num_ports = 1, .open = ch341_open, diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 8e5878ec656d0..d5a1832aa48eb 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -52,6 +52,7 @@ static int cp210x_port_remove(struct usb_serial_port *); static void cp210x_dtr_rts(struct usb_serial_port *p, int on); static const struct usb_device_id id_table[] = { + { USB_DEVICE(0x0404, 0x034C) }, /* NCR Retail IO Box */ { USB_DEVICE(0x045B, 0x0053) }, /* Renesas RX610 RX-Stick */ { USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */ { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ @@ -69,6 +70,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0FCF, 0x1004) }, /* Dynastream ANT2USB */ { USB_DEVICE(0x0FCF, 0x1006) }, /* Dynastream ANT development board */ { USB_DEVICE(0x0FDE, 0xCA05) }, /* OWL Wireless Electricity Monitor CM-160 */ + { USB_DEVICE(0x106F, 0x0003) }, /* CPI / Money Controls Bulk Coin Recycler */ { USB_DEVICE(0x10A6, 0xAA26) }, /* Knock-off DCU-11 cable */ { USB_DEVICE(0x10AB, 0x10C5) }, /* Siemens MC60 Cable */ { USB_DEVICE(0x10B5, 0xAC70) }, /* Nokia CA-42 USB */ @@ -193,6 +195,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x16DC, 0x0015) }, /* W-IE-NE-R Plein & Baus GmbH CML Control, Monitoring and Data Logger */ { USB_DEVICE(0x17A8, 0x0001) }, /* Kamstrup Optical Eye/3-wire */ { USB_DEVICE(0x17A8, 0x0005) }, /* Kamstrup M-Bus Master MultiPort 250D */ + { USB_DEVICE(0x17A8, 0x0101) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Int Ant) */ + { USB_DEVICE(0x17A8, 0x0102) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Ext Ant) */ { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */ { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ @@ -234,6 +238,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1FB9, 0x0602) }, /* Lake Shore Model 648 Magnet Power Supply */ { USB_DEVICE(0x1FB9, 0x0700) }, /* Lake Shore Model 737 VSM Controller */ { USB_DEVICE(0x1FB9, 0x0701) }, /* Lake Shore Model 776 Hall Matrix */ + { USB_DEVICE(0x2184, 0x0030) }, /* GW Instek GDM-834x Digital Multimeter */ { USB_DEVICE(0x2626, 0xEA60) }, /* Aruba Networks 7xxx USB Serial Console */ { USB_DEVICE(0x3195, 0xF190) }, /* Link Instruments MSO-19 */ { USB_DEVICE(0x3195, 0xF280) }, /* Link Instruments MSO-28 */ @@ -1551,6 +1556,8 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) /* 2 banks of GPIO - One for the pins taken from each serial port */ if (intf_num == 0) { + priv->gc.ngpio = 2; + if (mode.eci == CP210X_PIN_MODE_MODEM) { /* mark all GPIOs of this interface as reserved */ priv->gpio_altfunc = 0xff; @@ -1561,8 +1568,9 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) & CP210X_ECI_GPIO_MODE_MASK) >> CP210X_ECI_GPIO_MODE_OFFSET); - priv->gc.ngpio = 2; } else if (intf_num == 1) { + priv->gc.ngpio = 3; + if (mode.sci == CP210X_PIN_MODE_MODEM) { /* mark all GPIOs of this interface as reserved */ priv->gpio_altfunc = 0xff; @@ -1573,7 +1581,6 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) & CP210X_SCI_GPIO_MODE_MASK) >> CP210X_SCI_GPIO_MODE_OFFSET); - priv->gc.ngpio = 3; } else { return -ENODEV; } diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 1ec623e46214b..d671e096594bb 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -969,6 +969,7 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_023_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_034_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_101_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_159_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_3_PID) }, @@ -977,12 +978,14 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_6_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_7_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_8_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_235_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_257_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_3_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_4_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_313_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_320_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_324_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_2_PID) }, @@ -1020,6 +1023,9 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_DISPLAY_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_LITE_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ANALOG_PID) }, + /* Belimo Automation devices */ + { USB_DEVICE(FTDI_VID, BELIMO_ZTH_PID) }, + { USB_DEVICE(FTDI_VID, BELIMO_ZIP_PID) }, /* ICP DAS I-756xU devices */ { USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) }, { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 755858ca20bac..4e92c165c86bf 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1506,6 +1506,9 @@ #define BRAINBOXES_VX_023_PID 0x1003 /* VX-023 ExpressCard 1 Port RS422/485 */ #define BRAINBOXES_VX_034_PID 0x1004 /* VX-034 ExpressCard 2 Port RS422/485 */ #define BRAINBOXES_US_101_PID 0x1011 /* US-101 1xRS232 */ +#define BRAINBOXES_US_159_PID 0x1021 /* US-159 1xRS232 */ +#define BRAINBOXES_US_235_PID 0x1017 /* US-235 1xRS232 */ +#define BRAINBOXES_US_320_PID 0x1019 /* US-320 1xRS422/485 */ #define BRAINBOXES_US_324_PID 0x1013 /* US-324 1xRS422/485 1Mbaud */ #define BRAINBOXES_US_606_1_PID 0x2001 /* US-606 6 Port RS232 Serial Port 1 and 2 */ #define BRAINBOXES_US_606_2_PID 0x2002 /* US-606 6 Port RS232 Serial Port 3 and 4 */ @@ -1565,6 +1568,12 @@ #define CHETCO_SEASMART_LITE_PID 0xA5AE /* SeaSmart Lite USB Adapter */ #define CHETCO_SEASMART_ANALOG_PID 0xA5AF /* SeaSmart Analog Adapter */ +/* + * Belimo Automation + */ +#define BELIMO_ZTH_PID 0x8050 +#define BELIMO_ZIP_PID 0xC811 + /* * Unjo AB */ diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index c327d4cf79285..03bcab3b9bd09 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -168,6 +168,7 @@ static const struct usb_device_id edgeport_2port_id_table[] = { { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_8S) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416B) }, + { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_E5805A) }, { } }; @@ -206,6 +207,7 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_8S) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416B) }, + { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_E5805A) }, { } }; diff --git a/drivers/usb/serial/io_usbvend.h b/drivers/usb/serial/io_usbvend.h index c38e87ac5ea9e..7ca5ca49adff8 100644 --- a/drivers/usb/serial/io_usbvend.h +++ b/drivers/usb/serial/io_usbvend.h @@ -212,6 +212,7 @@ // // Definitions for other product IDs #define ION_DEVICE_ID_MT4X56USB 0x1403 // OEM device +#define ION_DEVICE_ID_E5805A 0x1A01 // OEM device (rebranded Edgeport/4) #define GENERATION_ID_FROM_USB_PRODUCT_ID(ProductId) \ diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c index aa3dbce22cfbe..451759f38b573 100644 --- a/drivers/usb/serial/keyspan.c +++ b/drivers/usb/serial/keyspan.c @@ -2910,22 +2910,22 @@ static int keyspan_port_probe(struct usb_serial_port *port) for (i = 0; i < ARRAY_SIZE(p_priv->in_buffer); ++i) { p_priv->in_buffer[i] = kzalloc(IN_BUFLEN, GFP_KERNEL); if (!p_priv->in_buffer[i]) - goto err_in_buffer; + goto err_free_in_buffer; } for (i = 0; i < ARRAY_SIZE(p_priv->out_buffer); ++i) { p_priv->out_buffer[i] = kzalloc(OUT_BUFLEN, GFP_KERNEL); if (!p_priv->out_buffer[i]) - goto err_out_buffer; + goto err_free_out_buffer; } p_priv->inack_buffer = kzalloc(INACK_BUFLEN, GFP_KERNEL); if (!p_priv->inack_buffer) - goto err_inack_buffer; + goto err_free_out_buffer; p_priv->outcont_buffer = kzalloc(OUTCONT_BUFLEN, GFP_KERNEL); if (!p_priv->outcont_buffer) - goto err_outcont_buffer; + goto err_free_inack_buffer; p_priv->device_details = d_details; @@ -2971,15 +2971,14 @@ static int keyspan_port_probe(struct usb_serial_port *port) return 0; -err_outcont_buffer: +err_free_inack_buffer: kfree(p_priv->inack_buffer); -err_inack_buffer: +err_free_out_buffer: for (i = 0; i < ARRAY_SIZE(p_priv->out_buffer); ++i) kfree(p_priv->out_buffer[i]); -err_out_buffer: +err_free_in_buffer: for (i = 0; i < ARRAY_SIZE(p_priv->in_buffer); ++i) kfree(p_priv->in_buffer[i]); -err_in_buffer: kfree(p_priv); return -ENOMEM; diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index aefc1b58d9563..f5caf38ba2be0 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -226,8 +226,10 @@ static int read_mos_reg(struct usb_serial *serial, unsigned int serial_portnum, int status; buf = kmalloc(1, GFP_KERNEL); - if (!buf) + if (!buf) { + *data = 0; return -ENOMEM; + } status = usb_control_msg(usbdev, pipe, request, requesttype, value, index, buf, 1, MOS_WDR_TIMEOUT); diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 2b8a0d4b66fce..84b6093ed1d28 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -114,7 +114,6 @@ #define BANDB_DEVICE_ID_USOPTL4_2P 0xBC02 #define BANDB_DEVICE_ID_USOPTL4_4 0xAC44 #define BANDB_DEVICE_ID_USOPTL4_4P 0xBC03 -#define BANDB_DEVICE_ID_USOPTL2_4 0xAC24 /* This driver also supports * ATEN UC2324 device using Moschip MCS7840 @@ -196,7 +195,6 @@ static const struct usb_device_id id_table[] = { {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_2P)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_4)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_4P)}, - {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL2_4)}, {USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2324)}, {USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2322)}, {USB_DEVICE(USB_VENDOR_ID_MOXA, MOXA_DEVICE_ID_2210)}, diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 793530f241ceb..2317ed357d8ef 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -198,6 +198,8 @@ static void option_instat_callback(struct urb *urb); #define DELL_PRODUCT_5821E 0x81d7 #define DELL_PRODUCT_5821E_ESIM 0x81e0 +#define DELL_PRODUCT_5829E_ESIM 0x81e4 +#define DELL_PRODUCT_5829E 0x81e6 #define KYOCERA_VENDOR_ID 0x0c88 #define KYOCERA_PRODUCT_KPC650 0x17da @@ -246,12 +248,16 @@ static void option_instat_callback(struct urb *urb); /* These Quectel products use Quectel's vendor ID */ #define QUECTEL_PRODUCT_EC21 0x0121 #define QUECTEL_PRODUCT_EC25 0x0125 +#define QUECTEL_PRODUCT_EG91 0x0191 #define QUECTEL_PRODUCT_EG95 0x0195 #define QUECTEL_PRODUCT_BG96 0x0296 #define QUECTEL_PRODUCT_EP06 0x0306 +#define QUECTEL_PRODUCT_EM05G 0x030a #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 +#define QUECTEL_PRODUCT_EC200S_CN 0x6002 #define QUECTEL_PRODUCT_EC200T 0x6026 +#define QUECTEL_PRODUCT_RM500K 0x7001 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -428,6 +434,10 @@ static void option_instat_callback(struct urb *urb); #define CINTERION_PRODUCT_CLS8 0x00b0 #define CINTERION_PRODUCT_MV31_MBIM 0x00b3 #define CINTERION_PRODUCT_MV31_RMNET 0x00b7 +#define CINTERION_PRODUCT_MV31_2_MBIM 0x00b8 +#define CINTERION_PRODUCT_MV31_2_RMNET 0x00b9 +#define CINTERION_PRODUCT_MV32_WA 0x00f1 +#define CINTERION_PRODUCT_MV32_WB 0x00f2 /* Olivetti products */ #define OLIVETTI_VENDOR_ID 0x0b3c @@ -1061,6 +1071,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E_ESIM), .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5829E), + .driver_info = RSVD(0) | RSVD(6) }, + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5829E_ESIM), + .driver_info = RSVD(0) | RSVD(6) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) }, @@ -1111,6 +1125,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25, 0xff, 0xff, 0xff), .driver_info = NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG91, 0xff, 0xff, 0xff), + .driver_info = NUMEP2 }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG91, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0xff, 0xff), .driver_info = NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0, 0) }, @@ -1119,16 +1136,22 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff), + .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0xff, 0x30) }, /* EM160R-GL */ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0, 0) }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, 0x0700, 0xff), /* BG95 */ + .driver_info = RSVD(3) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10), .driver_info = ZLP }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, @@ -1205,6 +1228,28 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1056, 0xff), /* Telit FD980 */ .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1057, 0xff), /* Telit FN980 */ + .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1058, 0xff), /* Telit FN980 (PCIe) */ + .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1060, 0xff), /* Telit LN920 (rmnet) */ + .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1061, 0xff), /* Telit LN920 (MBIM) */ + .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1062, 0xff), /* Telit LN920 (RNDIS) */ + .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1063, 0xff), /* Telit LN920 (ECM) */ + .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1070, 0xff), /* Telit FN990 (rmnet) */ + .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1071, 0xff), /* Telit FN990 (MBIM) */ + .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1072, 0xff), /* Telit FN990 (RNDIS) */ + .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1073, 0xff), /* Telit FN990 (ECM) */ + .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff), /* Telit FN990 (PCIe) */ + .driver_info = RSVD(0) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), @@ -1219,6 +1264,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1203, 0xff), /* Telit LE910Cx (RNDIS) */ .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1204, 0xff), /* Telit LE910Cx (MBIM) */ + .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920), @@ -1237,6 +1284,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1231, 0xff), /* Telit LE910Cx (RNDIS) */ .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x1250, 0xff, 0x00, 0x00) }, /* Telit LE910Cx (rmnet) */ { USB_DEVICE(TELIT_VENDOR_ID, 0x1260), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x1261), @@ -1249,8 +1297,16 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7011, 0xff), /* Telit LE910-S1 (ECM) */ .driver_info = NCTRL(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x701a, 0xff), /* Telit LE910R1 (RNDIS) */ + .driver_info = NCTRL(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x701b, 0xff), /* Telit LE910R1 (ECM) */ + .driver_info = NCTRL(2) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x9010), /* Telit SBL FN980 flashing device */ .driver_info = NCTRL(0) | ZLP }, + { USB_DEVICE(TELIT_VENDOR_ID, 0x9200), /* Telit LE910S1 flashing device */ + .driver_info = NCTRL(0) | ZLP }, + { USB_DEVICE(TELIT_VENDOR_ID, 0x9201), /* Telit LE910R1 flashing device */ + .driver_info = NCTRL(0) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff), .driver_info = RSVD(1) }, @@ -1623,6 +1679,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x1476, 0xff) }, /* GosunCn ZTE WeLink ME3630 (ECM/NCM mode) */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1481, 0xff, 0x00, 0x00) }, /* ZTE MF871A */ + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1485, 0xff, 0xff, 0xff), /* ZTE MF286D */ + .driver_info = RSVD(5) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1533, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1534, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1535, 0xff, 0xff, 0xff) }, @@ -1650,7 +1708,6 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0060, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0070, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0073, 0xff, 0xff, 0xff) }, - { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0094, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0130, 0xff, 0xff, 0xff), .driver_info = RSVD(1) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0133, 0xff, 0xff, 0xff), @@ -1930,6 +1987,14 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3)}, { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_RMNET, 0xff), .driver_info = RSVD(0)}, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_2_MBIM, 0xff), + .driver_info = RSVD(3)}, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_2_RMNET, 0xff), + .driver_info = RSVD(0)}, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WA, 0xff), + .driver_info = RSVD(3)}, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WB, 0xff), + .driver_info = RSVD(3)}, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100), .driver_info = RSVD(4) }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD120), @@ -2068,13 +2133,24 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, { USB_DEVICE(0x0489, 0xe0b5), /* Foxconn T77W968 ESIM */ .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, + { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe0db, 0xff), /* Foxconn T99W265 MBIM */ + .driver_info = RSVD(3) }, { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 (IOT version) */ .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, + { USB_DEVICE(0x1782, 0x4d10) }, /* Fibocom L610 (AT mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x1782, 0x4d11, 0xff) }, /* Fibocom L610 (ECM/RNDIS mode) */ { USB_DEVICE(0x2cb7, 0x0104), /* Fibocom NL678 series */ .driver_info = RSVD(4) | RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ .driver_info = RSVD(6) }, + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0106, 0xff) }, /* Fibocom MA510 (ECM mode w/ diag intf.) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x010a, 0xff) }, /* Fibocom MA510 (ECM mode) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */ + .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 9600cee957697..386894fe59eef 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -100,6 +100,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LCM960_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LM920_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LM930_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LM940_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_TD620_PRODUCT_ID) }, { USB_DEVICE(CRESSI_VENDOR_ID, CRESSI_EDY_PRODUCT_ID) }, @@ -110,6 +111,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(ADLINK_VENDOR_ID, ADLINK_ND6530GC_PRODUCT_ID) }, { USB_DEVICE(SMART_VENDOR_ID, SMART_PRODUCT_ID) }, { USB_DEVICE(AT_VENDOR_ID, AT_VTKIT3_PRODUCT_ID) }, + { USB_DEVICE(IBM_VENDOR_ID, IBM_PRODUCT_ID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 3e5442573fe4e..873e50088a36e 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -29,6 +29,9 @@ #define ATEN_PRODUCT_UC232B 0x2022 #define ATEN_PRODUCT_ID2 0x2118 +#define IBM_VENDOR_ID 0x04b3 +#define IBM_PRODUCT_ID 0x4016 + #define IODATA_VENDOR_ID 0x04bb #define IODATA_PRODUCT_ID 0x0a03 #define IODATA_PRODUCT_ID_RSAQ5 0x0a0e @@ -126,6 +129,7 @@ #define HP_TD620_PRODUCT_ID 0x0956 #define HP_LD960_PRODUCT_ID 0x0b39 #define HP_LD381_PRODUCT_ID 0x0f7f +#define HP_LM930_PRODUCT_ID 0x0f9b #define HP_LCM220_PRODUCT_ID 0x3139 #define HP_LCM960_PRODUCT_ID 0x3239 #define HP_LD220_PRODUCT_ID 0x3524 diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 0f60363c1bbc8..3f437f07356b4 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -165,6 +165,9 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x907b)}, /* Sierra Wireless EM74xx */ {DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */ {DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */ + {DEVICE_SWI(0x1199, 0x90d2)}, /* Sierra Wireless EM9191 QDL */ + {DEVICE_SWI(0x1199, 0xc080)}, /* Sierra Wireless EM7590 QDL */ + {DEVICE_SWI(0x1199, 0xc081)}, /* Sierra Wireless EM7590 */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index bd23a7cb1be2b..4c6747889a194 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -91,6 +91,11 @@ DEVICE(moto_modem, MOTO_IDS); { USB_DEVICE(0x0cad, 0x9016) } /* TPG2200 */ DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS); +/* Nokia mobile phone driver */ +#define NOKIA_IDS() \ + { USB_DEVICE(0x0421, 0x069a) } /* Nokia 130 (RM-1035) */ +DEVICE(nokia, NOKIA_IDS); + /* Novatel Wireless GPS driver */ #define NOVATEL_IDS() \ { USB_DEVICE(0x09d7, 0x0100) } /* NovAtel FlexPack GPS */ @@ -123,6 +128,7 @@ static struct usb_serial_driver * const serial_drivers[] = { &vivopay_device, &moto_modem_device, &motorola_tetra_device, + &nokia_device, &novatel_gps_device, &hp4x_device, &suunto_device, @@ -140,6 +146,7 @@ static const struct usb_device_id id_table[] = { VIVOPAY_IDS(), MOTO_IDS(), MOTOROLA_TETRA_IDS(), + NOKIA_IDS(), NOVATEL_IDS(), HP4X_IDS(), SUUNTO_IDS(), diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index ca3bd58f2025a..5576cfa50bc80 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -599,9 +599,8 @@ static int firm_send_command(struct usb_serial_port *port, __u8 command, switch (command) { case WHITEHEAT_GET_DTR_RTS: info = usb_get_serial_port_data(port); - memcpy(&info->mcr, command_info->result_buffer, - sizeof(struct whiteheat_dr_info)); - break; + info->mcr = command_info->result_buffer[0]; + break; } } exit: diff --git a/drivers/usb/storage/ene_ub6250.c b/drivers/usb/storage/ene_ub6250.c index 8b1b730654218..9c984f3c7248a 100644 --- a/drivers/usb/storage/ene_ub6250.c +++ b/drivers/usb/storage/ene_ub6250.c @@ -237,36 +237,33 @@ static struct us_unusual_dev ene_ub6250_unusual_dev_list[] = { #define memstick_logaddr(logadr1, logadr0) ((((u16)(logadr1)) << 8) | (logadr0)) -struct SD_STATUS { - u8 Insert:1; - u8 Ready:1; - u8 MediaChange:1; - u8 IsMMC:1; - u8 HiCapacity:1; - u8 HiSpeed:1; - u8 WtP:1; - u8 Reserved:1; -}; - -struct MS_STATUS { - u8 Insert:1; - u8 Ready:1; - u8 MediaChange:1; - u8 IsMSPro:1; - u8 IsMSPHG:1; - u8 Reserved1:1; - u8 WtP:1; - u8 Reserved2:1; -}; - -struct SM_STATUS { - u8 Insert:1; - u8 Ready:1; - u8 MediaChange:1; - u8 Reserved:3; - u8 WtP:1; - u8 IsMS:1; -}; +/* SD_STATUS bits */ +#define SD_Insert BIT(0) +#define SD_Ready BIT(1) +#define SD_MediaChange BIT(2) +#define SD_IsMMC BIT(3) +#define SD_HiCapacity BIT(4) +#define SD_HiSpeed BIT(5) +#define SD_WtP BIT(6) + /* Bit 7 reserved */ + +/* MS_STATUS bits */ +#define MS_Insert BIT(0) +#define MS_Ready BIT(1) +#define MS_MediaChange BIT(2) +#define MS_IsMSPro BIT(3) +#define MS_IsMSPHG BIT(4) + /* Bit 5 reserved */ +#define MS_WtP BIT(6) + /* Bit 7 reserved */ + +/* SM_STATUS bits */ +#define SM_Insert BIT(0) +#define SM_Ready BIT(1) +#define SM_MediaChange BIT(2) + /* Bits 3-5 reserved */ +#define SM_WtP BIT(6) +#define SM_IsMS BIT(7) struct ms_bootblock_cis { u8 bCistplDEVICE[6]; /* 0 */ @@ -437,9 +434,9 @@ struct ene_ub6250_info { u8 *bbuf; /* for 6250 code */ - struct SD_STATUS SD_Status; - struct MS_STATUS MS_Status; - struct SM_STATUS SM_Status; + u8 SD_Status; + u8 MS_Status; + u8 SM_Status; /* ----- SD Control Data ---------------- */ /*SD_REGISTER SD_Regs; */ @@ -602,7 +599,7 @@ static int sd_scsi_test_unit_ready(struct us_data *us, struct scsi_cmnd *srb) { struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; - if (info->SD_Status.Insert && info->SD_Status.Ready) + if ((info->SD_Status & SD_Insert) && (info->SD_Status & SD_Ready)) return USB_STOR_TRANSPORT_GOOD; else { ene_sd_init(us); @@ -622,7 +619,7 @@ static int sd_scsi_mode_sense(struct us_data *us, struct scsi_cmnd *srb) 0x0b, 0x00, 0x80, 0x08, 0x00, 0x00, 0x71, 0xc0, 0x00, 0x00, 0x02, 0x00 }; - if (info->SD_Status.WtP) + if (info->SD_Status & SD_WtP) usb_stor_set_xfer_buf(mediaWP, 12, srb); else usb_stor_set_xfer_buf(mediaNoWP, 12, srb); @@ -641,9 +638,9 @@ static int sd_scsi_read_capacity(struct us_data *us, struct scsi_cmnd *srb) struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; usb_stor_dbg(us, "sd_scsi_read_capacity\n"); - if (info->SD_Status.HiCapacity) { + if (info->SD_Status & SD_HiCapacity) { bl_len = 0x200; - if (info->SD_Status.IsMMC) + if (info->SD_Status & SD_IsMMC) bl_num = info->HC_C_SIZE-1; else bl_num = (info->HC_C_SIZE + 1) * 1024 - 1; @@ -693,7 +690,7 @@ static int sd_scsi_read(struct us_data *us, struct scsi_cmnd *srb) return USB_STOR_TRANSPORT_ERROR; } - if (info->SD_Status.HiCapacity) + if (info->SD_Status & SD_HiCapacity) bnByte = bn; /* set up the command wrapper */ @@ -733,7 +730,7 @@ static int sd_scsi_write(struct us_data *us, struct scsi_cmnd *srb) return USB_STOR_TRANSPORT_ERROR; } - if (info->SD_Status.HiCapacity) + if (info->SD_Status & SD_HiCapacity) bnByte = bn; /* set up the command wrapper */ @@ -1455,7 +1452,7 @@ static int ms_scsi_test_unit_ready(struct us_data *us, struct scsi_cmnd *srb) struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); /* pr_info("MS_SCSI_Test_Unit_Ready\n"); */ - if (info->MS_Status.Insert && info->MS_Status.Ready) { + if ((info->MS_Status & MS_Insert) && (info->MS_Status & MS_Ready)) { return USB_STOR_TRANSPORT_GOOD; } else { ene_ms_init(us); @@ -1475,7 +1472,7 @@ static int ms_scsi_mode_sense(struct us_data *us, struct scsi_cmnd *srb) 0x0b, 0x00, 0x80, 0x08, 0x00, 0x00, 0x71, 0xc0, 0x00, 0x00, 0x02, 0x00 }; - if (info->MS_Status.WtP) + if (info->MS_Status & MS_WtP) usb_stor_set_xfer_buf(mediaWP, 12, srb); else usb_stor_set_xfer_buf(mediaNoWP, 12, srb); @@ -1494,7 +1491,7 @@ static int ms_scsi_read_capacity(struct us_data *us, struct scsi_cmnd *srb) usb_stor_dbg(us, "ms_scsi_read_capacity\n"); bl_len = 0x200; - if (info->MS_Status.IsMSPro) + if (info->MS_Status & MS_IsMSPro) bl_num = info->MSP_TotalBlock - 1; else bl_num = info->MS_Lib.NumberOfLogBlock * info->MS_Lib.blockSize * 2 - 1; @@ -1649,7 +1646,7 @@ static int ms_scsi_read(struct us_data *us, struct scsi_cmnd *srb) if (bn > info->bl_num) return USB_STOR_TRANSPORT_ERROR; - if (info->MS_Status.IsMSPro) { + if (info->MS_Status & MS_IsMSPro) { result = ene_load_bincode(us, MSP_RW_PATTERN); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Load MPS RW pattern Fail !!\n"); @@ -1750,7 +1747,7 @@ static int ms_scsi_write(struct us_data *us, struct scsi_cmnd *srb) if (bn > info->bl_num) return USB_STOR_TRANSPORT_ERROR; - if (info->MS_Status.IsMSPro) { + if (info->MS_Status & MS_IsMSPro) { result = ene_load_bincode(us, MSP_RW_PATTERN); if (result != USB_STOR_XFER_GOOD) { pr_info("Load MSP RW pattern Fail !!\n"); @@ -1858,12 +1855,12 @@ static int ene_get_card_status(struct us_data *us, u8 *buf) tmpreg = (u16) reg4b; reg4b = *(u32 *)(&buf[0x14]); - if (info->SD_Status.HiCapacity && !info->SD_Status.IsMMC) + if ((info->SD_Status & SD_HiCapacity) && !(info->SD_Status & SD_IsMMC)) info->HC_C_SIZE = (reg4b >> 8) & 0x3fffff; info->SD_C_SIZE = ((tmpreg & 0x03) << 10) | (u16)(reg4b >> 22); info->SD_C_SIZE_MULT = (u8)(reg4b >> 7) & 0x07; - if (info->SD_Status.HiCapacity && info->SD_Status.IsMMC) + if ((info->SD_Status & SD_HiCapacity) && (info->SD_Status & SD_IsMMC)) info->HC_C_SIZE = *(u32 *)(&buf[0x100]); if (info->SD_READ_BL_LEN > SD_BLOCK_LEN) { @@ -2075,6 +2072,7 @@ static int ene_ms_init(struct us_data *us) u16 MSP_BlockSize, MSP_UserAreaBlocks; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; u8 *bbuf = info->bbuf; + unsigned int s; printk(KERN_INFO "transport --- ENE_MSInit\n"); @@ -2099,15 +2097,16 @@ static int ene_ms_init(struct us_data *us) return USB_STOR_TRANSPORT_ERROR; } /* the same part to test ENE */ - info->MS_Status = *(struct MS_STATUS *) bbuf; - - if (info->MS_Status.Insert && info->MS_Status.Ready) { - printk(KERN_INFO "Insert = %x\n", info->MS_Status.Insert); - printk(KERN_INFO "Ready = %x\n", info->MS_Status.Ready); - printk(KERN_INFO "IsMSPro = %x\n", info->MS_Status.IsMSPro); - printk(KERN_INFO "IsMSPHG = %x\n", info->MS_Status.IsMSPHG); - printk(KERN_INFO "WtP= %x\n", info->MS_Status.WtP); - if (info->MS_Status.IsMSPro) { + info->MS_Status = bbuf[0]; + + s = info->MS_Status; + if ((s & MS_Insert) && (s & MS_Ready)) { + printk(KERN_INFO "Insert = %x\n", !!(s & MS_Insert)); + printk(KERN_INFO "Ready = %x\n", !!(s & MS_Ready)); + printk(KERN_INFO "IsMSPro = %x\n", !!(s & MS_IsMSPro)); + printk(KERN_INFO "IsMSPHG = %x\n", !!(s & MS_IsMSPHG)); + printk(KERN_INFO "WtP= %x\n", !!(s & MS_WtP)); + if (s & MS_IsMSPro) { MSP_BlockSize = (bbuf[6] << 8) | bbuf[7]; MSP_UserAreaBlocks = (bbuf[10] << 8) | bbuf[11]; info->MSP_TotalBlock = MSP_BlockSize * MSP_UserAreaBlocks; @@ -2168,17 +2167,17 @@ static int ene_sd_init(struct us_data *us) return USB_STOR_TRANSPORT_ERROR; } - info->SD_Status = *(struct SD_STATUS *) bbuf; - if (info->SD_Status.Insert && info->SD_Status.Ready) { - struct SD_STATUS *s = &info->SD_Status; + info->SD_Status = bbuf[0]; + if ((info->SD_Status & SD_Insert) && (info->SD_Status & SD_Ready)) { + unsigned int s = info->SD_Status; ene_get_card_status(us, bbuf); - usb_stor_dbg(us, "Insert = %x\n", s->Insert); - usb_stor_dbg(us, "Ready = %x\n", s->Ready); - usb_stor_dbg(us, "IsMMC = %x\n", s->IsMMC); - usb_stor_dbg(us, "HiCapacity = %x\n", s->HiCapacity); - usb_stor_dbg(us, "HiSpeed = %x\n", s->HiSpeed); - usb_stor_dbg(us, "WtP = %x\n", s->WtP); + usb_stor_dbg(us, "Insert = %x\n", !!(s & SD_Insert)); + usb_stor_dbg(us, "Ready = %x\n", !!(s & SD_Ready)); + usb_stor_dbg(us, "IsMMC = %x\n", !!(s & SD_IsMMC)); + usb_stor_dbg(us, "HiCapacity = %x\n", !!(s & SD_HiCapacity)); + usb_stor_dbg(us, "HiSpeed = %x\n", !!(s & SD_HiSpeed)); + usb_stor_dbg(us, "WtP = %x\n", !!(s & SD_WtP)); } else { usb_stor_dbg(us, "SD Card Not Ready --- %x\n", bbuf[0]); return USB_STOR_TRANSPORT_ERROR; @@ -2200,14 +2199,14 @@ static int ene_init(struct us_data *us) misc_reg03 = bbuf[0]; if (misc_reg03 & 0x01) { - if (!info->SD_Status.Ready) { + if (!(info->SD_Status & SD_Ready)) { result = ene_sd_init(us); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; } } if (misc_reg03 & 0x02) { - if (!info->MS_Status.Ready) { + if (!(info->MS_Status & MS_Ready)) { result = ene_ms_init(us); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; @@ -2306,14 +2305,14 @@ static int ene_transport(struct scsi_cmnd *srb, struct us_data *us) /*US_DEBUG(usb_stor_show_command(us, srb)); */ scsi_set_resid(srb, 0); - if (unlikely(!(info->SD_Status.Ready || info->MS_Status.Ready))) + if (unlikely(!(info->SD_Status & SD_Ready) || (info->MS_Status & MS_Ready))) result = ene_init(us); if (result == USB_STOR_XFER_GOOD) { result = USB_STOR_TRANSPORT_ERROR; - if (info->SD_Status.Ready) + if (info->SD_Status & SD_Ready) result = sd_scsi_irp(us, srb); - if (info->MS_Status.Ready) + if (info->MS_Status & MS_Ready) result = ms_scsi_irp(us, srb); } return result; @@ -2377,7 +2376,6 @@ static int ene_ub6250_probe(struct usb_interface *intf, static int ene_ub6250_resume(struct usb_interface *iface) { - u8 tmp = 0; struct us_data *us = usb_get_intfdata(iface); struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); @@ -2389,17 +2387,16 @@ static int ene_ub6250_resume(struct usb_interface *iface) mutex_unlock(&us->dev_mutex); info->Power_IsResum = true; - /*info->SD_Status.Ready = 0; */ - info->SD_Status = *(struct SD_STATUS *)&tmp; - info->MS_Status = *(struct MS_STATUS *)&tmp; - info->SM_Status = *(struct SM_STATUS *)&tmp; + /* info->SD_Status &= ~SD_Ready; */ + info->SD_Status = 0; + info->MS_Status = 0; + info->SM_Status = 0; return 0; } static int ene_ub6250_reset_resume(struct usb_interface *iface) { - u8 tmp = 0; struct us_data *us = usb_get_intfdata(iface); struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); @@ -2411,10 +2408,10 @@ static int ene_ub6250_reset_resume(struct usb_interface *iface) * the device */ info->Power_IsResum = true; - /*info->SD_Status.Ready = 0; */ - info->SD_Status = *(struct SD_STATUS *)&tmp; - info->MS_Status = *(struct MS_STATUS *)&tmp; - info->SM_Status = *(struct SM_STATUS *)&tmp; + /* info->SD_Status &= ~SD_Ready; */ + info->SD_Status = 0; + info->MS_Status = 0; + info->SM_Status = 0; return 0; } diff --git a/drivers/usb/storage/karma.c b/drivers/usb/storage/karma.c index 05cec81dcd3f2..38ddfedef629c 100644 --- a/drivers/usb/storage/karma.c +++ b/drivers/usb/storage/karma.c @@ -174,24 +174,25 @@ static void rio_karma_destructor(void *extra) static int rio_karma_init(struct us_data *us) { - int ret = 0; struct karma_data *data = kzalloc(sizeof(struct karma_data), GFP_NOIO); if (!data) - goto out; + return -ENOMEM; data->recv = kmalloc(RIO_RECV_LEN, GFP_NOIO); if (!data->recv) { kfree(data); - goto out; + return -ENOMEM; } us->extra = data; us->extra_destructor = rio_karma_destructor; - ret = rio_karma_send_command(RIO_ENTER_STORAGE, us); - data->in_storage = (ret == 0); -out: - return ret; + if (rio_karma_send_command(RIO_ENTER_STORAGE, us)) + return -EIO; + + data->in_storage = 1; + + return 0; } static struct scsi_host_template karma_host_template; diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c index 3789698d9d3c6..0c423916d7bfa 100644 --- a/drivers/usb/storage/realtek_cr.c +++ b/drivers/usb/storage/realtek_cr.c @@ -365,7 +365,7 @@ static int rts51x_read_mem(struct us_data *us, u16 addr, u8 *data, u16 len) buf = kmalloc(len, GFP_NOIO); if (buf == NULL) - return USB_STOR_TRANSPORT_ERROR; + return -ENOMEM; usb_stor_dbg(us, "addr = 0x%x, len = %d\n", addr, len); diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 861153d294b67..66e7f5d123c46 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -406,6 +406,16 @@ UNUSUAL_DEV( 0x04b8, 0x0602, 0x0110, 0x0110, "785EPX Storage", USB_SC_SCSI, USB_PR_BULK, NULL, US_FL_SINGLE_LUN), +/* + * Reported by James Buren + * Virtual ISOs cannot be remounted if ejected while the device is locked + * Disable locking to mimic Windows behavior that bypasses the issue + */ +UNUSUAL_DEV( 0x04c5, 0x2028, 0x0001, 0x0001, + "iODD", + "2531/2541", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NOT_LOCKABLE), + /* * Not sure who reported this originally but * Pavel Machek reported that the extra US_FL_SINGLE_LUN @@ -416,9 +426,16 @@ UNUSUAL_DEV( 0x04cb, 0x0100, 0x0000, 0x2210, USB_SC_UFI, USB_PR_DEVICE, NULL, US_FL_FIX_INQUIRY | US_FL_SINGLE_LUN), /* - * Reported by Ondrej Zary + * Reported by Ondrej Zary * The device reports one sector more and breaks when that sector is accessed + * Firmwares older than 2.6c (the latest one and the only that claims Linux + * support) have also broken tag handling */ +UNUSUAL_DEV( 0x04ce, 0x0002, 0x0000, 0x026b, + "ScanLogic", + "SL11R-IDE", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_FIX_CAPACITY | US_FL_BULK_IGNORE_TAG), UNUSUAL_DEV( 0x04ce, 0x0002, 0x026c, 0x026c, "ScanLogic", "SL11R-IDE", @@ -2284,6 +2301,16 @@ UNUSUAL_DEV( 0x2027, 0xa001, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init, US_FL_SCM_MULT_TARG ), +/* + * Reported by DocMAX + * and Thomas Weißschuh + */ +UNUSUAL_DEV( 0x2109, 0x0715, 0x9999, 0x9999, + "VIA Labs, Inc.", + "VL817 SATA Bridge", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + UNUSUAL_DEV( 0x2116, 0x0320, 0x0001, 0x0001, "ST", "2A", diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index c7db6c943ba51..2f72753c3e225 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -50,7 +50,7 @@ UNUSUAL_DEV(0x059f, 0x1061, 0x0000, 0x9999, "LaCie", "Rugged USB3-FW", USB_SC_DEVICE, USB_PR_DEVICE, NULL, - US_FL_IGNORE_UAS), + US_FL_NO_REPORT_OPCODES | US_FL_NO_SAME), /* * Apricorn USB3 dongle sometimes returns "USBSUSBSUSBS" in response to SCSI diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 526509ed92e5c..db926641b79da 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1383,6 +1383,7 @@ void typec_set_pwr_opmode(struct typec_port *port, partner->usb_pd = 1; sysfs_notify(&partner_dev->kobj, NULL, "supports_usb_power_delivery"); + kobject_uevent(&partner_dev->kobj, KOBJ_CHANGE); } put_device(partner_dev); } diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c index b498960ff72b5..5e661bae39972 100644 --- a/drivers/usb/typec/tcpm/fusb302.c +++ b/drivers/usb/typec/tcpm/fusb302.c @@ -669,25 +669,27 @@ static int tcpm_set_cc(struct tcpc_dev *dev, enum typec_cc_status cc) ret = fusb302_i2c_mask_write(chip, FUSB_REG_MASK, FUSB_REG_MASK_BC_LVL | FUSB_REG_MASK_COMP_CHNG, - FUSB_REG_MASK_COMP_CHNG); + FUSB_REG_MASK_BC_LVL); if (ret < 0) { fusb302_log(chip, "cannot set SRC interrupt, ret=%d", ret); goto done; } chip->intr_comp_chng = true; + chip->intr_bc_lvl = false; break; case TYPEC_CC_RD: ret = fusb302_i2c_mask_write(chip, FUSB_REG_MASK, FUSB_REG_MASK_BC_LVL | FUSB_REG_MASK_COMP_CHNG, - FUSB_REG_MASK_BC_LVL); + FUSB_REG_MASK_COMP_CHNG); if (ret < 0) { fusb302_log(chip, "cannot set SRC interrupt, ret=%d", ret); goto done; } chip->intr_bc_lvl = true; + chip->intr_comp_chng = false; break; default: break; diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index 59e304a341f8a..6caed68ce1be7 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -611,7 +611,7 @@ static int tcpci_remove(struct i2c_client *client) /* Disable chip interrupts before unregistering port */ err = tcpci_write16(chip->tcpci, TCPC_ALERT_MASK, 0); if (err < 0) - return err; + dev_warn(&client->dev, "Failed to disable irqs (%pe)\n", ERR_PTR(err)); tcpci_unregister_port(chip->tcpci); diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index b40db48f8874d..fb18264b702e6 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -3118,11 +3118,7 @@ static void run_state_machine(struct tcpm_port *port) tcpm_try_src(port) ? SRC_TRY : SNK_ATTACHED, 0); - else - /* Wait for VBUS, but not forever */ - tcpm_set_state(port, PORT_RESET, PD_T_PS_SOURCE_ON); break; - case SRC_TRY: port->try_src_count++; tcpm_set_cc(port, tcpm_rp_cc(port)); @@ -3679,6 +3675,7 @@ static void _tcpm_cc_change(struct tcpm_port *port, enum typec_cc_status cc1, tcpm_set_state(port, SRC_ATTACH_WAIT, 0); break; case SRC_ATTACHED: + case SRC_STARTUP: case SRC_SEND_CAPABILITIES: case SRC_READY: if (tcpm_port_is_disconnected(port) || @@ -3906,7 +3903,8 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port) case SNK_TRYWAIT_DEBOUNCE: break; case SNK_ATTACH_WAIT: - tcpm_set_state(port, SNK_UNATTACHED, 0); + case SNK_DEBOUNCED: + /* Do nothing, as TCPM is still waiting for vbus to reaach VSAFE5V to connect */ break; case SNK_NEGOTIATE_CAPABILITIES: diff --git a/drivers/usb/typec/tps6598x.c b/drivers/usb/typec/tps6598x.c index a38d1409f15b7..67bebee693301 100644 --- a/drivers/usb/typec/tps6598x.c +++ b/drivers/usb/typec/tps6598x.c @@ -109,7 +109,7 @@ tps6598x_block_read(struct tps6598x *tps, u8 reg, void *val, size_t len) u8 data[TPS_MAX_LEN + 1]; int ret; - if (WARN_ON(len + 1 > sizeof(data))) + if (len + 1 > sizeof(data)) return -EINVAL; if (!tps->i2c_protocol) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index f3934f2d3c443..0268e654cae68 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -748,14 +748,18 @@ ucsi_dr_swap(const struct typec_capability *cap, enum typec_data_role role) if (ret < 0) goto out_unlock; + mutex_unlock(&con->lock); + if (!wait_for_completion_timeout(&con->complete, - msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS))) - ret = -ETIMEDOUT; + msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS))) + return -ETIMEDOUT; + + return 0; out_unlock: mutex_unlock(&con->lock); - return ret < 0 ? ret : 0; + return ret; } static int @@ -780,11 +784,13 @@ ucsi_pr_swap(const struct typec_capability *cap, enum typec_role role) if (ret < 0) goto out_unlock; + mutex_unlock(&con->lock); + if (!wait_for_completion_timeout(&con->complete, - msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS))) { - ret = -ETIMEDOUT; - goto out_unlock; - } + msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS))) + return -ETIMEDOUT; + + mutex_lock(&con->lock); /* Something has gone wrong while swapping the role */ if (con->status.pwr_op_mode != UCSI_CONSTAT_PWR_OPMODE_PD) { diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index d772fce519057..0a38f98f78650 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -304,7 +304,7 @@ static int ucsi_ccg_init(struct ucsi_ccg *uc) if (status < 0) return status; - if (!data) + if (!(data & DEV_INT)) return 0; status = ccg_write(uc, CCGX_RAB_INTR_REG, &data, sizeof(data)); diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index d8d3892e5a69a..3c6d452e3bf40 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -393,7 +393,6 @@ static int stub_probe(struct usb_device *udev) err_port: dev_set_drvdata(&udev->dev, NULL); - usb_put_dev(udev); /* we already have busid_priv, just lock busid_lock */ spin_lock(&busid_priv->busid_lock); @@ -408,6 +407,7 @@ static int stub_probe(struct usb_device *udev) put_busid_priv(busid_priv); sdev_free: + usb_put_dev(udev); stub_device_free(sdev); return rc; diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index e2b0195322340..d3d360ff0d24e 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -138,7 +138,9 @@ static int tweak_set_configuration_cmd(struct urb *urb) req = (struct usb_ctrlrequest *) urb->setup_packet; config = le16_to_cpu(req->wValue); + usb_lock_device(sdev->udev); err = usb_set_configuration(sdev->udev, config); + usb_unlock_device(sdev->udev); if (err && err != -ENODEV) dev_err(&sdev->udev->dev, "can't set config #%d, error %d\n", config, err); diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 98636fbf71882..170abb06a8a4d 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -455,8 +455,14 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, vhci_hcd->port_status[rhport] &= ~(1 << USB_PORT_FEAT_RESET); vhci_hcd->re_timeout = 0; + /* + * A few drivers do usb reset during probe when + * the device could be in VDEV_ST_USED state + */ if (vhci_hcd->vdev[rhport].ud.status == - VDEV_ST_NOTASSIGNED) { + VDEV_ST_NOTASSIGNED || + vhci_hcd->vdev[rhport].ud.status == + VDEV_ST_USED) { usbip_dbg_vhci_rh( " enable rhport %d (status %u)\n", rhport, @@ -952,8 +958,32 @@ static void vhci_device_unlink_cleanup(struct vhci_device *vdev) spin_lock(&vdev->priv_lock); list_for_each_entry_safe(unlink, tmp, &vdev->unlink_tx, list) { + struct urb *urb; + + /* give back urb of unsent unlink request */ pr_info("unlink cleanup tx %lu\n", unlink->unlink_seqnum); + + urb = pickup_urb_and_free_priv(vdev, unlink->unlink_seqnum); + if (!urb) { + list_del(&unlink->list); + kfree(unlink); + continue; + } + + urb->status = -ENODEV; + + usb_hcd_unlink_urb_from_ep(hcd, urb); + list_del(&unlink->list); + + spin_unlock(&vdev->priv_lock); + spin_unlock_irqrestore(&vhci->lock, flags); + + usb_hcd_giveback_urb(hcd, urb, urb->status); + + spin_lock_irqsave(&vhci->lock, flags); + spin_lock(&vdev->priv_lock); + kfree(unlink); } diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 1192765845b2d..c0556378b0376 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -11,7 +11,7 @@ if VDPA config VDPA_SIM tristate "vDPA device simulator core" - depends on RUNTIME_TESTING_MENU && HAS_DMA + depends on RUNTIME_TESTING_MENU && HAS_DMA && VHOST_DPN select VHOST_RING select IOMMU_IOVA help diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 167a20fbb1494..e19dee2a19149 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -438,9 +438,9 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, dev, &ifc_vdpa_ops, NULL, false); - if (adapter == NULL) { + if (IS_ERR(adapter)) { IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); - return -ENOMEM; + return PTR_ERR(adapter); } pci_set_master(pdev); diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 5c92a576edae8..b6cc53ba980cc 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -4,9 +4,13 @@ #ifndef __MLX5_VDPA_H__ #define __MLX5_VDPA_H__ +#include +#include #include #include +#define MLX5V_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) + struct mlx5_vdpa_direct_mr { u64 start; u64 end; @@ -15,6 +19,7 @@ struct mlx5_vdpa_direct_mr { struct sg_table sg_head; int log_size; int nsg; + int nent; struct list_head list; u64 offset; }; diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa_ifc.h b/drivers/vdpa/mlx5/core/mlx5_vdpa_ifc.h index f6f57a29b38ef..3c3bae2a3b6e2 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa_ifc.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa_ifc.h @@ -13,13 +13,15 @@ enum { }; enum { - MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT = 0x1, // do I check this caps? - MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED = 0x2, + MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT = 0, + MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED = 1, }; enum { - MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT = 0, - MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED = 1, + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT = + BIT(MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT), + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED = + BIT(MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED), }; struct mlx5_ifc_virtio_q_bits { diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 4b6195666c589..32c9925de4736 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -25,17 +25,6 @@ static int get_octo_len(u64 len, int page_shift) return (npages + 1) / 2; } -static void fill_sg(struct mlx5_vdpa_direct_mr *mr, void *in) -{ - struct scatterlist *sg; - __be64 *pas; - int i; - - pas = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); - for_each_sg(mr->sg_head.sgl, sg, mr->nsg, i) - (*pas) = cpu_to_be64(sg_dma_address(sg)); -} - static void mlx5_set_access_mode(void *mkc, int mode) { MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); @@ -45,10 +34,18 @@ static void mlx5_set_access_mode(void *mkc, int mode) static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) { struct scatterlist *sg; + int nsg = mr->nsg; + u64 dma_addr; + u64 dma_len; + int j = 0; int i; - for_each_sg(mr->sg_head.sgl, sg, mr->nsg, i) - mtt[i] = cpu_to_be64(sg_dma_address(sg)); + for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) { + for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg); + nsg && dma_len; + nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size)) + mtt[j++] = cpu_to_be64(dma_addr); + } } static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) @@ -64,7 +61,6 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct return -ENOMEM; MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); - fill_sg(mr, in); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO)); MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO)); @@ -276,9 +272,11 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr done: mr->log_size = log_entity_size; mr->nsg = nsg; - err = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); - if (!err) + mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); + if (!mr->nent) { + err = -ENOMEM; goto err_map; + } err = create_direct_mr(mvdev, mr); if (err) @@ -456,11 +454,6 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) mutex_unlock(&mr->mkey_mtx); } -static bool map_empty(struct vhost_iotlb *iotlb) -{ - return !vhost_iotlb_itree_first(iotlb, 0, U64_MAX); -} - int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, bool *change_map) { @@ -468,10 +461,6 @@ int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *io int err = 0; *change_map = false; - if (map_empty(iotlb)) { - mlx5_vdpa_destroy_mr(mvdev); - return 0; - } mutex_lock(&mr->mkey_mtx); if (mr->initialized) { mlx5_vdpa_info(mvdev, "memory map update\n"); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 92d20f680e1ce..34b73a96895b7 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "mlx5_vnet.h" #include "mlx5_vdpa_ifc.h" #include "mlx5_vdpa.h" @@ -77,6 +78,7 @@ struct mlx5_vq_restore_info { u64 device_addr; u64 driver_addr; u16 avail_index; + u16 used_index; bool ready; struct vdpa_callback cb; bool restore; @@ -111,6 +113,7 @@ struct mlx5_vdpa_virtqueue { u32 virtq_id; struct mlx5_vdpa_net *ndev; u16 avail_idx; + u16 used_idx; int fw_state; /* keep last in the struct */ @@ -464,6 +467,11 @@ static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) { mlx5_cq_set_ci(&mvq->cq.mcq); + + /* make sure CQ cosumer update is visible to the hardware before updating + * RX doorbell record. + */ + dma_wmb(); rx_post(&mvq->vqqp, num); if (mvq->event_cb.callback) mvq->event_cb.callback(mvq->event_cb.private); @@ -503,7 +511,6 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) void __iomem *uar_page = ndev->mvdev.res.uar->map; u32 out[MLX5_ST_SZ_DW(create_cq_out)]; struct mlx5_vdpa_cq *vcq = &mvq->cq; - unsigned int irqn; __be64 *pas; int inlen; void *cqc; @@ -543,7 +550,7 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) /* Use vector 0 by default. Consider adding code to choose least used * vector. */ - err = mlx5_vector2eqn(mdev, 0, &eqn, &irqn); + err = mlx5_vector2eqn(mdev, 0, &eqn); if (err) goto err_vec; @@ -588,8 +595,8 @@ static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) mlx5_db_free(ndev->mvdev.mdev, &vcq->db); } -static int umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, - struct mlx5_vdpa_umem **umemp) +static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, + struct mlx5_vdpa_umem **umemp) { struct mlx5_core_dev *mdev = ndev->mvdev.mdev; int p_a; @@ -612,7 +619,7 @@ static int umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq *umemp = &mvq->umem3; break; } - return p_a * mvq->num_ent + p_b; + (*umemp)->size = p_a * mvq->num_ent + p_b; } static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem) @@ -628,15 +635,10 @@ static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *m void *in; int err; __be64 *pas; - int size; struct mlx5_vdpa_umem *umem; - size = umem_size(ndev, mvq, num, &umem); - if (size < 0) - return size; - - umem->size = size; - err = umem_frag_buf_alloc(ndev, umem, size); + set_umem_size(ndev, mvq, num, &umem); + err = umem_frag_buf_alloc(ndev, umem, umem->size); if (err) return err; @@ -735,12 +737,12 @@ static int get_queue_type(struct mlx5_vdpa_net *ndev) type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type); /* prefer split queue */ - if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED) - return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; + if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT) + return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; - WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)); + WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)); - return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; + return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; } static bool vq_is_tx(u16 idx) @@ -784,6 +786,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); + MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, get_features_12_3(ndev->mvdev.actual_features)); vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); @@ -797,7 +800,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, - !!(ndev->mvdev.actual_features & VIRTIO_F_VERSION_1)); + !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); @@ -805,12 +808,10 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); - MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem1.size); + MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size); MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); - MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem1.size); + MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); - if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type)) - MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1); err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); if (err) @@ -1002,6 +1003,7 @@ static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *m struct mlx5_virtq_attr { u8 state; u16 available_index; + u16 used_index; }; static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, @@ -1032,6 +1034,7 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu memset(attr, 0, sizeof(*attr)); attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); + attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index); kfree(out); return 0; @@ -1144,6 +1147,7 @@ static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *m return; } mvq->avail_idx = attr.available_index; + mvq->used_idx = attr.used_index; } static void suspend_vqs(struct mlx5_vdpa_net *ndev) @@ -1401,6 +1405,7 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, return -EINVAL; } + mvq->used_idx = state->avail_index; mvq->avail_idx = state->avail_index; return 0; } @@ -1418,7 +1423,11 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa * that cares about emulating the index after vq is stopped. */ if (!mvq->initialized) { - state->avail_index = mvq->avail_idx; + /* Firmware returns a wrong value for the available index. + * Since both values should be identical, we take the value of + * used_idx which is reported correctly. + */ + state->avail_index = mvq->used_idx; return 0; } @@ -1427,7 +1436,7 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n"); return err; } - state->avail_index = attr.available_index; + state->avail_index = attr.used_index; return 0; } @@ -1519,7 +1528,7 @@ static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev) { return virtio_legacy_is_little_endian() || - (mvdev->actual_features & (1ULL << VIRTIO_F_VERSION_1)); + (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1)); } static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val) @@ -1590,6 +1599,7 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu return err; ri->avail_index = attr.available_index; + ri->used_index = attr.used_index; ri->ready = mvq->ready; ri->num_ent = mvq->num_ent; ri->desc_addr = mvq->desc_addr; @@ -1634,6 +1644,7 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev) continue; mvq->avail_idx = ri->avail_index; + mvq->used_idx = ri->used_index; mvq->ready = ri->ready; mvq->num_ent = ri->num_ent; mvq->desc_addr = ri->desc_addr; @@ -1738,6 +1749,14 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev) mutex_unlock(&ndev->reslock); } +static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) +{ + int i; + + for (i = 0; i < ndev->mvdev.max_vqs; i++) + ndev->vqs[i].ready = false; +} + static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); @@ -1748,6 +1767,7 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) if (!status) { mlx5_vdpa_info(mvdev, "performing device reset\n"); teardown_driver(ndev); + clear_vqs_ready(ndev); mlx5_vdpa_destroy_mr(&ndev->mvdev); ndev->mvdev.status = 0; ndev->mvdev.mlx_features = 0; @@ -1826,11 +1846,16 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb static void mlx5_vdpa_free(struct vdpa_device *vdev) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_core_dev *pfmdev; struct mlx5_vdpa_net *ndev; ndev = to_mlx5_vdpa_ndev(mvdev); free_resources(ndev); + if (!is_zero_ether_addr(ndev->config.mac)) { + pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); + mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); + } mlx5_vdpa_free_resources(&ndev->mvdev); mutex_destroy(&ndev->reslock); } @@ -1875,6 +1900,19 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { .free = mlx5_vdpa_free, }; +static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) +{ + u16 hw_mtu; + int err; + + err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); + if (err) + return err; + + *mtu = hw_mtu - MLX5V_ETH_HARD_MTU; + return 0; +} + static int alloc_resources(struct mlx5_vdpa_net *ndev) { struct mlx5_vdpa_net_resources *res = &ndev->res; @@ -1937,11 +1975,18 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev) void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev) { struct virtio_net_config *config; + struct mlx5_core_dev *pfmdev; struct mlx5_vdpa_dev *mvdev; struct mlx5_vdpa_net *ndev; u32 max_vqs; int err; + if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) & + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) { + dev_warn(mdev->device, "missing support for split virtqueues\n"); + return -EOPNOTSUPP; + } + /* we save one virtqueue for control virtqueue should we require it */ max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues); max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS); @@ -1957,7 +2002,7 @@ void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev) init_mvqs(ndev); mutex_init(&ndev->reslock); config = &ndev->config; - err = mlx5_query_nic_vport_mtu(mdev, &ndev->mtu); + err = query_mtu(mdev, &ndev->mtu); if (err) goto err_mtu; @@ -1965,10 +2010,17 @@ void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev) if (err) goto err_mtu; + if (!is_zero_ether_addr(config->mac)) { + pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); + err = mlx5_mpfs_add_mac(pfmdev, config->mac); + if (err) + goto err_mtu; + } + mvdev->vdev.dma_dev = mdev->device; err = mlx5_vdpa_alloc_resources(&ndev->mvdev); if (err) - goto err_mtu; + goto err_mpfs; err = alloc_resources(ndev); if (err) @@ -1984,6 +2036,9 @@ void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev) free_resources(ndev); err_res: mlx5_vdpa_free_resources(&ndev->mvdev); +err_mpfs: + if (!is_zero_ether_addr(config->mac)) + mlx5_mpfs_del_mac(pfmdev, config->mac); err_mtu: mutex_destroy(&ndev->reslock); put_device(&mvdev->vdev.dev); diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 8c99f130a6278..2f88ce3a573bc 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -353,7 +353,8 @@ static int vdpa_mgmtdev_fill(const struct vdpa_mgmt_dev *mdev, struct sk_buff *m goto msg_err; while (mdev->id_table[i].device) { - supported_classes |= BIT(mdev->id_table[i].device); + if (mdev->id_table[i].device <= 63) + supported_classes |= BIT_ULL(mdev->id_table[i].device); i++; } diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 2aba1c076d8e6..872ba1c73138c 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -251,8 +251,10 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, dev_attr->name, false); - if (!vdpasim) + if (IS_ERR(vdpasim)) { + ret = PTR_ERR(vdpasim); goto err_alloc; + } vdpasim->dev_attr = *dev_attr; INIT_WORK(&vdpasim->work, dev_attr->work_fn); @@ -578,8 +580,11 @@ static void vdpasim_free(struct vdpa_device *vdpa) vringh_kiov_cleanup(&vdpasim->vqs[i].in_iov); } - put_iova_domain(&vdpasim->iova); - iova_cache_put(); + if (vdpa_get_dma_dev(vdpa)) { + put_iova_domain(&vdpasim->iova); + iova_cache_put(); + } + kvfree(vdpasim->buffer); if (vdpasim->iommu) vhost_iotlb_free(vdpasim->iommu); diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c index 5ac3f50fad7f4..83949ddd69cbb 100644 --- a/drivers/vdpa/vdpa_user/iova_domain.c +++ b/drivers/vdpa/vdpa_user/iova_domain.c @@ -167,8 +167,9 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain, map->orig_phys == INVALID_PHYS_ADDR)) return; - addr = page_address(map->bounce_page) + offset; - do_bounce(map->orig_phys + offset, addr, sz, dir); + addr = kmap_atomic(map->bounce_page); + do_bounce(map->orig_phys + offset, addr + offset, sz, dir); + kunmap_atomic(addr); size -= sz; iova += sz; } @@ -239,26 +240,25 @@ vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova) struct vduse_bounce_map *map; struct page *page = NULL; - spin_lock(&domain->iotlb_lock); + read_lock(&domain->bounce_lock); map = &domain->bounce_maps[iova >> PAGE_SHIFT]; - if (!map->bounce_page) + if (domain->user_bounce_pages || !map->bounce_page) goto out; page = map->bounce_page; get_page(page); out: - spin_unlock(&domain->iotlb_lock); + read_unlock(&domain->bounce_lock); return page; } static void -vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain) +vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain) { struct vduse_bounce_map *map; unsigned long pfn, bounce_pfns; - spin_lock(&domain->iotlb_lock); bounce_pfns = domain->bounce_size >> PAGE_SHIFT; for (pfn = 0; pfn < bounce_pfns; pfn++) { @@ -272,7 +272,79 @@ vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain) __free_page(map->bounce_page); map->bounce_page = NULL; } - spin_unlock(&domain->iotlb_lock); +} + +int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain, + struct page **pages, int count) +{ + struct vduse_bounce_map *map; + int i, ret; + + /* Now we don't support partial mapping */ + if (count != (domain->bounce_size >> PAGE_SHIFT)) + return -EINVAL; + + write_lock(&domain->bounce_lock); + ret = -EEXIST; + if (domain->user_bounce_pages) + goto out; + + for (i = 0; i < count; i++) { + map = &domain->bounce_maps[i]; + if (map->bounce_page) { + /* Copy kernel page to user page if it's in use */ + if (map->orig_phys != INVALID_PHYS_ADDR) { + char *to = kmap_atomic(pages[i]); + char *from = page_address(map->bounce_page); + + memcpy(to, from, PAGE_SIZE); + kunmap_atomic(to); + } + __free_page(map->bounce_page); + } + map->bounce_page = pages[i]; + get_page(pages[i]); + } + domain->user_bounce_pages = true; + ret = 0; +out: + write_unlock(&domain->bounce_lock); + + return ret; +} + +void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain) +{ + struct vduse_bounce_map *map; + unsigned long i, count; + + write_lock(&domain->bounce_lock); + if (!domain->user_bounce_pages) + goto out; + + count = domain->bounce_size >> PAGE_SHIFT; + for (i = 0; i < count; i++) { + struct page *page = NULL; + + map = &domain->bounce_maps[i]; + if (WARN_ON(!map->bounce_page)) + continue; + + /* Copy user page to kernel page if it's in use */ + if (map->orig_phys != INVALID_PHYS_ADDR) { + char *to, *from = kmap_atomic(map->bounce_page); + + page = alloc_page(GFP_ATOMIC | __GFP_NOFAIL); + to = page_address(page); + memcpy(to, from, PAGE_SIZE); + kunmap_atomic(from); + } + put_page(map->bounce_page); + map->bounce_page = page; + } + domain->user_bounce_pages = false; +out: + write_unlock(&domain->bounce_lock); } void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain) @@ -358,13 +430,18 @@ dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain, if (vduse_domain_init_bounce_map(domain)) goto err; + read_lock(&domain->bounce_lock); if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa)) - goto err; + goto err_unlock; if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE); + read_unlock(&domain->bounce_lock); + return iova; +err_unlock: + read_unlock(&domain->bounce_lock); err: vduse_domain_free_iova(iovad, iova, size); return DMA_MAPPING_ERROR; @@ -376,10 +453,12 @@ void vduse_domain_unmap_page(struct vduse_iova_domain *domain, { struct iova_domain *iovad = &domain->stream_iovad; + read_lock(&domain->bounce_lock); if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE); vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size); + read_unlock(&domain->bounce_lock); vduse_domain_free_iova(iovad, dma_addr, size); } @@ -485,8 +564,11 @@ static int vduse_domain_release(struct inode *inode, struct file *file) { struct vduse_iova_domain *domain = file->private_data; - vduse_domain_reset_bounce_map(domain); - vduse_domain_free_bounce_pages(domain); + spin_lock(&domain->iotlb_lock); + vduse_iotlb_del_range(domain, 0, ULLONG_MAX); + vduse_domain_remove_user_bounce_pages(domain); + vduse_domain_free_kernel_bounce_pages(domain); + spin_unlock(&domain->iotlb_lock); put_iova_domain(&domain->stream_iovad); put_iova_domain(&domain->consistent_iovad); vhost_iotlb_free(domain->iotlb); @@ -543,6 +625,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size) goto err_file; domain->file = file; + rwlock_init(&domain->bounce_lock); spin_lock_init(&domain->iotlb_lock); init_iova_domain(&domain->stream_iovad, PAGE_SIZE, IOVA_START_PFN); diff --git a/drivers/vdpa/vdpa_user/iova_domain.h b/drivers/vdpa/vdpa_user/iova_domain.h index 17b6550fcfe29..2353d293ceca0 100644 --- a/drivers/vdpa/vdpa_user/iova_domain.h +++ b/drivers/vdpa/vdpa_user/iova_domain.h @@ -14,6 +14,7 @@ #include #include #include +#include #define IOVA_START_PFN 1 @@ -34,6 +35,8 @@ struct vduse_iova_domain { struct vhost_iotlb *iotlb; spinlock_t iotlb_lock; struct file *file; + bool user_bounce_pages; + rwlock_t bounce_lock; }; int vduse_domain_set_map(struct vduse_iova_domain *domain, @@ -60,6 +63,11 @@ void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size, void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain); +int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain, + struct page **pages, int count); + +void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain); + void vduse_domain_destroy(struct vduse_iova_domain *domain); struct vduse_iova_domain *vduse_domain_create(unsigned long iova_limit, diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 64027a285d0f9..1fd06e0402875 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -23,6 +23,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -40,18 +43,21 @@ #define VDUSE_DEV_MAX (1U << MINORBITS) #define VDUSE_REQUEST_TIMEOUT 30 +#define MIN_CUSTOM_VIRTIO_ID 63 + struct vduse_dev; struct vduse_virtqueue { u16 index; bool ready; spinlock_t kick_lock; - spinlock_t irq_lock; + struct rw_semaphore irq_lock; struct eventfd_ctx *kickfd; struct vdpa_callback cb; struct work_struct inject; struct kobject kobj; int irq_affinity; + bool irq_use_wq; u16 avail_index; u32 num; u64 desc_addr; @@ -68,6 +74,13 @@ struct vduse_vdpa { struct vduse_dev *dev; }; +struct vduse_umem { + unsigned long iova; + unsigned long npages; + struct page **pages; + struct mm_struct *mm; +}; + struct vduse_dev { struct vduse_vdpa *vdev; struct device dev; @@ -101,6 +114,7 @@ struct vduse_dev { u8 status; struct delayed_work timeout_work; u16 dead_timeout; + int (*dead_handler)(struct vduse_dev *dev, struct vduse_virtqueue *vq); void *shm_addr; u8 dev_shm_size; u8 vq_shm_off; @@ -108,6 +122,8 @@ struct vduse_dev { bool hung; spinlock_t config_lock; void *config; + struct vduse_umem *umem; + struct mutex mem_lock; }; struct vduse_dev_msg { @@ -122,6 +138,10 @@ struct vduse_control { unsigned long api_version; }; +static bool allow_unsafe_device; +module_param(allow_unsafe_device, bool, 0644); +MODULE_PARM_DESC(allow_unsage_device, "Allow emulating unsafe device types"); + static unsigned long max_bounce_size = (1024 * 1024 * 1024UL); module_param(max_bounce_size, ulong, 0444); MODULE_PARM_DESC(max_bounce_size, "Maximum bounce buffer size. (default: 1G)"); @@ -139,6 +159,12 @@ static struct class *vduse_class; static struct workqueue_struct *vduse_irq_wq; static struct workqueue_struct *vduse_irq_bound_wq; +static u32 allowed_device_id[] = { + VIRTIO_ID_BLOCK, + VIRTIO_ID_FS, + VIRTIO_ID_NET, +}; + static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa) { struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa); @@ -474,6 +500,17 @@ static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to) return ret; } +static bool is_mem_zero(const char *ptr, int size) +{ + int i; + + for (i = 0; i < size; i++) { + if (ptr[i]) + return false; + } + return true; +} + static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; @@ -549,11 +586,11 @@ static void vduse_dev_reset(struct vduse_dev *dev) vq->device_addr = 0; vq->avail_index = 0; vq->num = 0; - spin_lock(&vq->irq_lock); + down_write(&vq->irq_lock); vq->ready = false; vq->cb.callback = NULL; vq->cb.private = NULL; - spin_unlock(&vq->irq_lock); + up_write(&vq->irq_lock); } } @@ -603,10 +640,10 @@ static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx, struct vduse_dev *dev = vdpa_to_vduse(vdpa); struct vduse_virtqueue *vq = dev->vqs[idx]; - spin_lock(&vq->irq_lock); + down_write(&vq->irq_lock); vq->cb.callback = cb->callback; vq->cb.private = cb->private; - spin_unlock(&vq->irq_lock); + up_write(&vq->irq_lock); } static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) @@ -780,6 +817,9 @@ static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset, dev->config_size); } + /* Initialize the buffer in case of partial copy. */ + memset(buf, 0, len); + spin_lock(&dev->config_lock); if (!dev->config) dev->config = config; @@ -990,23 +1030,124 @@ static void vduse_dev_irq_inject(struct work_struct *work) dev->name, dev->status); } -static void vduse_vq_irq_inject(struct work_struct *work) +static void vduse_vq_irq_inject(struct vduse_virtqueue *vq) { - struct vduse_virtqueue *vq = container_of(work, - struct vduse_virtqueue, inject); struct vduse_dev *dev = vq->dev; - bool missed = true; - spin_lock_irq(&vq->irq_lock); - if (dev && (dev->status & VIRTIO_CONFIG_S_DRIVER_OK) && - vq->cb.callback) { + if (!dev) + return; + + down_read(&vq->irq_lock); + if ((dev->status & VIRTIO_CONFIG_S_DRIVER_OK) && + vq->cb.callback) vq->cb.callback(vq->cb.private); - missed = false; - } - spin_unlock_irq(&vq->irq_lock); - if (missed && dev) + else pr_err_ratelimited("Miss vduse [%s] vq%d irq, status: %d\n", dev->name, vq->index, dev->status); + up_read(&vq->irq_lock); +} + +static void vduse_vq_irq_inject_work(struct work_struct *work) +{ + struct vduse_virtqueue *vq = container_of(work, + struct vduse_virtqueue, inject); + vduse_vq_irq_inject(vq); +} + +static int vduse_dev_dereg_umem(struct vduse_dev *dev, + u64 iova, u64 size) +{ + int ret; + + mutex_lock(&dev->mem_lock); + ret = -ENOENT; + if (!dev->umem) + goto unlock; + + ret = -EINVAL; + if (dev->umem->iova != iova || size != dev->domain->bounce_size) + goto unlock; + + vduse_domain_remove_user_bounce_pages(dev->domain); + put_user_pages_dirty_lock(dev->umem->pages, + dev->umem->npages, true); + atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm); + mmdrop(dev->umem->mm); + vfree(dev->umem->pages); + kfree(dev->umem); + dev->umem = NULL; + ret = 0; +unlock: + mutex_unlock(&dev->mem_lock); + return ret; +} + +static int vduse_dev_reg_umem(struct vduse_dev *dev, + u64 iova, u64 uaddr, u64 size) +{ + struct page **page_list = NULL; + struct vduse_umem *umem = NULL; + long pinned = 0; + unsigned long npages, lock_limit; + int ret; + + if (!dev->domain->bounce_map || + size != dev->domain->bounce_size || + iova != 0 || uaddr & ~PAGE_MASK) + return -EINVAL; + + mutex_lock(&dev->mem_lock); + ret = -EEXIST; + if (dev->umem) + goto unlock; + + ret = -ENOMEM; + npages = size >> PAGE_SHIFT; + page_list = __vmalloc(array_size(npages, sizeof(struct page *)), + GFP_KERNEL_ACCOUNT, PAGE_KERNEL); + umem = kzalloc(sizeof(*umem), GFP_KERNEL); + if (!page_list || !umem) + goto unlock; + + mmap_read_lock(current->mm); + + lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); + if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit) + goto out; + + pinned = get_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE, + page_list, NULL); + if (pinned != npages) { + ret = pinned < 0 ? pinned : -ENOMEM; + goto out; + } + + ret = vduse_domain_add_user_bounce_pages(dev->domain, + page_list, pinned); + if (ret) + goto out; + + atomic64_add(npages, ¤t->mm->pinned_vm); + + umem->pages = page_list; + umem->npages = pinned; + umem->iova = iova; + umem->mm = current->mm; + mmgrab(current->mm); + + dev->umem = umem; +out: + if (ret && pinned > 0) + put_user_pages(page_list, pinned); + + mmap_read_unlock(current->mm); +unlock: + if (ret) { + vfree(page_list); + kfree(umem); + } + mutex_unlock(&dev->mem_lock); + return ret; } static long vduse_dev_ioctl(struct file *file, unsigned int cmd, @@ -1123,12 +1264,8 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, vq = dev->vqs[vq_index]; ret = 0; - /* virtio-fs driver already uses workqueue in irq handler */ - if (dev->device_id == VIRTIO_ID_FS) { - spin_lock_irq(&vq->irq_lock); - if (vq->ready && vq->cb.callback) - vq->cb.callback(vq->cb.private); - spin_unlock_irq(&vq->irq_lock); + if (!vq->irq_use_wq) { + vduse_vq_irq_inject(vq); break; } if (vq->irq_affinity == -1) @@ -1146,6 +1283,77 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, spin_unlock(&dev->config_lock); queue_work(vduse_irq_wq, &dev->inject); break; + case VDUSE_IOTLB_REG_UMEM: { + struct vduse_iova_umem umem; + + ret = -EFAULT; + if (copy_from_user(&umem, argp, sizeof(umem))) + break; + + ret = -EINVAL; + if (!is_mem_zero((const char *)umem.reserved, + sizeof(umem.reserved))) + break; + + ret = vduse_dev_reg_umem(dev, umem.iova, + umem.uaddr, umem.size); + break; + } + case VDUSE_IOTLB_DEREG_UMEM: { + struct vduse_iova_umem umem; + + ret = -EFAULT; + if (copy_from_user(&umem, argp, sizeof(umem))) + break; + + ret = -EINVAL; + if (!is_mem_zero((const char *)umem.reserved, + sizeof(umem.reserved))) + break; + + ret = vduse_dev_dereg_umem(dev, umem.iova, + umem.size); + break; + } + case VDUSE_IOTLB_GET_INFO: { + struct vduse_iova_info info; + struct vhost_iotlb_map *map; + struct vduse_iova_domain *domain = dev->domain; + + ret = -EFAULT; + if (copy_from_user(&info, argp, sizeof(info))) + break; + + ret = -EINVAL; + if (info.start > info.last) + break; + + if (!is_mem_zero((const char *)info.reserved, + sizeof(info.reserved))) + break; + + spin_lock(&domain->iotlb_lock); + map = vhost_iotlb_itree_first(domain->iotlb, + info.start, info.last); + if (map) { + info.start = map->start; + info.last = map->last; + info.capability = 0; + if (domain->bounce_map && map->start == 0 && + map->last == domain->bounce_size - 1) + info.capability |= VDUSE_IOVA_CAP_UMEM; + } + spin_unlock(&domain->iotlb_lock); + if (!map) + break; + + ret = -EFAULT; + if (copy_to_user(argp, &info, sizeof(info))) + break; + + ret = 0; + break; + } default: ret = -ENOIOCTLCMD; break; @@ -1205,6 +1413,7 @@ static int vduse_dev_release(struct inode *inode, struct file *file) spin_unlock(&vq->kick_lock); } + vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size); spin_lock(&dev->msg_lock); /* Make sure the inflight messages can processed after reconncection */ list_splice_init(&dev->recv_list, &dev->send_list); @@ -1255,6 +1464,26 @@ static const struct file_operations vduse_dev_fops = { .mmap = vduse_dev_mmap, }; +static ssize_t irq_use_wq_show(struct vduse_virtqueue *vq, char *buf) +{ + return sprintf(buf, "%d\n", vq->irq_use_wq); +} + +static ssize_t irq_use_wq_store(struct vduse_virtqueue *vq, + const char *buf, size_t count) +{ + bool enabled; + int ret; + + ret = kstrtobool(buf, &enabled); + if (ret) + return ret; + + vq->irq_use_wq = enabled; + + return count; +} + static ssize_t irq_affinity_show(struct vduse_virtqueue *vq, char *buf) { return sprintf(buf, "%d\n", vq->irq_affinity); @@ -1312,10 +1541,13 @@ static struct vq_sysfs_entry irq_inject_attr = __ATTR_WO(irq_inject); static struct vq_sysfs_entry kick_attr = __ATTR_WO(kick); +static struct vq_sysfs_entry irq_use_wq_attr = __ATTR_RW(irq_use_wq); + static struct attribute *vq_attrs[] = { &irq_affinity_attr.attr, &irq_inject_attr.attr, &kick_attr.attr, + &irq_use_wq_attr.attr, NULL, }; @@ -1406,13 +1638,17 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, dev->vqs[i]->index = i; dev->vqs[i]->dev = dev; dev->vqs[i]->irq_affinity = -1; + /* virtio-fs driver already uses workqueue in irq handler */ + dev->vqs[i]->irq_use_wq = (dev->device_id == VIRTIO_ID_FS) ? + false : true; + vringh_set_iotlb(&dev->vqs[i]->vring, dev->domain->iotlb, &dev->domain->iotlb_lock); vringh_kiov_init(&dev->vqs[i]->out_iov, NULL, 0); vringh_kiov_init(&dev->vqs[i]->in_iov, NULL, 0); - INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject); + INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject_work); spin_lock_init(&dev->vqs[i]->kick_lock); - spin_lock_init(&dev->vqs[i]->irq_lock); + init_rwsem(&dev->vqs[i]->irq_lock); kobject_init(&dev->vqs[i]->kobj, &vq_attr_type); ret = kobject_add(&dev->vqs[i]->kobj, &dev->dev.kobj, "vq%d", i); @@ -1434,14 +1670,17 @@ static int vduse_blk_timeout_handler(struct vduse_dev *dev, { size_t len = 0; ssize_t bytes; - unsigned short head; + unsigned short head = vq->vring.vring.num; u8 status; int ret; ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov, &head, GFP_ATOMIC); - if (ret != 1) + if (ret != 1) { + if (ret < 0 && head != vq->vring.vring.num) + goto err; return ret; + } if (vq->out_iov.used < 1 || vq->in_iov.used < 1) { pr_err("VDUSE: missing headers - out_iov: %u in_iov %u\n", @@ -1487,53 +1726,68 @@ static int vduse_fs_timeout_handler(struct vduse_dev *dev, { size_t len = 0; ssize_t bytes; - unsigned short head; + unsigned short head = vq->vring.vring.num; int ret; - struct fuse_in_header in; struct fuse_out_header out; ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov, &head, GFP_ATOMIC); - if (ret != 1) + if (ret != 1) { + if (ret < 0 && head != vq->vring.vring.num) + goto err; return ret; - - if (vq->out_iov.used < 1) { - pr_err("VDUSE: missing headers - out_iov: %u\n", - vq->out_iov.used); - goto out; - } - - if (vq->out_iov.iov[0].iov_len < sizeof(struct fuse_in_header)) { - pr_err("VDUSE: request in header too short\n"); - goto out; - } - - bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, - &in, sizeof(struct fuse_in_header)); - if (bytes != sizeof(struct fuse_in_header)) { - ret = (bytes >= 0) ? -EINVAL : bytes; - pr_err("VDUSE: read fuse header failed: %ld\n", bytes); - goto err; } len = vringh_kiov_length(&vq->in_iov); if (!len) goto out; - out.unique = in.unique; - out.error = -EIO; + if (vq->index == 0) { + pr_err("VDUSE: invalid req in virtiofs high priority queue\n"); + goto out; + } + out.error = -EIO; bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &out, sizeof(struct fuse_out_header)); if (bytes != sizeof(struct fuse_out_header)) { ret = (bytes >= 0) ? -EINVAL : bytes; - pr_err("VDUSE: write fuse header failed: %ld\n", bytes); + pr_err("VDUSE: write fuse header failed in (%u, %u): %ld\n", + head, vq->vring.last_avail_idx, bytes); goto err; } /* Make sure data is wrote before advancing index */ smp_wmb(); out: + ret = vringh_complete_iotlb(&vq->vring, head, len); + if (ret) { + pr_err("VDUSE: update used vring failed in (%u, %u): %ld\n", + head, vq->vring.last_avail_idx, bytes); + goto err; + } + + return 1; +err: + vringh_abandon_iotlb(&vq->vring, 1); + return ret; +} + +static int vduse_default_timeout_handler(struct vduse_dev *dev, + struct vduse_virtqueue *vq) +{ + size_t len = 0; + unsigned short head; + int ret; + + ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov, + &head, GFP_ATOMIC); + if (ret != 1) { + if (ret < 0 && head != vq->vring.vring.num) + goto err; + return ret; + } + ret = vringh_complete_iotlb(&vq->vring, head, len); if (ret) { pr_err("VDUSE: update used vring failed\n"); @@ -1560,11 +1814,7 @@ static int vduse_req_timeout_handler(struct vduse_dev *dev, return ret; } - if (dev->device_id == VIRTIO_ID_BLOCK) - ret = vduse_blk_timeout_handler(dev, vq); - else if (dev->device_id == VIRTIO_ID_FS) - ret = vduse_fs_timeout_handler(dev, vq); - + ret = dev->dead_handler(dev, vq); if (ret < 0 && do_retry) { do_retry = false; goto retry; @@ -1590,6 +1840,10 @@ static void vduse_vq_check_inflights(struct vduse_dev *dev, int index) if (inflight->desc[i].inflight) vringh_recover_desc_iotlb(&vq->vring, idx++, i); } + + pr_info("VDUSE: get vq%d I/O for %s, desc %d num %d idx %u inuse %d\n", + index, dev_name(&dev->dev), desc_num, vq->vring.vring.num, + vq->vring.last_avail_idx, idx - vq->vring.last_avail_idx); } static void vduse_dev_timeout_work(struct work_struct *work) @@ -1620,12 +1874,8 @@ static void vduse_dev_timeout_work(struct work_struct *work) } spin_unlock(&dev->msg_lock); - if (dev->device_id != VIRTIO_ID_BLOCK && - dev->device_id != VIRTIO_ID_FS) { - pr_warn("VDUSE: unsupported device type %d in %s\n", - dev->device_id, dev_name(&dev->dev)); + if (!dev->dead_handler) goto unlock; - } if (!dev->shm_addr && check_inflight) { check_inflight = false; @@ -1649,15 +1899,22 @@ static void vduse_dev_timeout_work(struct work_struct *work) dev->vqs[i])) == 1); } while (!vringh_notify_enable_iotlb(&dev->vqs[i]->vring) && ret >= 0); - spin_lock_irq(&dev->vqs[i]->irq_lock); - if (dev->vqs[i]->cb.callback) - dev->vqs[i]->cb.callback(dev->vqs[i]->cb.private); - spin_unlock_irq(&dev->vqs[i]->irq_lock); + vduse_vq_irq_inject(dev->vqs[i]); } unlock: mutex_unlock(&dev->lock); } +static void vduse_set_dead_handler(struct vduse_dev *dev) +{ + if (dev->device_id == VIRTIO_ID_FS) + dev->dead_handler = vduse_fs_timeout_handler; + else if (dev->device_id == VIRTIO_ID_BLOCK) + dev->dead_handler = vduse_blk_timeout_handler; + else + dev->dead_handler = vduse_default_timeout_handler; +} + static ssize_t abort_conn_store(struct device *device, struct device_attribute *attr, const char *buf, size_t count) @@ -1672,8 +1929,68 @@ static ssize_t abort_conn_store(struct device *device, static DEVICE_ATTR_WO(abort_conn); +static ssize_t enable_dead_handler_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct vduse_dev *dev = container_of(device, struct vduse_dev, dev); + + return sprintf(buf, "%d\n", dev->dead_handler ? 1 : 0); +} + +static ssize_t enable_dead_handler_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct vduse_dev *dev = container_of(device, struct vduse_dev, dev); + bool enable; + int ret; + + ret = kstrtobool(buf, &enable); + if (ret) + return ret; + + mutex_lock(&dev->lock); + if (enable) + vduse_set_dead_handler(dev); + else + dev->dead_handler = NULL; + mutex_unlock(&dev->lock); + + return count; +} + +static DEVICE_ATTR_RW(enable_dead_handler); + +static ssize_t dead_timeout_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct vduse_dev *dev = container_of(device, struct vduse_dev, dev); + + return sprintf(buf, "%u\n", dev->dead_timeout); +} + +static ssize_t dead_timeout_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct vduse_dev *dev = container_of(device, struct vduse_dev, dev); + int ret; + + ret = kstrtou16(buf, 0, &dev->dead_timeout); + if (ret) + return ret; + + return count; +} + +static DEVICE_ATTR_RW(dead_timeout); + static struct attribute *vduse_dev_attrs[] = { &dev_attr_abort_conn.attr, + &dev_attr_enable_dead_handler.attr, + &dev_attr_dead_timeout.attr, NULL }; @@ -1687,6 +2004,7 @@ static struct vduse_dev *vduse_dev_create(void) return NULL; mutex_init(&dev->lock); + mutex_init(&dev->mem_lock); spin_lock_init(&dev->config_lock); spin_lock_init(&dev->msg_lock); INIT_LIST_HEAD(&dev->send_list); @@ -1758,6 +2076,37 @@ static void vduse_release_dev(struct device *device) vduse_dev_destroy(dev); } +static bool device_is_allowed(u32 device_id) +{ + int i; + + if (allow_unsafe_device || device_id >= MIN_CUSTOM_VIRTIO_ID) + return true; + + for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++) + if (allowed_device_id[i] == device_id) + return true; + + return false; +} + +static bool vduse_validate_config(struct vduse_dev_config *config) +{ + if (config->vq_align > PAGE_SIZE) + return false; + + if (config->config_size > PAGE_SIZE) + return false; + + if (config->vq_num > 0xffff) + return false; + + if (!device_is_allowed(config->device_id)) + return false; + + return true; +} + static int vduse_create_dev(struct vduse_dev_config *config, unsigned long api_version) { @@ -1817,6 +2166,7 @@ static int vduse_create_dev(struct vduse_dev_config *config, if (ret) goto err_vqs; + vduse_set_dead_handler(dev); list_add(&dev->list, &vduse_devs); __module_get(THIS_MODULE); @@ -1862,6 +2212,10 @@ static long vduse_ioctl(struct file *file, unsigned int cmd, if (copy_from_user(&config, argp, sizeof(config))) break; + ret = -EINVAL; + if (vduse_validate_config(&config) == false) + break; + ret = vduse_create_dev(&config, control->api_version); break; } diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 503ed2f3fbb5e..65743de8aad11 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -29,7 +29,7 @@ menuconfig VFIO If you don't know what to do here, say N. -menuconfig VFIO_NOIOMMU +config VFIO_NOIOMMU bool "VFIO No-IOMMU support" depends on VFIO help diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 987b4d311fde9..de7ac0cd6c5c2 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -31,12 +31,14 @@ struct vfio_pci_ioeventfd { struct list_head next; + struct vfio_pci_device *vdev; struct virqfd *virqfd; void __iomem *addr; uint64_t data; loff_t pos; int bar; int count; + bool test_mem; }; struct vfio_pci_irq_ctx { diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 83f81d24df78e..ea755e02a23bc 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -37,17 +37,70 @@ #define vfio_ioread8 ioread8 #define vfio_iowrite8 iowrite8 +#define VFIO_IOWRITE(size) \ +static int vfio_pci_iowrite##size(struct vfio_pci_device *vdev, \ + bool test_mem, u##size val, void __iomem *io) \ +{ \ + if (test_mem) { \ + down_read(&vdev->memory_lock); \ + if (!__vfio_pci_memory_enabled(vdev)) { \ + up_read(&vdev->memory_lock); \ + return -EIO; \ + } \ + } \ + \ + vfio_iowrite##size(val, io); \ + \ + if (test_mem) \ + up_read(&vdev->memory_lock); \ + \ + return 0; \ +} + +VFIO_IOWRITE(8) +VFIO_IOWRITE(16) +VFIO_IOWRITE(32) +#ifdef iowrite64 +VFIO_IOWRITE(64) +#endif + +#define VFIO_IOREAD(size) \ +static int vfio_pci_ioread##size(struct vfio_pci_device *vdev, \ + bool test_mem, u##size *val, void __iomem *io) \ +{ \ + if (test_mem) { \ + down_read(&vdev->memory_lock); \ + if (!__vfio_pci_memory_enabled(vdev)) { \ + up_read(&vdev->memory_lock); \ + return -EIO; \ + } \ + } \ + \ + *val = vfio_ioread##size(io); \ + \ + if (test_mem) \ + up_read(&vdev->memory_lock); \ + \ + return 0; \ +} + +VFIO_IOREAD(8) +VFIO_IOREAD(16) +VFIO_IOREAD(32) + /* * Read or write from an __iomem region (MMIO or I/O port) with an excluded * range which is inaccessible. The excluded range drops writes and fills * reads with -1. This is intended for handling MSI-X vector tables and * leftover space for ROM BARs. */ -static ssize_t do_io_rw(void __iomem *io, char __user *buf, +static ssize_t do_io_rw(struct vfio_pci_device *vdev, bool test_mem, + void __iomem *io, char __user *buf, loff_t off, size_t count, size_t x_start, size_t x_end, bool iswrite) { ssize_t done = 0; + int ret; while (count) { size_t fillable, filled; @@ -66,9 +119,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf, if (copy_from_user(&val, buf, 4)) return -EFAULT; - vfio_iowrite32(val, io + off); + ret = vfio_pci_iowrite32(vdev, test_mem, + val, io + off); + if (ret) + return ret; } else { - val = vfio_ioread32(io + off); + ret = vfio_pci_ioread32(vdev, test_mem, + &val, io + off); + if (ret) + return ret; if (copy_to_user(buf, &val, 4)) return -EFAULT; @@ -82,9 +141,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf, if (copy_from_user(&val, buf, 2)) return -EFAULT; - vfio_iowrite16(val, io + off); + ret = vfio_pci_iowrite16(vdev, test_mem, + val, io + off); + if (ret) + return ret; } else { - val = vfio_ioread16(io + off); + ret = vfio_pci_ioread16(vdev, test_mem, + &val, io + off); + if (ret) + return ret; if (copy_to_user(buf, &val, 2)) return -EFAULT; @@ -98,9 +163,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf, if (copy_from_user(&val, buf, 1)) return -EFAULT; - vfio_iowrite8(val, io + off); + ret = vfio_pci_iowrite8(vdev, test_mem, + val, io + off); + if (ret) + return ret; } else { - val = vfio_ioread8(io + off); + ret = vfio_pci_ioread8(vdev, test_mem, + &val, io + off); + if (ret) + return ret; if (copy_to_user(buf, &val, 1)) return -EFAULT; @@ -178,14 +249,6 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, count = min(count, (size_t)(end - pos)); - if (res->flags & IORESOURCE_MEM) { - down_read(&vdev->memory_lock); - if (!__vfio_pci_memory_enabled(vdev)) { - up_read(&vdev->memory_lock); - return -EIO; - } - } - if (bar == PCI_ROM_RESOURCE) { /* * The ROM can fill less space than the BAR, so we start the @@ -213,7 +276,8 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, x_end = vdev->msix_offset + vdev->msix_size; } - done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite); + done = do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos, + count, x_start, x_end, iswrite); if (done >= 0) *ppos += done; @@ -221,9 +285,6 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, if (bar == PCI_ROM_RESOURCE) pci_unmap_rom(pdev, io); out: - if (res->flags & IORESOURCE_MEM) - up_read(&vdev->memory_lock); - return done; } @@ -278,7 +339,12 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, return ret; } - done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite); + /* + * VGA MMIO is a legacy, non-BAR resource that hopefully allows + * probing, so we don't currently worry about access in relation + * to the memory enable bit in the command register. + */ + done = do_io_rw(vdev, false, iomem, buf, off, count, 0, 0, iswrite); vga_put(vdev->pdev, rsrc); @@ -290,30 +356,60 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, return done; } -static int vfio_pci_ioeventfd_handler(void *opaque, void *unused) +static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd, + bool test_mem) { - struct vfio_pci_ioeventfd *ioeventfd = opaque; - switch (ioeventfd->count) { case 1: - vfio_iowrite8(ioeventfd->data, ioeventfd->addr); + vfio_pci_iowrite8(ioeventfd->vdev, test_mem, + ioeventfd->data, ioeventfd->addr); break; case 2: - vfio_iowrite16(ioeventfd->data, ioeventfd->addr); + vfio_pci_iowrite16(ioeventfd->vdev, test_mem, + ioeventfd->data, ioeventfd->addr); break; case 4: - vfio_iowrite32(ioeventfd->data, ioeventfd->addr); + vfio_pci_iowrite32(ioeventfd->vdev, test_mem, + ioeventfd->data, ioeventfd->addr); break; #ifdef iowrite64 case 8: - vfio_iowrite64(ioeventfd->data, ioeventfd->addr); + vfio_pci_iowrite64(ioeventfd->vdev, test_mem, + ioeventfd->data, ioeventfd->addr); break; #endif } +} + +static int vfio_pci_ioeventfd_handler(void *opaque, void *unused) +{ + struct vfio_pci_ioeventfd *ioeventfd = opaque; + struct vfio_pci_device *vdev = ioeventfd->vdev; + + if (ioeventfd->test_mem) { + if (!down_read_trylock(&vdev->memory_lock)) + return 1; /* Lock contended, use thread */ + if (!__vfio_pci_memory_enabled(vdev)) { + up_read(&vdev->memory_lock); + return 0; + } + } + + vfio_pci_ioeventfd_do_write(ioeventfd, false); + + if (ioeventfd->test_mem) + up_read(&vdev->memory_lock); return 0; } +static void vfio_pci_ioeventfd_thread(void *opaque, void *unused) +{ + struct vfio_pci_ioeventfd *ioeventfd = opaque; + + vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem); +} + long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset, uint64_t data, int count, int fd) { @@ -378,14 +474,17 @@ long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset, goto out_unlock; } + ioeventfd->vdev = vdev; ioeventfd->addr = vdev->barmap[bar] + pos; ioeventfd->data = data; ioeventfd->pos = pos; ioeventfd->bar = bar; ioeventfd->count = count; + ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM; ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler, - NULL, NULL, &ioeventfd->virqfd, fd); + vfio_pci_ioeventfd_thread, NULL, + &ioeventfd->virqfd, fd); if (ret) { kfree(ioeventfd); goto out_unlock; diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index bf85b61cfb9c9..ff9cb43bd989f 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -11,6 +11,15 @@ config VHOST_RING This option is selected by any driver which needs to access the host side of a virtio ring. +config VHOST_DPN + bool + depends on !ARM || AEABI + default y + help + Anything selecting VHOST or VHOST_RING must depend on VHOST_DPN. + This excludes the deprecated ARM ABI since that forces a 4 byte + alignment on all structs - incompatible with virtio spec requirements. + config VHOST tristate select VHOST_IOTLB @@ -26,7 +35,7 @@ if VHOST_MENU config VHOST_NET tristate "Host kernel accelerator for virtio net" - depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP) + depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP) && VHOST_DPN select VHOST ---help--- This kernel module can be loaded in host kernel to accelerate @@ -38,7 +47,7 @@ config VHOST_NET config VHOST_SCSI tristate "VHOST_SCSI TCM fabric driver" - depends on TARGET_CORE && EVENTFD + depends on TARGET_CORE && EVENTFD && VHOST_DPN select VHOST default n ---help--- @@ -47,7 +56,7 @@ config VHOST_SCSI config VHOST_VSOCK tristate "vhost virtio-vsock driver" - depends on VSOCKETS && EVENTFD + depends on VSOCKETS && EVENTFD && VHOST_DPN select VHOST select VIRTIO_VSOCKETS_COMMON default n @@ -61,7 +70,7 @@ config VHOST_VSOCK config VHOST_VDPA tristate "Vhost driver for vDPA-based backend" - depends on EVENTFD + depends on EVENTFD && VHOST_DPN select VHOST select IRQ_BYPASS_MANAGER depends on VDPA diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c index 5c99e1112cbb7..483ea5451b520 100644 --- a/drivers/vhost/iotlb.c +++ b/drivers/vhost/iotlb.c @@ -57,6 +57,21 @@ int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb, if (last < start) return -EFAULT; + /* If the range being mapped is [0, ULONG_MAX], split it into two entries + * otherwise its size would overflow u64. + */ + if (start == 0 && last == ULONG_MAX) { + u64 mid = last / 2; + int err = vhost_iotlb_add_range_ctx(iotlb, start, mid, addr, + perm, opaque); + + if (err) + return err; + + addr += mid + 1; + start = mid + 1; + } + if (iotlb->limit && iotlb->nmaps == iotlb->limit && iotlb->flags & VHOST_IOTLB_FLAG_RETIRE) { diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 8b976b3e1bd15..5afb1b28b6e32 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -466,15 +466,25 @@ static void vhost_tx_batch(struct vhost_net *net, .num = nvq->batched_xdp, .ptr = nvq->xdp, }; - int err; + int i, err; if (nvq->batched_xdp == 0) goto signal_used; msghdr->msg_control = &ctl; + msghdr->msg_controllen = sizeof(ctl); err = sock->ops->sendmsg(sock, msghdr, 0); if (unlikely(err < 0)) { vq_err(&nvq->vq, "Fail to batch sending packets\n"); + + /* free pages owned by XDP; since this is an unlikely error path, + * keep it simple and avoid more complex bulk update for the + * used pages + */ + for (i = 0; i < nvq->batched_xdp; ++i) + put_page(virt_to_head_page(nvq->xdp[i].data)); + nvq->batched_xdp = 0; + nvq->done_idx = 0; return; } @@ -1439,13 +1449,9 @@ static struct socket *get_raw_socket(int fd) return ERR_PTR(r); } -static struct ptr_ring *get_tap_ptr_ring(int fd) +static struct ptr_ring *get_tap_ptr_ring(struct file *file) { struct ptr_ring *ring; - struct file *file = fget(fd); - - if (!file) - return NULL; ring = tun_get_tx_ring(file); if (!IS_ERR(ring)) goto out; @@ -1454,7 +1460,6 @@ static struct ptr_ring *get_tap_ptr_ring(int fd) goto out; ring = NULL; out: - fput(file); return ring; } @@ -1541,8 +1546,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) r = vhost_net_enable_vq(n, vq); if (r) goto err_used; - if (index == VHOST_NET_VQ_RX) - nvq->rx_ring = get_tap_ptr_ring(fd); + if (index == VHOST_NET_VQ_RX) { + if (sock) + nvq->rx_ring = get_tap_ptr_ring(sock->file); + else + nvq->rx_ring = NULL; + } oldubufs = nvq->ubufs; nvq->ubufs = ubufs; diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 10897b29e616f..cca0a2b6b0b00 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -120,7 +120,7 @@ static int vhost_test_open(struct inode *inode, struct file *f) vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ]; n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV, - VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT); + VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, true, NULL); f->private_data = n; diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 4eae2fcc04dc1..d3ae221b20eb6 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -192,7 +192,7 @@ static int vhost_vdpa_config_validate(struct vhost_vdpa *v, struct vdpa_device *vdpa = v->vdpa; long size = vdpa->config->get_config_size(vdpa); - if (c->len == 0) + if (c->len == 0 || c->off > size) return -EINVAL; if (c->len > size - c->off) @@ -317,7 +317,7 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) struct eventfd_ctx *ctx; cb.callback = vhost_vdpa_config_cb; - cb.private = v->vdpa; + cb.private = v; if (copy_from_user(&fd, argp, sizeof(fd))) return -EFAULT; @@ -809,7 +809,8 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, struct vdpa_device *vdpa = v->vdpa; struct vhost_iotlb *iotlb = dev->iotlb; - if (msg->iova < v->range.first || + if (msg->iova < v->range.first || !msg->size || + msg->iova > U64_MAX - msg->size + 1 || msg->iova + msg->size - 1 > v->range.last) return -EINVAL; @@ -1081,6 +1082,7 @@ static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) if (vma->vm_end - vma->vm_start != notify.size) return -ENOTSUPP; + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &vhost_vdpa_vm_ops; return 0; } diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 0c741df2e34b9..c0958d1472841 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1158,6 +1158,13 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev, goto done; } + if ((msg.type == VHOST_IOTLB_UPDATE || + msg.type == VHOST_IOTLB_INVALIDATE) && + msg.size == 0) { + ret = -EINVAL; + goto done; + } + if (dev->msg_handler) ret = dev->msg_handler(dev, &msg); else diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 65c17f56efe6a..ebfb0f2d92684 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -289,7 +289,7 @@ __vringh_iov(struct vringh *vrh, u16 i, int (*copy)(const struct vringh *vrh, void *dst, const void *src, size_t len)) { - int err, count = 0, up_next, desc_max; + int err, count = 0, indirect_count = 0, up_next, desc_max; struct vring_desc desc, *descs; struct vringh_range range = { -1ULL, 0 }, slowrange; bool slow = false; @@ -346,7 +346,12 @@ __vringh_iov(struct vringh *vrh, u16 i, continue; } - if (count++ == vrh->vring.num) { + if (up_next == -1) + count++; + else + indirect_count++; + + if (count > vrh->vring.num || indirect_count > desc_max) { vringh_bad("Descriptor loop in %p", descs); err = -ELOOP; goto fail; @@ -356,7 +361,7 @@ __vringh_iov(struct vringh *vrh, u16 i, iov = wiov; else { iov = riov; - if (unlikely(wiov && wiov->i)) { + if (unlikely(wiov && wiov->used)) { vringh_bad("Readable desc %p after writable", &descs[i]); err = -EINVAL; @@ -408,6 +413,7 @@ __vringh_iov(struct vringh *vrh, u16 i, i = return_from_indirect(vrh, &up_next, &descs, &desc_max); slow = false; + indirect_count = 0; } else break; } diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 6d9943dad5e1e..43840e7e65ffb 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -491,7 +491,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) virtio_transport_free_pkt(pkt); len += sizeof(pkt->hdr); - vhost_add_used(vq, head, len); + vhost_add_used(vq, head, 0); total_len += len; added = true; } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); @@ -570,16 +570,18 @@ static int vhost_vsock_start(struct vhost_vsock *vsock) return ret; } -static int vhost_vsock_stop(struct vhost_vsock *vsock) +static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner) { size_t i; - int ret; + int ret = 0; mutex_lock(&vsock->dev.mutex); - ret = vhost_dev_check_owner(&vsock->dev); - if (ret) - goto err; + if (check_owner) { + ret = vhost_dev_check_owner(&vsock->dev); + if (ret) + goto err; + } for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { struct vhost_virtqueue *vq = &vsock->vqs[i]; @@ -694,7 +696,12 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) * inefficient. Room for improvement here. */ vsock_for_each_connected_socket(vhost_vsock_reset_orphans); - vhost_vsock_stop(vsock); + /* Don't check the owner, because we are in the release path, so we + * need to stop the vsock device in any case. + * vhost_vsock_stop() can not fail in this case, so we don't need to + * check the return code. + */ + vhost_vsock_stop(vsock, false); vhost_vsock_flush(vsock); vhost_dev_stop(&vsock->dev); @@ -792,7 +799,7 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, if (start) return vhost_vsock_start(vsock); else - return vhost_vsock_stop(vsock); + return vhost_vsock_stop(vsock, true); case VHOST_GET_FEATURES: features = VHOST_VSOCK_FEATURES; if (copy_to_user(argp, &features, sizeof(features))) diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c index cac3e35d76308..813a264a1b119 100644 --- a/drivers/video/backlight/backlight.c +++ b/drivers/video/backlight/backlight.c @@ -630,12 +630,6 @@ struct backlight_device *of_find_backlight(struct device *dev) of_node_put(np); if (!bd) return ERR_PTR(-EPROBE_DEFER); - /* - * Note: gpio_backlight uses brightness as - * power state during probe - */ - if (!bd->props.brightness) - bd->props.brightness = bd->props.max_brightness; } } diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index 746eebc411dfa..047f80ee37e81 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -415,6 +415,33 @@ static bool pwm_backlight_is_linear(struct platform_pwm_backlight_data *data) static int pwm_backlight_initial_power_state(const struct pwm_bl_data *pb) { struct device_node *node = pb->dev->of_node; + bool active = true; + + /* + * If the enable GPIO is present, observable (either as input + * or output) and off then the backlight is not currently active. + * */ + if (pb->enable_gpio && gpiod_get_value_cansleep(pb->enable_gpio) == 0) + active = false; + + if (!regulator_is_enabled(pb->power_supply)) + active = false; + + if (!pwm_is_enabled(pb->pwm)) + active = false; + + /* + * Synchronize the enable_gpio with the observed state of the + * hardware. + */ + if (pb->enable_gpio) + gpiod_direction_output(pb->enable_gpio, active); + + /* + * Do not change pb->enabled here! pb->enabled essentially + * tells us if we own one of the regulator's use counts and + * right now we do not. + */ /* Not booted with device tree or no phandle link to the node */ if (!node || !node->phandle) @@ -426,20 +453,7 @@ static int pwm_backlight_initial_power_state(const struct pwm_bl_data *pb) * assume that another driver will enable the backlight at the * appropriate time. Therefore, if it is disabled, keep it so. */ - - /* if the enable GPIO is disabled, do not enable the backlight */ - if (pb->enable_gpio && gpiod_get_value_cansleep(pb->enable_gpio) == 0) - return FB_BLANK_POWERDOWN; - - /* The regulator is disabled, do not enable the backlight */ - if (!regulator_is_enabled(pb->power_supply)) - return FB_BLANK_POWERDOWN; - - /* The PWM is disabled, keep it like this */ - if (!pwm_is_enabled(pb->pwm)) - return FB_BLANK_POWERDOWN; - - return FB_BLANK_UNBLANK; + return active ? FB_BLANK_UNBLANK: FB_BLANK_POWERDOWN; } static int pwm_backlight_probe(struct platform_device *pdev) @@ -508,18 +522,6 @@ static int pwm_backlight_probe(struct platform_device *pdev) pb->enable_gpio = gpio_to_desc(data->enable_gpio); } - /* - * If the GPIO is not known to be already configured as output, that - * is, if gpiod_get_direction returns either 1 or -EINVAL, change the - * direction to output and set the GPIO as active. - * Do not force the GPIO to active when it was already output as it - * could cause backlight flickering or we would enable the backlight too - * early. Leave the decision of the initial backlight state for later. - */ - if (pb->enable_gpio && - gpiod_get_direction(pb->enable_gpio) != 0) - gpiod_direction_output(pb->enable_gpio, 1); - pb->power_supply = devm_regulator_get(&pdev->dev, "power"); if (IS_ERR(pb->power_supply)) { ret = PTR_ERR(pb->power_supply); diff --git a/drivers/video/console/sticon.c b/drivers/video/console/sticon.c index 79c9bd8d30254..559a7305cadaa 100644 --- a/drivers/video/console/sticon.c +++ b/drivers/video/console/sticon.c @@ -291,13 +291,13 @@ static unsigned long sticon_getxy(struct vc_data *conp, unsigned long pos, static u8 sticon_build_attr(struct vc_data *conp, u8 color, u8 intens, u8 blink, u8 underline, u8 reverse, u8 italic) { - u8 attr = ((color & 0x70) >> 1) | ((color & 7)); + u8 fg = color & 7; + u8 bg = (color & 0x70) >> 4; - if (reverse) { - color = ((color >> 3) & 0x7) | ((color & 0x7) << 3); - } - - return attr; + if (reverse) + return (fg << 3) | bg; + else + return (bg << 3) | fg; } static void sticon_invert_region(struct vc_data *conp, u16 *p, int count) diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index a992d922b3a71..23f15f42e5cb6 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -370,11 +370,17 @@ static void vgacon_init(struct vc_data *c, int init) struct uni_pagedir *p; /* - * We cannot be loaded as a module, therefore init is always 1, - * but vgacon_init can be called more than once, and init will - * not be 1. + * We cannot be loaded as a module, therefore init will be 1 + * if we are the default console, however if we are a fallback + * console, for example if fbcon has failed registration, then + * init will be 0, so we need to make sure our boot parameters + * have been copied to the console structure for vgacon_resize + * ultimately called by vc_resize. Any subsequent calls to + * vgacon_init init will have init set to 0 too. */ c->vc_can_do_color = vga_can_do_color; + c->vc_scan_lines = vga_scan_lines; + c->vc_font.height = c->vc_cell_height = vga_video_font_height; /* set dimensions manually if init != 0 since vc_resize() will fail */ if (init) { @@ -383,8 +389,6 @@ static void vgacon_init(struct vc_data *c, int init) } else vc_resize(c, vga_video_num_columns, vga_video_num_lines); - c->vc_scan_lines = vga_scan_lines; - c->vc_font.height = c->vc_cell_height = vga_video_font_height; c->vc_complement_mask = 0x7700; if (vga_512_chars) c->vc_hi_font_mask = 0x0800; diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c index 7de43be6ef2c2..3b7a7c74bf0a5 100644 --- a/drivers/video/fbdev/amba-clcd.c +++ b/drivers/video/fbdev/amba-clcd.c @@ -774,12 +774,15 @@ static int clcdfb_of_vram_setup(struct clcd_fb *fb) return -ENODEV; fb->fb.screen_base = of_iomap(memory, 0); - if (!fb->fb.screen_base) + if (!fb->fb.screen_base) { + of_node_put(memory); return -ENOMEM; + } fb->fb.fix.smem_start = of_translate_address(memory, of_get_address(memory, 0, &size, NULL)); fb->fb.fix.smem_len = size; + of_node_put(memory); return 0; } diff --git a/drivers/video/fbdev/asiliantfb.c b/drivers/video/fbdev/asiliantfb.c index ea31054a28ca8..c1d6e63362259 100644 --- a/drivers/video/fbdev/asiliantfb.c +++ b/drivers/video/fbdev/asiliantfb.c @@ -227,6 +227,9 @@ static int asiliantfb_check_var(struct fb_var_screeninfo *var, { unsigned long Ftarget, ratio, remainder; + if (!var->pixclock) + return -EINVAL; + ratio = 1000000 / var->pixclock; remainder = 1000000 % var->pixclock; Ftarget = 1000000 * ratio + (1000000 * remainder) / var->pixclock; diff --git a/drivers/video/fbdev/atafb.c b/drivers/video/fbdev/atafb.c index 51f5d1c56fd9c..c1ee817d7dcc5 100644 --- a/drivers/video/fbdev/atafb.c +++ b/drivers/video/fbdev/atafb.c @@ -1692,9 +1692,9 @@ static int falcon_setcolreg(unsigned int regno, unsigned int red, ((blue & 0xfc00) >> 8)); if (regno < 16) { shifter_tt.color_reg[regno] = - (((red & 0xe000) >> 13) | ((red & 0x1000) >> 12) << 8) | - (((green & 0xe000) >> 13) | ((green & 0x1000) >> 12) << 4) | - ((blue & 0xe000) >> 13) | ((blue & 0x1000) >> 12); + ((((red & 0xe000) >> 13) | ((red & 0x1000) >> 12)) << 8) | + ((((green & 0xe000) >> 13) | ((green & 0x1000) >> 12)) << 4) | + ((blue & 0xe000) >> 13) | ((blue & 0x1000) >> 12); ((u32 *)info->pseudo_palette)[regno] = ((red & 0xf800) | ((green & 0xfc00) >> 5) | ((blue & 0xf800) >> 11)); @@ -1980,9 +1980,9 @@ static int stste_setcolreg(unsigned int regno, unsigned int red, green >>= 12; if (ATARIHW_PRESENT(EXTD_SHIFTER)) shifter_tt.color_reg[regno] = - (((red & 0xe) >> 1) | ((red & 1) << 3) << 8) | - (((green & 0xe) >> 1) | ((green & 1) << 3) << 4) | - ((blue & 0xe) >> 1) | ((blue & 1) << 3); + ((((red & 0xe) >> 1) | ((red & 1) << 3)) << 8) | + ((((green & 0xe) >> 1) | ((green & 1) << 3)) << 4) | + ((blue & 0xe) >> 1) | ((blue & 1) << 3); else shifter_tt.color_reg[regno] = ((red & 0xe) << 7) | diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c index cf2bfff2efbf1..b91919861075b 100644 --- a/drivers/video/fbdev/atmel_lcdfb.c +++ b/drivers/video/fbdev/atmel_lcdfb.c @@ -1062,15 +1062,16 @@ static int __init atmel_lcdfb_probe(struct platform_device *pdev) INIT_LIST_HEAD(&info->modelist); - if (pdev->dev.of_node) { - ret = atmel_lcdfb_of_init(sinfo); - if (ret) - goto free_info; - } else { + if (!pdev->dev.of_node) { dev_err(dev, "cannot get default configuration\n"); goto free_info; } + ret = atmel_lcdfb_of_init(sinfo); + if (ret) + goto free_info; + + ret = -ENODEV; if (!sinfo->config) goto free_info; diff --git a/drivers/video/fbdev/chipsfb.c b/drivers/video/fbdev/chipsfb.c index f4dc320dcafe2..80fdd3ee0565f 100644 --- a/drivers/video/fbdev/chipsfb.c +++ b/drivers/video/fbdev/chipsfb.c @@ -331,7 +331,7 @@ static const struct fb_var_screeninfo chipsfb_var = { static void init_chips(struct fb_info *p, unsigned long addr) { - memset(p->screen_base, 0, 0x100000); + fb_memset(p->screen_base, 0, 0x100000); p->fix = chipsfb_fix; p->fix.smem_start = addr; diff --git a/drivers/video/fbdev/cirrusfb.c b/drivers/video/fbdev/cirrusfb.c index e4ce5667b1251..1b0a58f96af25 100644 --- a/drivers/video/fbdev/cirrusfb.c +++ b/drivers/video/fbdev/cirrusfb.c @@ -470,7 +470,7 @@ static int cirrusfb_check_mclk(struct fb_info *info, long freq) return 0; } -static int cirrusfb_check_pixclock(const struct fb_var_screeninfo *var, +static int cirrusfb_check_pixclock(struct fb_var_screeninfo *var, struct fb_info *info) { long freq; @@ -479,9 +479,7 @@ static int cirrusfb_check_pixclock(const struct fb_var_screeninfo *var, unsigned maxclockidx = var->bits_per_pixel >> 3; /* convert from ps to kHz */ - freq = PICOS2KHZ(var->pixclock); - - dev_dbg(info->device, "desired pixclock: %ld kHz\n", freq); + freq = PICOS2KHZ(var->pixclock ? : 1); maxclock = cirrusfb_board_info[cinfo->btype].maxclock[maxclockidx]; cinfo->multiplexing = 0; @@ -489,11 +487,13 @@ static int cirrusfb_check_pixclock(const struct fb_var_screeninfo *var, /* If the frequency is greater than we can support, we might be able * to use multiplexing for the video mode */ if (freq > maxclock) { - dev_err(info->device, - "Frequency greater than maxclock (%ld kHz)\n", - maxclock); - return -EINVAL; + var->pixclock = KHZ2PICOS(maxclock); + + while ((freq = PICOS2KHZ(var->pixclock)) > maxclock) + var->pixclock++; } + dev_dbg(info->device, "desired pixclock: %ld kHz\n", freq); + /* * Additional constraint: 8bpp uses DAC clock doubling to allow maximum * pixel clock diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 75b7705140673..8721b75131362 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2490,6 +2490,11 @@ static int fbcon_set_font(struct vc_data *vc, struct console_font *font, if (charcount != 256 && charcount != 512) return -EINVAL; + /* font bigger than screen resolution ? */ + if (w > FBCON_SWAP(info->var.rotate, info->var.xres, info->var.yres) || + h > FBCON_SWAP(info->var.rotate, info->var.yres, info->var.xres)) + return -EINVAL; + /* Make sure drawing engine can handle the font */ if (!(info->pixmap.blit_x & (1 << (font->width - 1))) || !(info->pixmap.blit_y & (1 << (font->height - 1)))) @@ -2756,6 +2761,34 @@ void fbcon_update_vcs(struct fb_info *info, bool all) } EXPORT_SYMBOL(fbcon_update_vcs); +/* let fbcon check if it supports a new screen resolution */ +int fbcon_modechange_possible(struct fb_info *info, struct fb_var_screeninfo *var) +{ + struct fbcon_ops *ops = info->fbcon_par; + struct vc_data *vc; + unsigned int i; + + WARN_CONSOLE_UNLOCKED(); + + if (!ops) + return 0; + + /* prevent setting a screen size which is smaller than font size */ + for (i = first_fb_vc; i <= last_fb_vc; i++) { + vc = vc_cons[i].d; + if (!vc || vc->vc_mode != KD_TEXT || + registered_fb[con2fb_map[i]] != info) + continue; + + if (vc->vc_font.width > FBCON_SWAP(var->rotate, var->xres, var->yres) || + vc->vc_font.height > FBCON_SWAP(var->rotate, var->yres, var->xres)) + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(fbcon_modechange_possible); + int fbcon_mode_deleted(struct fb_info *info, struct fb_videomode *mode) { @@ -3286,6 +3319,9 @@ static void fbcon_register_existing_fbs(struct work_struct *work) console_lock(); + deferred_takeover = false; + logo_shown = FBCON_LOGO_DONTSHOW; + for_each_registered_fb(i) fbcon_fb_registered(registered_fb[i]); @@ -3303,8 +3339,6 @@ static int fbcon_output_notifier(struct notifier_block *nb, pr_info("fbcon: Taking over console\n"); dummycon_unregister_output_notifier(&fbcon_output_nb); - deferred_takeover = false; - logo_shown = FBCON_LOGO_DONTSHOW; /* We may get called in atomic context */ schedule_work(&fbcon_deferred_takeover_work); diff --git a/drivers/video/fbdev/core/fbcvt.c b/drivers/video/fbdev/core/fbcvt.c index 55d2bd0ce5c02..64843464c6613 100644 --- a/drivers/video/fbdev/core/fbcvt.c +++ b/drivers/video/fbdev/core/fbcvt.c @@ -214,9 +214,11 @@ static u32 fb_cvt_aspect_ratio(struct fb_cvt_data *cvt) static void fb_cvt_print_name(struct fb_cvt_data *cvt) { u32 pixcount, pixcount_mod; - int cnt = 255, offset = 0, read = 0; - u8 *buf = kzalloc(256, GFP_KERNEL); + int size = 256; + int off = 0; + u8 *buf; + buf = kzalloc(size, GFP_KERNEL); if (!buf) return; @@ -224,43 +226,30 @@ static void fb_cvt_print_name(struct fb_cvt_data *cvt) pixcount_mod = (cvt->xres * (cvt->yres/cvt->interlace)) % 1000000; pixcount_mod /= 1000; - read = snprintf(buf+offset, cnt, "fbcvt: %dx%d@%d: CVT Name - ", - cvt->xres, cvt->yres, cvt->refresh); - offset += read; - cnt -= read; + off += scnprintf(buf + off, size - off, "fbcvt: %dx%d@%d: CVT Name - ", + cvt->xres, cvt->yres, cvt->refresh); - if (cvt->status) - snprintf(buf+offset, cnt, "Not a CVT standard - %d.%03d Mega " - "Pixel Image\n", pixcount, pixcount_mod); - else { - if (pixcount) { - read = snprintf(buf+offset, cnt, "%d", pixcount); - cnt -= read; - offset += read; - } + if (cvt->status) { + off += scnprintf(buf + off, size - off, + "Not a CVT standard - %d.%03d Mega Pixel Image\n", + pixcount, pixcount_mod); + } else { + if (pixcount) + off += scnprintf(buf + off, size - off, "%d", pixcount); - read = snprintf(buf+offset, cnt, ".%03dM", pixcount_mod); - cnt -= read; - offset += read; + off += scnprintf(buf + off, size - off, ".%03dM", pixcount_mod); if (cvt->aspect_ratio == 0) - read = snprintf(buf+offset, cnt, "3"); + off += scnprintf(buf + off, size - off, "3"); else if (cvt->aspect_ratio == 3) - read = snprintf(buf+offset, cnt, "4"); + off += scnprintf(buf + off, size - off, "4"); else if (cvt->aspect_ratio == 1 || cvt->aspect_ratio == 4) - read = snprintf(buf+offset, cnt, "9"); + off += scnprintf(buf + off, size - off, "9"); else if (cvt->aspect_ratio == 2) - read = snprintf(buf+offset, cnt, "A"); - else - read = 0; - cnt -= read; - offset += read; - - if (cvt->flags & FB_CVT_FLAG_REDUCED_BLANK) { - read = snprintf(buf+offset, cnt, "-R"); - cnt -= read; - offset += read; - } + off += scnprintf(buf + off, size - off, "A"); + + if (cvt->flags & FB_CVT_FLAG_REDUCED_BLANK) + off += scnprintf(buf + off, size - off, "-R"); } printk(KERN_INFO "%s\n", buf); diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index d87de5d467189..23ce47655c931 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -512,7 +512,7 @@ static int fb_show_logo_line(struct fb_info *info, int rotate, while (n && (n * (logo->width + 8) - 8 > xres)) --n; - image.dx = (xres - n * (logo->width + 8) - 8) / 2; + image.dx = (xres - (n * (logo->width + 8) - 8)) / 2; image.dy = y ?: (yres - logo->height) / 2; } else { image.dx = 0; @@ -957,6 +957,7 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) struct fb_var_screeninfo old_var; struct fb_videomode mode; struct fb_event event; + u32 unused; if (var->activate & FB_ACTIVATE_INV_MODE) { struct fb_videomode mode1, mode2; @@ -1003,11 +1004,26 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) if (var->xres < 8 || var->yres < 8) return -EINVAL; + /* Too huge resolution causes multiplication overflow. */ + if (check_mul_overflow(var->xres, var->yres, &unused) || + check_mul_overflow(var->xres_virtual, var->yres_virtual, &unused)) + return -EINVAL; + ret = info->fbops->fb_check_var(var, info); if (ret) return ret; + /* verify that virtual resolution >= physical resolution */ + if (var->xres_virtual < var->xres || + var->yres_virtual < var->yres) { + pr_warn("WARNING: fbcon: Driver '%s' missed to adjust virtual screen size (%ux%u vs. %ux%u)\n", + info->fix.id, + var->xres_virtual, var->yres_virtual, + var->xres, var->yres); + return -EINVAL; + } + if ((var->activate & FB_ACTIVATE_MASK) != FB_ACTIVATE_NOW) return 0; @@ -1098,7 +1114,9 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, return -EFAULT; console_lock(); lock_fb_info(info); - ret = fb_set_var(info, &var); + ret = fbcon_modechange_possible(info, &var); + if (!ret) + ret = fb_set_var(info, &var); if (!ret) fbcon_update_vcs(info, var.activate & FB_ACTIVATE_ALL); unlock_fb_info(info); diff --git a/drivers/video/fbdev/gbefb.c b/drivers/video/fbdev/gbefb.c index b9f6a82a04953..6fdc6ab3ceb87 100644 --- a/drivers/video/fbdev/gbefb.c +++ b/drivers/video/fbdev/gbefb.c @@ -1269,7 +1269,7 @@ static struct platform_device *gbefb_device; static int __init gbefb_init(void) { int ret = platform_driver_register(&gbefb_driver); - if (!ret) { + if (IS_ENABLED(CONFIG_SGI_IP32) && !ret) { gbefb_device = platform_device_alloc("gbefb", 0); if (gbefb_device) { ret = platform_device_add(gbefb_device); diff --git a/drivers/video/fbdev/kyro/fbdev.c b/drivers/video/fbdev/kyro/fbdev.c index a7bd9f25911b5..74bf26b527b91 100644 --- a/drivers/video/fbdev/kyro/fbdev.c +++ b/drivers/video/fbdev/kyro/fbdev.c @@ -372,6 +372,11 @@ static int kyro_dev_overlay_viewport_set(u32 x, u32 y, u32 ulWidth, u32 ulHeight /* probably haven't called CreateOverlay yet */ return -EINVAL; + if (ulWidth == 0 || ulWidth == 0xffffffff || + ulHeight == 0 || ulHeight == 0xffffffff || + (x < 2 && ulWidth + 2 == 0)) + return -EINVAL; + /* Stop Ramdac Output */ DisableRamdacOutput(deviceInfo.pSTGReg); @@ -394,6 +399,9 @@ static int kyrofb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) { struct kyrofb_info *par = info->par; + if (!var->pixclock) + return -EINVAL; + if (var->bits_per_pixel != 16 && var->bits_per_pixel != 32) { printk(KERN_WARNING "kyrofb: depth not supported: %u\n", var->bits_per_pixel); return -EINVAL; diff --git a/drivers/video/fbdev/nvidia/nv_i2c.c b/drivers/video/fbdev/nvidia/nv_i2c.c index d7994a1732459..0b48965a6420c 100644 --- a/drivers/video/fbdev/nvidia/nv_i2c.c +++ b/drivers/video/fbdev/nvidia/nv_i2c.c @@ -86,7 +86,7 @@ static int nvidia_setup_i2c_bus(struct nvidia_i2c_chan *chan, const char *name, { int rc; - strcpy(chan->adapter.name, name); + strscpy(chan->adapter.name, name, sizeof(chan->adapter.name)); chan->adapter.owner = THIS_MODULE; chan->adapter.class = i2c_class; chan->adapter.algo_data = &chan->algo; diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c index b4a1aefff7661..777f6d66c28c3 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c @@ -251,6 +251,7 @@ static int dvic_probe_of(struct platform_device *pdev) adapter_node = of_parse_phandle(node, "ddc-i2c-bus", 0); if (adapter_node) { adapter = of_get_i2c_adapter_by_node(adapter_node); + of_node_put(adapter_node); if (adapter == NULL) { dev_err(&pdev->dev, "failed to parse ddc-i2c-bus\n"); omap_dss_put_device(ddata->in); diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c index 4b0793abdd84b..a2c7c5cb15234 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c @@ -409,7 +409,7 @@ static ssize_t dsicm_num_errors_show(struct device *dev, if (r) return r; - return snprintf(buf, PAGE_SIZE, "%d\n", errors); + return sysfs_emit(buf, "%d\n", errors); } static ssize_t dsicm_hw_revision_show(struct device *dev, @@ -439,7 +439,7 @@ static ssize_t dsicm_hw_revision_show(struct device *dev, if (r) return r; - return snprintf(buf, PAGE_SIZE, "%02x.%02x.%02x\n", id1, id2, id3); + return sysfs_emit(buf, "%02x.%02x.%02x\n", id1, id2, id3); } static ssize_t dsicm_store_ulps(struct device *dev, @@ -487,7 +487,7 @@ static ssize_t dsicm_show_ulps(struct device *dev, t = ddata->ulps_enabled; mutex_unlock(&ddata->lock); - return snprintf(buf, PAGE_SIZE, "%u\n", t); + return sysfs_emit(buf, "%u\n", t); } static ssize_t dsicm_store_ulps_timeout(struct device *dev, @@ -532,7 +532,7 @@ static ssize_t dsicm_show_ulps_timeout(struct device *dev, t = ddata->ulps_timeout; mutex_unlock(&ddata->lock); - return snprintf(buf, PAGE_SIZE, "%u\n", t); + return sysfs_emit(buf, "%u\n", t); } static DEVICE_ATTR(num_dsi_errors, S_IRUGO, dsicm_num_errors_show, NULL); diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c index 1293515e4b169..0cbc5b9183f89 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c @@ -476,7 +476,7 @@ static ssize_t show_cabc_available_modes(struct device *dev, int i; if (!ddata->has_cabc) - return snprintf(buf, PAGE_SIZE, "%s\n", cabc_modes[0]); + return sysfs_emit(buf, "%s\n", cabc_modes[0]); for (i = 0, len = 0; len < PAGE_SIZE && i < ARRAY_SIZE(cabc_modes); i++) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c index bb85b21f07248..9f6ef9e04d9ce 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c @@ -169,7 +169,7 @@ static ssize_t tpo_td043_vmirror_show(struct device *dev, { struct panel_drv_data *ddata = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", ddata->vmirror); + return sysfs_emit(buf, "%d\n", ddata->vmirror); } static ssize_t tpo_td043_vmirror_store(struct device *dev, @@ -199,7 +199,7 @@ static ssize_t tpo_td043_mode_show(struct device *dev, { struct panel_drv_data *ddata = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", ddata->mode); + return sysfs_emit(buf, "%d\n", ddata->mode); } static ssize_t tpo_td043_mode_store(struct device *dev, diff --git a/drivers/video/fbdev/pxa3xx-gcu.c b/drivers/video/fbdev/pxa3xx-gcu.c index 74ffb446e00c9..7c4694d70dac1 100644 --- a/drivers/video/fbdev/pxa3xx-gcu.c +++ b/drivers/video/fbdev/pxa3xx-gcu.c @@ -651,6 +651,7 @@ static int pxa3xx_gcu_probe(struct platform_device *pdev) for (i = 0; i < 8; i++) { ret = pxa3xx_gcu_add_buffer(dev, priv); if (ret) { + pxa3xx_gcu_free_buffers(dev, priv); dev_err(dev, "failed to allocate DMA memory\n"); goto err_disable_clk; } @@ -667,15 +668,15 @@ static int pxa3xx_gcu_probe(struct platform_device *pdev) SHARED_SIZE, irq); return 0; -err_free_dma: - dma_free_coherent(dev, SHARED_SIZE, - priv->shared, priv->shared_phys); +err_disable_clk: + clk_disable_unprepare(priv->clk); err_misc_deregister: misc_deregister(&priv->misc_dev); -err_disable_clk: - clk_disable_unprepare(priv->clk); +err_free_dma: + dma_free_coherent(dev, SHARED_SIZE, + priv->shared, priv->shared_phys); return ret; } @@ -688,6 +689,7 @@ static int pxa3xx_gcu_remove(struct platform_device *pdev) pxa3xx_gcu_wait_idle(priv); misc_deregister(&priv->misc_dev); dma_free_coherent(dev, SHARED_SIZE, priv->shared, priv->shared_phys); + clk_disable_unprepare(priv->clk); pxa3xx_gcu_free_buffers(dev, priv); return 0; diff --git a/drivers/video/fbdev/riva/fbdev.c b/drivers/video/fbdev/riva/fbdev.c index ca593a3e41d74..51c9d9508c0b0 100644 --- a/drivers/video/fbdev/riva/fbdev.c +++ b/drivers/video/fbdev/riva/fbdev.c @@ -1088,6 +1088,9 @@ static int rivafb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) int mode_valid = 0; NVTRACE_ENTER(); + if (!var->pixclock) + return -EINVAL; + switch (var->bits_per_pixel) { case 1 ... 8: var->red.offset = var->green.offset = var->blue.offset = 0; diff --git a/drivers/video/fbdev/sm712fb.c b/drivers/video/fbdev/sm712fb.c index 2466814145779..83d05600360dd 100644 --- a/drivers/video/fbdev/sm712fb.c +++ b/drivers/video/fbdev/sm712fb.c @@ -1047,7 +1047,7 @@ static ssize_t smtcfb_read(struct fb_info *info, char __user *buf, if (count + p > total_size) count = total_size - p; - buffer = kmalloc((count > PAGE_SIZE) ? PAGE_SIZE : count, GFP_KERNEL); + buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buffer) return -ENOMEM; @@ -1059,25 +1059,14 @@ static ssize_t smtcfb_read(struct fb_info *info, char __user *buf, while (count) { c = (count > PAGE_SIZE) ? PAGE_SIZE : count; dst = buffer; - for (i = c >> 2; i--;) { - *dst = fb_readl(src++); - *dst = big_swap(*dst); + for (i = (c + 3) >> 2; i--;) { + u32 val; + + val = fb_readl(src); + *dst = big_swap(val); + src++; dst++; } - if (c & 3) { - u8 *dst8 = (u8 *)dst; - u8 __iomem *src8 = (u8 __iomem *)src; - - for (i = c & 3; i--;) { - if (i & 1) { - *dst8++ = fb_readb(++src8); - } else { - *dst8++ = fb_readb(--src8); - src8 += 2; - } - } - src = (u32 __iomem *)src8; - } if (copy_to_user(buf, buffer, c)) { err = -EFAULT; @@ -1130,7 +1119,7 @@ static ssize_t smtcfb_write(struct fb_info *info, const char __user *buf, count = total_size - p; } - buffer = kmalloc((count > PAGE_SIZE) ? PAGE_SIZE : count, GFP_KERNEL); + buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buffer) return -ENOMEM; @@ -1148,24 +1137,11 @@ static ssize_t smtcfb_write(struct fb_info *info, const char __user *buf, break; } - for (i = c >> 2; i--;) { - fb_writel(big_swap(*src), dst++); + for (i = (c + 3) >> 2; i--;) { + fb_writel(big_swap(*src), dst); + dst++; src++; } - if (c & 3) { - u8 *src8 = (u8 *)src; - u8 __iomem *dst8 = (u8 __iomem *)dst; - - for (i = c & 3; i--;) { - if (i & 1) { - fb_writeb(*src8++, ++dst8); - } else { - fb_writeb(*src8++, --dst8); - dst8 += 2; - } - } - dst = (u32 __iomem *)dst8; - } *ppos += c; buf += c; diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c index 0e0f5bbfc5efc..ad60f739c6a19 100644 --- a/drivers/video/fbdev/smscufx.c +++ b/drivers/video/fbdev/smscufx.c @@ -1657,6 +1657,7 @@ static int ufx_usb_probe(struct usb_interface *interface, info->par = dev; info->pseudo_palette = dev->pseudo_palette; info->fbops = &ufx_ops; + INIT_LIST_HEAD(&info->modelist); retval = fb_alloc_cmap(&info->cmap, 256, 0); if (retval < 0) { @@ -1667,8 +1668,6 @@ static int ufx_usb_probe(struct usb_interface *interface, INIT_DELAYED_WORK(&dev->free_framebuffer_work, ufx_free_framebuffer_work); - INIT_LIST_HEAD(&info->modelist); - retval = ufx_reg_read(dev, 0x3000, &id_rev); check_warn_goto_error(retval, "error %d reading 0x3000 register from device", retval); dev_dbg(dev->gdev, "ID_REV register value 0x%08x", id_rev); diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c index ecbfbbf1c1a79..8e8af408998ea 100644 --- a/drivers/video/fbdev/udlfb.c +++ b/drivers/video/fbdev/udlfb.c @@ -1427,7 +1427,7 @@ static ssize_t metrics_bytes_rendered_show(struct device *fbdev, struct device_attribute *a, char *buf) { struct fb_info *fb_info = dev_get_drvdata(fbdev); struct dlfb_data *dlfb = fb_info->par; - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", atomic_read(&dlfb->bytes_rendered)); } @@ -1435,7 +1435,7 @@ static ssize_t metrics_bytes_identical_show(struct device *fbdev, struct device_attribute *a, char *buf) { struct fb_info *fb_info = dev_get_drvdata(fbdev); struct dlfb_data *dlfb = fb_info->par; - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", atomic_read(&dlfb->bytes_identical)); } @@ -1443,7 +1443,7 @@ static ssize_t metrics_bytes_sent_show(struct device *fbdev, struct device_attribute *a, char *buf) { struct fb_info *fb_info = dev_get_drvdata(fbdev); struct dlfb_data *dlfb = fb_info->par; - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", atomic_read(&dlfb->bytes_sent)); } @@ -1451,7 +1451,7 @@ static ssize_t metrics_cpu_kcycles_used_show(struct device *fbdev, struct device_attribute *a, char *buf) { struct fb_info *fb_info = dev_get_drvdata(fbdev); struct dlfb_data *dlfb = fb_info->par; - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", atomic_read(&dlfb->cpu_kcycles_used)); } @@ -1650,8 +1650,9 @@ static int dlfb_usb_probe(struct usb_interface *intf, const struct device_attribute *attr; struct dlfb_data *dlfb; struct fb_info *info; - int retval = -ENOMEM; + int retval; struct usb_device *usbdev = interface_to_usbdev(intf); + struct usb_endpoint_descriptor *out; /* usb initialization */ dlfb = kzalloc(sizeof(*dlfb), GFP_KERNEL); @@ -1665,6 +1666,12 @@ static int dlfb_usb_probe(struct usb_interface *intf, dlfb->udev = usb_get_dev(usbdev); usb_set_intfdata(intf, dlfb); + retval = usb_find_common_endpoints(intf->cur_altsetting, NULL, &out, NULL, NULL); + if (retval) { + dev_err(&intf->dev, "Device should have at lease 1 bulk endpoint!\n"); + goto error; + } + dev_dbg(&intf->dev, "console enable=%d\n", console); dev_dbg(&intf->dev, "fb_defio enable=%d\n", fb_defio); dev_dbg(&intf->dev, "shadow enable=%d\n", shadow); @@ -1674,6 +1681,7 @@ static int dlfb_usb_probe(struct usb_interface *intf, if (!dlfb_parse_vendor_descriptor(dlfb, intf)) { dev_err(&intf->dev, "firmware not recognized, incompatible device?\n"); + retval = -ENODEV; goto error; } @@ -1687,8 +1695,10 @@ static int dlfb_usb_probe(struct usb_interface *intf, /* allocates framebuffer driver structure, not framebuffer memory */ info = framebuffer_alloc(0, &dlfb->udev->dev); - if (!info) + if (!info) { + retval = -ENOMEM; goto error; + } dlfb->info = info; info->par = dlfb; diff --git a/drivers/video/fbdev/w100fb.c b/drivers/video/fbdev/w100fb.c index e30f9427b3355..52ec80bbfe5ea 100644 --- a/drivers/video/fbdev/w100fb.c +++ b/drivers/video/fbdev/w100fb.c @@ -770,12 +770,18 @@ int w100fb_probe(struct platform_device *pdev) fb_dealloc_cmap(&info->cmap); kfree(info->pseudo_palette); } - if (remapped_fbuf != NULL) + if (remapped_fbuf != NULL) { iounmap(remapped_fbuf); - if (remapped_regs != NULL) + remapped_fbuf = NULL; + } + if (remapped_regs != NULL) { iounmap(remapped_regs); - if (remapped_base != NULL) + remapped_regs = NULL; + } + if (remapped_base != NULL) { iounmap(remapped_base); + remapped_base = NULL; + } if (info) framebuffer_release(info); return err; @@ -795,8 +801,11 @@ static int w100fb_remove(struct platform_device *pdev) fb_dealloc_cmap(&info->cmap); iounmap(remapped_base); + remapped_base = NULL; iounmap(remapped_regs); + remapped_regs = NULL; iounmap(remapped_fbuf); + remapped_fbuf = NULL; framebuffer_release(info); diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 59a05f1b81054..cab97cda54304 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -167,14 +167,12 @@ void virtio_add_status(struct virtio_device *dev, unsigned int status) } EXPORT_SYMBOL_GPL(virtio_add_status); -int virtio_finalize_features(struct virtio_device *dev) +/* Do some validation, then set FEATURES_OK */ +static int virtio_features_ok(struct virtio_device *dev) { - int ret = dev->config->finalize_features(dev); unsigned status; might_sleep(); - if (ret) - return ret; if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1)) return 0; @@ -188,7 +186,6 @@ int virtio_finalize_features(struct virtio_device *dev) } return 0; } -EXPORT_SYMBOL_GPL(virtio_finalize_features); static int virtio_dev_probe(struct device *_d) { @@ -235,13 +232,26 @@ static int virtio_dev_probe(struct device *_d) if (device_features & (1ULL << i)) __virtio_set_bit(dev, i); + err = dev->config->finalize_features(dev); + if (err) + goto err; + if (drv->validate) { + u64 features = dev->features; + err = drv->validate(dev); if (err) goto err; + + /* Did validation change any features? Then write them again. */ + if (features != dev->features) { + err = dev->config->finalize_features(dev); + if (err) + goto err; + } } - err = virtio_finalize_features(dev); + err = virtio_features_ok(dev); if (err) goto err; @@ -406,7 +416,11 @@ int virtio_device_restore(struct virtio_device *dev) /* We have a driver! */ virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER); - ret = virtio_finalize_features(dev); + ret = dev->config->finalize_features(dev); + if (ret) + goto err; + + ret = virtio_features_ok(dev); if (ret) goto err; diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index e09edb5c5e065..e781e5e9215f0 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -514,6 +515,28 @@ static const struct virtio_config_ops virtio_mmio_config_ops = { .bus_name = vm_bus_name, }; +#ifdef CONFIG_PM_SLEEP +static int virtio_mmio_freeze(struct device *dev) +{ + struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + + return virtio_device_freeze(&vm_dev->vdev); +} + +static int virtio_mmio_restore(struct device *dev) +{ + struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + + if (vm_dev->version == 1) + writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE); + + return virtio_device_restore(&vm_dev->vdev); +} + +static const struct dev_pm_ops virtio_mmio_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(virtio_mmio_freeze, virtio_mmio_restore) +}; +#endif static void virtio_mmio_release_dev(struct device *_d) { @@ -669,6 +692,7 @@ static int vm_cmdline_set(const char *device, if (!vm_cmdline_parent_registered) { err = device_register(&vm_cmdline_parent); if (err) { + put_device(&vm_cmdline_parent); pr_err("Failed to register parent device!\n"); return err; } @@ -766,6 +790,9 @@ static struct platform_driver virtio_mmio_driver = { .name = "virtio-mmio", .of_match_table = virtio_mmio_match, .acpi_match_table = ACPI_PTR(virtio_mmio_acpi_match), +#ifdef CONFIG_PM_SLEEP + .pm = &virtio_mmio_pm_ops, +#endif }, }; diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 222d630c41fc9..1e890ef176873 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -254,8 +254,7 @@ void vp_del_vqs(struct virtio_device *vdev) if (vp_dev->msix_affinity_masks) { for (i = 0; i < vp_dev->msix_vectors; i++) - if (vp_dev->msix_affinity_masks[i]) - free_cpumask_var(vp_dev->msix_affinity_masks[i]); + free_cpumask_var(vp_dev->msix_affinity_masks[i]); } if (vp_dev->msix_enabled) { @@ -576,6 +575,13 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); struct device *dev = get_device(&vp_dev->vdev.dev); + /* + * Device is marked broken on surprise removal so that virtio upper + * layers can abort any ongoing operation. + */ + if (!pci_device_is_present(pci_dev)) + virtio_break_device(&vp_dev->vdev); + pci_disable_sriov(pci_dev); unregister_virtio_device(&vp_dev->vdev); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index c04efc9eec3f0..179f957460046 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -263,7 +263,7 @@ size_t virtio_max_dma_size(struct virtio_device *vdev) size_t max_segment_size = SIZE_MAX; if (vring_use_dma_api(vdev)) - max_segment_size = dma_max_mapping_size(&vdev->dev); + max_segment_size = dma_max_mapping_size(vdev->dev.parent); return max_segment_size; } @@ -412,6 +412,218 @@ static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, return desc; } +dma_addr_t virtio_dma_map_sg(struct virtio_device *dev, + struct scatterlist *sg, + enum dma_data_direction direction) +{ + if (!vring_use_dma_api(dev)) + return (dma_addr_t)sg_phys(sg); + + /* + * We can't use dma_map_sg, because we don't use scatterlists in + * the way it expects (we don't guarantee that the scatterlist + * will exist for the lifetime of the mapping). + */ + return dma_map_page(dev->dev.parent, + sg_page(sg), sg->offset, sg->length, + direction); +} +EXPORT_SYMBOL_GPL(virtio_dma_map_sg); + +dma_addr_t virtio_dma_map(struct virtio_device *dev, void *addr, + unsigned int length, enum dma_data_direction dir) +{ + struct page *page; + size_t offset; + + page = virt_to_page(addr); + offset = offset_in_page(addr); + + if (!vring_use_dma_api(dev)) + return page_to_phys(page) + offset; + + return dma_map_page(dev->dev.parent, page, offset, length, dir); +} +EXPORT_SYMBOL_GPL(virtio_dma_map); + +int virtio_dma_mapping_error(struct virtio_device *dev, dma_addr_t addr) +{ + if (!vring_use_dma_api(dev)) + return 0; + + return dma_mapping_error(dev->dev.parent, addr); +} +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error); + +void virtio_dma_unmap(struct virtio_device *dev, dma_addr_t dma, + unsigned int length, enum dma_data_direction dir) +{ + if (!vring_use_dma_api(dev)) + return; + + dma_unmap_page(dev->dev.parent, dma, length, dir); +} +EXPORT_SYMBOL_GPL(virtio_dma_unmap); + +int virtqueue_add_mapped_buf_split(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, void *ctx, + gfp_t gfp) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct scatterlist *sg; + struct vring_desc *desc; + unsigned int i, n, avail, descs_used, uninitialized_var(prev); + int head; + bool indirect; + + START_USE(vq); + + BUG_ON(data == NULL); + BUG_ON(ctx && vq->indirect); + + if (unlikely(vq->broken)) { + END_USE(vq); + return -EIO; + } + + LAST_ADD_TIME_UPDATE(vq); + + BUG_ON(total_sg == 0); + + head = vq->free_head; + + if (virtqueue_use_indirect(_vq, total_sg)) + desc = alloc_indirect_split(_vq, total_sg, gfp); + else { + desc = NULL; + WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); + } + + if (desc) { + /* Use a single buffer which doesn't continue */ + indirect = true; + /* Set up rest to use this indirect table. */ + i = 0; + descs_used = 1; + } else { + indirect = false; + desc = vq->split.vring.desc; + i = head; + descs_used = total_sg; + } + + if (vq->vq.num_free < descs_used) { + pr_debug("Can't add buf len %i - avail = %i\n", + descs_used, vq->vq.num_free); + /* FIXME: for historical reasons, we force a notify here if + * there are outgoing parts to the buffer. Presumably the + * host should service the ring ASAP. + */ + if (out_sgs) + vq->notify(&vq->vq); + if (indirect) + kfree(desc); + END_USE(vq); + return -ENOSPC; + } + + for (n = 0; n < out_sgs; n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr = sg_dma_address(sg); + + desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); + desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); + desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); + prev = i; + i = virtio16_to_cpu(_vq->vdev, desc[i].next); + } + } + for (; n < (out_sgs + in_sgs); n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr = sg_dma_address(sg); + + desc[i].flags = cpu_to_virtio16(_vq->vdev, + VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); + desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); + desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); + prev = i; + i = virtio16_to_cpu(_vq->vdev, desc[i].next); + } + } + /* Last one doesn't continue. */ + desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); + + if (indirect) { + /* Now that the indirect table is filled in, map it. */ + dma_addr_t addr = vring_map_single( + vq, desc, total_sg * sizeof(struct vring_desc), + DMA_TO_DEVICE); + if (vring_mapping_error(vq, addr)) + goto unmap_release; + + vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, + VRING_DESC_F_INDIRECT); + vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, + addr); + + vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev, + total_sg * sizeof(struct vring_desc)); + } + + /* We're using some buffers from the free list. */ + vq->vq.num_free -= descs_used; + + /* Update free pointer */ + if (indirect) + vq->free_head = virtio16_to_cpu(_vq->vdev, + vq->split.vring.desc[head].next); + else + vq->free_head = i; + + /* Store token and indirect buffer state. */ + vq->split.desc_state[head].data = data; + if (indirect) + vq->split.desc_state[head].indir_desc = desc; + else + vq->split.desc_state[head].indir_desc = ctx; + + /* Put entry in available array (but don't update avail->idx until they + * do sync). + */ + avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); + vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); + + /* Descriptors and available array need to be set before we expose the + * new available array entries. + */ + virtio_wmb(vq->weak_barriers); + vq->split.avail_idx_shadow++; + vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, + vq->split.avail_idx_shadow); + vq->num_added++; + + pr_debug("Added buffer head %i to %p\n", head, vq); + END_USE(vq); + + /* This is very unlikely, but theoretically possible. Kick + * just in case. + */ + if (unlikely(vq->num_added == (1 << 16) - 1)) + virtqueue_kick(_vq); + + return 0; + +unmap_release: + kfree(desc); + END_USE(vq); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(virtqueue_add_mapped_buf_split); + static inline int virtqueue_add_split(struct virtqueue *_vq, struct scatterlist *sgs[], unsigned int total_sg, @@ -675,6 +887,137 @@ static inline bool more_used_split(const struct vring_virtqueue *vq) vq->split.vring.used->idx); } +static void vring_unmap_indirect_split(const struct vring_virtqueue *vq, + struct vring_desc *desc) +{ + u16 flags; + + if (!vq->use_dma_api) + return; + + flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); + + if (!(flags & VRING_DESC_F_INDIRECT)) + return; + + dma_unmap_single(vring_dma_dev(vq), + virtio64_to_cpu(vq->vq.vdev, desc->addr), + virtio32_to_cpu(vq->vq.vdev, desc->len), + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); +} + +static void detach_mapped_buf_split(struct vring_virtqueue *vq, unsigned int head, + void **ctx) +{ + unsigned int i; + __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); + + /* Clear data ptr. */ + vq->split.desc_state[head].data = NULL; + + /* Put back on free list: unmap first-level descriptors and find end */ + i = head; + + while (vq->split.vring.desc[i].flags & nextflag) { + vring_unmap_indirect_split(vq, &vq->split.vring.desc[i]); + i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next); + vq->vq.num_free++; + } + + vring_unmap_indirect_split(vq, &vq->split.vring.desc[i]); + vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, + vq->free_head); + vq->free_head = head; + + /* Plus final descriptor */ + vq->vq.num_free++; + + if (vq->indirect) { + struct vring_desc *indir_desc = + vq->split.desc_state[head].indir_desc; + u32 len; + + /* Free the indirect table, if any, now that it's unmapped. */ + if (!indir_desc) + return; + + len = virtio32_to_cpu(vq->vq.vdev, + vq->split.vring.desc[head].len); + + BUG_ON(!(vq->split.vring.desc[head].flags & + cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); + BUG_ON(len == 0 || len % sizeof(struct vring_desc)); + + kfree(indir_desc); + vq->split.desc_state[head].indir_desc = NULL; + } else if (ctx) { + *ctx = vq->split.desc_state[head].indir_desc; + } +} + +void *virtqueue_get_mapped_buf_split(struct virtqueue *_vq, + unsigned int *len, + void **ctx) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + void *ret; + unsigned int i; + u16 last_used; + + START_USE(vq); + + if (unlikely(vq->broken)) { + END_USE(vq); + return NULL; + } + + if (!more_used_split(vq)) { + pr_debug("No more buffers in queue\n"); + END_USE(vq); + return NULL; + } + + /* Only get used array entries after they have been exposed by host. */ + virtio_rmb(vq->weak_barriers); + + last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); + i = virtio32_to_cpu(_vq->vdev, + vq->split.vring.used->ring[last_used].id); + *len = virtio32_to_cpu(_vq->vdev, + vq->split.vring.used->ring[last_used].len); + + if (unlikely(i >= vq->split.vring.num)) { + BAD_RING(vq, "id %u out of range, usd_idx: %u\n", i, + vq->last_used_idx); + return NULL; + } + if (unlikely(!vq->split.desc_state[i].data)) { + BAD_RING(vq, "id %u is not a head! used_idx: %u\n", i, + vq->last_used_idx); + return NULL; + } + + /* detach_buf_split clears data, so grab it now. */ + ret = vq->split.desc_state[i].data; + detach_mapped_buf_split(vq, i, ctx); + vq->last_used_idx++; + /* If we expect an interrupt for the next entry, tell host + * by writing event index and flush out the write before + * the read in the next get_buf call. + */ + if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) + virtio_store_mb(vq->weak_barriers, + &vring_used_event(&vq->split.vring), + cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); + + LAST_ADD_TIME_INVALID(vq); + + END_USE(vq); + return ret; +} +EXPORT_SYMBOL_GPL(virtqueue_get_mapped_buf_split); + static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, unsigned int *len, void **ctx) @@ -817,6 +1160,34 @@ static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) return true; } +void *virtqueue_detach_unused_mapped_buf_split(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + unsigned int i; + void *buf; + + START_USE(vq); + + for (i = 0; i < vq->split.vring.num; i++) { + if (!vq->split.desc_state[i].data) + continue; + /* detach_buf_split clears data, so grab it now. */ + buf = vq->split.desc_state[i].data; + detach_mapped_buf_split(vq, i, NULL); + vq->split.avail_idx_shadow--; + vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, + vq->split.avail_idx_shadow); + END_USE(vq); + return buf; + } + /* That should have freed everything. */ + BUG_ON(vq->vq.num_free != vq->split.vring.num); + + END_USE(vq); + return NULL; +} +EXPORT_SYMBOL_GPL(virtqueue_detach_unused_mapped_buf_split); + static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -2270,7 +2641,7 @@ bool virtqueue_is_broken(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); - return vq->broken; + return READ_ONCE(vq->broken); } EXPORT_SYMBOL_GPL(virtqueue_is_broken); @@ -2285,7 +2656,9 @@ void virtio_break_device(struct virtio_device *dev) spin_lock(&dev->vqs_list_lock); list_for_each_entry(_vq, &dev->vqs, list) { struct vring_virtqueue *vq = to_vvq(_vq); - vq->broken = true; + + /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ + WRITE_ONCE(vq->broken, true); } spin_unlock(&dev->vqs_list_lock); } diff --git a/drivers/w1/slaves/w1_ds28e04.c b/drivers/w1/slaves/w1_ds28e04.c index 8a640f1590784..06a9966f8c933 100644 --- a/drivers/w1/slaves/w1_ds28e04.c +++ b/drivers/w1/slaves/w1_ds28e04.c @@ -32,7 +32,7 @@ static int w1_strong_pullup = 1; module_param_named(strong_pullup, w1_strong_pullup, int, 0); /* enable/disable CRC checking on DS28E04-100 memory accesses */ -static char w1_enable_crccheck = 1; +static bool w1_enable_crccheck = true; #define W1_EEPROM_SIZE 512 #define W1_PAGE_COUNT 16 @@ -339,32 +339,18 @@ static BIN_ATTR_RW(pio, 1); static ssize_t crccheck_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (put_user(w1_enable_crccheck + 0x30, buf)) - return -EFAULT; - - return sizeof(w1_enable_crccheck); + return sysfs_emit(buf, "%d\n", w1_enable_crccheck); } static ssize_t crccheck_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - char val; - - if (count != 1 || !buf) - return -EINVAL; + int err = kstrtobool(buf, &w1_enable_crccheck); - if (get_user(val, buf)) - return -EFAULT; + if (err) + return err; - /* convert to decimal */ - val = val - 0x30; - if (val != 0 && val != 1) - return -EINVAL; - - /* set the new value */ - w1_enable_crccheck = val; - - return sizeof(w1_enable_crccheck); + return count; } static DEVICE_ATTR_RW(crccheck); diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c index e028e00927991..cf1e01f0dede3 100644 --- a/drivers/w1/slaves/w1_therm.c +++ b/drivers/w1/slaves/w1_therm.c @@ -679,16 +679,20 @@ static ssize_t w1_seq_show(struct device *device, if (sl->reg_num.id == reg_num->id) seq = i; + if (w1_reset_bus(sl->master)) + goto error; + + /* Put the device into chain DONE state */ + w1_write_8(sl->master, W1_MATCH_ROM); + w1_write_block(sl->master, (u8 *)&rn, 8); w1_write_8(sl->master, W1_42_CHAIN); w1_write_8(sl->master, W1_42_CHAIN_DONE); w1_write_8(sl->master, W1_42_CHAIN_DONE_INV); - w1_read_block(sl->master, &ack, sizeof(ack)); /* check for acknowledgment */ ack = w1_read_8(sl->master); if (ack != W1_42_SUCCESS_CONFIRM_BYTE) goto error; - } /* Exit from CHAIN state */ diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 1aa42e879e633..3fabd4177b0ee 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1682,7 +1682,7 @@ config SIBYTE_WDOG config AR7_WDT tristate "TI AR7 Watchdog Timer" - depends on AR7 || (MIPS && COMPILE_TEST) + depends on AR7 || (MIPS && 32BIT && COMPILE_TEST) help Hardware driver for the TI AR7 Watchdog Timer. diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index 893cef70c1599..aa57498009c34 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -228,15 +228,17 @@ static int watchdog_set_timeout(int timeout) mutex_lock(&watchdog.lock); - watchdog.timeout = timeout; if (timeout > 0xff) { watchdog.timer_val = DIV_ROUND_UP(timeout, 60); watchdog.minutes_mode = true; + timeout = watchdog.timer_val * 60; } else { watchdog.timer_val = timeout; watchdog.minutes_mode = false; } + watchdog.timeout = timeout; + mutex_unlock(&watchdog.lock); return 0; diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c index 9b91882fe3c41..6d7ccbc0b666c 100644 --- a/drivers/watchdog/omap_wdt.c +++ b/drivers/watchdog/omap_wdt.c @@ -268,8 +268,12 @@ static int omap_wdt_probe(struct platform_device *pdev) wdev->wdog.bootstatus = WDIOF_CARDRESET; } - if (!early_enable) + if (early_enable) { + omap_wdt_start(&wdev->wdog); + set_bit(WDOG_HW_RUNNING, &wdev->wdog.status); + } else { omap_wdt_disable(wdev); + } ret = watchdog_register_device(&wdev->wdog); if (ret) { diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c index 8e6dfe76f9c9d..4ddb4ea2e4a32 100644 --- a/drivers/watchdog/orion_wdt.c +++ b/drivers/watchdog/orion_wdt.c @@ -52,7 +52,7 @@ #define WDT_A370_RATIO (1 << WDT_A370_RATIO_SHIFT) static bool nowayout = WATCHDOG_NOWAYOUT; -static int heartbeat = -1; /* module parameter (seconds) */ +static int heartbeat; /* module parameter (seconds) */ struct orion_watchdog; diff --git a/drivers/watchdog/ts4800_wdt.c b/drivers/watchdog/ts4800_wdt.c index c137ad2bd5c31..0ea554c7cda57 100644 --- a/drivers/watchdog/ts4800_wdt.c +++ b/drivers/watchdog/ts4800_wdt.c @@ -125,13 +125,16 @@ static int ts4800_wdt_probe(struct platform_device *pdev) ret = of_property_read_u32_index(np, "syscon", 1, ®); if (ret < 0) { dev_err(dev, "no offset in syscon\n"); + of_node_put(syscon_np); return ret; } /* allocate memory for watchdog struct */ wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL); - if (!wdt) + if (!wdt) { + of_node_put(syscon_np); return -ENOMEM; + } /* set regmap and offset to know where to write */ wdt->feed_offset = reg; diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c index 88c5e6361aa05..fddbb39433bee 100644 --- a/drivers/watchdog/wdat_wdt.c +++ b/drivers/watchdog/wdat_wdt.c @@ -462,6 +462,7 @@ static int wdat_wdt_probe(struct platform_device *pdev) return ret; watchdog_set_nowayout(&wdat->wdd, nowayout); + watchdog_stop_on_reboot(&wdat->wdd); return devm_watchdog_register_device(dev, &wdat->wdd); } diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index ebb05517b6aa1..39bbb127852f9 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -43,6 +43,8 @@ #include #include #include +#include +#include #include #include #include @@ -56,6 +58,7 @@ #include #include #include +#include #include #include @@ -73,6 +76,12 @@ #include #include +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "xen." + +static uint __read_mostly balloon_boot_timeout = 180; +module_param(balloon_boot_timeout, uint, 0444); + static int xen_hotplug_unpopulated; #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG @@ -117,7 +126,7 @@ static struct ctl_table xen_root[] = { #define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1) /* - * balloon_process() state: + * balloon_thread() state: * * BP_DONE: done or nothing to do, * BP_WAIT: wait to be rescheduled, @@ -125,13 +134,15 @@ static struct ctl_table xen_root[] = { * BP_ECANCELED: error, balloon operation canceled. */ -enum bp_state { +static enum bp_state { BP_DONE, BP_WAIT, BP_EAGAIN, BP_ECANCELED -}; +} balloon_state = BP_DONE; +/* Main waiting point for xen-balloon thread. */ +static DECLARE_WAIT_QUEUE_HEAD(balloon_thread_wq); static DEFINE_MUTEX(balloon_mutex); @@ -146,10 +157,6 @@ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)]; static LIST_HEAD(ballooned_pages); static DECLARE_WAIT_QUEUE_HEAD(balloon_wq); -/* Main work function, always executed in process context. */ -static void balloon_process(struct work_struct *work); -static DECLARE_DELAYED_WORK(balloon_worker, balloon_process); - /* When ballooning out (allocating memory to return to Xen) we don't really want the kernel to try too hard since that can trigger the oom killer. */ #define GFP_BALLOON \ @@ -201,18 +208,15 @@ static struct page *balloon_next_page(struct page *page) return list_entry(next, struct page, lru); } -static enum bp_state update_schedule(enum bp_state state) +static void update_schedule(void) { - if (state == BP_WAIT) - return BP_WAIT; - - if (state == BP_ECANCELED) - return BP_ECANCELED; + if (balloon_state == BP_WAIT || balloon_state == BP_ECANCELED) + return; - if (state == BP_DONE) { + if (balloon_state == BP_DONE) { balloon_stats.schedule_delay = 1; balloon_stats.retry_count = 1; - return BP_DONE; + return; } ++balloon_stats.retry_count; @@ -221,7 +225,8 @@ static enum bp_state update_schedule(enum bp_state state) balloon_stats.retry_count > balloon_stats.max_retry_count) { balloon_stats.schedule_delay = 1; balloon_stats.retry_count = 1; - return BP_ECANCELED; + balloon_state = BP_ECANCELED; + return; } balloon_stats.schedule_delay <<= 1; @@ -229,7 +234,7 @@ static enum bp_state update_schedule(enum bp_state state) if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay) balloon_stats.schedule_delay = balloon_stats.max_schedule_delay; - return BP_EAGAIN; + balloon_state = BP_EAGAIN; } #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG @@ -383,7 +388,7 @@ static void xen_online_page(struct page *page, unsigned int order) static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { if (val == MEM_ONLINE) - schedule_delayed_work(&balloon_worker, 0); + wake_up(&balloon_thread_wq); return NOTIFY_OK; } @@ -508,50 +513,79 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) } /* - * As this is a work item it is guaranteed to run as a single instance only. + * Stop waiting if either state is BP_DONE and ballooning action is + * needed, or if the credit has changed while state is not BP_DONE. + */ +static bool balloon_thread_cond(long credit) +{ + if (balloon_state == BP_DONE) + credit = 0; + + return current_credit() != credit || kthread_should_stop(); +} + +/* + * As this is a kthread it is guaranteed to run as a single instance only. * We may of course race updates of the target counts (which are protected * by the balloon lock), or with changes to the Xen hard limit, but we will * recover from these in time. */ -static void balloon_process(struct work_struct *work) +static int balloon_thread(void *unused) { - enum bp_state state = BP_DONE; long credit; + unsigned long timeout; + + set_freezable(); + for (;;) { + switch (balloon_state) { + case BP_DONE: + case BP_ECANCELED: + timeout = 3600 * HZ; + break; + case BP_EAGAIN: + timeout = balloon_stats.schedule_delay * HZ; + break; + case BP_WAIT: + timeout = HZ; + break; + } + + credit = current_credit(); + + wait_event_freezable_timeout(balloon_thread_wq, + balloon_thread_cond(credit), timeout); + if (kthread_should_stop()) + return 0; - do { mutex_lock(&balloon_mutex); credit = current_credit(); if (credit > 0) { if (balloon_is_inflated()) - state = increase_reservation(credit); + balloon_state = increase_reservation(credit); else - state = reserve_additional_memory(); + balloon_state = reserve_additional_memory(); } if (credit < 0) { long n_pages; n_pages = min(-credit, si_mem_available()); - state = decrease_reservation(n_pages, GFP_BALLOON); - if (state == BP_DONE && n_pages != -credit && + balloon_state = decrease_reservation(n_pages, + GFP_BALLOON); + if (balloon_state == BP_DONE && n_pages != -credit && n_pages < totalreserve_pages) - state = BP_EAGAIN; + balloon_state = BP_EAGAIN; } - state = update_schedule(state); + update_schedule(); mutex_unlock(&balloon_mutex); cond_resched(); - - } while (credit && state == BP_DONE); - - /* Schedule more work if there is some still to be done. */ - if (state == BP_EAGAIN) - schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ); + } } /* Resets the Xen limit, sets new target, and kicks off processing. */ @@ -559,7 +593,7 @@ void balloon_set_new_target(unsigned long target) { /* No need for lock. Not read-modify-write updates. */ balloon_stats.target_pages = target; - schedule_delayed_work(&balloon_worker, 0); + wake_up(&balloon_thread_wq); } EXPORT_SYMBOL_GPL(balloon_set_new_target); @@ -664,7 +698,7 @@ void free_xenballooned_pages(int nr_pages, struct page **pages) /* The balloon may be too large now. Shrink it if needed. */ if (current_credit()) - schedule_delayed_work(&balloon_worker, 0); + wake_up(&balloon_thread_wq); mutex_unlock(&balloon_mutex); } @@ -696,6 +730,8 @@ static void __init balloon_add_region(unsigned long start_pfn, static int __init balloon_init(void) { + struct task_struct *task; + if (!xen_domain()) return -ENODEV; @@ -739,9 +775,50 @@ static int __init balloon_init(void) } #endif + task = kthread_run(balloon_thread, NULL, "xen-balloon"); + if (IS_ERR(task)) { + pr_err("xen-balloon thread could not be started, ballooning will not work!\n"); + return PTR_ERR(task); + } + /* Init the xen-balloon driver. */ xen_balloon_init(); return 0; } subsys_initcall(balloon_init); + +static int __init balloon_wait_finish(void) +{ + long credit, last_credit = 0; + unsigned long last_changed = 0; + + if (!xen_domain()) + return -ENODEV; + + /* PV guests don't need to wait. */ + if (xen_pv_domain() || !current_credit()) + return 0; + + pr_notice("Waiting for initial ballooning down having finished.\n"); + + while ((credit = current_credit()) < 0) { + if (credit != last_credit) { + last_changed = jiffies; + last_credit = credit; + } + if (balloon_state == BP_ECANCELED) { + pr_warn_once("Initial ballooning failed, %ld pages need to be freed.\n", + -credit); + if (jiffies - last_changed >= HZ * balloon_boot_timeout) + panic("Initial ballooning failed!\n"); + } + + schedule_timeout_interruptible(HZ / 10); + } + + pr_notice("Initial ballooning down finished.\n"); + + return 0; +} +late_initcall_sync(balloon_wait_finish); diff --git a/drivers/xen/features.c b/drivers/xen/features.c index 25c053b096051..2c306de228db3 100644 --- a/drivers/xen/features.c +++ b/drivers/xen/features.c @@ -29,6 +29,6 @@ void xen_setup_features(void) if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) break; for (j = 0; j < 32; j++) - xen_features[i * 32 + j] = !!(fi.submap & 1<notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { uint8_t *tmp = kmap(gref->page); tmp[gref->notify.pgoff] = 0; @@ -196,21 +190,16 @@ static void __del_gref(struct gntalloc_gref *gref) gref->notify.flags = 0; if (gref->gref_id) { - if (gnttab_query_foreign_access(gref->gref_id)) - return; - - if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) - return; - - gnttab_free_grant_reference(gref->gref_id); + if (gref->page) { + addr = (unsigned long)page_to_virt(gref->page); + gnttab_end_foreign_access(gref->gref_id, 0, addr); + } else + gnttab_free_grant_reference(gref->gref_id); } gref_size--; list_del(&gref->next_gref); - if (gref->page) - __free_page(gref->page); - kfree(gref); } diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h index 2f8b949c3eeb1..fab6f5a54d5b2 100644 --- a/drivers/xen/gntdev-common.h +++ b/drivers/xen/gntdev-common.h @@ -15,6 +15,8 @@ #include #include #include +#include +#include struct gntdev_dmabuf_priv; @@ -61,6 +63,7 @@ struct gntdev_grant_map { struct gnttab_unmap_grant_ref *unmap_ops; struct gnttab_map_grant_ref *kmap_ops; struct gnttab_unmap_grant_ref *kunmap_ops; + bool *being_removed; struct page **pages; unsigned long pages_vm_start; @@ -78,6 +81,11 @@ struct gntdev_grant_map { /* Needed to avoid allocation in gnttab_dma_free_pages(). */ xen_pfn_t *frames; #endif + + /* Number of live grants */ + atomic_t live_grants; + /* Needed to avoid allocation in __unmap_grant_pages */ + struct gntab_unmap_queue_data unmap_data; }; struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 59af76839fca9..0b88e8c868dfc 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -62,11 +63,12 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by " static atomic_t pages_mapped = ATOMIC_INIT(0); +/* True in PV mode, false otherwise */ static int use_ptemod; #define populate_freeable_maps use_ptemod -static int unmap_grant_pages(struct gntdev_grant_map *map, - int offset, int pages); +static void unmap_grant_pages(struct gntdev_grant_map *map, + int offset, int pages); static struct miscdevice gntdev_miscdev; @@ -123,6 +125,7 @@ static void gntdev_free_map(struct gntdev_grant_map *map) kfree(map->unmap_ops); kfree(map->kmap_ops); kfree(map->kunmap_ops); + kfree(map->being_removed); kfree(map); } @@ -142,12 +145,15 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL); add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL); add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); + add->being_removed = + kcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL); if (NULL == add->grants || NULL == add->map_ops || NULL == add->unmap_ops || NULL == add->kmap_ops || NULL == add->kunmap_ops || - NULL == add->pages) + NULL == add->pages || + NULL == add->being_removed) goto err; #ifdef CONFIG_XEN_GRANT_DMA_ALLOC @@ -243,6 +249,35 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) return; atomic_sub(map->count, &pages_mapped); + if (map->pages && !use_ptemod) { + /* + * Increment the reference count. This ensures that the + * subsequent call to unmap_grant_pages() will not wind up + * re-entering itself. It *can* wind up calling + * gntdev_put_map() recursively, but such calls will be with a + * reference count greater than 1, so they will return before + * this code is reached. The recursion depth is thus limited to + * 1. Do NOT use refcount_inc() here, as it will detect that + * the reference count is zero and WARN(). + */ + refcount_set(&map->users, 1); + + /* + * Unmap the grants. This may or may not be asynchronous, so it + * is possible that the reference count is 1 on return, but it + * could also be greater than 1. + */ + unmap_grant_pages(map, 0, map->count); + + /* Check if the memory now needs to be freed */ + if (!refcount_dec_and_test(&map->users)) + return; + + /* + * All pages have been returned to the hypervisor, so free the + * map. + */ + } if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(map->notify.event); @@ -298,6 +333,7 @@ static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data) int gntdev_map_grant_pages(struct gntdev_grant_map *map) { + size_t alloced = 0; int i, err = 0; if (!use_ptemod) { @@ -346,87 +382,110 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) map->pages, map->count); for (i = 0; i < map->count; i++) { - if (map->map_ops[i].status == GNTST_okay) + if (map->map_ops[i].status == GNTST_okay) { map->unmap_ops[i].handle = map->map_ops[i].handle; - else if (!err) + if (!use_ptemod) + alloced++; + } else if (!err) err = -EINVAL; if (map->flags & GNTMAP_device_map) map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr; if (use_ptemod) { - if (map->kmap_ops[i].status == GNTST_okay) + if (map->kmap_ops[i].status == GNTST_okay) { + if (map->map_ops[i].status == GNTST_okay) + alloced++; map->kunmap_ops[i].handle = map->kmap_ops[i].handle; - else if (!err) + } else if (!err) err = -EINVAL; } } + atomic_add(alloced, &map->live_grants); return err; } -static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset, - int pages) +static void __unmap_grant_pages_done(int result, + struct gntab_unmap_queue_data *data) { - int i, err = 0; - struct gntab_unmap_queue_data unmap_data; + unsigned int i; + struct gntdev_grant_map *map = data->data; + unsigned int offset = data->unmap_ops - map->unmap_ops; + for (i = 0; i < data->count; i++) { + WARN_ON(map->unmap_ops[offset+i].status && + map->unmap_ops[offset+i].handle != -1); + pr_debug("unmap handle=%d st=%d\n", + map->unmap_ops[offset+i].handle, + map->unmap_ops[offset+i].status); + map->unmap_ops[offset+i].handle = -1; + } + /* + * Decrease the live-grant counter. This must happen after the loop to + * prevent premature reuse of the grants by gnttab_mmap(). + */ + atomic_sub(data->count, &map->live_grants); + + /* Release reference taken by __unmap_grant_pages */ + gntdev_put_map(NULL, map); +} + +static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset, + int pages) +{ if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { int pgno = (map->notify.addr >> PAGE_SHIFT); + if (pgno >= offset && pgno < offset + pages) { /* No need for kmap, pages are in lowmem */ uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); + tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; } } - unmap_data.unmap_ops = map->unmap_ops + offset; - unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; - unmap_data.pages = map->pages + offset; - unmap_data.count = pages; + map->unmap_data.unmap_ops = map->unmap_ops + offset; + map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; + map->unmap_data.pages = map->pages + offset; + map->unmap_data.count = pages; + map->unmap_data.done = __unmap_grant_pages_done; + map->unmap_data.data = map; + refcount_inc(&map->users); /* to keep map alive during async call below */ - err = gnttab_unmap_refs_sync(&unmap_data); - if (err) - return err; - - for (i = 0; i < pages; i++) { - if (map->unmap_ops[offset+i].status) - err = -EINVAL; - pr_debug("unmap handle=%d st=%d\n", - map->unmap_ops[offset+i].handle, - map->unmap_ops[offset+i].status); - map->unmap_ops[offset+i].handle = -1; - } - return err; + gnttab_unmap_refs_async(&map->unmap_data); } -static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, - int pages) +static void unmap_grant_pages(struct gntdev_grant_map *map, int offset, + int pages) { - int range, err = 0; + int range; + + if (atomic_read(&map->live_grants) == 0) + return; /* Nothing to do */ pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages); /* It is possible the requested range will have a "hole" where we * already unmapped some of the grants. Only unmap valid ranges. */ - while (pages && !err) { - while (pages && map->unmap_ops[offset].handle == -1) { + while (pages) { + while (pages && map->being_removed[offset]) { offset++; pages--; } range = 0; while (range < pages) { - if (map->unmap_ops[offset+range].handle == -1) + if (map->being_removed[offset + range]) break; + map->being_removed[offset + range] = true; range++; } - err = __unmap_grant_pages(map, offset, range); + if (range) + __unmap_grant_pages(map, offset, range); offset += range; pages -= range; } - - return err; } /* ------------------------------------------------------------------ */ @@ -496,7 +555,6 @@ static int unmap_if_in_range(struct gntdev_grant_map *map, bool blockable) { unsigned long mstart, mend; - int err; if (!in_range(map, start, end)) return 0; @@ -510,10 +568,9 @@ static int unmap_if_in_range(struct gntdev_grant_map *map, map->index, map->count, map->vma->vm_start, map->vma->vm_end, start, end, mstart, mend); - err = unmap_grant_pages(map, + unmap_grant_pages(map, (mstart - map->vma->vm_start) >> PAGE_SHIFT, (mend - mstart) >> PAGE_SHIFT); - WARN_ON(err); return 0; } @@ -554,7 +611,6 @@ static void mn_release(struct mmu_notifier *mn, { struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); struct gntdev_grant_map *map; - int err; mutex_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { @@ -563,8 +619,7 @@ static void mn_release(struct mmu_notifier *mn, pr_debug("map %d+%d (%lx %lx)\n", map->index, map->count, map->vma->vm_start, map->vma->vm_end); - err = unmap_grant_pages(map, /* offset */ 0, map->count); - WARN_ON(err); + unmap_grant_pages(map, /* offset */ 0, map->count); } list_for_each_entry(map, &priv->freeable_maps, next) { if (!map->vma) @@ -572,8 +627,7 @@ static void mn_release(struct mmu_notifier *mn, pr_debug("map %d+%d (%lx %lx)\n", map->index, map->count, map->vma->vm_start, map->vma->vm_end); - err = unmap_grant_pages(map, /* offset */ 0, map->count); - WARN_ON(err); + unmap_grant_pages(map, /* offset */ 0, map->count); } mutex_unlock(&priv->lock); } @@ -1102,6 +1156,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) goto unlock_out; } + if (atomic_read(&map->live_grants)) { + err = -EAGAIN; + goto unlock_out; + } refcount_inc(&map->users); vma->vm_ops = &gntdev_vmops; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 49b381e104efa..c75dc17d1a617 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -135,12 +135,9 @@ struct gnttab_ops { */ unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref); /* - * Query the status of a grant entry. Ref parameter is reference of - * queried grant entry, return value is the status of queried entry. - * Detailed status(writing/reading) can be gotten from the return value - * by bit operations. + * Read the frame number related to a given grant reference. */ - int (*query_foreign_access)(grant_ref_t ref); + unsigned long (*read_frame)(grant_ref_t ref); }; struct unmap_refs_callback_data { @@ -285,22 +282,6 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); -static int gnttab_query_foreign_access_v1(grant_ref_t ref) -{ - return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing); -} - -static int gnttab_query_foreign_access_v2(grant_ref_t ref) -{ - return grstatus[ref] & (GTF_reading|GTF_writing); -} - -int gnttab_query_foreign_access(grant_ref_t ref) -{ - return gnttab_interface->query_foreign_access(ref); -} -EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); - static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly) { u16 flags, nflags; @@ -354,6 +335,16 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) } EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); +static unsigned long gnttab_read_frame_v1(grant_ref_t ref) +{ + return gnttab_shared.v1[ref].frame; +} + +static unsigned long gnttab_read_frame_v2(grant_ref_t ref) +{ + return gnttab_shared.v2[ref].full_page.frame; +} + struct deferred_entry { struct list_head list; grant_ref_t ref; @@ -383,12 +374,9 @@ static void gnttab_handle_deferred(struct timer_list *unused) spin_unlock_irqrestore(&gnttab_list_lock, flags); if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) { put_free_entry(entry->ref); - if (entry->page) { - pr_debug("freeing g.e. %#x (pfn %#lx)\n", - entry->ref, page_to_pfn(entry->page)); - put_page(entry->page); - } else - pr_info("freeing g.e. %#x\n", entry->ref); + pr_debug("freeing g.e. %#x (pfn %#lx)\n", + entry->ref, page_to_pfn(entry->page)); + put_page(entry->page); kfree(entry); entry = NULL; } else { @@ -413,9 +401,18 @@ static void gnttab_handle_deferred(struct timer_list *unused) static void gnttab_add_deferred(grant_ref_t ref, bool readonly, struct page *page) { - struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + struct deferred_entry *entry; + gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL; const char *what = KERN_WARNING "leaking"; + entry = kmalloc(sizeof(*entry), gfp); + if (!page) { + unsigned long gfn = gnttab_interface->read_frame(ref); + + page = pfn_to_page(gfn_to_pfn(gfn)); + get_page(page); + } + if (entry) { unsigned long flags; @@ -436,11 +433,21 @@ static void gnttab_add_deferred(grant_ref_t ref, bool readonly, what, ref, page ? page_to_pfn(page) : -1); } +int gnttab_try_end_foreign_access(grant_ref_t ref) +{ + int ret = _gnttab_end_foreign_access_ref(ref, 0); + + if (ret) + put_free_entry(ref); + + return ret; +} +EXPORT_SYMBOL_GPL(gnttab_try_end_foreign_access); + void gnttab_end_foreign_access(grant_ref_t ref, int readonly, unsigned long page) { - if (gnttab_end_foreign_access_ref(ref, readonly)) { - put_free_entry(ref); + if (gnttab_try_end_foreign_access(ref)) { if (page != 0) put_page(virt_to_page(page)); } else @@ -1297,7 +1304,7 @@ static const struct gnttab_ops gnttab_v1_ops = { .update_entry = gnttab_update_entry_v1, .end_foreign_access_ref = gnttab_end_foreign_access_ref_v1, .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v1, - .query_foreign_access = gnttab_query_foreign_access_v1, + .read_frame = gnttab_read_frame_v1, }; static const struct gnttab_ops gnttab_v2_ops = { @@ -1309,7 +1316,7 @@ static const struct gnttab_ops gnttab_v2_ops = { .update_entry = gnttab_update_entry_v2, .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2, .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2, - .query_foreign_access = gnttab_query_foreign_access_v2, + .read_frame = gnttab_read_frame_v2, }; static bool gnttab_need_v2(void) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 4ea47de6e3676..3a01a17d614a7 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -810,11 +810,12 @@ static long privcmd_ioctl_mmap_resource(struct file *file, unsigned int domid = (xdata.flags & XENMEM_rsrc_acq_caller_owned) ? DOMID_SELF : kdata.dom; - int num; + int num, *errs = (int *)pfns; + BUILD_BUG_ON(sizeof(*errs) > sizeof(*pfns)); num = xen_remap_domain_mfn_array(vma, kdata.addr & PAGE_MASK, - pfns, kdata.num, (int *)pfns, + pfns, kdata.num, errs, vma->vm_page_prot, domid, vma->vm_private_data); @@ -824,7 +825,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file, unsigned int i; for (i = 0; i < num; i++) { - rc = pfns[i]; + rc = errs[i]; if (rc < 0) break; } diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index 57592a6b5c9e3..91e52e05555eb 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -337,8 +337,8 @@ static void free_active_ring(struct sock_mapping *map) if (!map->active.ring) return; - free_pages((unsigned long)map->active.data.in, - map->active.ring->ring_order); + free_pages_exact(map->active.data.in, + PAGE_SIZE << map->active.ring->ring_order); free_page((unsigned long)map->active.ring); } @@ -352,8 +352,8 @@ static int alloc_active_ring(struct sock_mapping *map) goto out; map->active.ring->ring_order = PVCALLS_RING_ORDER; - bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - PVCALLS_RING_ORDER); + bytes = alloc_pages_exact(PAGE_SIZE << PVCALLS_RING_ORDER, + GFP_KERNEL | __GFP_ZERO); if (!bytes) goto out; diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c index e5694133ebe57..42f0f64fcba47 100644 --- a/drivers/xen/xen-pciback/conf_space_capability.c +++ b/drivers/xen/xen-pciback/conf_space_capability.c @@ -160,7 +160,7 @@ static void *pm_ctrl_init(struct pci_dev *dev, int offset) } out: - return ERR_PTR(err); + return err ? ERR_PTR(err) : NULL; } static const struct config_field caplist_pm[] = { diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 81eddb8529ffc..8739dd0ee870d 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -366,7 +366,14 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, unsigned int nr_pages, grant_ref_t *grefs) { int err; - int i, j; + unsigned int i; + grant_ref_t gref_head; + + err = gnttab_alloc_grant_references(nr_pages, &gref_head); + if (err) { + xenbus_dev_fatal(dev, err, "granting access to ring page"); + return err; + } for (i = 0; i < nr_pages; i++) { unsigned long gfn; @@ -376,23 +383,14 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, else gfn = virt_to_gfn(vaddr); - err = gnttab_grant_foreign_access(dev->otherend_id, gfn, 0); - if (err < 0) { - xenbus_dev_fatal(dev, err, - "granting access to ring page"); - goto fail; - } - grefs[i] = err; + grefs[i] = gnttab_claim_grant_reference(&gref_head); + gnttab_grant_foreign_access_ref(grefs[i], dev->otherend_id, + gfn, 0); vaddr = vaddr + XEN_PAGE_SIZE; } return 0; - -fail: - for (j = 0; j < i; j++) - gnttab_end_foreign_access_ref(grefs[j], 0); - return err; } EXPORT_SYMBOL_GPL(xenbus_grant_ring); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 652894d619677..b911a91bce6b7 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -846,7 +846,7 @@ static struct notifier_block xenbus_resume_nb = { static int __init xenbus_init(void) { - int err = 0; + int err; uint64_t v = 0; xen_store_domain_type = XS_UNKNOWN; @@ -886,6 +886,29 @@ static int __init xenbus_init(void) err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); if (err) goto out_error; + /* + * Uninitialized hvm_params are zero and return no error. + * Although it is theoretically possible to have + * HVM_PARAM_STORE_PFN set to zero on purpose, in reality it is + * not zero when valid. If zero, it means that Xenstore hasn't + * been properly initialized. Instead of attempting to map a + * wrong guest physical address return error. + * + * Also recognize all bits set as an invalid value. + */ + if (!v || !~v) { + err = -ENOENT; + goto out_error; + } + /* Avoid truncation on 32-bit. */ +#if BITS_PER_LONG == 32 + if (v > ULONG_MAX) { + pr_err("%s: cannot handle HVM_PARAM_STORE_PFN=%llx > ULONG_MAX\n", + __func__, v); + err = -EINVAL; + goto out_error; + } +#endif xen_store_gfn = (unsigned long)v; xen_store_interface = xen_remap(xen_store_gfn << XEN_PAGE_SHIFT, @@ -920,8 +943,10 @@ static int __init xenbus_init(void) */ proc_create_mount_point("xen"); #endif + return 0; out_error: + xen_store_domain_type = XS_UNKNOWN; return err; } diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c index 7b1077f0abcb0..c8aa4f5f85db1 100644 --- a/drivers/xen/xlate_mmu.c +++ b/drivers/xen/xlate_mmu.c @@ -261,7 +261,6 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt, return 0; } -EXPORT_SYMBOL_GPL(xen_xlate_map_ballooned_pages); struct remap_pfn { struct mm_struct *mm; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 60328b21c5fbd..9795fe0f1ba21 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -641,14 +641,10 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, if (stat->st_result_mask & P9_STATS_NLINK) set_nlink(inode, stat->st_nlink); if (stat->st_result_mask & P9_STATS_MODE) { - inode->i_mode = stat->st_mode; - if ((S_ISBLK(inode->i_mode)) || - (S_ISCHR(inode->i_mode))) - init_special_inode(inode, inode->i_mode, - inode->i_rdev); + mode = stat->st_mode & S_IALLUGO; + mode |= inode->i_mode & ~S_IALLUGO; + inode->i_mode = mode; } - if (stat->st_result_mask & P9_STATS_RDEV) - inode->i_rdev = new_decode_dev(stat->st_rdev); if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) && stat->st_result_mask & P9_STATS_SIZE) v9fs_i_size_write(inode, stat->st_size); diff --git a/fs/Kconfig b/fs/Kconfig index 00b1af97c119b..597de17a36250 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -213,19 +213,27 @@ config HUGETLBFS config HUGETLB_PAGE def_bool HUGETLBFS -config HUGETLB_PAGE_FREE_VMEMMAP +# +# Select this config option from the architecture Kconfig, if it is preferred +# to enable the feature of minimizing overhead of struct page associated with +# each HugeTLB page. +# +config ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP + bool + +config HUGETLB_PAGE_OPTIMIZE_VMEMMAP def_bool HUGETLB_PAGE - depends on X86_64 + depends on ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP depends on SPARSEMEM_VMEMMAP -config HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON - bool "Default freeing vmemmap pages of HugeTLB to on" +config HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON + bool "Default optimizing vmemmap pages of HugeTLB to on" default n - depends on HUGETLB_PAGE_FREE_VMEMMAP + depends on HUGETLB_PAGE_OPTIMIZE_VMEMMAP help - When using HUGETLB_PAGE_FREE_VMEMMAP, the freeing unused vmemmap + When using HUGETLB_PAGE_OPTIMIZE_VMEMMAP, the optimizing unused vmemmap pages associated with each HugeTLB page is default off. Say Y here - to enable freeing vmemmap pages of HugeTLB by default. It can then + to enable optimizing vmemmap pages of HugeTLB by default. It can then be disabled on the command line via hugetlb_free_vmemmap=off. config MEMFD_CREATE diff --git a/fs/afs/dir.c b/fs/afs/dir.c index e7494cd49ce7b..3a355a209919b 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -415,8 +415,11 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, } /* skip if starts before the current position */ - if (offset < curr) + if (offset < curr) { + if (next > curr) + ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); continue; + } /* found the next entry */ if (!dir_emit(ctx, dire->u.name, nlen, @@ -977,9 +980,9 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, */ static int afs_d_revalidate_rcu(struct dentry *dentry) { - struct afs_vnode *dvnode, *vnode; + struct afs_vnode *dvnode; struct dentry *parent; - struct inode *dir, *inode; + struct inode *dir; long dir_version, de_version; _enter("%p", dentry); @@ -1009,18 +1012,6 @@ static int afs_d_revalidate_rcu(struct dentry *dentry) return -ECHILD; } - /* Check to see if the vnode referred to by the dentry still - * has a callback. - */ - if (d_really_is_positive(dentry)) { - inode = d_inode_rcu(dentry); - if (inode) { - vnode = AFS_FS_I(inode); - if (!afs_check_validity(vnode)) - return -ECHILD; - } - } - return 1; /* Still valid */ } @@ -1056,17 +1047,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (IS_ERR(key)) key = NULL; - if (d_really_is_positive(dentry)) { - inode = d_inode(dentry); - if (inode) { - vnode = AFS_FS_I(inode); - afs_validate(vnode, key); - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - goto out_bad; - } - } - - /* lock down the parent dentry so we can peer at it */ + /* Hold the parent dentry so we can peer at it */ parent = dget_parent(dentry); dir = AFS_FS_I(d_inode(parent)); @@ -1075,7 +1056,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (test_bit(AFS_VNODE_DELETED, &dir->flags)) { _debug("%pd: parent dir deleted", dentry); - goto out_bad_parent; + goto not_found; } /* We only need to invalidate a dentry if the server's copy changed @@ -1101,12 +1082,12 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) case 0: /* the filename maps to something */ if (d_really_is_negative(dentry)) - goto out_bad_parent; + goto not_found; inode = d_inode(dentry); if (is_bad_inode(inode)) { printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n", dentry); - goto out_bad_parent; + goto not_found; } vnode = AFS_FS_I(inode); @@ -1128,9 +1109,6 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) dentry, fid.unique, vnode->fid.unique, vnode->vfs_inode.i_generation); - write_seqlock(&vnode->cb_lock); - set_bit(AFS_VNODE_DELETED, &vnode->flags); - write_sequnlock(&vnode->cb_lock); goto not_found; } goto out_valid; @@ -1145,7 +1123,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) default: _debug("failed to iterate dir %pd: %d", parent, ret); - goto out_bad_parent; + goto not_found; } out_valid: @@ -1156,16 +1134,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) _leave(" = 1 [valid]"); return 1; - /* the dirent, if it exists, now points to a different vnode */ not_found: - spin_lock(&dentry->d_lock); - dentry->d_flags |= DCACHE_NFSFS_RENAMED; - spin_unlock(&dentry->d_lock); - -out_bad_parent: _debug("dropping dentry %pd2", dentry); dput(parent); -out_bad: key_put(key); _leave(" = 0 [bad]"); diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 4f58b28a1edd2..622363af4c1bf 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -734,10 +734,23 @@ int afs_getattr(const struct path *path, struct kstat *stat, { struct inode *inode = d_inode(path->dentry); struct afs_vnode *vnode = AFS_FS_I(inode); - int seq = 0; + struct key *key; + int ret, seq = 0; _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation); + if (vnode->volume && + !(query_flags & AT_STATX_DONT_SYNC) && + !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + key = afs_request_key(vnode->volume->cell); + if (IS_ERR(key)) + return PTR_ERR(key); + ret = afs_validate(vnode, key); + key_put(key); + if (ret < 0) + return ret; + } + do { read_seqbegin_or_lock(&vnode->cb_lock, &seq); generic_fillattr(inode, stat); diff --git a/fs/aio.c b/fs/aio.c index b469aa57239ec..848868f30caa9 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -182,8 +182,9 @@ struct poll_iocb { struct file *file; struct wait_queue_head *head; __poll_t events; - bool done; bool cancelled; + bool work_scheduled; + bool work_need_resched; struct wait_queue_entry wait; struct work_struct work; }; @@ -1625,6 +1626,51 @@ static void aio_poll_put_work(struct work_struct *work) iocb_put(iocb); } +/* + * Safely lock the waitqueue which the request is on, synchronizing with the + * case where the ->poll() provider decides to free its waitqueue early. + * + * Returns true on success, meaning that req->head->lock was locked, req->wait + * is on req->head, and an RCU read lock was taken. Returns false if the + * request was already removed from its waitqueue (which might no longer exist). + */ +static bool poll_iocb_lock_wq(struct poll_iocb *req) +{ + wait_queue_head_t *head; + + /* + * While we hold the waitqueue lock and the waitqueue is nonempty, + * wake_up_pollfree() will wait for us. However, taking the waitqueue + * lock in the first place can race with the waitqueue being freed. + * + * We solve this as eventpoll does: by taking advantage of the fact that + * all users of wake_up_pollfree() will RCU-delay the actual free. If + * we enter rcu_read_lock() and see that the pointer to the queue is + * non-NULL, we can then lock it without the memory being freed out from + * under us, then check whether the request is still on the queue. + * + * Keep holding rcu_read_lock() as long as we hold the queue lock, in + * case the caller deletes the entry from the queue, leaving it empty. + * In that case, only RCU prevents the queue memory from being freed. + */ + rcu_read_lock(); + head = smp_load_acquire(&req->head); + if (head) { + spin_lock(&head->lock); + if (!list_empty(&req->wait.entry)) + return true; + spin_unlock(&head->lock); + } + rcu_read_unlock(); + return false; +} + +static void poll_iocb_unlock_wq(struct poll_iocb *req) +{ + spin_unlock(&req->head->lock); + rcu_read_unlock(); +} + static void aio_poll_complete_work(struct work_struct *work) { struct poll_iocb *req = container_of(work, struct poll_iocb, work); @@ -1644,14 +1690,27 @@ static void aio_poll_complete_work(struct work_struct *work) * avoid further branches in the fast path. */ spin_lock_irq(&ctx->ctx_lock); - if (!mask && !READ_ONCE(req->cancelled)) { - add_wait_queue(req->head, &req->wait); - spin_unlock_irq(&ctx->ctx_lock); - return; - } + if (poll_iocb_lock_wq(req)) { + if (!mask && !READ_ONCE(req->cancelled)) { + /* + * The request isn't actually ready to be completed yet. + * Reschedule completion if another wakeup came in. + */ + if (req->work_need_resched) { + schedule_work(&req->work); + req->work_need_resched = false; + } else { + req->work_scheduled = false; + } + poll_iocb_unlock_wq(req); + spin_unlock_irq(&ctx->ctx_lock); + return; + } + list_del_init(&req->wait.entry); + poll_iocb_unlock_wq(req); + } /* else, POLLFREE has freed the waitqueue, so we must complete */ list_del_init(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); - req->done = true; spin_unlock_irq(&ctx->ctx_lock); iocb_put(iocb); @@ -1663,13 +1722,14 @@ static int aio_poll_cancel(struct kiocb *iocb) struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw); struct poll_iocb *req = &aiocb->poll; - spin_lock(&req->head->lock); - WRITE_ONCE(req->cancelled, true); - if (!list_empty(&req->wait.entry)) { - list_del_init(&req->wait.entry); - schedule_work(&aiocb->poll.work); - } - spin_unlock(&req->head->lock); + if (poll_iocb_lock_wq(req)) { + WRITE_ONCE(req->cancelled, true); + if (!req->work_scheduled) { + schedule_work(&aiocb->poll.work); + req->work_scheduled = true; + } + poll_iocb_unlock_wq(req); + } /* else, the request was force-cancelled by POLLFREE already */ return 0; } @@ -1686,20 +1746,26 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, if (mask && !(mask & req->events)) return 0; - list_del_init(&req->wait.entry); - - if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { + /* + * Complete the request inline if possible. This requires that three + * conditions be met: + * 1. An event mask must have been passed. If a plain wakeup was done + * instead, then mask == 0 and we have to call vfs_poll() to get + * the events, so inline completion isn't possible. + * 2. The completion work must not have already been scheduled. + * 3. ctx_lock must not be busy. We have to use trylock because we + * already hold the waitqueue lock, so this inverts the normal + * locking order. Use irqsave/irqrestore because not all + * filesystems (e.g. fuse) call this function with IRQs disabled, + * yet IRQs have to be disabled before ctx_lock is obtained. + */ + if (mask && !req->work_scheduled && + spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { struct kioctx *ctx = iocb->ki_ctx; - /* - * Try to complete the iocb inline if we can. Use - * irqsave/irqrestore because not all filesystems (e.g. fuse) - * call this function with IRQs disabled and because IRQs - * have to be disabled before ctx_lock is obtained. - */ + list_del_init(&req->wait.entry); list_del(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); - req->done = true; if (iocb->ki_eventfd && eventfd_signal_count()) { iocb = NULL; INIT_WORK(&req->work, aio_poll_put_work); @@ -1709,7 +1775,43 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, if (iocb) iocb_put(iocb); } else { - schedule_work(&req->work); + /* + * Schedule the completion work if needed. If it was already + * scheduled, record that another wakeup came in. + * + * Don't remove the request from the waitqueue here, as it might + * not actually be complete yet (we won't know until vfs_poll() + * is called), and we must not miss any wakeups. POLLFREE is an + * exception to this; see below. + */ + if (req->work_scheduled) { + req->work_need_resched = true; + } else { + schedule_work(&req->work); + req->work_scheduled = true; + } + + /* + * If the waitqueue is being freed early but we can't complete + * the request inline, we have to tear down the request as best + * we can. That means immediately removing the request from its + * waitqueue and preventing all further accesses to the + * waitqueue via the request. We also need to schedule the + * completion work (done above). Also mark the request as + * cancelled, to potentially skip an unneeded call to ->poll(). + */ + if (mask & POLLFREE) { + WRITE_ONCE(req->cancelled, true); + list_del_init(&req->wait.entry); + + /* + * Careful: this *must* be the last step, since as soon + * as req->head is NULL'ed out, the request can be + * completed and freed, since aio_poll_complete_work() + * will no longer need to take the waitqueue lock. + */ + smp_store_release(&req->head, NULL); + } } return 1; } @@ -1717,6 +1819,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, struct aio_poll_table { struct poll_table_struct pt; struct aio_kiocb *iocb; + bool queued; int error; }; @@ -1727,11 +1830,12 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head, struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt); /* multiple wait queues per file are not supported */ - if (unlikely(pt->iocb->poll.head)) { + if (unlikely(pt->queued)) { pt->error = -EINVAL; return; } + pt->queued = true; pt->error = 0; pt->iocb->poll.head = head; add_wait_queue(head, &pt->iocb->poll.wait); @@ -1756,12 +1860,14 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; req->head = NULL; - req->done = false; req->cancelled = false; + req->work_scheduled = false; + req->work_need_resched = false; apt.pt._qproc = aio_poll_queue_proc; apt.pt._key = req->events; apt.iocb = aiocb; + apt.queued = false; apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */ /* initialized the list so that we can do list_empty checks */ @@ -1770,23 +1876,35 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) mask = vfs_poll(req->file, &apt.pt) & req->events; spin_lock_irq(&ctx->ctx_lock); - if (likely(req->head)) { - spin_lock(&req->head->lock); - if (unlikely(list_empty(&req->wait.entry))) { - if (apt.error) + if (likely(apt.queued)) { + bool on_queue = poll_iocb_lock_wq(req); + + if (!on_queue || req->work_scheduled) { + /* + * aio_poll_wake() already either scheduled the async + * completion work, or completed the request inline. + */ + if (apt.error) /* unsupported case: multiple queues */ cancel = true; apt.error = 0; mask = 0; } if (mask || apt.error) { + /* Steal to complete synchronously. */ list_del_init(&req->wait.entry); } else if (cancel) { + /* Cancel if possible (may be too late though). */ WRITE_ONCE(req->cancelled, true); - } else if (!req->done) { /* actually waiting for an event */ + } else if (on_queue) { + /* + * Actually waiting for an event, so add the request to + * active_reqs so that it can be cancelled if needed. + */ list_add_tail(&aiocb->ki_list, &ctx->active_reqs); aiocb->ki_cancel = aio_poll_cancel; } - spin_unlock(&req->head->lock); + if (on_queue) + poll_iocb_unlock_wq(req); } if (mask) { /* no async, we'd stolen it */ aiocb->ki_res.res = mangle_poll(mask); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7ce3cfd965d25..72cd871544ac0 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -583,7 +583,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, vaddr = eppnt->p_vaddr; if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) - elf_type |= MAP_FIXED_NOREPLACE; + elf_type |= MAP_FIXED; else if (no_base && interp_elf_ex->e_type == ET_DYN) load_addr = -vaddr; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 196f9f64d075c..c999bc0c0691f 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -422,6 +422,30 @@ static void old_reloc(unsigned long rl) /****************************************************************************/ +static inline u32 __user *skip_got_header(u32 __user *rp) +{ + if (IS_ENABLED(CONFIG_RISCV)) { + /* + * RISC-V has a 16 byte GOT PLT header for elf64-riscv + * and 8 byte GOT PLT header for elf32-riscv. + * Skip the whole GOT PLT header, since it is reserved + * for the dynamic linker (ld.so). + */ + u32 rp_val0, rp_val1; + + if (get_user(rp_val0, rp)) + return rp; + if (get_user(rp_val1, rp + 1)) + return rp; + + if (rp_val0 == 0xffffffff && rp_val1 == 0xffffffff) + rp += 4; + else if (rp_val0 == 0xffffffff) + rp += 2; + } + return rp; +} + static int load_flat_file(struct linux_binprm *bprm, struct lib_info *libinfo, int id, unsigned long *extra_stack) { @@ -769,7 +793,8 @@ static int load_flat_file(struct linux_binprm *bprm, * image. */ if (flags & FLAT_FLAG_GOTPIC) { - for (rp = (u32 __user *)datapos; ; rp++) { + rp = skip_got_header((u32 __user *) datapos); + for (; ; rp++) { u32 addr, rp_val; if (get_user(rp_val, rp)) return -EFAULT; diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 11be02459b876..eb592b92aa9cd 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -237,6 +237,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq, ordered_list); if (!test_bit(WORK_DONE_BIT, &work->flags)) break; + /* + * Orders all subsequent loads after reading WORK_DONE_BIT, + * paired with the smp_mb__before_atomic in btrfs_work_helper + * this guarantees that the ordered function will see all + * updates from ordinary work function. + */ + smp_rmb(); /* * we are going to call the ordered done function, but @@ -325,6 +332,13 @@ static void btrfs_work_helper(struct work_struct *normal_work) thresh_exec_hook(wq); work->func(work); if (need_order) { + /* + * Ensures all memory accesses done in the work function are + * ordered before setting the WORK_DONE_BIT. Ensuring the thread + * which is going to executed the ordered work sees them. + * Pairs with the smp_rmb in run_ordered_work. + */ + smp_mb__before_atomic(); set_bit(WORK_DONE_BIT, &work->flags); run_ordered_work(wq, work); } diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 7f644a58db511..238120bf5c092 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -391,7 +391,7 @@ static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr) struct rb_node **p = &preftrees->direct.root.rb_root.rb_node; struct rb_node *parent = NULL; struct prelim_ref *ref = NULL; - struct prelim_ref target = {0}; + struct prelim_ref target = {}; int result; target.parent = bytenr; @@ -1208,7 +1208,12 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0); if (ret < 0) goto out; - BUG_ON(ret == 0); + if (ret == 0) { + /* This shouldn't happen, indicates a bug or fs corruption. */ + ASSERT(ret != 0); + ret = -EUCLEAN; + goto out; + } #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS if (trans && likely(trans->type != __TRANS_DUMMY) && @@ -1356,10 +1361,18 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, goto out; if (!ret && extent_item_pos) { /* - * we've recorded that parent, so we must extend - * its inode list here + * We've recorded that parent, so we must extend + * its inode list here. + * + * However if there was corruption we may not + * have found an eie, return an error in this + * case. */ - BUG_ON(!eie); + ASSERT(eie); + if (!eie) { + ret = -EUCLEAN; + goto out; + } while (eie->next) eie = eie->next; eie->next = ref->inode_list; diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index e98d6ea35ea80..bcf19dfb0af35 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2388,7 +2388,6 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) struct btrfs_path *path = NULL; LIST_HEAD(dirty); struct list_head *io = &cur_trans->io_bgs; - int num_started = 0; int loops = 0; spin_lock(&cur_trans->dirty_bgs_lock); @@ -2455,7 +2454,6 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) cache->io_ctl.inode = NULL; ret = btrfs_write_out_cache(trans, cache, path); if (ret == 0 && cache->io_ctl.inode) { - num_started++; should_put = 0; /* @@ -2556,7 +2554,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) int should_put; struct btrfs_path *path; struct list_head *io = &cur_trans->io_bgs; - int num_started = 0; path = btrfs_alloc_path(); if (!path) @@ -2614,7 +2611,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) cache->io_ctl.inode = NULL; ret = btrfs_write_out_cache(trans, cache, path); if (ret == 0 && cache->io_ctl.inode) { - num_started++; should_put = 0; list_add_tail(&cache->io_list, io); } else { diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index f853835c409c1..f3ff57b931580 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -268,6 +268,21 @@ static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode, mod); } +/* + * Called every time after doing a buffered, direct IO or memory mapped write. + * + * This is to ensure that if we write to a file that was previously fsynced in + * the current transaction, then try to fsync it again in the same transaction, + * we will know that there were changes in the file and that it needs to be + * logged. + */ +static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode) +{ + spin_lock(&inode->lock); + inode->last_sub_trans = inode->root->log_transid; + spin_unlock(&inode->lock); +} + static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) { int ret = 0; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index dac30b00d14b7..822c615840e84 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2658,12 +2658,9 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root, { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *b; - int root_lock; + int root_lock = 0; int level = 0; - /* We try very hard to do read locks on the root */ - root_lock = BTRFS_READ_LOCK; - if (p->search_commit_root) { /* * The commit roots are read only so we always do read locks, @@ -2701,6 +2698,9 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root, goto out; } + /* We try very hard to do read locks on the root */ + root_lock = BTRFS_READ_LOCK; + /* * If the level is set to maximum, we can skip trying to get the read * lock. @@ -2727,6 +2727,17 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root, level = btrfs_header_level(b); out: + /* + * The root may have failed to write out at some point, and thus is no + * longer valid, return an error in this case. + */ + if (!extent_buffer_uptodate(b)) { + if (root_lock) + btrfs_tree_unlock_rw(b, root_lock); + free_extent_buffer(b); + return ERR_PTR(-EIO); + } + p->nodes[level] = b; if (!p->skip_locking) p->locks[level] = root_lock; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dacd67dca43fe..a4b3e6f6bf021 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2894,6 +2894,29 @@ int open_ctree(struct super_block *sb, */ fs_info->compress_type = BTRFS_COMPRESS_ZLIB; + /* + * Flag our filesystem as having big metadata blocks if they are bigger + * than the page size + */ + if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) { + if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) + btrfs_info(fs_info, + "flagging fs with big metadata feature"); + features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; + } + + /* Set up fs_info before parsing mount options */ + nodesize = btrfs_super_nodesize(disk_super); + sectorsize = btrfs_super_sectorsize(disk_super); + stripesize = sectorsize; + fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids)); + fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); + + /* Cache block sizes */ + fs_info->nodesize = nodesize; + fs_info->sectorsize = sectorsize; + fs_info->stripesize = stripesize; + ret = btrfs_parse_options(fs_info, options, sb->s_flags); if (ret) { err = ret; @@ -2904,7 +2927,7 @@ int open_ctree(struct super_block *sb, ~BTRFS_FEATURE_INCOMPAT_SUPP; if (features) { btrfs_err(fs_info, - "cannot mount because of unsupported optional features (%llx)", + "cannot mount because of unsupported optional features (0x%llx)", features); err = -EINVAL; goto fail_csum; @@ -2920,28 +2943,6 @@ int open_ctree(struct super_block *sb, if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) btrfs_info(fs_info, "has skinny extents"); - /* - * flag our filesystem as having big metadata blocks if - * they are bigger than the page size - */ - if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) { - if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) - btrfs_info(fs_info, - "flagging fs with big metadata feature"); - features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; - } - - nodesize = btrfs_super_nodesize(disk_super); - sectorsize = btrfs_super_sectorsize(disk_super); - stripesize = sectorsize; - fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids)); - fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); - - /* Cache block sizes */ - fs_info->nodesize = nodesize; - fs_info->sectorsize = sectorsize; - fs_info->stripesize = stripesize; - /* * mixed block groups end up with duplicate but slightly offset * extent buffers for the same range. It leads to corruptions @@ -2964,7 +2965,7 @@ int open_ctree(struct super_block *sb, ~BTRFS_FEATURE_COMPAT_RO_SUPP; if (!sb_rdonly(sb) && features) { btrfs_err(fs_info, - "cannot mount read-write because of unsupported optional features (%llx)", + "cannot mount read-write because of unsupported optional features (0x%llx)", features); err = -EINVAL; goto fail_csum; @@ -3144,7 +3145,8 @@ int open_ctree(struct super_block *sb, goto fail_sysfs; } - if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) { + if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices && + !btrfs_check_rw_degradable(fs_info, NULL)) { btrfs_warn(fs_info, "writable mount is not allowed due to too many missing devices"); goto fail_sysfs; @@ -3634,11 +3636,23 @@ static void btrfs_end_empty_barrier(struct bio *bio) */ static void write_dev_flush(struct btrfs_device *device) { - struct request_queue *q = bdev_get_queue(device->bdev); struct bio *bio = device->flush_bio; +#ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY + /* + * When a disk has write caching disabled, we skip submission of a bio + * with flush and sync requests before writing the superblock, since + * it's not needed. However when the integrity checker is enabled, this + * results in reports that there are metadata blocks referred by a + * superblock that were not properly flushed. So don't skip the bio + * submission only when the integrity checker is enabled for the sake + * of simplicity, since this is a debug tool and not meant for use in + * non-debug builds. + */ + struct request_queue *q = bdev_get_queue(device->bdev); if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) return; +#endif bio_reset(bio); bio->bi_end_io = btrfs_end_empty_barrier; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5273965226534..19d2104c04629 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4596,6 +4596,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, out_free_delayed: btrfs_free_delayed_extent_op(extent_op); out_free_buf: + btrfs_tree_unlock(buf); free_extent_buffer(buf); out_free_reserved: btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9108a73423f70..95ddeb4777970 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3754,6 +3754,12 @@ static void set_btree_ioerr(struct page *page) if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) return; + /* + * A read may stumble upon this buffer later, make sure that it gets an + * error and knows there was an error. + */ + clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); + /* * If we error out, we should add back the dirty_metadata_bytes * to make it consistent. diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index fcf1807cc8dd7..c8def2bdf2474 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -202,7 +202,7 @@ struct extent_buffer { */ struct extent_changeset { /* How many bytes are set/cleared in this operation */ - unsigned int bytes_changed; + u64 bytes_changed; /* Changed ranges */ struct ulist range_changed; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index bbd77db8dd31b..29677510f91c1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2004,14 +2004,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, inode_unlock(inode); - /* - * We also have to set last_sub_trans to the current log transid, - * otherwise subsequent syncs to a file that's been synced in this - * transaction will appear to have already occurred. - */ - spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->last_sub_trans = root->log_transid; - spin_unlock(&BTRFS_I(inode)->lock); + btrfs_set_inode_last_sub_trans(BTRFS_I(inode)); + if (num_written > 0) num_written = generic_write_sync(iocb, num_written); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 54b607a3cc3f2..7755a0362a3ad 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -543,7 +543,7 @@ static noinline int compress_file_range(struct async_chunk *async_chunk) * inode has not been flagged as nocompress. This flag can * change at any time if we discover bad compression ratios. */ - if (nr_pages > 1 && inode_need_compress(inode, start, end)) { + if (inode_need_compress(inode, start, end)) { WARN_ON(pages); pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); if (!pages) { @@ -1200,11 +1200,6 @@ static noinline void async_cow_submit(struct btrfs_work *work) nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >> PAGE_SHIFT; - /* atomic_sub_return implies a barrier */ - if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) < - 5 * SZ_1M) - cond_wake_up_nomb(&fs_info->async_submit_wait); - /* * ->inode could be NULL if async_chunk_start has failed to compress, * in which case we don't have anything to submit, yet we need to @@ -1213,6 +1208,11 @@ static noinline void async_cow_submit(struct btrfs_work *work) */ if (async_chunk->inode) submit_compressed_extents(async_chunk); + + /* atomic_sub_return implies a barrier */ + if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) < + 5 * SZ_1M) + cond_wake_up_nomb(&fs_info->async_submit_wait); } static noinline void async_cow_free(struct btrfs_work *work) @@ -9250,9 +9250,7 @@ vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf) set_page_dirty(page); SetPageUptodate(page); - BTRFS_I(inode)->last_trans = fs_info->generation; - BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; - BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; + btrfs_set_inode_last_sub_trans(BTRFS_I(inode)); unlock_extent_cached(io_tree, page_start, page_end, &cached_state); @@ -10810,9 +10808,19 @@ static int btrfs_add_swap_extent(struct swap_info_struct *sis, struct btrfs_swap_info *bsi) { unsigned long nr_pages; + unsigned long max_pages; u64 first_ppage, first_ppage_reported, next_ppage; int ret; + /* + * Our swapfile may have had its size extended after the swap header was + * written. In that case activating the swapfile should not go beyond + * the max size set in the swap header. + */ + if (bsi->nr_pages >= sis->max) + return 0; + + max_pages = sis->max - bsi->nr_pages; first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT; next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len, PAGE_SIZE) >> PAGE_SHIFT; @@ -10820,6 +10828,7 @@ static int btrfs_add_swap_extent(struct swap_info_struct *sis, if (first_ppage >= next_ppage) return 0; nr_pages = next_ppage - first_ppage; + nr_pages = min(nr_pages, max_pages); first_ppage_reported = first_ppage; if (bsi->start == 0) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index bb034e19a2a8a..5a3006c75d63f 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -890,6 +890,14 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) int ret = 0; int slot; + /* + * We need to have subvol_sem write locked, to prevent races between + * concurrent tasks trying to enable quotas, because we will unlock + * and relock qgroup_ioctl_lock before setting fs_info->quota_root + * and before setting BTRFS_FS_QUOTA_ENABLED. + */ + lockdep_assert_held_write(&fs_info->subvol_sem); + mutex_lock(&fs_info->qgroup_ioctl_lock); if (fs_info->quota_root) goto out; @@ -1035,8 +1043,19 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) goto out_free_path; } + mutex_unlock(&fs_info->qgroup_ioctl_lock); + /* + * Commit the transaction while not holding qgroup_ioctl_lock, to avoid + * a deadlock with tasks concurrently doing other qgroup operations, such + * adding/removing qgroups or adding/deleting qgroup relations for example, + * because all qgroup operations first start or join a transaction and then + * lock the qgroup_ioctl_lock mutex. + * We are safe from a concurrent task trying to enable quotas, by calling + * this function, since we are serialized by fs_info->subvol_sem. + */ ret = btrfs_commit_transaction(trans); trans = NULL; + mutex_lock(&fs_info->qgroup_ioctl_lock); if (ret) goto out_free_path; @@ -1086,11 +1105,33 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) struct btrfs_trans_handle *trans = NULL; int ret = 0; + /* + * We need to have subvol_sem write locked, to prevent races between + * concurrent tasks trying to disable quotas, because we will unlock + * and relock qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes. + */ + lockdep_assert_held_write(&fs_info->subvol_sem); + mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) goto out; + + /* + * Unlock the qgroup_ioctl_lock mutex before waiting for the rescan worker to + * complete. Otherwise we can deadlock because btrfs_remove_qgroup() needs + * to lock that mutex while holding a transaction handle and the rescan + * worker needs to commit a transaction. + */ mutex_unlock(&fs_info->qgroup_ioctl_lock); + /* + * Request qgroup rescan worker to complete and wait for it. This wait + * must be done before transaction start for quota disable since it may + * deadlock with transaction by the qgroup rescan worker. + */ + clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); + btrfs_qgroup_wait_for_completion(fs_info, false); + /* * 1 For the root item * @@ -1106,14 +1147,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) if (IS_ERR(trans)) { ret = PTR_ERR(trans); trans = NULL; + set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); goto out; } if (!fs_info->quota_root) goto out; - clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); - btrfs_qgroup_wait_for_completion(fs_info, false); spin_lock(&fs_info->qgroup_lock); quota_root = fs_info->quota_root; fs_info->quota_root = NULL; @@ -3285,6 +3325,9 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, btrfs_warn(fs_info, "qgroup rescan init failed, qgroup is not enabled"); ret = -EINVAL; + } else if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { + /* Quota disable is in progress */ + ret = -EBUSY; } if (ret) { diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 612411c74550f..0d07ebe511e7f 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -371,7 +371,8 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, key.offset = ref_id; again: ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); - BUG_ON(ret < 0); + if (ret < 0) + goto out; if (ret == 0) { leaf = path->nodes[0]; ref = btrfs_item_ptr(leaf, path->slots[0], diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fc688af57c231..e258fc484cea2 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5005,6 +5005,10 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) lock_page(page); if (!PageUptodate(page)) { unlock_page(page); + btrfs_err(fs_info, + "send: IO error at offset %llu for inode %llu root %llu", + page_offset(page), sctx->cur_ino, + sctx->send_root->root_key.objectid); put_page(page); ret = -EIO; break; diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 90500b6c41fc6..1cd39f6a9c3ad 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -262,9 +262,10 @@ static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info, { lockdep_assert_held(&info->lock); - btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull", + /* The free space could be negative in case of overcommit */ + btrfs_info(fs_info, "space_info %llu has %lld free, is %sfull", info->flags, - info->total_bytes - btrfs_space_info_used(info, true), + (s64)(info->total_bytes - btrfs_space_info_used(info, true)), info->full ? "" : "not "); btrfs_info(fs_info, "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu", diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index d8a7d460e436a..cbede328bda5b 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -160,7 +160,7 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->last_trans = trans->transaction->transid; BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; - BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; + BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans - 1; spin_unlock(&BTRFS_I(inode)->lock); } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 5412361d0c270..b7bfecfc2ea33 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -719,7 +719,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, */ ret = btrfs_lookup_data_extent(fs_info, ins.objectid, ins.offset); - if (ret == 0) { + if (ret < 0) { + goto out; + } else if (ret == 0) { btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, ins.objectid, ins.offset, 0); @@ -898,9 +900,11 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, } /* - * helper function to see if a given name and sequence number found - * in an inode back reference are already in a directory and correctly - * point to this inode + * See if a given name and sequence number found in an inode back reference are + * already in a directory and correctly point to this inode. + * + * Returns: < 0 on error, 0 if the directory entry does not exists and 1 if it + * exists. */ static noinline int inode_in_dir(struct btrfs_root *root, struct btrfs_path *path, @@ -909,29 +913,35 @@ static noinline int inode_in_dir(struct btrfs_root *root, { struct btrfs_dir_item *di; struct btrfs_key location; - int match = 0; + int ret = 0; di = btrfs_lookup_dir_index_item(NULL, root, path, dirid, index, name, name_len, 0); - if (di && !IS_ERR(di)) { + if (IS_ERR(di)) { + if (PTR_ERR(di) != -ENOENT) + ret = PTR_ERR(di); + goto out; + } else if (di) { btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); if (location.objectid != objectid) goto out; - } else + } else { goto out; - btrfs_release_path(path); + } + btrfs_release_path(path); di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); - if (di && !IS_ERR(di)) { - btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); - if (location.objectid != objectid) - goto out; - } else + if (IS_ERR(di)) { + ret = PTR_ERR(di); goto out; - match = 1; + } else if (di) { + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); + if (location.objectid == objectid) + ret = 1; + } out: btrfs_release_path(path); - return match; + return ret; } /* @@ -1158,7 +1168,10 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, /* look for a conflicting sequence number */ di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), ref_index, name, namelen, 0); - if (di && !IS_ERR(di)) { + if (IS_ERR(di)) { + if (PTR_ERR(di) != -ENOENT) + return PTR_ERR(di); + } else if (di) { ret = drop_one_dir_item(trans, root, path, dir, di); if (ret) return ret; @@ -1168,7 +1181,9 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, /* look for a conflicting name */ di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), name, namelen, 0); - if (di && !IS_ERR(di)) { + if (IS_ERR(di)) { + return PTR_ERR(di); + } else if (di) { ret = drop_one_dir_item(trans, root, path, dir, di); if (ret) return ret; @@ -1293,6 +1308,15 @@ static int unlink_old_inode_refs(struct btrfs_trans_handle *trans, inode, name, namelen); kfree(name); iput(dir); + /* + * Whenever we need to check if a name exists or not, we + * check the subvolume tree. So after an unlink we must + * run delayed items, so that future checks for a name + * during log replay see that the name does not exists + * anymore. + */ + if (!ret) + ret = btrfs_run_delayed_items(trans); if (ret) goto out; goto again; @@ -1493,10 +1517,12 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, if (ret) goto out; - /* if we already have a perfect match, we're done */ - if (!inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)), - btrfs_ino(BTRFS_I(inode)), ref_index, - name, namelen)) { + ret = inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)), + btrfs_ino(BTRFS_I(inode)), ref_index, + name, namelen); + if (ret < 0) { + goto out; + } else if (ret == 0) { /* * look for a conflicting back reference in the * metadata. if we find one we have to unlink that name @@ -1542,6 +1568,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, */ if (!ret && inode->i_nlink == 0) inc_nlink(inode); + /* + * Whenever we need to check if a name exists or + * not, we check the subvolume tree. So after an + * unlink we must run delayed items, so that future + * checks for a name during log replay see that the + * name does not exists anymore. + */ + if (!ret) + ret = btrfs_run_delayed_items(trans); } if (ret < 0) goto out; @@ -1554,6 +1589,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, btrfs_update_inode(trans, root, inode); } + /* Else, ret == 1, we already have a perfect match, we're done. */ ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; kfree(name); @@ -1942,8 +1978,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, struct btrfs_key log_key; struct inode *dir; u8 log_type; - int exists; - int ret = 0; + bool exists; + int ret; bool update_size = (key->type == BTRFS_DIR_INDEX_KEY); bool name_added = false; @@ -1963,12 +1999,12 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, name_len); btrfs_dir_item_key_to_cpu(eb, di, &log_key); - exists = btrfs_lookup_inode(trans, root, path, &log_key, 0); - if (exists == 0) - exists = 1; - else - exists = 0; + ret = btrfs_lookup_inode(trans, root, path, &log_key, 0); btrfs_release_path(path); + if (ret < 0) + goto out; + exists = (ret == 0); + ret = 0; if (key->type == BTRFS_DIR_ITEM_KEY) { dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, @@ -1983,7 +2019,14 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, ret = -EINVAL; goto out; } - if (IS_ERR_OR_NULL(dst_di)) { + + if (dst_di == ERR_PTR(-ENOENT)) + dst_di = NULL; + + if (IS_ERR(dst_di)) { + ret = PTR_ERR(dst_di); + goto out; + } else if (!dst_di) { /* we need a sequence number to insert, so we only * do inserts for the BTRFS_DIR_INDEX_KEY types */ @@ -2465,7 +2508,9 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, else { ret = find_dir_range(log, path, dirid, key_type, &range_start, &range_end); - if (ret != 0) + if (ret < 0) + goto out; + else if (ret > 0) break; } @@ -4222,7 +4267,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, /* * Log all prealloc extents beyond the inode's i_size to make sure we do not - * lose them after doing a fast fsync and replaying the log. We scan the + * lose them after doing a full/fast fsync and replaying the log. We scan the * subvolume's root instead of iterating the inode's extent map tree because * otherwise we can log incorrect extent items based on extent map conversion. * That can happen due to the fact that extent maps are merged when they @@ -5015,6 +5060,7 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans, struct btrfs_log_ctx *ctx, bool *need_log_inode_item) { + const u64 i_size = i_size_read(&inode->vfs_inode); struct btrfs_root *root = inode->root; int ins_start_slot = 0; int ins_nr = 0; @@ -5035,13 +5081,21 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans, if (min_key->type > max_key->type) break; - if (min_key->type == BTRFS_INODE_ITEM_KEY) + if (min_key->type == BTRFS_INODE_ITEM_KEY) { *need_log_inode_item = false; - - if ((min_key->type == BTRFS_INODE_REF_KEY || - min_key->type == BTRFS_INODE_EXTREF_KEY) && - inode->generation == trans->transid && - !recursive_logging) { + } else if (min_key->type == BTRFS_EXTENT_DATA_KEY && + min_key->offset >= i_size) { + /* + * Extents at and beyond eof are logged with + * btrfs_log_prealloc_extents(). + * Only regular files have BTRFS_EXTENT_DATA_KEY keys, + * and no keys greater than that, so bail out. + */ + break; + } else if ((min_key->type == BTRFS_INODE_REF_KEY || + min_key->type == BTRFS_INODE_EXTREF_KEY) && + inode->generation == trans->transid && + !recursive_logging) { u64 other_ino = 0; u64 other_parent = 0; @@ -5072,10 +5126,8 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans, btrfs_release_path(path); goto next_key; } - } - - /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */ - if (min_key->type == BTRFS_XATTR_ITEM_KEY) { + } else if (min_key->type == BTRFS_XATTR_ITEM_KEY) { + /* Skip xattrs, logged later with btrfs_log_all_xattrs() */ if (ins_nr == 0) goto next_slot; ret = copy_items(trans, inode, dst_path, path, @@ -5128,9 +5180,21 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans, break; } } - if (ins_nr) + if (ins_nr) { ret = copy_items(trans, inode, dst_path, path, ins_start_slot, ins_nr, inode_only, logged_isize); + if (ret) + return ret; + } + + if (inode_only == LOG_INODE_ALL && S_ISREG(inode->vfs_inode.i_mode)) { + /* + * Release the path because otherwise we might attempt to double + * lock the same leaf with btrfs_log_prealloc_extents() below. + */ + btrfs_release_path(path); + ret = btrfs_log_prealloc_extents(trans, inode, dst_path); + } return ret; } @@ -5230,6 +5294,18 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, mutex_lock(&inode->log_mutex); } + /* + * For symlinks, we must always log their content, which is stored in an + * inline extent, otherwise we could end up with an empty symlink after + * log replay, which is invalid on linux (symlink(2) returns -ENOENT if + * one attempts to create an empty symlink). + * We don't need to worry about flushing delalloc, because when we create + * the inline extent when the symlink is created (we never have delalloc + * for symlinks). + */ + if (S_ISLNK(inode->vfs_inode.i_mode)) + inode_only = LOG_INODE_ALL; + /* * a brute force approach to making sure we get the most uptodate * copies of everything. @@ -5643,7 +5719,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, } ctx->log_new_dentries = false; - if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK) + if (type == BTRFS_FT_DIR) log_mode = LOG_INODE_ALL; ret = btrfs_log_inode(trans, root, BTRFS_I(di_inode), log_mode, 0, LLONG_MAX, ctx); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3e3529c600cb7..c7706a769de12 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -742,6 +742,8 @@ static int btrfs_free_stale_devices(const char *path, struct btrfs_device *device, *tmp_device; int ret = 0; + lockdep_assert_held(&uuid_mutex); + if (path) ret = -ENOENT; @@ -1181,11 +1183,12 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) struct btrfs_device *orig_dev; int ret = 0; + lockdep_assert_held(&uuid_mutex); + fs_devices = alloc_fs_devices(orig->fsid, NULL); if (IS_ERR(fs_devices)) return fs_devices; - mutex_lock(&orig->device_list_mutex); fs_devices->total_devices = orig->total_devices; list_for_each_entry(orig_dev, &orig->devices, dev_list) { @@ -1217,10 +1220,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) device->fs_devices = fs_devices; fs_devices->num_devices++; } - mutex_unlock(&orig->device_list_mutex); return fs_devices; error: - mutex_unlock(&orig->device_list_mutex); free_fs_devices(fs_devices); return ERR_PTR(ret); } @@ -1311,8 +1312,13 @@ static void btrfs_close_one_device(struct btrfs_device *device) fs_devices->rw_devices--; } - if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) + if (device->devid == BTRFS_DEV_REPLACE_DEVID) + clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); + + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) { + clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state); fs_devices->missing_devices--; + } btrfs_close_bdev(device); @@ -2156,8 +2162,11 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, u64 num_devices; int ret = 0; - mutex_lock(&uuid_mutex); - + /* + * The device list in fs_devices is accessed without locks (neither + * uuid_mutex nor device_list_mutex) as it won't change on a mounted + * filesystem and another device rm cannot run. + */ num_devices = btrfs_num_devices(fs_info); ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1); @@ -2168,7 +2177,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, if (IS_ERR(device)) { if (PTR_ERR(device) == -ENOENT && - strcmp(device_path, "missing") == 0) + device_path && strcmp(device_path, "missing") == 0) ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND; else ret = PTR_ERR(device); @@ -2201,11 +2210,9 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, mutex_unlock(&fs_info->chunk_mutex); } - mutex_unlock(&uuid_mutex); ret = btrfs_shrink_device(device, 0); if (!ret) btrfs_reada_remove_dev(device); - mutex_lock(&uuid_mutex); if (ret) goto error_undo; @@ -2287,7 +2294,6 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, } out: - mutex_unlock(&uuid_mutex); return ret; error_undo: @@ -4314,10 +4320,12 @@ static int balance_kthread(void *data) struct btrfs_fs_info *fs_info = data; int ret = 0; + sb_start_write(fs_info->sb); mutex_lock(&fs_info->balance_mutex); if (fs_info->balance_ctl) ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL); mutex_unlock(&fs_info->balance_mutex); + sb_end_write(fs_info->sb); return ret; } @@ -7375,12 +7383,12 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) * do another round of validation checks. */ if (total_dev != fs_info->fs_devices->total_devices) { - btrfs_err(fs_info, - "super_num_devices %llu mismatch with num_devices %llu found here", + btrfs_warn(fs_info, +"super block num_devices %llu mismatch with DEV_ITEM count %llu, will be repaired on next transaction commit", btrfs_super_num_devices(fs_info->super_copy), total_dev); - ret = -EINVAL; - goto error; + fs_info->fs_devices->total_devices = total_dev; + btrfs_set_super_num_devices(fs_info->super_copy, total_dev); } if (btrfs_super_total_bytes(fs_info->super_copy) < fs_info->fs_devices->total_rw_bytes) { diff --git a/fs/buffer.c b/fs/buffer.c index 847cfffd9c071..86e21ed6e2eae 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1120,12 +1120,19 @@ EXPORT_SYMBOL(mark_buffer_dirty); void mark_buffer_write_io_error(struct buffer_head *bh) { + struct super_block *sb; + set_buffer_write_io_error(bh); /* FIXME: do we need to set this in both places? */ if (bh->b_page && bh->b_page->mapping) mapping_set_error(bh->b_page->mapping, -EIO); if (bh->b_assoc_map) mapping_set_error(bh->b_assoc_map, -EIO); + rcu_read_lock(); + sb = READ_ONCE(bh->b_bdev->bd_super); + if (sb) + errseq_set(&sb->s_wb_err, -EIO); + rcu_read_unlock(); } EXPORT_SYMBOL(mark_buffer_write_io_error); diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig index ae559ed5b3b33..50d8379686ccc 100644 --- a/fs/cachefiles/Kconfig +++ b/fs/cachefiles/Kconfig @@ -38,3 +38,15 @@ config CACHEFILES_HISTOGRAM See Documentation/filesystems/caching/cachefiles.txt for more information. + +config CACHEFILES_ONDEMAND + bool "Support for on-demand read" + depends on CACHEFILES + default n + help + This permits userspace to enable the cachefiles on-demand read mode. + In this mode, when a cache miss occurs, responsibility for fetching + the data lies with the cachefiles backend instead of with the netfs + and is delegated to userspace. + + If unsure, say N. diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile index 891dedda59054..5001187f129e1 100644 --- a/fs/cachefiles/Makefile +++ b/fs/cachefiles/Makefile @@ -7,6 +7,7 @@ cachefiles-y := \ bind.o \ daemon.o \ interface.o \ + io.o \ key.o \ main.o \ namei.o \ @@ -15,5 +16,6 @@ cachefiles-y := \ xattr.o cachefiles-$(CONFIG_CACHEFILES_HISTOGRAM) += proc.o +cachefiles-$(CONFIG_CACHEFILES_ONDEMAND) += ondemand.o obj-$(CONFIG_CACHEFILES) := cachefiles.o diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c index dfb14dbddf51d..7b3bc2a250ebe 100644 --- a/fs/cachefiles/bind.c +++ b/fs/cachefiles/bind.c @@ -46,11 +46,6 @@ int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args) cache->bcull_percent < cache->brun_percent && cache->brun_percent < 100); - if (*args) { - pr_err("'bind' command doesn't take an argument\n"); - return -EINVAL; - } - if (!cache->rootdirname) { pr_err("No cache directory specified\n"); return -EINVAL; @@ -62,6 +57,18 @@ int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args) return -EBUSY; } + if (IS_ENABLED(CONFIG_CACHEFILES_ONDEMAND)) { + if (!strcmp(args, "ondemand")) { + set_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags); + } else if (*args) { + pr_err("Invalid argument to the 'bind' command\n"); + return -EINVAL; + } + } else if (*args) { + pr_err("'bind' command doesn't take an argument\n"); + return -EINVAL; + } + /* make sure we have copies of the tag and dirname strings */ if (!cache->tag) { /* the tag string is released by the fops->release() diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 752c1e43416f5..8b67432c1c708 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -73,6 +73,10 @@ static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = { { "inuse", cachefiles_daemon_inuse }, { "secctx", cachefiles_daemon_secctx }, { "tag", cachefiles_daemon_tag }, +#ifdef CONFIG_CACHEFILES_ONDEMAND + { "copen", cachefiles_ondemand_copen }, + { "restore", cachefiles_ondemand_restore }, +#endif { "", NULL } }; @@ -105,6 +109,10 @@ static int cachefiles_daemon_open(struct inode *inode, struct file *file) cache->active_nodes = RB_ROOT; rwlock_init(&cache->active_lock); init_waitqueue_head(&cache->daemon_pollwq); + refcount_set(&cache->unbind_pincount, 1); + + xa_init_flags(&cache->reqs, XA_FLAGS_ALLOC); + xa_init_flags(&cache->ondemand_ids, XA_FLAGS_ALLOC1); /* set default caching limits * - limit at 1% free space and/or free files @@ -123,6 +131,53 @@ static int cachefiles_daemon_open(struct inode *inode, struct file *file) return 0; } +static void cachefiles_flush_reqs(struct cachefiles_cache *cache) +{ + struct xarray *xa = &cache->reqs; + struct cachefiles_req *req; + unsigned long index; + + /* + * Make sure the following two operations won't be reordered. + * 1) set CACHEFILES_DEAD bit + * 2) flush requests in the xarray + * Otherwise the request may be enqueued after xarray has been + * flushed, leaving the orphan request never being completed. + * + * CPU 1 CPU 2 + * ===== ===== + * flush requests in the xarray + * test CACHEFILES_DEAD bit + * enqueue the request + * set CACHEFILES_DEAD bit + */ + smp_mb(); + + xa_lock(xa); + xa_for_each(xa, index, req) { + req->error = -EIO; + complete(&req->done); + } + xa_unlock(xa); + xa_destroy(&cache->reqs); + xa_destroy(&cache->ondemand_ids); +} + +void cachefiles_put_unbind_pincount(struct cachefiles_cache *cache) +{ + if (refcount_dec_and_test(&cache->unbind_pincount)) { + cachefiles_daemon_unbind(cache); + ASSERT(!cache->active_nodes.rb_node); + cachefiles_open = 0; + kfree(cache); + } +} + +void cachefiles_get_unbind_pincount(struct cachefiles_cache *cache) +{ + refcount_inc(&cache->unbind_pincount); +} + /* * release a cache */ @@ -136,38 +191,26 @@ static int cachefiles_daemon_release(struct inode *inode, struct file *file) set_bit(CACHEFILES_DEAD, &cache->flags); - cachefiles_daemon_unbind(cache); - - ASSERT(!cache->active_nodes.rb_node); + if (cachefiles_in_ondemand_mode(cache)) + cachefiles_flush_reqs(cache); /* clean up the control file interface */ cache->cachefilesd = NULL; file->private_data = NULL; - cachefiles_open = 0; - - kfree(cache); + cachefiles_put_unbind_pincount(cache); _leave(""); return 0; } -/* - * read the cache state - */ -static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer, - size_t buflen, loff_t *pos) +static ssize_t cachefiles_do_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen, loff_t *pos) { - struct cachefiles_cache *cache = file->private_data; unsigned long long b_released; unsigned f_released; char buffer[256]; int n; - //_enter(",,%zu,", buflen); - - if (!test_bit(CACHEFILES_READY, &cache->flags)) - return 0; - /* check how much space the cache has */ cachefiles_has_space(cache, 0, 0); @@ -205,6 +248,25 @@ static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer, return n; } +/* + * read the cache state + */ +static ssize_t cachefiles_daemon_read(struct file *file, + char __user *_buffer, size_t buflen, loff_t *pos) +{ + struct cachefiles_cache *cache = file->private_data; + + //_enter(",,%zu,", buflen); + + if (!test_bit(CACHEFILES_READY, &cache->flags)) + return 0; + + if (cachefiles_in_ondemand_mode(cache)) + return cachefiles_ondemand_daemon_read(cache, _buffer, buflen, pos); + else + return cachefiles_do_daemon_read(cache, _buffer, buflen, pos); +} + /* * command the cache */ @@ -291,13 +353,29 @@ static __poll_t cachefiles_daemon_poll(struct file *file, struct poll_table_struct *poll) { struct cachefiles_cache *cache = file->private_data; + struct xarray *xa = &cache->reqs; + struct cachefiles_req *req; + unsigned long index; __poll_t mask; poll_wait(file, &cache->daemon_pollwq, poll); mask = 0; - if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags)) - mask |= EPOLLIN; + if (cachefiles_in_ondemand_mode(cache)) { + if (!xa_empty(xa)) { + xa_lock(xa); + xa_for_each_marked(xa, index, req, CACHEFILES_REQ_NEW) { + if (!cachefiles_ondemand_is_reopening_read(req)) { + mask |= EPOLLIN; + break; + } + } + xa_unlock(xa); + } + } else { + if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags)) + mask |= EPOLLIN; + } if (test_bit(CACHEFILES_CULLING, &cache->flags)) mask |= EPOLLOUT; diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 4cea5fbf695ea..0d60ffe4f5ec9 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -7,6 +7,7 @@ #include #include +#include #include "internal.h" struct cachefiles_lookup_data { @@ -51,6 +52,9 @@ static struct fscache_object *cachefiles_alloc_object( fscache_object_init(&object->fscache, cookie, &cache->cache); + if (cachefiles_ondemand_init_obj_info(object)) + goto nomem_obj_info; + object->type = cookie->def->type; /* get hold of the raw key @@ -102,6 +106,8 @@ static struct fscache_object *cachefiles_alloc_object( nomem_key: kfree(buffer); nomem_buffer: + cachefiles_ondemand_deinit_obj_info(object); +nomem_obj_info: BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); kmem_cache_free(cachefiles_object_jar, object); fscache_object_destroyed(&cache->cache); @@ -280,6 +286,8 @@ static void cachefiles_drop_object(struct fscache_object *_object) ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); #endif + cachefiles_ondemand_clean_object(object); + /* We need to tidy the object up if we did in fact manage to open it. * It's possible for us to get here before the object is fully * initialised if the parent goes away or the object gets retired @@ -306,6 +314,12 @@ static void cachefiles_drop_object(struct fscache_object *_object) object->backer = NULL; } + /* clean up file descriptor for non-index object */ + if (object->file) { + fput(object->file); + object->file = NULL; + } + /* note that the object is now inactive */ if (test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) cachefiles_mark_object_inactive(cache, object, i_blocks); @@ -319,8 +333,8 @@ static void cachefiles_drop_object(struct fscache_object *_object) /* * dispose of a reference to an object */ -static void cachefiles_put_object(struct fscache_object *_object, - enum fscache_obj_ref_trace why) +void cachefiles_put_object(struct fscache_object *_object, + enum fscache_obj_ref_trace why) { struct cachefiles_object *object; struct fscache_cache *cache; @@ -337,8 +351,11 @@ static void cachefiles_put_object(struct fscache_object *_object, ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); #endif - ASSERTIFCMP(object->fscache.parent, - object->fscache.parent->n_children, >, 0); + if (!cachefiles_in_ondemand_mode(container_of(object->fscache.cache, + struct cachefiles_cache, cache))) { + ASSERTIFCMP(object->fscache.parent, + object->fscache.parent->n_children, >, 0); + } u = atomic_dec_return(&object->usage); trace_cachefiles_ref(object, _object->cookie, @@ -362,6 +379,7 @@ static void cachefiles_put_object(struct fscache_object *_object, } cache = object->fscache.cache; + cachefiles_ondemand_deinit_obj_info(object); fscache_object_destroy(&object->fscache); kmem_cache_free(cachefiles_object_jar, object); fscache_object_destroyed(cache); @@ -568,4 +586,5 @@ const struct fscache_cache_ops cachefiles_cache_ops = { .uncache_page = cachefiles_uncache_page, .dissociate_pages = cachefiles_dissociate_pages, .check_consistency = cachefiles_check_consistency, + .begin_read_operation = cachefiles_begin_read_operation, }; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index cf9bd6401c2d0..131b96a26cd7a 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -18,6 +18,8 @@ #include #include #include +#include +#include struct cachefiles_cache; struct cachefiles_object; @@ -29,6 +31,19 @@ extern unsigned cachefiles_debug; #define cachefiles_gfp (__GFP_RECLAIM | __GFP_NORETRY | __GFP_NOMEMALLOC) +enum cachefiles_object_state { + CACHEFILES_ONDEMAND_OBJSTATE_close, /* Anonymous fd closed by daemon or initial state */ + CACHEFILES_ONDEMAND_OBJSTATE_open, /* Anonymous fd associated with object is available */ + CACHEFILES_ONDEMAND_OBJSTATE_reopening, /* Object that was closed and is being reopened. */ +}; + +struct cachefiles_ondemand_info { + struct work_struct work; + int ondemand_id; + enum cachefiles_object_state state; + struct cachefiles_object *object; +}; + /* * node records */ @@ -37,6 +52,7 @@ struct cachefiles_object { struct cachefiles_lookup_data *lookup_data; /* cached lookup data */ struct dentry *dentry; /* the file/dir representing this object */ struct dentry *backer; /* backing file */ + struct file *file; /* backing file in on-demand mode */ loff_t i_size; /* object size */ unsigned long flags; #define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ @@ -45,10 +61,13 @@ struct cachefiles_object { uint8_t new; /* T if object new */ spinlock_t work_lock; struct rb_node active_node; /* link in active tree (dentry is key) */ + struct cachefiles_ondemand_info *private; }; extern struct kmem_cache *cachefiles_object_jar; +#define CACHEFILES_ONDEMAND_ID_CLOSED -1 + /* * Cache files cache definition */ @@ -84,11 +103,32 @@ struct cachefiles_cache { #define CACHEFILES_DEAD 1 /* T if cache dead */ #define CACHEFILES_CULLING 2 /* T if cull engaged */ #define CACHEFILES_STATE_CHANGED 3 /* T if state changed (poll trigger) */ +#define CACHEFILES_ONDEMAND_MODE 4 /* T if in on-demand read mode */ char *rootdirname; /* name of cache root directory */ char *secctx; /* LSM security context */ char *tag; /* cache binding tag */ + struct xarray reqs; /* xarray of pending on-demand requests */ + unsigned long req_id_next; + struct xarray ondemand_ids; /* xarray for ondemand_id allocation */ + refcount_t unbind_pincount;/* refcount to do daemon unbind */ + u32 ondemand_id_next; }; +static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache) +{ + return IS_ENABLED(CONFIG_CACHEFILES_ONDEMAND) && + test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags); +} + +struct cachefiles_req { + struct cachefiles_object *object; + struct completion done; + int error; + struct cachefiles_msg msg; +}; + +#define CACHEFILES_REQ_NEW XA_MARK_1 + /* * backing file read tracking */ @@ -141,6 +181,8 @@ extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache); * daemon.c */ extern const struct file_operations cachefiles_daemon_fops; +extern void cachefiles_get_unbind_pincount(struct cachefiles_cache *cache); +extern void cachefiles_put_unbind_pincount(struct cachefiles_cache *cache); extern int cachefiles_has_space(struct cachefiles_cache *cache, unsigned fnr, unsigned bnr); @@ -150,6 +192,22 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache, */ extern const struct fscache_cache_ops cachefiles_cache_ops; +void cachefiles_put_object(struct fscache_object *_object, + enum fscache_obj_ref_trace why); + +/* + * io.c + */ +extern int __cachefiles_write(struct cachefiles_object *object, + struct file *file, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv); + +extern int cachefiles_prepare_write(struct netfs_cache_resources *cres, + loff_t *_start, size_t *_len, loff_t i_size); + /* * key.c */ @@ -217,6 +275,94 @@ extern int cachefiles_allocate_pages(struct fscache_retrieval *, extern int cachefiles_write_page(struct fscache_storage *, struct page *); extern void cachefiles_uncache_page(struct fscache_object *, struct page *); +/* + * rdwr2.c + */ +extern int cachefiles_begin_read_operation(struct netfs_read_request *, + struct fscache_retrieval *); + +/* + * ondemand.c + */ +#ifdef CONFIG_CACHEFILES_ONDEMAND +extern ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen, loff_t *pos); + +extern int cachefiles_ondemand_copen(struct cachefiles_cache *cache, + char *args); + +extern int cachefiles_ondemand_restore(struct cachefiles_cache *cache, + char *args); + +extern int cachefiles_ondemand_init_object(struct cachefiles_object *object); +extern void cachefiles_ondemand_clean_object(struct cachefiles_object *object); + +extern int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len); + +extern int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object); +extern void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *object); + +#define CACHEFILES_OBJECT_STATE_FUNCS(_state) \ +static inline bool \ +cachefiles_ondemand_object_is_##_state(const struct cachefiles_object *object) \ +{ \ + return object->private->state == CACHEFILES_ONDEMAND_OBJSTATE_##_state; \ +} \ + \ +static inline void \ +cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \ +{ \ + object->private->state = CACHEFILES_ONDEMAND_OBJSTATE_##_state; \ +} + +CACHEFILES_OBJECT_STATE_FUNCS(open); +CACHEFILES_OBJECT_STATE_FUNCS(close); +CACHEFILES_OBJECT_STATE_FUNCS(reopening); + +static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req) +{ + return cachefiles_ondemand_object_is_reopening(req->object) && + req->msg.opcode == CACHEFILES_OP_READ; +} + +#else +static inline ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen, loff_t *pos) +{ + return -EOPNOTSUPP; +} + +static inline int cachefiles_ondemand_init_object(struct cachefiles_object *object) +{ + return 0; +} + +static inline void cachefiles_ondemand_clean_object(struct cachefiles_object *object) +{ +} +static inline int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len) +{ + return -EOPNOTSUPP; +} + +static inline int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object) +{ + return 0; +} + +static inline void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *object) +{ + return; +} + +static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req) +{ + return false; +} +#endif + /* * security.c */ diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c new file mode 100644 index 0000000000000..5e4aa4596cb26 --- /dev/null +++ b/fs/cachefiles/io.c @@ -0,0 +1,519 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* kiocb-using read/write + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +struct cachefiles_kiocb { + struct kiocb iocb; + refcount_t ki_refcnt; + loff_t start; + union { + size_t skipped; + size_t len; + }; + netfs_io_terminated_t term_func; + void *term_func_priv; + bool was_async; +}; + +static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki) +{ + if (refcount_dec_and_test(&ki->ki_refcnt)) { + fput(ki->iocb.ki_filp); + kfree(ki); + } +} + +/* + * Handle completion of a read from the cache. + */ +static void cachefiles_read_complete(struct kiocb *iocb, long ret, long ret2) +{ + struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); + + _enter("%ld,%ld", ret, ret2); + + if (ki->term_func) { + if (ret >= 0) + ret += ki->skipped; + ki->term_func(ki->term_func_priv, ret, ki->was_async); + } + + cachefiles_put_kiocb(ki); +} + +static void cachefiles_page_copy(struct cachefiles_kiocb *ki, struct iov_iter *iter) +{ + struct address_space *mapping = ki->iocb.ki_filp->f_mapping; + struct kiocb *iocb = &ki->iocb; + loff_t isize = i_size_read(mapping->host); + loff_t end = min_t(loff_t, isize, iocb->ki_pos + iov_iter_count(iter)); + struct pagevec pv; + pgoff_t index; + unsigned int i; + bool writably_mapped; + int error = 0; + + while (iocb->ki_pos < end && !error) { + index = iocb->ki_pos >> PAGE_SHIFT; + pv.nr = find_get_pages_contig(mapping, index, PAGEVEC_SIZE, pv.pages); + if (pv.nr == 0) + break; + writably_mapped = mapping_writably_mapped(mapping); + + for (i = 0; i < pv.nr; i++) { + struct page *page = pv.pages[i]; + unsigned int offset = iocb->ki_pos & ~PAGE_MASK; + unsigned int bytes = min_t(loff_t, end - iocb->ki_pos, + PAGE_SIZE - offset); + unsigned int copied; + + if (page->index * PAGE_SIZE >= end) + break; + if (!PageUptodate(page)) { + error = -1; + break; + } + if (writably_mapped) + flush_dcache_page(page); + copied = copy_page_to_iter(page, offset, bytes, iter); + iocb->ki_pos += copied; + if (copied < bytes) { + error = -1; + break; + } + } + for (i = 0; i < pv.nr; i++) + put_page(pv.pages[i]); + } +} + +/* + * Initiate a read from the cache. + */ +static int cachefiles_read(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + bool seek_data, + netfs_io_terminated_t term_func, + void *term_func_priv) +{ + struct cachefiles_kiocb *ki; + struct file *file = cres->cache_priv2; + unsigned int old_nofs; + ssize_t ret = -ENOBUFS; + size_t len = iov_iter_count(iter), skipped = 0; + struct fscache_retrieval *op = cres->cache_priv; + struct cachefiles_object *object = container_of(op->op.object, + struct cachefiles_object, fscache); + + + _enter("%pD,%li,%llx,%zx/%llx", + file, file_inode(file)->i_ino, start_pos, len, + i_size_read(file->f_inode)); + + /* If the caller asked us to seek for data before doing the read, then + * we should do that now. If we find a gap, we fill it with zeros. + */ + if (seek_data) { + loff_t off = start_pos, off2; + + off2 = vfs_llseek(file, off, SEEK_DATA); + if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) { + skipped = 0; + ret = off2; + goto presubmission_error; + } + + if (off2 == -ENXIO || off2 >= start_pos + len) { + /* The region is beyond the EOF or there's no more data + * in the region, so clear the rest of the buffer and + * return success. + */ + iov_iter_zero(len, iter); + skipped = len; + ret = 0; + goto presubmission_error; + } + + skipped = off2 - off; + iov_iter_zero(skipped, iter); + } + + ret = -ENOBUFS; + ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); + if (!ki) + goto presubmission_error; + + refcount_set(&ki->ki_refcnt, 2); + ki->iocb.ki_filp = file; + ki->iocb.ki_pos = start_pos + skipped; + ki->iocb.ki_flags = IOCB_DIRECT; + ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file)); + ki->iocb.ki_ioprio = get_current_ioprio(); + ki->skipped = skipped; + ki->term_func = term_func; + ki->term_func_priv = term_func_priv; + ki->was_async = true; + + if (ki->term_func) + ki->iocb.ki_complete = cachefiles_read_complete; + + get_file(ki->iocb.ki_filp); + + old_nofs = memalloc_nofs_save(); + + /* for ondemand mode try to fill iter form pagecache first */ + if (cachefiles_in_ondemand_mode(container_of(object->fscache.cache, + struct cachefiles_cache, cache))) { + cachefiles_page_copy(ki, iter); + if (!iov_iter_count(iter)) { + memalloc_nofs_restore(old_nofs); + ki->was_async = false; + cachefiles_read_complete(&ki->iocb, len - skipped, 0); + ret = 0; + goto in_progress; + } + } + ret = vfs_iocb_iter_read(file, &ki->iocb, iter); + memalloc_nofs_restore(old_nofs); + switch (ret) { + case -EIOCBQUEUED: + goto in_progress; + + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTARTNOHAND: + case -ERESTART_RESTARTBLOCK: + /* There's no easy way to restart the syscall since other AIO's + * may be already running. Just fail this IO with EINTR. + */ + ret = -EINTR; + fallthrough; + default: + ki->was_async = false; + cachefiles_read_complete(&ki->iocb, ret, 0); + if (ret > 0) + ret = 0; + break; + } + +in_progress: + cachefiles_put_kiocb(ki); + _leave(" = %zd", ret); + return ret; + +presubmission_error: + if (term_func) + term_func(term_func_priv, ret < 0 ? ret : skipped, false); + return ret; +} + +/* + * Handle completion of a write to the cache. + */ +static void cachefiles_write_complete(struct kiocb *iocb, long ret, long ret2) +{ + struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); + struct inode *inode = file_inode(ki->iocb.ki_filp); + + _enter("%ld,%ld", ret, ret2); + + /* Tell lockdep we inherited freeze protection from submission thread */ + __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); + __sb_end_write(inode->i_sb, SB_FREEZE_WRITE); + + if (ki->term_func) + ki->term_func(ki->term_func_priv, ret, ki->was_async); + + cachefiles_put_kiocb(ki); +} + +/* + * Initiate a write to the cache. + */ +int __cachefiles_write(struct cachefiles_object *object, + struct file *file, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv) +{ + struct cachefiles_kiocb *ki; + struct inode *inode; + unsigned int old_nofs; + ssize_t ret = -ENOBUFS; + size_t len = iov_iter_count(iter); + struct cachefiles_cache *cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + _enter("%pD,%li,%llx,%zx/%llx", + file, file_inode(file)->i_ino, start_pos, len, + i_size_read(file->f_inode)); + + ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); + if (!ki) + goto presubmission_error; + + refcount_set(&ki->ki_refcnt, 2); + ki->iocb.ki_filp = file; + ki->iocb.ki_pos = start_pos; + if (cachefiles_in_ondemand_mode(cache)) + ki->iocb.ki_flags = IOCB_WRITE; + else + ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE; + ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file)); + ki->iocb.ki_ioprio = get_current_ioprio(); + ki->start = start_pos; + ki->len = len; + ki->term_func = term_func; + ki->term_func_priv = term_func_priv; + ki->was_async = true; + + if (ki->term_func) + ki->iocb.ki_complete = cachefiles_write_complete; + + /* Open-code file_start_write here to grab freeze protection, which + * will be released by another thread in aio_complete_rw(). Fool + * lockdep by telling it the lock got released so that it doesn't + * complain about the held lock when we return to userspace. + */ + inode = file_inode(file); + __sb_start_write(inode->i_sb, SB_FREEZE_WRITE); + __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); + + get_file(ki->iocb.ki_filp); + + old_nofs = memalloc_nofs_save(); + ret = vfs_iocb_iter_write(file, &ki->iocb, iter); + memalloc_nofs_restore(old_nofs); + switch (ret) { + case -EIOCBQUEUED: + goto in_progress; + + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTARTNOHAND: + case -ERESTART_RESTARTBLOCK: + /* There's no easy way to restart the syscall since other AIO's + * may be already running. Just fail this IO with EINTR. + */ + ret = -EINTR; + fallthrough; + default: + ki->was_async = false; + cachefiles_write_complete(&ki->iocb, ret, 0); + if (ret > 0) + ret = 0; + break; + } + +in_progress: + cachefiles_put_kiocb(ki); + _leave(" = %zd", ret); + return ret; + +presubmission_error: + if (term_func) + term_func(term_func_priv, -ENOMEM, false); + return -ENOMEM; +} + +static int cachefiles_write(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv) +{ + struct fscache_retrieval *op = cres->cache_priv; + struct cachefiles_object *object = container_of(op->op.object, + struct cachefiles_object, fscache); + + return __cachefiles_write(object, cres->cache_priv2, start_pos, iter, + term_func, term_func_priv); +} + +/* + * Prepare a read operation, shortening it to a cached/uncached + * boundary as appropriate. + */ +static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subrequest *subreq, + loff_t i_size) +{ + struct fscache_retrieval *op = subreq->rreq->cache_resources.cache_priv; + struct cachefiles_object *object; + struct cachefiles_cache *cache; + const struct cred *saved_cred; + struct file *file = subreq->rreq->cache_resources.cache_priv2; + loff_t off, to; + int rc; + + _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size); + + object = container_of(op->op.object, + struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + if (!file) + goto cache_fail_nosec; + + if (subreq->start >= i_size) + return NETFS_FILL_WITH_ZEROES; + + cachefiles_begin_secure(cache, &saved_cred); +retry: + off = vfs_llseek(file, subreq->start, SEEK_DATA); + if (off < 0 && off >= (loff_t)-MAX_ERRNO) { + if (off == (loff_t)-ENXIO) + goto download_and_store; + goto cache_fail; + } + + if (off >= subreq->start + subreq->len) + goto download_and_store; + + if (off > subreq->start) { + off = round_up(off, cache->bsize); + subreq->len = off - subreq->start; + goto download_and_store; + } + + to = vfs_llseek(file, subreq->start, SEEK_HOLE); + if (to < 0 && to >= (loff_t)-MAX_ERRNO) + goto cache_fail; + + if (to < subreq->start + subreq->len) { + if (subreq->start + subreq->len >= i_size) + to = round_up(to, cache->bsize); + else + to = round_down(to, cache->bsize); + subreq->len = to - subreq->start; + } + + cachefiles_end_secure(cache, saved_cred); + return NETFS_READ_FROM_CACHE; + +download_and_store: + if (cachefiles_has_space(cache, 0, (subreq->len + PAGE_SIZE - 1) / PAGE_SIZE) == 0) { + __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags); + if (test_bit(NETFS_SREQ_ONDEMAND, &subreq->flags)) { + rc = cachefiles_ondemand_read(object, subreq->start, + subreq->len); + if (!rc) { + __clear_bit(NETFS_SREQ_ONDEMAND, &subreq->flags); + goto retry; + } + cachefiles_end_secure(cache, saved_cred); + return NETFS_INVALID_READ; + } + } +cache_fail: + cachefiles_end_secure(cache, saved_cred); +cache_fail_nosec: + return NETFS_DOWNLOAD_FROM_SERVER; +} + +/* + * Prepare for a write to occur. + */ +int cachefiles_prepare_write(struct netfs_cache_resources *cres, + loff_t *_start, size_t *_len, loff_t i_size) +{ + loff_t start = *_start; + size_t len = *_len, down; + + /* Round to DIO size */ + down = start - round_down(start, PAGE_SIZE); + *_start = start - down; + *_len = round_up(down + len, PAGE_SIZE); + return 0; +} + +/* + * Clean up an operation. + */ +static void cachefiles_end_operation(struct netfs_cache_resources *cres) +{ + struct fscache_retrieval *op = cres->cache_priv; + struct file *file = cres->cache_priv2; + + _enter(""); + + if (file) + fput(file); + if (op) { + fscache_op_complete(&op->op, false); + fscache_put_retrieval(op); + } + + _leave(""); +} + +static const struct netfs_cache_ops cachefiles_netfs_cache_ops = { + .end_operation = cachefiles_end_operation, + .read = cachefiles_read, + .write = cachefiles_write, + .prepare_read = cachefiles_prepare_read, + .prepare_write = cachefiles_prepare_write, +}; + +/* + * Open the cache file when beginning a cache operation. + */ +int cachefiles_begin_read_operation(struct netfs_read_request *rreq, + struct fscache_retrieval *op) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct path path; + struct file *file; + + _enter(""); + + object = container_of(op->op.object, + struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + if (cachefiles_in_ondemand_mode(cache) && object->file) { + file = get_file(object->file); + } else { + path.mnt = cache->mnt; + path.dentry = object->backer; + file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, + d_inode(object->backer), cache->cache_cred); + if (IS_ERR(file)) + return PTR_ERR(file); + if (!S_ISREG(file_inode(file)->i_mode)) + goto error_file; + if (unlikely(!file->f_op->read_iter) || + unlikely(!file->f_op->write_iter)) { + pr_notice("Cache does not support read_iter and write_iter\n"); + goto error_file; + } + } + + fscache_get_retrieval(op); + rreq->cache_resources.cache_priv = op; + rreq->cache_resources.cache_priv2 = file; + rreq->cache_resources.ops = &cachefiles_netfs_cache_ops; + rreq->cookie_debug_id = object->fscache.debug_id; + _leave(""); + return 0; + +error_file: + fput(file); + return -EIO; +} diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index ecc8ecbbfa5ac..b9b159cf90285 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -592,6 +592,10 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, if (ret < 0) goto no_space_error; + ret = cachefiles_ondemand_init_object(object); + if (ret < 0) + goto create_error; + path.dentry = dir; ret = security_path_mknod(&path, next, S_IFREG, 0); if (ret < 0) @@ -636,6 +640,12 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, if (!object->new) { _debug("validate '%pd'", next); + ret = cachefiles_ondemand_init_object(object); + if (ret < 0) { + object->dentry = NULL; + goto error; + } + ret = cachefiles_check_object_xattr(object, auxdata); if (ret == -ESTALE) { /* delete the object (the deleter drops the directory @@ -695,6 +705,20 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, if (object->dentry->d_sb->s_blocksize > PAGE_SIZE) goto check_error; + if (cachefiles_in_ondemand_mode(cache)) { + struct path path; + struct file *file; + + path.mnt = cache->mnt; + path.dentry = object->dentry; + file = dentry_open(&path, O_RDWR | O_LARGEFILE | O_DIRECT, + cache->cache_cred); + if (IS_ERR(file)) + goto check_error; + + object->file = file; + } + object->backer = object->dentry; } else { BUG(); // TODO: open file in data-class subdir diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c new file mode 100644 index 0000000000000..f8ab3379657bd --- /dev/null +++ b/fs/cachefiles/ondemand.c @@ -0,0 +1,604 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include +#include +#include "internal.h" + +static int cachefiles_ondemand_fd_release(struct inode *inode, + struct file *file) +{ + struct cachefiles_object *object = file->private_data; + struct cachefiles_cache *cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + struct cachefiles_ondemand_info *info = object->private; + int object_id = info->ondemand_id; + struct cachefiles_req *req; + XA_STATE(xas, &cache->reqs, 0); + + xa_lock(&cache->reqs); + info->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED; + cachefiles_ondemand_set_object_close(object); + + /* Only flush CACHEFILES_REQ_NEW marked req to avoid race with daemon_read */ + xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) { + if (req->msg.object_id == object_id && + req->msg.opcode == CACHEFILES_OP_CLOSE) { + complete(&req->done); + xas_store(&xas, NULL); + } + } + xa_unlock(&cache->reqs); + xa_erase(&cache->ondemand_ids, object_id); + object->fscache.cache->ops->put_object(&object->fscache, + cachefiles_obj_put_ondemand_fd); + cachefiles_put_unbind_pincount(cache); + return 0; +} + +static ssize_t cachefiles_ondemand_fd_write_iter(struct kiocb *kiocb, + struct iov_iter *iter) +{ + struct cachefiles_object *object = kiocb->ki_filp->private_data; + struct cachefiles_cache *cache; + const struct cred *saved_cred; + size_t len = iter->count; + loff_t pos = kiocb->ki_pos; + struct file *file = object->file; + int ret; + + if (!file) + return -ENOBUFS; + + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + cachefiles_begin_secure(cache, &saved_cred); + ret = cachefiles_prepare_write(NULL, &pos, &len, 0); + cachefiles_end_secure(cache, saved_cred); + if (ret < 0) + return ret; + + ret = __cachefiles_write(object, file, pos, iter, NULL, NULL); + if (!ret) + ret = len; + + return len; +} + +static loff_t cachefiles_ondemand_fd_llseek(struct file *filp, loff_t pos, + int whence) +{ + struct cachefiles_object *object = filp->private_data; + struct file *file = object->file; + + if (!file) + return -ENOBUFS; + + return vfs_llseek(file, pos, whence); +} + +static long cachefiles_ondemand_fd_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct cachefiles_object *object = filp->private_data; + struct cachefiles_cache *cache; + struct cachefiles_req *req; + unsigned long id; + + if (ioctl != CACHEFILES_IOC_READ_COMPLETE) + return -EINVAL; + + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return -EOPNOTSUPP; + + id = arg; + req = xa_erase(&cache->reqs, id); + if (!req) + return -EINVAL; + + complete(&req->done); + return 0; +} + +static const struct file_operations cachefiles_ondemand_fd_fops = { + .owner = THIS_MODULE, + .release = cachefiles_ondemand_fd_release, + .write_iter = cachefiles_ondemand_fd_write_iter, + .llseek = cachefiles_ondemand_fd_llseek, + .unlocked_ioctl = cachefiles_ondemand_fd_ioctl, +}; + +/* + * OPEN request Completion (copen) + * - command: "copen ," + * indicates the object size if >=0, error code if negative + */ +int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) +{ + struct cachefiles_req *req; + struct fscache_cookie *cookie; + char *pid, *psize; + unsigned long id; + long size; + int ret; + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return -EOPNOTSUPP; + + if (!*args) { + pr_err("Empty id specified\n"); + return -EINVAL; + } + + pid = args; + psize = strchr(args, ','); + if (!psize) { + pr_err("Cache size is not specified\n"); + return -EINVAL; + } + + *psize = 0; + psize++; + + ret = kstrtoul(pid, 0, &id); + if (ret) + return ret; + + req = xa_erase(&cache->reqs, id); + if (!req) + return -EINVAL; + + /* fail OPEN request if copen format is invalid */ + ret = kstrtol(psize, 0, &size); + if (ret) { + req->error = ret; + goto out; + } + + /* fail OPEN request if daemon reports an error */ + if (size < 0) { + if (!IS_ERR_VALUE(size)) { + req->error = -EINVAL; + ret = -EINVAL; + } else { + req->error = size; + ret = 0; + } + goto out; + } + + cookie = req->object->fscache.cookie; + fscache_set_store_limit(&req->object->fscache, size); + if (size) + clear_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); + else + set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); + + cachefiles_ondemand_set_object_open(req->object); + wake_up_all(&cache->daemon_pollwq); + +out: + complete(&req->done); + return ret; +} + +int cachefiles_ondemand_restore(struct cachefiles_cache *cache, char *args) +{ + struct cachefiles_req *req; + + XA_STATE(xas, &cache->reqs, 0); + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return -EOPNOTSUPP; + + /* + * Reset the requests to CACHEFILES_REQ_NEW state, so that the + * requests have been processed halfway before the crash of the + * user daemon could be reprocessed after the recovery. + */ + xas_lock(&xas); + xas_for_each(&xas, req, ULONG_MAX) + xas_set_mark(&xas, CACHEFILES_REQ_NEW); + xas_unlock(&xas); + + wake_up_all(&cache->daemon_pollwq); + return 0; +} + +static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) +{ + struct cachefiles_object *object = req->object; + struct cachefiles_cache *cache; + struct cachefiles_open *load; + struct file *file; + u32 object_id; + int ret, fd; + + ret = object->fscache.cache->ops->grab_object(&object->fscache, + cachefiles_obj_get_ondemand_fd) ? 0 : -EAGAIN; + if (ret) + return ret; + + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + ret = xa_alloc_cyclic(&cache->ondemand_ids, &object_id, NULL, + XA_LIMIT(1, INT_MAX), + &cache->ondemand_id_next, GFP_KERNEL); + if (ret < 0) + goto err; + + fd = get_unused_fd_flags(O_WRONLY); + if (fd < 0) { + ret = fd; + goto err_free_id; + } + + file = anon_inode_getfile("[cachefiles]", &cachefiles_ondemand_fd_fops, + object, O_WRONLY); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto err_put_fd; + } + + file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; + fd_install(fd, file); + + load = (void *)req->msg.data; + load->fd = fd; + object->private->ondemand_id = object_id; + + cachefiles_get_unbind_pincount(cache); + return 0; + +err_put_fd: + put_unused_fd(fd); +err_free_id: + xa_erase(&cache->ondemand_ids, object_id); +err: + object->fscache.cache->ops->put_object(&object->fscache, + cachefiles_obj_put_ondemand_fd); + return ret; +} + +static void ondemand_object_worker(struct work_struct *work) +{ + struct cachefiles_object *object = + ((struct cachefiles_ondemand_info *)work)->object; + + cachefiles_ondemand_init_object(object); +} + +/* + * If there are any inflight or subsequent READ requests on the + * closed object, reopen it. + * Skip read requests whose related object is reopening. + */ +static struct cachefiles_req *cachefiles_ondemand_select_req(struct xa_state *xas, + unsigned long xa_max) +{ + struct cachefiles_req *req; + struct cachefiles_object *object; + struct cachefiles_ondemand_info *info; + + xas_for_each_marked(xas, req, xa_max, CACHEFILES_REQ_NEW) { + if (req->msg.opcode != CACHEFILES_OP_READ) + return req; + object = req->object; + info = object->private; + if (cachefiles_ondemand_object_is_close(object)) { + cachefiles_ondemand_set_object_reopening(object); + queue_work(fscache_object_wq, &info->work); + continue; + } else if (cachefiles_ondemand_object_is_reopening(object)) { + continue; + } + return req; + } + return NULL; +} + +ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen, loff_t *pos) +{ + struct cachefiles_req *req; + struct cachefiles_msg *msg; + unsigned long id = 0; + size_t n; + int ret = 0; + XA_STATE(xas, &cache->reqs, cache->req_id_next); + + /* + * Cyclically search for a request that has not ever been processed, + * to prevent requests from being processed repeatedly, and make + * request distribution fair. + */ + xa_lock(&cache->reqs); + req = cachefiles_ondemand_select_req(&xas, ULONG_MAX); + if (!req && cache->req_id_next > 0) { + xas_set(&xas, 0); + req = cachefiles_ondemand_select_req(&xas, cache->req_id_next - 1); + } + if (!req) { + xa_unlock(&cache->reqs); + return 0; + } + + msg = &req->msg; + n = msg->len; + + if (n > buflen) { + xa_unlock(&cache->reqs); + return -EMSGSIZE; + } + + xas_clear_mark(&xas, CACHEFILES_REQ_NEW); + cache->req_id_next = xas.xa_index + 1; + xa_unlock(&cache->reqs); + + id = xas.xa_index; + + if (msg->opcode == CACHEFILES_OP_OPEN) { + ret = cachefiles_ondemand_get_fd(req); + if (ret) { + cachefiles_ondemand_set_object_close(req->object); + goto error; + } + } + + msg->msg_id = id; + msg->object_id = req->object->private->ondemand_id; + + if (copy_to_user(_buffer, msg, n) != 0) { + ret = -EFAULT; + goto err_put_fd; + } + + /* CLOSE request has no reply */ + if (msg->opcode == CACHEFILES_OP_CLOSE) { + xa_erase(&cache->reqs, id); + complete(&req->done); + } + + return n; + +err_put_fd: + if (msg->opcode == CACHEFILES_OP_OPEN) + __close_fd(current->files, + ((struct cachefiles_open *)msg->data)->fd); +error: + xa_erase(&cache->reqs, id); + req->error = ret; + complete(&req->done); + return ret; +} + +typedef int (*init_req_fn)(struct cachefiles_req *req, void *private); + +static int cachefiles_ondemand_send_req(struct cachefiles_object *object, + enum cachefiles_opcode opcode, + size_t data_len, + init_req_fn init_req, + void *private) +{ + struct cachefiles_cache *cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + struct cachefiles_req *req = NULL; + int ret; + XA_STATE(xas, &cache->reqs, 0); + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return 0; + + if (test_bit(CACHEFILES_DEAD, &cache->flags)) { + ret = -EIO; + goto out; + } + + req = kzalloc(sizeof(*req) + data_len, GFP_KERNEL); + if (!req) { + ret = -ENOMEM; + goto out; + } + + req->object = object; + init_completion(&req->done); + req->msg.opcode = opcode; + req->msg.len = sizeof(struct cachefiles_msg) + data_len; + + ret = init_req(req, private); + if (ret) + goto out; + + do { + /* + * Stop enqueuing the request when daemon is dying. The + * following two operations need to be atomic as a whole. + * 1) check cache state, and + * 2) enqueue request if cache is alive. + * Otherwise the request may be enqueued after xarray has been + * flushed, leaving the orphan request never being completed. + * + * CPU 1 CPU 2 + * ===== ===== + * test CACHEFILES_DEAD bit + * set CACHEFILES_DEAD bit + * flush requests in the xarray + * enqueue the request + */ + xas_lock(&xas); + if (test_bit(CACHEFILES_DEAD, &cache->flags)) { + xas_unlock(&xas); + ret = -EIO; + goto out; + } + + /* coupled with the barrier in cachefiles_flush_reqs() */ + smp_mb(); + + if (opcode == CACHEFILES_OP_CLOSE && + !cachefiles_ondemand_object_is_open(object)) { + WARN_ON_ONCE(object->private->ondemand_id == 0); + xas_unlock(&xas); + ret = -EIO; + goto out; + } + xas.xa_index = 0; + xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK); + if (xas.xa_node == XAS_RESTART) + xas_set_err(&xas, -EBUSY); + xas_store(&xas, req); + xas_clear_mark(&xas, XA_FREE_MARK); + xas_set_mark(&xas, CACHEFILES_REQ_NEW); + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + + ret = xas_error(&xas); + if (ret) + goto out; + + wake_up_all(&cache->daemon_pollwq); + wait_for_completion(&req->done); + ret = req->error; + kfree(req); + return ret; +out: + /* Reset the object to close state in error handling path. + * If error occurs after creating the anonymous fd, + * cachefiles_ondemand_fd_release() will set object to close. + */ + if (opcode == CACHEFILES_OP_OPEN) + cachefiles_ondemand_set_object_close(object); + kfree(req); + return ret; +} + +static int cachefiles_ondemand_init_open_req(struct cachefiles_req *req, + void *private) +{ + struct cachefiles_object *object = req->object; + struct fscache_cookie *cookie = object->fscache.cookie; + struct fscache_cookie *volume = object->fscache.parent->cookie; + struct cachefiles_open *load = (void *)req->msg.data; + size_t volume_key_size, cookie_key_size; + char *cookie_key, *volume_key; + + /* + * cookie_key is a string without trailing '\0', while cachefiles_open + * expects cookie key a string without trailing '\0'. + */ + cookie_key_size = cookie->key_len; + if (cookie->key_len <= sizeof(cookie->inline_key)) + cookie_key = cookie->inline_key; + else + cookie_key = cookie->key; + + /* + * volume_key is a string without trailing '\0', while cachefiles_open + * expects volume key a string with trailing '\0'. + */ + volume_key_size = volume->key_len + 1; + if (volume->key_len <= sizeof(volume->inline_key)) + volume_key = volume->inline_key; + else + volume_key = volume->key; + + load->volume_key_size = volume_key_size; + load->cookie_key_size = cookie_key_size; + memcpy(load->data, volume_key, volume->key_len); + load->data[volume_key_size - 1] = '\0'; + memcpy(load->data + volume_key_size, cookie_key, cookie_key_size); + return 0; +} + +static int cachefiles_ondemand_init_close_req(struct cachefiles_req *req, + void *private) +{ + struct cachefiles_object *object = req->object; + + if (!cachefiles_ondemand_object_is_open(object)) + return -ENOENT; + + return 0; +} + +struct cachefiles_read_ctx { + loff_t off; + size_t len; +}; + +static int cachefiles_ondemand_init_read_req(struct cachefiles_req *req, + void *private) +{ + struct cachefiles_read *load = (void *)req->msg.data; + struct cachefiles_read_ctx *read_ctx = private; + + load->off = read_ctx->off; + load->len = read_ctx->len; + return 0; +} + +int cachefiles_ondemand_init_object(struct cachefiles_object *object) +{ + struct fscache_cookie *cookie = object->fscache.cookie; + size_t volume_key_size, cookie_key_size, data_len; + + /* + * CacheFiles will firstly check the cache file under the root cache + * directory. If the coherency check failed, it will fallback to + * creating a new tmpfile as the cache file. Reuse the previously + * allocated object ID if any. + */ + if (cachefiles_ondemand_object_is_open(object) || + object->type == FSCACHE_COOKIE_TYPE_INDEX) + return 0; + + volume_key_size = object->fscache.parent->cookie->key_len + 1; + cookie_key_size = cookie->key_len; + data_len = sizeof(struct cachefiles_open) + volume_key_size + cookie_key_size; + + return cachefiles_ondemand_send_req(object, CACHEFILES_OP_OPEN, + data_len, cachefiles_ondemand_init_open_req, NULL); +} + +int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object) +{ + if (!cachefiles_in_ondemand_mode(container_of(object->fscache.cache, + struct cachefiles_cache, cache))) + return 0; + + object->private = kzalloc(sizeof(struct cachefiles_ondemand_info), + GFP_KERNEL); + if (!object->private) + return -ENOMEM; + + object->private->object = object; + INIT_WORK(&object->private->work, ondemand_object_worker); + return 0; +} + +void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *object) +{ + kfree(object->private); + object->private = NULL; +} + +int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len) +{ + struct cachefiles_read_ctx read_ctx = {pos, len}; + + return cachefiles_ondemand_send_req(object, CACHEFILES_OP_READ, + sizeof(struct cachefiles_read), + cachefiles_ondemand_init_read_req, &read_ctx); +} + + +void cachefiles_ondemand_clean_object(struct cachefiles_object *object) +{ + cachefiles_ondemand_send_req(object, CACHEFILES_OP_CLOSE, 0, + cachefiles_ondemand_init_close_req, NULL); +} diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a49bf1fbaea82..06e9b26bf277a 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1775,6 +1775,8 @@ static u64 __mark_caps_flushing(struct inode *inode, * try to invalidate mapping pages without blocking. */ static int try_nonblocking_invalidate(struct inode *inode) + __releases(ci->i_ceph_lock) + __acquires(ci->i_ceph_lock) { struct ceph_inode_info *ci = ceph_inode(inode); u32 invalidating_gen = ci->i_rdcache_gen; @@ -2247,7 +2249,6 @@ static int unsafe_request_wait(struct inode *inode) int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct ceph_file_info *fi = file->private_data; struct inode *inode = file->f_mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); u64 flush_tid; @@ -2278,14 +2279,9 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) if (err < 0) ret = err; - if (errseq_check(&ci->i_meta_err, READ_ONCE(fi->meta_err))) { - spin_lock(&file->f_lock); - err = errseq_check_and_advance(&ci->i_meta_err, - &fi->meta_err); - spin_unlock(&file->f_lock); - if (err < 0) - ret = err; - } + err = file_check_and_advance_wb_err(file); + if (err < 0) + ret = err; out: dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret); return ret; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index a10711a6337af..aa1eac6d89f2e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -234,7 +234,6 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, fi->fmode = fmode; spin_lock_init(&fi->rw_contexts_lock); INIT_LIST_HEAD(&fi->rw_contexts); - fi->meta_err = errseq_sample(&ci->i_meta_err); fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen); return 0; @@ -1469,32 +1468,26 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; } - err = file_remove_privs(file); - if (err) + down_read(&osdc->lock); + map_flags = osdc->osdmap->flags; + pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id); + up_read(&osdc->lock); + if ((map_flags & CEPH_OSDMAP_FULL) || + (pool_flags & CEPH_POOL_FLAG_FULL)) { + err = -ENOSPC; goto out; + } - err = file_update_time(file); + err = file_remove_privs(file); if (err) goto out; - inode_inc_iversion_raw(inode); - if (ci->i_inline_version != CEPH_INLINE_NONE) { err = ceph_uninline_data(file, NULL); if (err < 0) goto out; } - down_read(&osdc->lock); - map_flags = osdc->osdmap->flags; - pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id); - up_read(&osdc->lock); - if ((map_flags & CEPH_OSDMAP_FULL) || - (pool_flags & CEPH_POOL_FLAG_FULL)) { - err = -ENOSPC; - goto out; - } - dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n", inode, ceph_vinop(inode), pos, count, i_size_read(inode)); if (fi->fmode & CEPH_FILE_MODE_LAZY) @@ -1507,6 +1500,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err < 0) goto out; + err = file_update_time(file); + if (err) + goto out_caps; + + inode_inc_iversion_raw(inode); + dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n", inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); @@ -1590,6 +1589,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) } goto out_unlocked; +out_caps: + ceph_put_cap_refs(ci, got); out: if (direct_lock) ceph_end_io_direct(inode); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 5beebbbb42f09..af85a72376040 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -515,8 +515,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ceph_fscache_inode_init(ci); - ci->i_meta_err = 0; - return &ci->vfs_inode; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1ef370913c007..37fb71797b341 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1272,7 +1272,6 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc, { struct ceph_mds_request *req; struct rb_node *p; - struct ceph_inode_info *ci; dout("cleanup_session_requests mds%d\n", session->s_mds); mutex_lock(&mdsc->mutex); @@ -1281,16 +1280,10 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc, struct ceph_mds_request, r_unsafe_item); pr_warn_ratelimited(" dropping unsafe request %llu\n", req->r_tid); - if (req->r_target_inode) { - /* dropping unsafe change of inode's attributes */ - ci = ceph_inode(req->r_target_inode); - errseq_set(&ci->i_meta_err, -EIO); - } - if (req->r_unsafe_dir) { - /* dropping unsafe directory operation */ - ci = ceph_inode(req->r_unsafe_dir); - errseq_set(&ci->i_meta_err, -EIO); - } + if (req->r_target_inode) + mapping_set_error(req->r_target_inode->i_mapping, -EIO); + if (req->r_unsafe_dir) + mapping_set_error(req->r_unsafe_dir->i_mapping, -EIO); __unregister_request(mdsc, req); } /* zero r_attempts, so kick_requests() will re-send requests */ @@ -1436,7 +1429,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, spin_unlock(&mdsc->cap_dirty_lock); if (dirty_dropped) { - errseq_set(&ci->i_meta_err, -EIO); + mapping_set_error(inode->i_mapping, -EIO); if (ci->i_wrbuffer_ref_head == 0 && ci->i_wr_ref == 0 && diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 8ffc8e88dd3d2..6db7b3387e1cc 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -402,8 +402,6 @@ struct ceph_inode_info { struct fscache_cookie *fscache; u32 i_fscache_gen; #endif - errseq_t i_meta_err; - struct inode vfs_inode; /* at end */ }; @@ -712,7 +710,6 @@ struct ceph_file_info { spinlock_t rw_contexts_lock; struct list_head rw_contexts; - errseq_t meta_err; u32 filp_gen; atomic_t num_locks; }; diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index cb18ee637cb7b..4bcf0226818dc 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -316,6 +316,14 @@ static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val, } #define XATTR_RSTAT_FIELD(_type, _name) \ XATTR_NAME_CEPH(_type, _name, VXATTR_FLAG_RSTAT) +#define XATTR_RSTAT_FIELD_UPDATABLE(_type, _name) \ + { \ + .name = CEPH_XATTR_NAME(_type, _name), \ + .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ + .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ + .exists_cb = NULL, \ + .flags = VXATTR_FLAG_RSTAT, \ + } #define XATTR_LAYOUT_FIELD(_type, _name, _field) \ { \ .name = CEPH_XATTR_NAME2(_type, _name, _field), \ @@ -353,7 +361,7 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { XATTR_RSTAT_FIELD(dir, rfiles), XATTR_RSTAT_FIELD(dir, rsubdirs), XATTR_RSTAT_FIELD(dir, rbytes), - XATTR_RSTAT_FIELD(dir, rctime), + XATTR_RSTAT_FIELD_UPDATABLE(dir, rctime), { .name = "ceph.dir.pin", .name_size = sizeof("ceph.dir.pin"), diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 9bd03a2310328..171ad8b42107e 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -358,14 +358,9 @@ cifs_strndup_from_utf16(const char *src, const int maxlen, if (!dst) return NULL; cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage, - NO_MAP_UNI_RSVD); + NO_MAP_UNI_RSVD); } else { - len = strnlen(src, maxlen); - len++; - dst = kmalloc(len, GFP_KERNEL); - if (!dst) - return NULL; - strlcpy(dst, src, len); + dst = kstrndup(src, maxlen, GFP_KERNEL); } return dst; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 41b3c5fc958c7..79a18692b84c5 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -855,6 +855,7 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, out_super: deactivate_locked_super(sb); + return root; out: cifs_cleanup_volume_info(volume_info); return root; @@ -888,7 +889,7 @@ cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter) ssize_t rc; struct inode *inode = file_inode(iocb->ki_filp); - if (iocb->ki_filp->f_flags & O_DIRECT) + if (iocb->ki_flags & IOCB_DIRECT) return cifs_user_readv(iocb, iter); rc = cifs_revalidate_mapping(inode); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 9c0e348cb00f7..414936989255a 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1930,11 +1930,13 @@ extern mempool_t *cifs_mid_poolp; /* Operations for different SMB versions */ #define SMB1_VERSION_STRING "1.0" +#define SMB20_VERSION_STRING "2.0" +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY extern struct smb_version_operations smb1_operations; extern struct smb_version_values smb1_values; -#define SMB20_VERSION_STRING "2.0" extern struct smb_version_operations smb20_operations; extern struct smb_version_values smb20_values; +#endif /* CIFS_ALLOW_INSECURE_LEGACY */ #define SMB21_VERSION_STRING "2.1" extern struct smb_version_operations smb21_operations; extern struct smb_version_values smb21_values; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index da0720f41ebcb..86bdebd2ece65 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3691,9 +3691,10 @@ cifs_match_super(struct super_block *sb, void *data) spin_lock(&cifs_tcp_ses_lock); cifs_sb = CIFS_SB(sb); tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb)); - if (IS_ERR(tlink)) { + if (tlink == NULL) { + /* can not match superblock if tlink were ever null */ spin_unlock(&cifs_tcp_ses_lock); - return rc; + return 0; } tcon = tlink_tcon(tlink); ses = tcon->ses; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 1aac8d38f887d..03c85beecec10 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2577,12 +2577,23 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, tcon = tlink_tcon(smbfile->tlink); if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { server = tcon->ses->server; - if (server->ops->flush) - rc = server->ops->flush(xid, tcon, &smbfile->fid); - else + if (server->ops->flush == NULL) { rc = -ENOSYS; + goto strict_fsync_exit; + } + + if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { + smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); + if (smbfile) { + rc = server->ops->flush(xid, tcon, &smbfile->fid); + cifsFileInfo_put(smbfile); + } else + cifs_dbg(FYI, "ignore fsync for file not open for write\n"); + } else + rc = server->ops->flush(xid, tcon, &smbfile->fid); } +strict_fsync_exit: free_xid(xid); return rc; } @@ -2594,6 +2605,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct cifs_tcon *tcon; struct TCP_Server_Info *server; struct cifsFileInfo *smbfile = file->private_data; + struct inode *inode = file_inode(file); struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); rc = file_write_and_wait_range(file, start, end); @@ -2608,12 +2620,23 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) tcon = tlink_tcon(smbfile->tlink); if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { server = tcon->ses->server; - if (server->ops->flush) - rc = server->ops->flush(xid, tcon, &smbfile->fid); - else + if (server->ops->flush == NULL) { rc = -ENOSYS; + goto fsync_exit; + } + + if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { + smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); + if (smbfile) { + rc = server->ops->flush(xid, tcon, &smbfile->fid); + cifsFileInfo_put(smbfile); + } else + cifs_dbg(FYI, "ignore fsync for file not open for write\n"); + } else + rc = server->ops->flush(xid, tcon, &smbfile->fid); } +fsync_exit: free_xid(xid); return rc; } @@ -2989,7 +3012,7 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb; struct dentry *dentry = ctx->cfile->dentry; - int rc; + ssize_t rc; tcon = tlink_tcon(ctx->cfile->tlink); cifs_sb = CIFS_SB(dentry->d_sb); diff --git a/fs/cifs/link.c b/fs/cifs/link.c index b736acd3917bb..a24bcbbb50337 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -97,6 +97,9 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len, if (rc != 1) return -EINVAL; + if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) + return -EINVAL; + rc = symlink_hash(link_len, link_str, md5_hash); if (rc) { cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 85bd644f9773b..30f841a880acd 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -610,7 +610,7 @@ sess_alloc_buffer(struct sess_data *sess_data, int wct) return 0; out_free_smb_buf: - kfree(smb_buf); + cifs_small_buf_release(smb_buf); sess_data->iov[0].iov_base = NULL; sess_data->iov[0].iov_len = 0; sess_data->buf0_type = CIFS_NO_BUFFER; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index defee1d208d22..57164563eec69 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1643,9 +1643,17 @@ smb2_copychunk_range(const unsigned int xid, int chunks_copied = 0; bool chunk_sizes_updated = false; ssize_t bytes_written, total_bytes_written = 0; + struct inode *inode; pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL); + /* + * We need to flush all unwritten data before we can send the + * copychunk ioctl to the server. + */ + inode = d_inode(trgtfile->dentry); + filemap_write_and_wait(inode->i_mapping); + if (pcchunk == NULL) return -ENOMEM; @@ -3479,11 +3487,13 @@ smb3_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, } } +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY static bool smb2_is_read_op(__u32 oplock) { return oplock == SMB2_OPLOCK_LEVEL_II; } +#endif /* CIFS_ALLOW_INSECURE_LEGACY */ static bool smb21_is_read_op(__u32 oplock) @@ -4565,7 +4575,7 @@ smb2_make_node(unsigned int xid, struct inode *inode, return rc; } - +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY struct smb_version_operations smb20_operations = { .compare_fids = smb2_compare_fids, .setup_request = smb2_setup_request, @@ -4662,6 +4672,7 @@ struct smb_version_operations smb20_operations = { .fiemap = smb3_fiemap, .llseek = smb3_llseek, }; +#endif /* CIFS_ALLOW_INSECURE_LEGACY */ struct smb_version_operations smb21_operations = { .compare_fids = smb2_compare_fids, @@ -4979,6 +4990,7 @@ struct smb_version_operations smb311_operations = { .llseek = smb3_llseek, }; +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY struct smb_version_values smb20_values = { .version_string = SMB20_VERSION_STRING, .protocol_id = SMB20_PROT_ID, @@ -4999,6 +5011,7 @@ struct smb_version_values smb20_values = { .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, .create_lease_size = sizeof(struct create_lease), }; +#endif /* ALLOW_INSECURE_LEGACY */ struct smb_version_values smb21_values = { .version_string = SMB21_VERSION_STRING, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index e068f82ffeddf..0857eb7a95e28 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -356,6 +356,9 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) rc = -EHOSTDOWN; mutex_unlock(&tcon->ses->session_mutex); goto failed; + } else if (rc) { + mutex_unlock(&ses->session_mutex); + goto out; } } if (rc || !tcon->need_reconnect) { diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index e0226b2138d64..8fcc53d83af2d 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -808,13 +808,6 @@ COMPATIBLE_IOCTL(WDIOC_SETTIMEOUT) COMPATIBLE_IOCTL(WDIOC_GETTIMEOUT) COMPATIBLE_IOCTL(WDIOC_SETPRETIMEOUT) COMPATIBLE_IOCTL(WDIOC_GETPRETIMEOUT) -/* Big R */ -COMPATIBLE_IOCTL(RNDGETENTCNT) -COMPATIBLE_IOCTL(RNDADDTOENTCNT) -COMPATIBLE_IOCTL(RNDGETPOOL) -COMPATIBLE_IOCTL(RNDADDENTROPY) -COMPATIBLE_IOCTL(RNDZAPENTCNT) -COMPATIBLE_IOCTL(RNDCLEARPOOL) /* Bluetooth */ COMPATIBLE_IOCTL(HCIDEVUP) COMPATIBLE_IOCTL(HCIDEVDOWN) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 2992cebb78661..d73d88d9c2598 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -36,6 +36,14 @@ */ DEFINE_SPINLOCK(configfs_dirent_lock); +/* + * All of link_obj/unlink_obj/link_group/unlink_group require that + * subsys->su_mutex is held. + * But parent configfs_subsystem is NULL when config_item is root. + * Use this mutex when config_item is root. + */ +static DEFINE_MUTEX(configfs_subsystem_mutex); + static void configfs_d_iput(struct dentry * dentry, struct inode * inode) { @@ -1884,7 +1892,9 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) group->cg_item.ci_name = group->cg_item.ci_namebuf; sd = root->d_fsdata; + mutex_lock(&configfs_subsystem_mutex); link_group(to_config_group(sd->s_element), group); + mutex_unlock(&configfs_subsystem_mutex); inode_lock_nested(d_inode(root), I_MUTEX_PARENT); @@ -1909,7 +1919,9 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) inode_unlock(d_inode(root)); if (err) { + mutex_lock(&configfs_subsystem_mutex); unlink_group(group); + mutex_unlock(&configfs_subsystem_mutex); configfs_release_fs(); } put_fragment(frag); @@ -1956,7 +1968,9 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) dput(dentry); + mutex_lock(&configfs_subsystem_mutex); unlink_group(group); + mutex_unlock(&configfs_subsystem_mutex); configfs_release_fs(); } diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index a5a40a76b8ed7..82575cfbb04db 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -305,3 +305,47 @@ const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, return ERR_PTR(err); } EXPORT_SYMBOL_GPL(fscrypt_get_symlink); + +/** + * fscrypt_symlink_getattr() - set the correct st_size for encrypted symlinks + * @path: the path for the encrypted symlink being queried + * @stat: the struct being filled with the symlink's attributes + * + * Override st_size of encrypted symlinks to be the length of the decrypted + * symlink target (or the no-key encoded symlink target, if the key is + * unavailable) rather than the length of the encrypted symlink target. This is + * necessary for st_size to match the symlink target that userspace actually + * sees. POSIX requires this, and some userspace programs depend on it. + * + * This requires reading the symlink target from disk if needed, setting up the + * inode's encryption key if possible, and then decrypting or encoding the + * symlink target. This makes lstat() more heavyweight than is normally the + * case. However, decrypted symlink targets will be cached in ->i_link, so + * usually the symlink won't have to be read and decrypted again later if/when + * it is actually followed, readlink() is called, or lstat() is called again. + * + * Return: 0 on success, -errno on failure + */ +int fscrypt_symlink_getattr(const struct path *path, struct kstat *stat) +{ + struct dentry *dentry = path->dentry; + struct inode *inode = d_inode(dentry); + const char *link; + DEFINE_DELAYED_CALL(done); + + /* + * To get the symlink target that userspace will see (whether it's the + * decrypted target or the no-key encoded target), we can just get it in + * the same way the VFS does during path resolution and readlink(). + */ + link = READ_ONCE(inode->i_link); + if (!link) { + link = inode->i_op->get_link(dentry, inode, &done); + if (IS_ERR(link)) + return PTR_ERR(link); + } + stat->size = strlen(link); + do_delayed_call(&done); + return 0; +} +EXPORT_SYMBOL_GPL(fscrypt_symlink_getattr); diff --git a/fs/dax.c b/fs/dax.c index 12953e892bb25..bcb7c6b43fb2b 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -819,7 +819,8 @@ static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index, if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp)) goto unlock_pmd; - flush_cache_page(vma, address, pfn); + flush_cache_range(vma, address, + address + HPAGE_PMD_SIZE); pmd = pmdp_invalidate(vma, address, pmdp); pmd = pmd_wrprotect(pmd); pmd = pmd_mkclean(pmd); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 943637298f650..da87615ad69a7 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -146,7 +146,7 @@ static int debugfs_locked_down(struct inode *inode, struct file *filp, const struct file_operations *real_fops) { - if ((inode->i_mode & 07777) == 0444 && + if ((inode->i_mode & 07777 & ~0444) == 0 && !(filp->f_mode & FMODE_WRITE) && !real_fops->unlocked_ioctl && !real_fops->compat_ioctl && @@ -178,8 +178,10 @@ static int open_proxy_open(struct inode *inode, struct file *filp) if (!fops_get(real_fops)) { #ifdef CONFIG_MODULES if (real_fops->owner && - real_fops->owner->state == MODULE_STATE_GOING) + real_fops->owner->state == MODULE_STATE_GOING) { + r = -ENXIO; goto out; + } #endif /* Huh? Module did not clean up after itself at exit? */ @@ -313,8 +315,10 @@ static int full_proxy_open(struct inode *inode, struct file *filp) if (!fops_get(real_fops)) { #ifdef CONFIG_MODULES if (real_fops->owner && - real_fops->owner->state == MODULE_STATE_GOING) + real_fops->owner->state == MODULE_STATE_GOING) { + r = -ENXIO; goto out; + } #endif /* Huh? Module did not cleanup after itself at exit? */ diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 1c74a7cbf5b19..e595a29bf46e3 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -522,7 +522,7 @@ struct dentry *debugfs_create_file_size(const char *name, umode_t mode, { struct dentry *de = debugfs_create_file(name, mode, parent, data, fops); - if (de) + if (!IS_ERR(de)) d_inode(de)->i_size = file_size; return de; } diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 18d81599522f3..9165bf56c6e8e 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -1551,6 +1551,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype, lkb->lkb_wait_type = 0; lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; lkb->lkb_wait_count--; + unhold_lkb(lkb); goto out_del; } @@ -1577,6 +1578,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype, log_error(ls, "remwait error %x reply %d wait_type %d overlap", lkb->lkb_id, mstype, lkb->lkb_wait_type); lkb->lkb_wait_count--; + unhold_lkb(lkb); lkb->lkb_wait_type = 0; } @@ -3975,6 +3977,14 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms) int from = ms->m_header.h_nodeid; int error = 0; + /* currently mixing of user/kernel locks are not supported */ + if (ms->m_flags & DLM_IFL_USER && ~lkb->lkb_flags & DLM_IFL_USER) { + log_error(lkb->lkb_resource->res_ls, + "got user dlm message for a kernel lock"); + error = -EINVAL; + goto out; + } + switch (ms->m_type) { case DLM_MSG_CONVERT: case DLM_MSG_UNLOCK: @@ -4003,6 +4013,7 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms) error = -EINVAL; } +out: if (error) log_error(lkb->lkb_resource->res_ls, "ignore invalid message %d from %d %x %x %x %d", @@ -4056,13 +4067,14 @@ static void send_repeat_remove(struct dlm_ls *ls, char *ms_name, int len) rv = _create_message(ls, sizeof(struct dlm_message) + len, dir_nodeid, DLM_MSG_REMOVE, &ms, &mh); if (rv) - return; + goto out; memcpy(ms->m_extra, name, len); ms->m_hash = hash; send_message(mh, ms); +out: spin_lock(&ls->ls_remove_spin); ls->ls_remove_len = 0; memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN); @@ -5303,11 +5315,16 @@ int dlm_recover_waiters_post(struct dlm_ls *ls) lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK; lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; lkb->lkb_wait_type = 0; - lkb->lkb_wait_count = 0; + /* drop all wait_count references we still + * hold a reference for this iteration. + */ + while (lkb->lkb_wait_count) { + lkb->lkb_wait_count--; + unhold_lkb(lkb); + } mutex_lock(&ls->ls_waiters_mutex); list_del_init(&lkb->lkb_wait_reply); mutex_unlock(&ls->ls_waiters_mutex); - unhold_lkb(lkb); /* for waiters list */ if (oc || ou) { /* do an unlock or cancel instead of resending */ diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index c38b2b8ffd1d3..a10d2bcfe75a8 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -23,11 +23,11 @@ struct plock_op { struct list_head list; int done; struct dlm_plock_info info; + int (*callback)(struct file_lock *fl, int result); }; struct plock_xop { struct plock_op xop; - int (*callback)(struct file_lock *fl, int result); void *fl; void *file; struct file_lock flc; @@ -129,19 +129,18 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, /* fl_owner is lockd which doesn't distinguish processes on the nfs client */ op->info.owner = (__u64) fl->fl_pid; - xop->callback = fl->fl_lmops->lm_grant; + op->callback = fl->fl_lmops->lm_grant; locks_init_lock(&xop->flc); locks_copy_lock(&xop->flc, fl); xop->fl = fl; xop->file = file; } else { op->info.owner = (__u64)(long) fl->fl_owner; - xop->callback = NULL; } send_op(op); - if (xop->callback == NULL) { + if (!op->callback) { rv = wait_event_interruptible(recv_wq, (op->done != 0)); if (rv == -ERESTARTSYS) { log_debug(ls, "dlm_posix_lock: wait killed %llx", @@ -203,7 +202,7 @@ static int dlm_plock_callback(struct plock_op *op) file = xop->file; flc = &xop->flc; fl = xop->fl; - notify = xop->callback; + notify = op->callback; if (op->info.rv) { notify(fl, op->info.rv); @@ -436,10 +435,9 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, if (op->info.fsid == info.fsid && op->info.number == info.number && op->info.owner == info.owner) { - struct plock_xop *xop = (struct plock_xop *)op; list_del_init(&op->list); memcpy(&op->info, &info, sizeof(info)); - if (xop->callback) + if (op->callback) do_callback = 1; else op->done = 1; diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index 9d634d3a18451..0b8b743a5db9f 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -3,17 +3,24 @@ config EROFS_FS tristate "EROFS filesystem support" depends on BLOCK + select LIBCRC32C help - EROFS (Enhanced Read-Only File System) is a lightweight - read-only file system with modern designs (eg. page-sized - blocks, inline xattrs/data, etc.) for scenarios which need - high-performance read-only requirements, e.g. Android OS - for mobile phones and LIVECDs. - - It also provides fixed-sized output compression support, - which improves storage density, keeps relatively higher - compression ratios, which is more useful to achieve high - performance for embedded devices with limited memory. + EROFS (Enhanced Read-Only File System) is a lightweight read-only + file system with modern designs (e.g. no buffer heads, inline + xattrs/data, chunk-based deduplication, multiple devices, etc.) for + scenarios which need high-performance read-only solutions, e.g. + smartphones with Android OS, LiveCDs and high-density hosts with + numerous containers; + + It also provides fixed-sized output compression support in order to + improve storage density as well as keep relatively higher compression + ratios and implements in-place decompression to reuse the file page + for compressed data temporarily with proper strategies, which is + quite useful to ensure guaranteed end-to-end runtime decompression + performance under extremely memory pressure without extra cost. + + See the documentation at + for more details. If unsure, say N. @@ -89,3 +96,12 @@ config EROFS_FS_CLUSTER_PAGE_LIMIT less than 2. Otherwise, the image will be refused to mount on this kernel. +config EROFS_FS_ONDEMAND + bool "EROFS fscache-based on-demand read support" + depends on CACHEFILES_ONDEMAND && (EROFS_FS=m && FSCACHE || EROFS_FS=y && FSCACHE=y) + default n + help + This permits EROFS to use fscache-backed data blobs with on-demand + read support. + + If unsure, say N. diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile index 46f2aa4ba46c2..271427e853f3e 100644 --- a/fs/erofs/Makefile +++ b/fs/erofs/Makefile @@ -8,4 +8,4 @@ obj-$(CONFIG_EROFS_FS) += erofs.o erofs-objs := super.o inode.o data.o namei.o dir.o utils.o erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o - +erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 07d279fd5d67d..e9b1aa0afe0ec 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -1,8 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2019 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang + * https://www.huawei.com/ */ #ifndef __EROFS_FS_COMPRESS_H #define __EROFS_FS_COMPRESS_H @@ -57,4 +56,3 @@ int z_erofs_decompress(struct z_erofs_decompress_req *rq, struct list_head *pagepool); #endif - diff --git a/fs/erofs/data.c b/fs/erofs/data.c index b22a08ac53a23..5fd1b5b800b9d 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -1,11 +1,12 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang + * https://www.huawei.com/ + * Copyright (C) 2021, Alibaba Cloud */ #include "internal.h" #include +#include #include @@ -32,44 +33,83 @@ static void erofs_readendio(struct bio *bio) bio_put(bio); } -struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr) +void erofs_unmap_metabuf(struct erofs_buf *buf) { - struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping; - struct page *page; - - page = read_cache_page_gfp(mapping, blkaddr, - mapping_gfp_constraint(mapping, ~__GFP_FS)); - /* should already be PageUptodate */ - if (!IS_ERR(page)) - lock_page(page); - return page; + if (buf->kmap_type == EROFS_KMAP) + kunmap(buf->page); + else if (buf->kmap_type == EROFS_KMAP_ATOMIC) + kunmap_atomic(buf->base); + buf->base = NULL; + buf->kmap_type = EROFS_NO_KMAP; +} + +void erofs_put_metabuf(struct erofs_buf *buf) +{ + if (!buf->page) + return; + erofs_unmap_metabuf(buf); + put_page(buf->page); + buf->page = NULL; +} + +void *erofs_bread(struct erofs_buf *buf, struct inode *inode, + erofs_blk_t blkaddr, enum erofs_kmap_type type) +{ + erofs_off_t offset = blknr_to_addr(blkaddr); + pgoff_t index = offset >> PAGE_SHIFT; + struct page *page = buf->page; + unsigned int nofs_flag; + + if (!page || page->index != index) { + erofs_put_metabuf(buf); + nofs_flag = memalloc_nofs_save(); + page = read_cache_page(inode->i_mapping, index, NULL, NULL); + memalloc_nofs_restore(nofs_flag); + + if (IS_ERR(page)) + return page; + /* should already be PageUptodate, no need to lock page */ + buf->page = page; + } + if (buf->kmap_type == EROFS_NO_KMAP) { + if (type == EROFS_KMAP) + buf->base = kmap(page); + else if (type == EROFS_KMAP_ATOMIC) + buf->base = kmap_atomic(page); + buf->kmap_type = type; + } else if (buf->kmap_type != type) { + DBG_BUGON(1); + return ERR_PTR(-EFAULT); + } + if (type == EROFS_NO_KMAP) + return NULL; + return buf->base + (offset & ~PAGE_MASK); +} + +void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, + erofs_blk_t blkaddr, enum erofs_kmap_type type) +{ + if (erofs_is_fscache_mode(sb)) + return erofs_bread(buf, EROFS_SB(sb)->s_fscache->inode, + blkaddr, type); + + return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type); } static int erofs_map_blocks_flatmode(struct inode *inode, struct erofs_map_blocks *map, int flags) { - int err = 0; erofs_blk_t nblocks, lastblk; u64 offset = map->m_la; struct erofs_inode *vi = EROFS_I(inode); bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); - trace_erofs_map_blocks_flatmode_enter(inode, map, flags); - - nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE); + nblocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ); lastblk = nblocks - tailendpacking; - if (offset >= inode->i_size) { - /* leave out-of-bound access unmapped */ - map->m_flags = 0; - map->m_plen = 0; - goto out; - } - /* there is no hole in flatmode */ map->m_flags = EROFS_MAP_MAPPED; - if (offset < blknr_to_addr(lastblk)) { map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la; map->m_plen = blknr_to_addr(lastblk) - offset; @@ -81,47 +121,145 @@ static int erofs_map_blocks_flatmode(struct inode *inode, vi->xattr_isize + erofs_blkoff(map->m_la); map->m_plen = inode->i_size - offset; - /* inline data should be located in one meta block */ - if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) { + /* inline data should be located in the same meta block */ + if (erofs_blkoff(map->m_pa) + map->m_plen > EROFS_BLKSIZ) { erofs_err(inode->i_sb, "inline data cross block boundary @ nid %llu", vi->nid); DBG_BUGON(1); - err = -EFSCORRUPTED; - goto err_out; + return -EFSCORRUPTED; } - map->m_flags |= EROFS_MAP_META; } else { erofs_err(inode->i_sb, "internal error @ nid: %llu (size %llu), m_la 0x%llx", vi->nid, inode->i_size, map->m_la); DBG_BUGON(1); - err = -EIO; - goto err_out; + return -EIO; } - -out: - map->m_llen = map->m_plen; - -err_out: - trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0); - return err; + return 0; } int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map, int flags) { - if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) { - int err = z_erofs_map_blocks_iter(inode, map, flags); + struct super_block *sb = inode->i_sb; + struct erofs_inode *vi = EROFS_I(inode); + struct erofs_inode_chunk_index *idx; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + u64 chunknr; + unsigned int unit; + erofs_off_t pos; + void *kaddr; + int err = 0; - if (map->mpage) { - put_page(map->mpage); - map->mpage = NULL; + trace_erofs_map_blocks_enter(inode, map, flags); + map->m_deviceid = 0; + if (map->m_la >= inode->i_size) { + /* leave out-of-bound access unmapped */ + map->m_flags = 0; + map->m_plen = 0; + goto out; + } + + if (vi->datalayout != EROFS_INODE_CHUNK_BASED) { + err = erofs_map_blocks_flatmode(inode, map, flags); + goto out; + } + + if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES) + unit = sizeof(*idx); /* chunk index */ + else + unit = EROFS_BLOCK_MAP_ENTRY_SIZE; /* block map */ + + chunknr = map->m_la >> vi->chunkbits; + pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + + vi->xattr_isize, unit) + unit * chunknr; + + kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP); + if (IS_ERR(kaddr)) { + err = PTR_ERR(kaddr); + goto out; + } + map->m_la = chunknr << vi->chunkbits; + map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits, + roundup(inode->i_size - map->m_la, EROFS_BLKSIZ)); + + /* handle block map */ + if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) { + __le32 *blkaddr = kaddr + erofs_blkoff(pos); + + if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) { + map->m_flags = 0; + } else { + map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr)); + map->m_flags = EROFS_MAP_MAPPED; + } + goto out_unlock; + } + /* parse chunk indexes */ + idx = kaddr + erofs_blkoff(pos); + switch (le32_to_cpu(idx->blkaddr)) { + case EROFS_NULL_ADDR: + map->m_flags = 0; + break; + default: + map->m_deviceid = le16_to_cpu(idx->device_id) & + EROFS_SB(sb)->device_id_mask; + map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr)); + map->m_flags = EROFS_MAP_MAPPED; + break; + } +out_unlock: + erofs_put_metabuf(&buf); +out: + if (!err) + map->m_llen = map->m_plen; + trace_erofs_map_blocks_exit(inode, map, flags, 0); + return err; +} + +int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) +{ + struct erofs_dev_context *devs = EROFS_SB(sb)->devs; + struct erofs_device_info *dif; + int id; + + /* primary device by default */ + map->m_bdev = sb->s_bdev; + map->m_fscache = EROFS_SB(sb)->s_fscache; + + if (map->m_deviceid) { + down_read(&devs->rwsem); + dif = idr_find(&devs->tree, map->m_deviceid - 1); + if (!dif) { + up_read(&devs->rwsem); + return -ENODEV; + } + map->m_bdev = dif->bdev; + map->m_fscache = dif->fscache; + up_read(&devs->rwsem); + } else if (devs->extra_devices) { + down_read(&devs->rwsem); + idr_for_each_entry(&devs->tree, dif, id) { + erofs_off_t startoff, length; + + if (!dif->mapped_blkaddr) + continue; + startoff = blknr_to_addr(dif->mapped_blkaddr); + length = blknr_to_addr(dif->blocks); + + if (map->m_pa >= startoff && + map->m_pa < startoff + length) { + map->m_pa -= startoff; + map->m_bdev = dif->bdev; + map->m_fscache = dif->fscache; + break; + } } - return err; + up_read(&devs->rwsem); } - return erofs_map_blocks_flatmode(inode, map, flags); + return 0; } static inline struct bio *erofs_read_raw_page(struct bio *bio, @@ -156,6 +294,7 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio, struct erofs_map_blocks map = { .m_la = blknr_to_addr(current_block), }; + struct erofs_map_dev mdev; erofs_blk_t blknr; unsigned int blkoff; @@ -163,6 +302,14 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio, if (err) goto err_out; + mdev = (struct erofs_map_dev) { + .m_deviceid = map.m_deviceid, + .m_pa = map.m_pa, + }; + err = erofs_map_dev(sb, &mdev); + if (err) + goto err_out; + /* zero out the holed page */ if (!(map.m_flags & EROFS_MAP_MAPPED)) { zero_user_segment(page, 0, PAGE_SIZE); @@ -175,36 +322,31 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio, /* for RAW access mode, m_plen must be equal to m_llen */ DBG_BUGON(map.m_plen != map.m_llen); - blknr = erofs_blknr(map.m_pa); - blkoff = erofs_blkoff(map.m_pa); + blknr = erofs_blknr(mdev.m_pa); + blkoff = erofs_blkoff(mdev.m_pa); /* deal with inline page */ if (map.m_flags & EROFS_MAP_META) { void *vsrc, *vto; - struct page *ipage; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; DBG_BUGON(map.m_plen > PAGE_SIZE); - ipage = erofs_get_meta_page(inode->i_sb, blknr); - - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); + vsrc = erofs_read_metabuf(&buf, inode->i_sb, + blknr, EROFS_KMAP_ATOMIC); + if (IS_ERR(vsrc)) { + err = PTR_ERR(vsrc); goto err_out; } - - vsrc = kmap_atomic(ipage); vto = kmap_atomic(page); memcpy(vto, vsrc + blkoff, map.m_plen); memset(vto + map.m_plen, 0, PAGE_SIZE - map.m_plen); kunmap_atomic(vto); - kunmap_atomic(vsrc); flush_dcache_page(page); SetPageUptodate(page); - /* TODO: could we unlock the page earlier? */ - unlock_page(ipage); - put_page(ipage); + erofs_put_metabuf(&buf); /* imply err = 0, see erofs_map_blocks */ goto has_updated; } @@ -221,7 +363,7 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio, bio = bio_alloc(GFP_NOIO, nblocks); bio->bi_end_io = erofs_readendio; - bio_set_dev(bio, sb->s_bdev); + bio_set_dev(bio, mdev.m_bdev); bio->bi_iter.bi_sector = (sector_t)blknr << LOG_SECTORS_PER_BLOCK; bio->bi_opf = REQ_OP_READ; @@ -349,4 +491,3 @@ const struct address_space_operations erofs_raw_access_aops = { .readpages = erofs_raw_access_readpages, .bmap = erofs_bmap, }; - diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 23b74b8e8f96f..c1e35238b5222 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2019 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang + * https://www.huawei.com/ */ #include "compress.h" #include @@ -125,8 +124,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out) support_0padding = false; /* decompression inplace is only safe when 0padding is enabled */ - if (EROFS_SB(rq->sb)->feature_incompat & - EROFS_FEATURE_INCOMPAT_LZ4_0PADDING) { + if (erofs_sb_has_lz4_0padding(EROFS_SB(rq->sb))) { support_0padding = true; while (!src[inputmargin & ~PAGE_MASK]) @@ -333,4 +331,3 @@ int z_erofs_decompress(struct z_erofs_decompress_req *rq, return z_erofs_shifted_transform(rq, pagepool); return z_erofs_decompress_generic(rq, pagepool); } - diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index d28c623dfef9e..18e59821c5974 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang + * https://www.huawei.com/ + * Copyright (C) 2022, Alibaba Cloud */ #include "internal.h" @@ -68,7 +68,7 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, static int erofs_readdir(struct file *f, struct dir_context *ctx) { struct inode *dir = file_inode(f); - struct address_space *mapping = dir->i_mapping; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; const size_t dirsize = i_size_read(dir); unsigned int i = ctx->pos / EROFS_BLKSIZ; unsigned int ofs = ctx->pos % EROFS_BLKSIZ; @@ -76,26 +76,19 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) bool initial = true; while (ctx->pos < dirsize) { - struct page *dentry_page; struct erofs_dirent *de; unsigned int nameoff, maxsize; - dentry_page = read_mapping_page(mapping, i, NULL); - if (dentry_page == ERR_PTR(-ENOMEM)) { - err = -ENOMEM; - break; - } else if (IS_ERR(dentry_page)) { + de = erofs_bread(&buf, dir, i, EROFS_KMAP); + if (IS_ERR(de)) { erofs_err(dir->i_sb, "fail to readdir of logical block %u of nid %llu", i, EROFS_I(dir)->nid); - err = -EFSCORRUPTED; + err = PTR_ERR(de); break; } - de = (struct erofs_dirent *)kmap(dentry_page); - nameoff = le16_to_cpu(de->nameoff); - if (nameoff < sizeof(struct erofs_dirent) || nameoff >= PAGE_SIZE) { erofs_err(dir->i_sb, @@ -120,10 +113,6 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) err = erofs_fill_dentries(dir, ctx, de, &ofs, nameoff, maxsize); skip_this: - kunmap(dentry_page); - - put_page(dentry_page); - ctx->pos = blknr_to_addr(i) + ofs; if (err) @@ -131,6 +120,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) ++i; ofs = 0; } + erofs_put_metabuf(&buf); return err < 0 ? err : 0; } @@ -139,4 +129,3 @@ const struct file_operations erofs_dir_fops = { .read = generic_read_dir, .iterate_shared = erofs_readdir, }; - diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index 52ab961544673..9e27129764299 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -3,20 +3,37 @@ * EROFS (Enhanced ROM File System) on-disk format definition * * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang + * https://www.huawei.com/ + * Copyright (C) 2021, Alibaba Cloud */ #ifndef __EROFS_FS_H #define __EROFS_FS_H #define EROFS_SUPER_OFFSET 1024 +#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 + /* * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should * be incompatible with this kernel version. */ #define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001 -#define EROFS_ALL_FEATURE_INCOMPAT EROFS_FEATURE_INCOMPAT_LZ4_0PADDING +#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004 +#define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008 +#define EROFS_ALL_FEATURE_INCOMPAT \ + (EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \ + EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \ + EROFS_FEATURE_INCOMPAT_DEVICE_TABLE) + +#define EROFS_SB_EXTSLOT_SIZE 16 + +struct erofs_deviceslot { + u8 tag[64]; /* digest(sha256), etc. */ + __le32 blocks; /* total fs blocks of this device */ + __le32 mapped_blkaddr; /* map starting at mapped_blkaddr */ + u8 reserved[56]; +}; +#define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot) /* 128-byte erofs on-disk super block */ struct erofs_super_block { @@ -37,8 +54,10 @@ struct erofs_super_block { __u8 uuid[16]; /* 128-bit uuid for volume */ __u8 volume_name[16]; /* volume name */ __le32 feature_incompat; - - __u8 reserved2[44]; + __le16 reserved2; + __le16 extra_devices; /* # of devices besides the primary device */ + __le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */ + __u8 reserved3[38]; }; /* @@ -51,13 +70,16 @@ struct erofs_super_block { * inode, [xattrs], last_inline_data, ... | ... | no-holed data * 3 - inode compression D: * inode, [xattrs], map_header, extents ... | ... - * 4~7 - reserved + * 4 - inode chunk-based E: + * inode, [xattrs], chunk indexes ... | ... + * 5~7 - reserved */ enum { EROFS_INODE_FLAT_PLAIN = 0, EROFS_INODE_FLAT_COMPRESSION_LEGACY = 1, EROFS_INODE_FLAT_INLINE = 2, EROFS_INODE_FLAT_COMPRESSION = 3, + EROFS_INODE_CHUNK_BASED = 4, EROFS_INODE_DATALAYOUT_MAX }; @@ -77,6 +99,19 @@ static inline bool erofs_inode_is_data_compressed(unsigned int datamode) #define EROFS_I_ALL \ ((1 << (EROFS_I_DATALAYOUT_BIT + EROFS_I_DATALAYOUT_BITS)) - 1) +/* indicate chunk blkbits, thus 'chunksize = blocksize << chunk blkbits' */ +#define EROFS_CHUNK_FORMAT_BLKBITS_MASK 0x001F +/* with chunk indexes or just a 4-byte blkaddr array */ +#define EROFS_CHUNK_FORMAT_INDEXES 0x0020 + +#define EROFS_CHUNK_FORMAT_ALL \ + (EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES) + +struct erofs_inode_chunk_info { + __le16 format; /* chunk blkbits, etc. */ + __le16 reserved; +}; + /* 32-byte reduced form of an ondisk inode */ struct erofs_inode_compact { __le16 i_format; /* inode format hints */ @@ -94,6 +129,9 @@ struct erofs_inode_compact { /* for device files, used to indicate old/new device # */ __le32 rdev; + + /* for chunk-based files, it contains the summary info */ + struct erofs_inode_chunk_info c; } i_u; __le32 i_ino; /* only used for 32-bit stat compatibility */ __le16 i_uid; @@ -122,6 +160,9 @@ struct erofs_inode_extended { /* for device files, used to indicate old/new device # */ __le32 rdev; + + /* for chunk-based files, it contains the summary info */ + struct erofs_inode_chunk_info c; } i_u; /* only used for 32-bit stat compatibility */ @@ -191,6 +232,19 @@ static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e) e->e_name_len + le16_to_cpu(e->e_value_size)); } +/* represent a zeroed chunk (hole) */ +#define EROFS_NULL_ADDR -1 + +/* 4-byte block address array */ +#define EROFS_BLOCK_MAP_ENTRY_SIZE sizeof(__le32) + +/* 8-byte inode chunk indexes */ +struct erofs_inode_chunk_index { + __le16 advise; /* always 0, don't care for now */ + __le16 device_id; /* back-end storage id (with bits masked) */ + __le32 blkaddr; /* start block address of this inode chunk */ +}; + /* available compression algorithm types (for h_algorithmtype) */ enum { Z_EROFS_COMPRESSION_LZ4 = 0, @@ -307,13 +361,18 @@ static inline void erofs_check_ondisk_layout_definitions(void) BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64); BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12); BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4); + BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_info) != 4); + BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != 8); BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8); BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8); BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12); + /* keep in sync between 2 index structures for better extendibility */ + BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != + sizeof(struct z_erofs_vle_decompressed_index)); + BUILD_BUG_ON(sizeof(struct erofs_deviceslot) != 128); BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) < Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1); } #endif - diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c new file mode 100644 index 0000000000000..65894135e0ad9 --- /dev/null +++ b/fs/erofs/fscache.c @@ -0,0 +1,820 @@ +/* + * Copyright (C) 2022, Alibaba Cloud + * Copyright (C) 2022, Bytedance Inc. All rights reserved. + */ +#define FSCACHE_USE_NEW_IO_API +#include +#include +#include +#include +#include "internal.h" + +static DEFINE_MUTEX(erofs_domain_list_lock); +static DEFINE_MUTEX(erofs_domain_cookies_lock); +static LIST_HEAD(erofs_domain_list); +static struct vfsmount *erofs_pseudo_mnt; + +struct fscache_netfs erofs_fscache_netfs = { + .name = "erofs", + .version = 0, +}; + +int erofs_fscache_register(void) +{ + return fscache_register_netfs(&erofs_fscache_netfs); +} + +void erofs_fscache_unregister(void) +{ + fscache_unregister_netfs(&erofs_fscache_netfs); +} + +const struct fscache_cookie_def erofs_fscache_super_index_def = { + .name = "EROFS.super", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .check_aux = NULL, +}; + +const struct fscache_cookie_def erofs_fscache_inode_object_def = { + .name = "CIFS.uniqueid", + .type = FSCACHE_COOKIE_TYPE_DATAFILE, +}; + +static void erofs_fscache_free_request(struct work_struct *work) +{ + struct netfs_read_request *rreq = + container_of(work, struct netfs_read_request, work); + + if (rreq->cache_resources.ops) + rreq->cache_resources.ops->end_operation(&rreq->cache_resources); + kfree(rreq); +} + +static struct netfs_read_request *erofs_fscache_alloc_request(struct address_space *mapping, + loff_t start, size_t len) +{ + struct netfs_read_request *rreq; + + rreq = kzalloc(sizeof(struct netfs_read_request), GFP_KERNEL); + if (!rreq) + return ERR_PTR(-ENOMEM); + + rreq->start = start; + rreq->len = len; + rreq->mapping = mapping; + INIT_LIST_HEAD(&rreq->subrequests); + INIT_WORK(&rreq->work, erofs_fscache_free_request); + refcount_set(&rreq->usage, 1); + return rreq; +} + +static void erofs_fscache_put_request(struct netfs_read_request *rreq, bool was_async) +{ + if (!refcount_dec_and_test(&rreq->usage)) + return; + if (was_async) { + if (!queue_work(system_unbound_wq, &rreq->work)) + BUG(); + } else + erofs_fscache_free_request(&rreq->work); +} + +static void erofs_fscache_put_subrequest(struct netfs_read_subrequest *subreq, bool was_async) +{ + if (!refcount_dec_and_test(&subreq->usage)) + return; + erofs_fscache_put_request(subreq->rreq, was_async); + kfree(subreq); +} + +static void erofs_fscache_clear_subrequests(struct netfs_read_request *rreq, bool was_async) +{ + struct netfs_read_subrequest *subreq; + + while (!list_empty(&rreq->subrequests)) { + subreq = list_first_entry(&rreq->subrequests, + struct netfs_read_subrequest, rreq_link); + list_del(&subreq->rreq_link); + erofs_fscache_put_subrequest(subreq, was_async); + } +} + +static void erofs_fscache_rreq_unlock_pages(struct netfs_read_request *rreq) +{ + struct netfs_read_subrequest *subreq; + struct page *page; + unsigned int iopos = 0; + pgoff_t start_page = rreq->start / PAGE_SIZE; + pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; + bool subreq_failed = false; + + XA_STATE(xas, &rreq->mapping->i_pages, start_page); + + subreq = list_first_entry(&rreq->subrequests, + struct netfs_read_subrequest, rreq_link); + subreq_failed = (subreq->error < 0); + + rcu_read_lock(); + xas_for_each(&xas, page, last_page) { + unsigned int pgpos, pgend; + bool pg_failed = false; + + if (xas_retry(&xas, page)) + continue; + + pgpos = (page_index(page) - start_page) * PAGE_SIZE; + pgend = pgpos + PAGE_SIZE; + + for (;;) { + if (!subreq) { + pg_failed = true; + break; + } + + pg_failed |= subreq_failed; + if (pgend < iopos + subreq->len) + break; + + iopos += subreq->len; + if (!list_is_last(&subreq->rreq_link, + &rreq->subrequests)) { + subreq = list_next_entry(subreq, rreq_link); + subreq_failed = (subreq->error < 0); + } else { + subreq = NULL; + subreq_failed = false; + } + if (pgend == iopos) + break; + } + + if (!pg_failed) + SetPageUptodate(page); + + unlock_page(page); + } + rcu_read_unlock(); +} + +static void erofs_fscache_rreq_complete(struct netfs_read_request *rreq, bool was_async) +{ + erofs_fscache_rreq_unlock_pages(rreq); + erofs_fscache_clear_subrequests(rreq, was_async); + erofs_fscache_put_request(rreq, was_async); +} + +static void erofs_fscache_subreq_complete(void *priv, + ssize_t transferred_or_error, bool was_async) +{ + struct netfs_read_subrequest *subreq = priv; + struct netfs_read_request *rreq = subreq->rreq; + + if (IS_ERR_VALUE(transferred_or_error)) + subreq->error = transferred_or_error; + + if (atomic_dec_and_test(&rreq->nr_rd_ops)) + erofs_fscache_rreq_complete(rreq, was_async); + + erofs_fscache_put_subrequest(subreq, was_async); +} + +/* + * Read data from fscache and fill the read data into page cache described by + * @rreq, which shall be both aligned with PAGE_SIZE. @pstart describes + * the start physical address in the cache file. + */ +static int erofs_fscache_data_read_async(struct fscache_cookie *cookie, + struct netfs_read_request *rreq, loff_t pstart) +{ + enum netfs_read_source source; + struct super_block *sb = rreq->mapping->host->i_sb; + struct netfs_read_subrequest *subreq; + struct netfs_cache_resources *cres = &rreq->cache_resources; + struct iov_iter iter; + loff_t start = rreq->start; + size_t len = rreq->len; + size_t done = 0; + int ret; + + if (len == 0) + return 0; + atomic_set(&rreq->nr_rd_ops, 1); + + ret = fscache_begin_read_operation(rreq, cookie); + if (ret) + goto out; + + while (done < len) { + subreq = kzalloc(sizeof(struct netfs_read_subrequest), + GFP_KERNEL); + if (subreq) { + INIT_LIST_HEAD(&subreq->rreq_link); + refcount_set(&subreq->usage, 2); + subreq->rreq = rreq; + refcount_inc(&rreq->usage); + } else { + ret = -ENOMEM; + goto out; + } + + subreq->start = pstart + done; + subreq->len = len - done; + subreq->flags = 1 << NETFS_SREQ_ONDEMAND; + + list_add_tail(&subreq->rreq_link, &rreq->subrequests); + + source = cres->ops->prepare_read(subreq, LLONG_MAX); + if (WARN_ON(subreq->len == 0)) + source = NETFS_INVALID_READ; + if (source != NETFS_READ_FROM_CACHE) { + erofs_err(sb, "failed to fscache prepare_read (source %d)", + source); + ret = -EIO; + subreq->error = ret; + erofs_fscache_put_subrequest(subreq, false); + goto out; + } + + atomic_inc(&rreq->nr_rd_ops); + + iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, + start + done, subreq->len); + + ret = cres->ops->read(cres, subreq->start, &iter, + true, erofs_fscache_subreq_complete, subreq); + if (ret == -EIOCBQUEUED) + ret = 0; + if (ret) { + erofs_err(sb, "failed to fscache_read (ret %d)", ret); + goto out; + } + done += subreq->len; + } +out: + if (atomic_dec_and_test(&rreq->nr_rd_ops)) + erofs_fscache_rreq_complete(rreq, false); + + return ret; +} + +static int erofs_fscache_meta_readpage(struct file *data, struct page *page) +{ + int ret; + struct super_block *sb = page_mapping(page)->host->i_sb; + struct netfs_read_request *rreq; + struct erofs_map_dev mdev = { + .m_deviceid = 0, + .m_pa = page_offset(page), + }; + + ret = erofs_map_dev(sb, &mdev); + if (ret) + goto out; + + rreq = erofs_fscache_alloc_request(page_mapping(page), + page_offset(page), PAGE_SIZE); + if (IS_ERR(rreq)) { + ret = PTR_ERR(rreq); + goto out; + } + + return erofs_fscache_data_read_async(mdev.m_fscache->cookie, + rreq, mdev.m_pa); +out: + unlock_page(page); + return ret; +} + +static int erofs_fscache_readpage_inline(struct page *page, + struct erofs_map_blocks *map) +{ + struct super_block *sb = page_mapping(page)->host->i_sb; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + erofs_blk_t blknr; + size_t offset, len; + void *src, *dst; + + /* For tail packing layout, the offset may be non-zero. */ + offset = erofs_blkoff(map->m_pa); + blknr = erofs_blknr(map->m_pa); + len = map->m_llen; + + src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); + if (IS_ERR(src)) + return PTR_ERR(src); + + dst = kmap_atomic(page); + memcpy(dst, src + offset, len); + memset(dst + len, 0, PAGE_SIZE - len); + kunmap_atomic(dst); + + erofs_put_metabuf(&buf); + return 0; +} + + +static int erofs_fscache_readpage(struct file *file, struct page *page) +{ + struct inode *inode = page->mapping->host; + struct super_block *sb = inode->i_sb; + struct erofs_map_blocks map; + struct erofs_map_dev mdev; + struct netfs_read_request *rreq; + erofs_off_t pos; + loff_t pstart; + int ret; + + DBG_BUGON(page_size(page) != EROFS_BLKSIZ); + + pos = page_offset(page); + map.m_la = pos; + + ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); + if (ret) + goto out_unlock; + + if (!(map.m_flags & EROFS_MAP_MAPPED)) { + zero_user_segments(page, 0, page_size(page), 0, 0); + goto out_uptodate; + } + + if (map.m_flags & EROFS_MAP_META) { + ret = erofs_fscache_readpage_inline(page, &map); + goto out_uptodate; + } + + mdev = (struct erofs_map_dev) { + .m_deviceid = map.m_deviceid, + .m_pa = map.m_pa, + }; + + ret = erofs_map_dev(sb, &mdev); + if (ret) + goto out_unlock; + + + rreq = erofs_fscache_alloc_request(page_mapping(page), + page_offset(page), PAGE_SIZE); + if (IS_ERR(rreq)) { + ret = PTR_ERR(rreq); + goto out_unlock; + } + pstart = mdev.m_pa + (pos - map.m_la); + return erofs_fscache_data_read_async(mdev.m_fscache->cookie, + rreq, pstart); + +out_uptodate: + if (!ret) + SetPageUptodate(page); +out_unlock: + unlock_page(page); + return ret; +} + +static int erofs_fscache_readpages(struct file *filp, struct address_space *mapping, + struct list_head *pages, unsigned int nr_pages) +{ + struct inode *inode = mapping->host; + struct super_block *sb = inode->i_sb; + size_t len, count, done = 0; + erofs_off_t pos; + loff_t start, offset; + int ret; + struct page *page; + + if (!nr_pages) + return 0; + + start = page_offset(lru_to_page(pages)); + len = nr_pages * PAGE_SIZE; + + do { + struct erofs_map_blocks map; + struct erofs_map_dev mdev; + struct netfs_read_request *rreq; + unsigned int i, batch; + + pos = start + done; + map.m_la = pos; + + ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); + if (ret) + return ret; + + offset = start + done; + count = min_t(size_t, map.m_llen - (pos - map.m_la), + len - done); + + if (!(map.m_flags & EROFS_MAP_MAPPED)) { + page = lru_to_page(pages); + + list_del(&page->lru); + ret = add_to_page_cache_lru(page, mapping, page->index, + readahead_gfp_mask(mapping)); + if (ret) { + put_page(page); + return ret; + } + zero_user_segment(page, 0, PAGE_SIZE); + SetPageUptodate(page); + unlock_page(page); + put_page(page); + ret = PAGE_SIZE; + continue; + } + + if (map.m_flags & EROFS_MAP_META) { + page = lru_to_page(pages); + + list_del(&page->lru); + ret = add_to_page_cache_lru(page, mapping, page->index, + readahead_gfp_mask(mapping)); + if (ret) { + put_page(page); + return ret; + } + ret = erofs_fscache_readpage_inline(page, &map); + if (!ret) { + SetPageUptodate(page); + ret = PAGE_SIZE; + } + unlock_page(page); + put_page(page); + continue; + } + + mdev = (struct erofs_map_dev) { + .m_deviceid = map.m_deviceid, + .m_pa = map.m_pa, + }; + ret = erofs_map_dev(sb, &mdev); + if (ret) + return ret; + + rreq = erofs_fscache_alloc_request(mapping, + offset, count); + if (IS_ERR(rreq)) + return PTR_ERR(rreq); + /* + * Drop the ref of page here. Unlock them in + * rreq_unlock_pages() when rreq complete. + */ + if (WARN_ON(count % PAGE_SIZE)) + return 0; + batch = count >> PAGE_SHIFT; + + for (i = 0; i < batch; i++) { + page = lru_to_page(pages); + if (WARN_ON(page_offset(page) != (pos + PAGE_SIZE * i))) { + return 0; + } + list_del(&page->lru); + ret = add_to_page_cache_lru(page, mapping, page->index, + readahead_gfp_mask(mapping)); + if (ret) { + put_page(page); + /* only process pages already added to page cache, + * skip the failed page in next loop. + */ + rreq->len = i * PAGE_SIZE; + count = rreq->len + PAGE_SIZE; + break; + } + put_page(page); + } + + ret = erofs_fscache_data_read_async(mdev.m_fscache->cookie, + rreq, mdev.m_pa + (pos - map.m_la)); + if (!ret) + ret = count; + } while (ret > 0 && ((done += ret) < len)); + return ret; +} + +static const struct address_space_operations erofs_fscache_meta_aops = { + .readpage = erofs_fscache_meta_readpage, +}; + +const struct address_space_operations erofs_fscache_access_aops = { + .readpage = erofs_fscache_readpage, + .readpages = erofs_fscache_readpages, +}; + +static void erofs_fscache_domain_put(struct erofs_domain *domain) +{ + if (!domain) + return; + mutex_lock(&erofs_domain_list_lock); + if (refcount_dec_and_test(&domain->ref)) { + list_del(&domain->list); + if (list_empty(&erofs_domain_list)) { + kern_unmount(erofs_pseudo_mnt); + erofs_pseudo_mnt = NULL; + } + mutex_unlock(&erofs_domain_list_lock); + fscache_relinquish_cookie(domain->volume, NULL, false); + kfree(domain->domain_id); + kfree(domain); + return; + } + mutex_unlock(&erofs_domain_list_lock); +} + +static int erofs_fscache_register_volume(struct super_block *sb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + struct fscache_cookie *volume; + char *domain_id = sbi->domain_id; + char *name; + int ret = 0; + + name = kasprintf(GFP_KERNEL, "erofs,%s", + domain_id ? domain_id : sbi->fsid); + if (!name) + return -ENOMEM; + + volume = fscache_acquire_cookie(erofs_fscache_netfs.primary_index, + &erofs_fscache_super_index_def, name, strlen(name), + NULL, 0, NULL, 0, true); + if (IS_ERR_OR_NULL(volume)) { + erofs_err(sb, "failed to register volume for %s", name); + ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; + volume = NULL; + } + + sbi->volume = volume; + kfree(name); + return ret; +} + +static int erofs_fscache_init_domain(struct super_block *sb) +{ + int err; + struct erofs_domain *domain; + struct erofs_sb_info *sbi = EROFS_SB(sb); + + domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL); + if (!domain) + return -ENOMEM; + + domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL); + if (!domain->domain_id) { + kfree(domain); + return -ENOMEM; + } + + err = erofs_fscache_register_volume(sb); + if (err) + goto out; + + if (!erofs_pseudo_mnt) { + erofs_pseudo_mnt = kern_mount(&erofs_fs_type); + if (IS_ERR(erofs_pseudo_mnt)) { + err = PTR_ERR(erofs_pseudo_mnt); + goto out; + } + } + + domain->volume = sbi->volume; + refcount_set(&domain->ref, 1); + list_add(&domain->list, &erofs_domain_list); + sbi->domain = domain; + return 0; +out: + kfree(domain->domain_id); + kfree(domain); + return err; +} + +static int erofs_fscache_register_domain(struct super_block *sb) +{ + int err; + struct erofs_domain *domain; + struct erofs_sb_info *sbi = EROFS_SB(sb); + + mutex_lock(&erofs_domain_list_lock); + list_for_each_entry(domain, &erofs_domain_list, list) { + if (!strcmp(domain->domain_id, sbi->domain_id)) { + sbi->domain = domain; + sbi->volume = domain->volume; + refcount_inc(&domain->ref); + mutex_unlock(&erofs_domain_list_lock); + return 0; + } + } + err = erofs_fscache_init_domain(sb); + mutex_unlock(&erofs_domain_list_lock); + return err; +} + +static +struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, + char *name, + unsigned int flags) +{ + struct erofs_fscache *ctx; + struct fscache_cookie *cookie; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + cookie = fscache_acquire_cookie(EROFS_SB(sb)->volume, + &erofs_fscache_inode_object_def, + name, strlen(name), + NULL, 0, NULL, 0, true); + if (!cookie) { + erofs_err(sb, "failed to get cookie for %s", name); + ret = -EINVAL; + goto err; + } + + ctx->cookie = cookie; + + if (flags & EROFS_REG_COOKIE_NEED_INODE) { + struct inode *const inode = new_inode(sb); + + if (!inode) { + erofs_err(sb, "failed to get anon inode for %s", name); + ret = -ENOMEM; + goto err_cookie; + } + + set_nlink(inode, 1); + inode->i_size = OFFSET_MAX; + inode->i_mapping->a_ops = &erofs_fscache_meta_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + + ctx->inode = inode; + } + + return ctx; + +err_cookie: + fscache_relinquish_cookie(ctx->cookie, NULL, false); +err: + kfree(ctx); + return ERR_PTR(ret); +} + +static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) +{ + if (!ctx) + return; + + fscache_relinquish_cookie(ctx->cookie, NULL, false); + iput(ctx->inode); + kfree(ctx->name); + kfree(ctx); +} + +static +struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb, + char *name, + unsigned int flags) +{ + int err; + struct inode *inode; + struct erofs_fscache *ctx; + struct erofs_domain *domain = EROFS_SB(sb)->domain; + + ctx = erofs_fscache_acquire_cookie(sb, name, flags); + if (IS_ERR(ctx)) + return ctx; + + ctx->name = kstrdup(name, GFP_KERNEL); + if (!ctx->name) { + err = -ENOMEM; + goto out; + } + + inode = new_inode(erofs_pseudo_mnt->mnt_sb); + if (!inode) { + err = -ENOMEM; + goto out; + } + + ctx->domain = domain; + ctx->anon_inode = inode; + inode->i_private = ctx; + refcount_inc(&domain->ref); + return ctx; +out: + erofs_fscache_relinquish_cookie(ctx); + return ERR_PTR(err); +} + +static +struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, + char *name, + unsigned int flags) +{ + struct inode *inode; + struct erofs_fscache *ctx; + struct erofs_domain *domain = EROFS_SB(sb)->domain; + struct super_block *psb = erofs_pseudo_mnt->mnt_sb; + + mutex_lock(&erofs_domain_cookies_lock); + spin_lock(&psb->s_inode_list_lock); + list_for_each_entry(inode, &psb->s_inodes, i_sb_list) { + ctx = inode->i_private; + if (!ctx || ctx->domain != domain || strcmp(ctx->name, name)) + continue; + if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) { + igrab(inode); + } else { + erofs_err(sb, "%s already exists in domain %s", name, + domain->domain_id); + ctx = ERR_PTR(-EEXIST); + } + spin_unlock(&psb->s_inode_list_lock); + mutex_unlock(&erofs_domain_cookies_lock); + return ctx; + } + spin_unlock(&psb->s_inode_list_lock); + ctx = erofs_fscache_domain_init_cookie(sb, name, flags); + mutex_unlock(&erofs_domain_cookies_lock); + return ctx; +} + +struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, + char *name, + unsigned int flags) +{ + if (EROFS_SB(sb)->domain_id) + return erofs_domain_register_cookie(sb, name, flags); + return erofs_fscache_acquire_cookie(sb, name, flags); +} + +void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) +{ + bool drop; + struct erofs_domain *domain; + + if (!ctx) + return; + domain = ctx->domain; + if (domain) { + mutex_lock(&erofs_domain_cookies_lock); + drop = atomic_read(&ctx->anon_inode->i_count) == 1; + iput(ctx->anon_inode); + mutex_unlock(&erofs_domain_cookies_lock); + if (!drop) + return; + } + + erofs_fscache_relinquish_cookie(ctx); + erofs_fscache_domain_put(domain); +} + +int erofs_fscache_register_fs(struct super_block *sb) +{ + int ret; + struct erofs_sb_info *sbi = EROFS_SB(sb); + struct erofs_fscache *fscache; + unsigned int flags; + + if (sbi->domain_id) + ret = erofs_fscache_register_domain(sb); + else + ret = erofs_fscache_register_volume(sb); + if (ret) + return ret; + + /* + * When shared domain is enabled, using NEED_NOEXIST to guarantee + * the primary data blob (aka fsid) is unique in the shared domain. + * + * For non-shared-domain case, fscache_acquire_volume() invoked by + * erofs_fscache_register_volume() has already guaranteed + * the uniqueness of primary data blob. + * + * Acquired domain/volume will be relinquished in kill_sb() on error. + */ + flags = EROFS_REG_COOKIE_NEED_INODE; + if (sbi->domain_id) + flags |= EROFS_REG_COOKIE_NEED_NOEXIST; + fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags); + if (IS_ERR(fscache)) + return PTR_ERR(fscache); + + sbi->s_fscache = fscache; + return 0; +} + +void erofs_fscache_unregister_fs(struct super_block *sb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + + erofs_fscache_unregister_cookie(sbi->s_fscache); + + if (sbi->domain) + erofs_fscache_domain_put(sbi->domain); + else + fscache_relinquish_cookie(sbi->volume, NULL, false); + + sbi->s_fscache = NULL; + sbi->volume = NULL; + sbi->domain = NULL; +} diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 0dbeaf68e1d6e..fc05960e228e0 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang + * https://www.huawei.com/ + * Copyright (C) 2021, Alibaba Cloud */ #include "xattr.h" @@ -13,8 +13,8 @@ * the inode payload page if it's an extended inode) in order to fill * inline data if possible. */ -static struct page *erofs_read_inode(struct inode *inode, - unsigned int *ofs) +static void *erofs_read_inode(struct erofs_buf *buf, + struct inode *inode, unsigned int *ofs) { struct super_block *sb = inode->i_sb; struct erofs_sb_info *sbi = EROFS_SB(sb); @@ -22,7 +22,7 @@ static struct page *erofs_read_inode(struct inode *inode, const erofs_off_t inode_loc = iloc(sbi, vi->nid); erofs_blk_t blkaddr, nblks = 0; - struct page *page; + void *kaddr; struct erofs_inode_compact *dic; struct erofs_inode_extended *die, *copied = NULL; unsigned int ifmt; @@ -34,14 +34,14 @@ static struct page *erofs_read_inode(struct inode *inode, erofs_dbg("%s, reading inode nid %llu at %u of blkaddr %u", __func__, vi->nid, *ofs, blkaddr); - page = erofs_get_meta_page(sb, blkaddr); - if (IS_ERR(page)) { + kaddr = erofs_read_metabuf(buf, sb, blkaddr, EROFS_KMAP); + if (IS_ERR(kaddr)) { erofs_err(sb, "failed to get inode (nid: %llu) page, err %ld", - vi->nid, PTR_ERR(page)); - return page; + vi->nid, PTR_ERR(kaddr)); + return kaddr; } - dic = page_address(page) + *ofs; + dic = kaddr + *ofs; ifmt = le16_to_cpu(dic->i_format); if (ifmt & ~EROFS_I_ALL) { @@ -62,12 +62,12 @@ static struct page *erofs_read_inode(struct inode *inode, switch (erofs_inode_version(ifmt)) { case EROFS_INODE_LAYOUT_EXTENDED: vi->inode_isize = sizeof(struct erofs_inode_extended); - /* check if the inode acrosses page boundary */ - if (*ofs + vi->inode_isize <= PAGE_SIZE) { + /* check if the extended inode acrosses block boundary */ + if (*ofs + vi->inode_isize <= EROFS_BLKSIZ) { *ofs += vi->inode_isize; die = (struct erofs_inode_extended *)dic; } else { - const unsigned int gotten = PAGE_SIZE - *ofs; + const unsigned int gotten = EROFS_BLKSIZ - *ofs; copied = kmalloc(vi->inode_isize, GFP_NOFS); if (!copied) { @@ -75,18 +75,16 @@ static struct page *erofs_read_inode(struct inode *inode, goto err_out; } memcpy(copied, dic, gotten); - unlock_page(page); - put_page(page); - - page = erofs_get_meta_page(sb, blkaddr + 1); - if (IS_ERR(page)) { - erofs_err(sb, "failed to get inode payload page (nid: %llu), err %ld", - vi->nid, PTR_ERR(page)); + kaddr = erofs_read_metabuf(buf, sb, blkaddr + 1, + EROFS_KMAP); + if (IS_ERR(kaddr)) { + erofs_err(sb, "failed to get inode payload block (nid: %llu), err %ld", + vi->nid, PTR_ERR(kaddr)); kfree(copied); - return page; + return kaddr; } *ofs = vi->inode_isize - gotten; - memcpy((u8 *)copied + gotten, page_address(page), *ofs); + memcpy((u8 *)copied + gotten, kaddr, *ofs); die = copied; } vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount); @@ -123,8 +121,11 @@ static struct page *erofs_read_inode(struct inode *inode, /* total blocks for compressed files */ if (erofs_inode_is_data_compressed(vi->datalayout)) nblks = le32_to_cpu(die->i_u.compressed_blocks); - + else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) + /* fill chunked inode summary info */ + vi->chunkformat = le16_to_cpu(die->i_u.c.format); kfree(copied); + copied = NULL; break; case EROFS_INODE_LAYOUT_COMPACT: vi->inode_isize = sizeof(struct erofs_inode_compact); @@ -161,6 +162,8 @@ static struct page *erofs_read_inode(struct inode *inode, inode->i_size = le32_to_cpu(dic->i_size); if (erofs_inode_is_data_compressed(vi->datalayout)) nblks = le32_to_cpu(dic->i_u.compressed_blocks); + else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) + vi->chunkformat = le16_to_cpu(dic->i_u.c.format); break; default: erofs_err(inode->i_sb, @@ -170,6 +173,17 @@ static struct page *erofs_read_inode(struct inode *inode, goto err_out; } + if (vi->datalayout == EROFS_INODE_CHUNK_BASED) { + if (vi->chunkformat & ~EROFS_CHUNK_FORMAT_ALL) { + erofs_err(inode->i_sb, + "unsupported chunk format %x of nid %llu", + vi->chunkformat, vi->nid); + err = -EOPNOTSUPP; + goto err_out; + } + vi->chunkbits = LOG_BLOCK_SIZE + + (vi->chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK); + } inode->i_mtime.tv_sec = inode->i_ctime.tv_sec; inode->i_atime.tv_sec = inode->i_ctime.tv_sec; inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec; @@ -180,7 +194,7 @@ static struct page *erofs_read_inode(struct inode *inode, inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9; else inode->i_blocks = nblks << LOG_SECTORS_PER_BLOCK; - return page; + return kaddr; bogusimode: erofs_err(inode->i_sb, "bogus i_mode (%o) @ nid %llu", @@ -189,12 +203,11 @@ static struct page *erofs_read_inode(struct inode *inode, err_out: DBG_BUGON(1); kfree(copied); - unlock_page(page); - put_page(page); + erofs_put_metabuf(buf); return ERR_PTR(err); } -static int erofs_fill_symlink(struct inode *inode, void *data, +static int erofs_fill_symlink(struct inode *inode, void *kaddr, unsigned int m_pofs) { struct erofs_inode *vi = EROFS_I(inode); @@ -202,7 +215,7 @@ static int erofs_fill_symlink(struct inode *inode, void *data, /* if it cannot be handled with fast symlink scheme */ if (vi->datalayout != EROFS_INODE_FLAT_INLINE || - inode->i_size >= PAGE_SIZE) { + inode->i_size >= EROFS_BLKSIZ) { inode->i_op = &erofs_symlink_iops; return 0; } @@ -212,8 +225,8 @@ static int erofs_fill_symlink(struct inode *inode, void *data, return -ENOMEM; m_pofs += vi->xattr_isize; - /* inline symlink data shouldn't cross page boundary as well */ - if (m_pofs + inode->i_size > PAGE_SIZE) { + /* inline symlink data shouldn't cross block boundary */ + if (m_pofs + inode->i_size > EROFS_BLKSIZ) { kfree(lnk); erofs_err(inode->i_sb, "inline data cross block boundary @ nid %llu", @@ -221,8 +234,7 @@ static int erofs_fill_symlink(struct inode *inode, void *data, DBG_BUGON(1); return -EFSCORRUPTED; } - - memcpy(lnk, data + m_pofs, inode->i_size); + memcpy(lnk, kaddr + m_pofs, inode->i_size); lnk[inode->i_size] = '\0'; inode->i_link = lnk; @@ -233,16 +245,17 @@ static int erofs_fill_symlink(struct inode *inode, void *data, static int erofs_fill_inode(struct inode *inode, int isdir) { struct erofs_inode *vi = EROFS_I(inode); - struct page *page; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + void *kaddr; unsigned int ofs; int err = 0; trace_erofs_fill_inode(inode, isdir); /* read inode base data from disk */ - page = erofs_read_inode(inode, &ofs); - if (IS_ERR(page)) - return PTR_ERR(page); + kaddr = erofs_read_inode(&buf, inode, &ofs); + if (IS_ERR(kaddr)) + return PTR_ERR(kaddr); /* setup the new inode */ switch (inode->i_mode & S_IFMT) { @@ -255,7 +268,7 @@ static int erofs_fill_inode(struct inode *inode, int isdir) inode->i_fop = &erofs_dir_fops; break; case S_IFLNK: - err = erofs_fill_symlink(inode, page_address(page), ofs); + err = erofs_fill_symlink(inode, kaddr, ofs); if (err) goto out_unlock; inode_nohighmem(inode); @@ -273,14 +286,20 @@ static int erofs_fill_inode(struct inode *inode, int isdir) } if (erofs_inode_is_data_compressed(vi->datalayout)) { - err = z_erofs_fill_inode(inode); + if (!erofs_is_fscache_mode(inode->i_sb)) + err = z_erofs_fill_inode(inode); + else + err = -EOPNOTSUPP; goto out_unlock; } inode->i_mapping->a_ops = &erofs_raw_access_aops; +#ifdef CONFIG_EROFS_FS_ONDEMAND + if (erofs_is_fscache_mode(inode->i_sb)) + inode->i_mapping->a_ops = &erofs_fscache_access_aops; +#endif out_unlock: - unlock_page(page); - put_page(page); + erofs_put_metabuf(&buf); return err; } @@ -356,27 +375,20 @@ int erofs_getattr(const struct path *path, struct kstat *stat, const struct inode_operations erofs_generic_iops = { .getattr = erofs_getattr, -#ifdef CONFIG_EROFS_FS_XATTR .listxattr = erofs_listxattr, -#endif .get_acl = erofs_get_acl, }; const struct inode_operations erofs_symlink_iops = { .get_link = page_get_link, .getattr = erofs_getattr, -#ifdef CONFIG_EROFS_FS_XATTR .listxattr = erofs_listxattr, -#endif .get_acl = erofs_get_acl, }; const struct inode_operations erofs_fast_symlink_iops = { .get_link = simple_get_link, .getattr = erofs_getattr, -#ifdef CONFIG_EROFS_FS_XATTR .listxattr = erofs_listxattr, -#endif .get_acl = erofs_get_acl, }; - diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 544a453f3076c..3f49470c3b5b3 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang + * https://www.huawei.com/ + * Copyright (C) 2021, Alibaba Cloud */ #ifndef __EROFS_INTERNAL_H #define __EROFS_INTERNAL_H @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -46,7 +47,57 @@ typedef u64 erofs_off_t; /* data type for filesystem-wide blocks number */ typedef u32 erofs_blk_t; +struct erofs_device_info { + char *path; + struct erofs_fscache *fscache; + struct block_device *bdev; + + u32 blocks; + u32 mapped_blkaddr; +}; + +struct erofs_mount_opts { +#ifdef CONFIG_EROFS_FS_ZIP + /* current strategy of how to use managed cache */ + unsigned char cache_strategy; + + /* threshold for decompression synchronously */ + unsigned int max_sync_decompress_pages; +#endif + unsigned int mount_opt; +}; + +struct erofs_dev_context { + struct idr tree; + struct rw_semaphore rwsem; + + unsigned int extra_devices; +}; + +struct erofs_fs_context { + struct erofs_mount_opts opt; + struct erofs_dev_context *devs; + char *fsid; + char *domain_id; +}; + +struct erofs_domain { + refcount_t ref; + struct list_head list; + struct fscache_cookie *volume; + char *domain_id; +}; + +struct erofs_fscache { + struct fscache_cookie *cookie; + struct inode *inode; + struct inode *anon_inode; + struct erofs_domain *domain; + char *name; +}; + struct erofs_sb_info { + struct erofs_mount_opts opt; /* options */ #ifdef CONFIG_EROFS_FS_ZIP /* list for all registered superblocks, mainly for shrinker */ struct list_head list; @@ -55,22 +106,20 @@ struct erofs_sb_info { /* the dedicated workstation for compression */ struct radix_tree_root workstn_tree; - /* threshold for decompression synchronously */ - unsigned int max_sync_decompress_pages; - unsigned int shrinker_run_no; - /* current strategy of how to use managed cache */ - unsigned char cache_strategy; - /* pseudo inode to manage cached pages */ struct inode *managed_cache; #endif /* CONFIG_EROFS_FS_ZIP */ - u32 blocks; + struct erofs_dev_context *devs; + u64 total_blocks; + u32 primarydevice_blocks; + u32 meta_blkaddr; #ifdef CONFIG_EROFS_FS_XATTR u32 xattr_blkaddr; #endif + u16 device_id_mask; /* valid bits of device id to be used */ /* inode slot unit size in bit shift */ unsigned char islotbits; @@ -85,9 +134,15 @@ struct erofs_sb_info { u8 uuid[16]; /* 128-bit uuid for volume */ u8 volume_name[16]; /* volume name */ + u32 feature_compat; u32 feature_incompat; - unsigned int mount_opt; + /* fscache support */ + struct fscache_cookie *volume; + struct erofs_fscache *s_fscache; + struct erofs_domain *domain; + char *fsid; + char *domain_id; }; #define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info) @@ -97,17 +152,22 @@ struct erofs_sb_info { #define EROFS_MOUNT_XATTR_USER 0x00000010 #define EROFS_MOUNT_POSIX_ACL 0x00000020 -#define clear_opt(sbi, option) ((sbi)->mount_opt &= ~EROFS_MOUNT_##option) -#define set_opt(sbi, option) ((sbi)->mount_opt |= EROFS_MOUNT_##option) -#define test_opt(sbi, option) ((sbi)->mount_opt & EROFS_MOUNT_##option) +#define clear_opt(opt, option) ((opt)->mount_opt &= ~EROFS_MOUNT_##option) +#define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option) +#define test_opt(opt, option) ((opt)->mount_opt & EROFS_MOUNT_##option) + +static inline bool erofs_is_fscache_mode(struct super_block *sb) +{ + return IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && !sb->s_bdev; +} -#ifdef CONFIG_EROFS_FS_ZIP enum { EROFS_ZIP_CACHE_DISABLED, EROFS_ZIP_CACHE_READAHEAD, EROFS_ZIP_CACHE_READAROUND }; +#ifdef CONFIG_EROFS_FS_ZIP #define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL) /* basic unit of the workstation of a super_block */ @@ -199,6 +259,19 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp) #error erofs cannot be used in this platform #endif +enum erofs_kmap_type { + EROFS_NO_KMAP, /* don't map the buffer */ + EROFS_KMAP, /* use kmap() to map the buffer */ + EROFS_KMAP_ATOMIC, /* use kmap_atomic() to map the buffer */ +}; + +struct erofs_buf { + struct page *page; + void *base; + enum erofs_kmap_type kmap_type; +}; +#define __EROFS_BUF_INITIALIZER ((struct erofs_buf){ .page = NULL }) + #define ROOT_NID(sb) ((sb)->root_nid) #define erofs_blknr(addr) ((addr) / EROFS_BLKSIZ) @@ -210,6 +283,16 @@ static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid) return blknr_to_addr(sbi->meta_blkaddr) + (nid << sbi->islotbits); } +#define EROFS_FEATURE_FUNCS(name, compat, feature) \ +static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \ +{ \ + return sbi->feature_##compat & EROFS_FEATURE_##feature; \ +} + +EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING) +EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE) +EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) + /* atomic flag definitions */ #define EROFS_I_EA_INITED_BIT 0 #define EROFS_I_Z_INITED_BIT 1 @@ -233,6 +316,10 @@ struct erofs_inode { union { erofs_blk_t raw_blkaddr; + struct { + unsigned short chunkformat; + unsigned char chunkbits; + }; #ifdef CONFIG_EROFS_FS_ZIP struct { unsigned short z_advise; @@ -276,14 +363,13 @@ static inline unsigned int erofs_inode_datalayout(unsigned int value) } extern const struct super_operations erofs_sops; +extern struct file_system_type erofs_fs_type; extern const struct address_space_operations erofs_raw_access_aops; -#ifdef CONFIG_EROFS_FS_ZIP -extern const struct address_space_operations z_erofs_vle_normalaccess_aops; -#endif +extern const struct address_space_operations z_erofs_aops; /* - * Logical to physical block mapping, used by erofs_map_blocks() + * Logical to physical block mapping * * Different with other file systems, it is used for 2 access modes: * @@ -322,15 +408,16 @@ enum { #define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped) struct erofs_map_blocks { + struct erofs_buf buf; + erofs_off_t m_pa, m_la; u64 m_plen, m_llen; + unsigned short m_deviceid; unsigned int m_flags; - - struct page *mpage; }; -/* Flags used by erofs_map_blocks() */ +/* Flags used by erofs_map_blocks_flatmode() */ #define EROFS_GET_BLOCKS_RAW 0x0001 /* zmap.c */ @@ -349,10 +436,24 @@ static inline int z_erofs_map_blocks_iter(struct inode *inode, } #endif /* !CONFIG_EROFS_FS_ZIP */ -/* data.c */ -struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr); +struct erofs_map_dev { + struct erofs_fscache *m_fscache; + struct block_device *m_bdev; -int erofs_map_blocks(struct inode *, struct erofs_map_blocks *, int); + erofs_off_t m_pa; + unsigned int m_deviceid; +}; + +/* data.c */ +void erofs_unmap_metabuf(struct erofs_buf *buf); +void erofs_put_metabuf(struct erofs_buf *buf); +void *erofs_bread(struct erofs_buf *buf, struct inode *inode, + erofs_blk_t blkaddr, enum erofs_kmap_type type); +void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, + erofs_blk_t blkaddr, enum erofs_kmap_type type); +int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev); +int erofs_map_blocks(struct inode *inode, + struct erofs_map_blocks *map, int flags); /* inode.c */ static inline unsigned long erofs_inode_hash(erofs_nid_t nid) @@ -425,7 +526,48 @@ static inline int z_erofs_init_zip_subsystem(void) { return 0; } static inline void z_erofs_exit_zip_subsystem(void) {} #endif /* !CONFIG_EROFS_FS_ZIP */ +/* flags for erofs_fscache_register_cookie() */ +#define EROFS_REG_COOKIE_NEED_INODE 1 +#define EROFS_REG_COOKIE_NEED_NOEXIST 2 + +/* fscache.c */ +#ifdef CONFIG_EROFS_FS_ONDEMAND +int erofs_fscache_register(void); +void erofs_fscache_unregister(void); +int erofs_fscache_register_fs(struct super_block *sb); +void erofs_fscache_unregister_fs(struct super_block *sb); + +struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, + char *name, + unsigned int flags); +void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache); + +extern const struct address_space_operations erofs_fscache_access_aops; +#else +static inline int erofs_fscache_register(void) +{ + return 0; +} +static inline void erofs_fscache_unregister(void) {} +static inline int erofs_fscache_register_fs(struct super_block *sb) +{ + return -EOPNOTSUPP; +} +static inline void erofs_fscache_unregister_fs(struct super_block *sb) {} + +static inline +struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, + char *name, + unsigned int flags) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache) +{ +} +#endif + #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #endif /* __EROFS_INTERNAL_H */ - diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index 3abbecbf73de1..8476e993a3e3a 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang + * https://www.huawei.com/ + * Copyright (C) 2022, Alibaba Cloud */ #include "xattr.h" @@ -87,14 +87,14 @@ static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name, return ERR_PTR(-ENOENT); } -static struct page *find_target_block_classic(struct inode *dir, - struct erofs_qstr *name, - int *_ndirents) +static void *find_target_block_classic(struct erofs_buf *target, + struct inode *dir, + struct erofs_qstr *name, + int *_ndirents) { unsigned int startprfx, endprfx; int head, back; - struct address_space *const mapping = dir->i_mapping; - struct page *candidate = ERR_PTR(-ENOENT); + void *candidate = ERR_PTR(-ENOENT); startprfx = endprfx = 0; head = 0; @@ -102,10 +102,11 @@ static struct page *find_target_block_classic(struct inode *dir, while (head <= back) { const int mid = head + (back - head) / 2; - struct page *page = read_mapping_page(mapping, mid, NULL); + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + struct erofs_dirent *de; - if (!IS_ERR(page)) { - struct erofs_dirent *de = kmap_atomic(page); + de = erofs_bread(&buf, dir, mid, EROFS_KMAP); + if (!IS_ERR(de)) { const int nameoff = nameoff_from_disk(de->nameoff, EROFS_BLKSIZ); const int ndirents = nameoff / sizeof(*de); @@ -114,13 +115,12 @@ static struct page *find_target_block_classic(struct inode *dir, struct erofs_qstr dname; if (!ndirents) { - kunmap_atomic(de); - put_page(page); + erofs_put_metabuf(&buf); erofs_err(dir->i_sb, "corrupted dir block %d @ nid %llu", mid, EROFS_I(dir)->nid); DBG_BUGON(1); - page = ERR_PTR(-EFSCORRUPTED); + de = ERR_PTR(-EFSCORRUPTED); goto out; } @@ -136,7 +136,6 @@ static struct page *find_target_block_classic(struct inode *dir, /* string comparison without already matched prefix */ diff = erofs_dirnamecmp(name, &dname, &matched); - kunmap_atomic(de); if (!diff) { *_ndirents = 0; @@ -146,11 +145,12 @@ static struct page *find_target_block_classic(struct inode *dir, startprfx = matched; if (!IS_ERR(candidate)) - put_page(candidate); - candidate = page; + erofs_put_metabuf(target); + *target = buf; + candidate = de; *_ndirents = ndirents; } else { - put_page(page); + erofs_put_metabuf(&buf); back = mid - 1; endprfx = matched; @@ -159,8 +159,8 @@ static struct page *find_target_block_classic(struct inode *dir, } out: /* free if the candidate is valid */ if (!IS_ERR(candidate)) - put_page(candidate); - return page; + erofs_put_metabuf(target); + return de; } return candidate; } @@ -170,8 +170,7 @@ int erofs_namei(struct inode *dir, erofs_nid_t *nid, unsigned int *d_type) { int ndirents; - struct page *page; - void *data; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_dirent *de; struct erofs_qstr qn; @@ -182,26 +181,20 @@ int erofs_namei(struct inode *dir, qn.end = name->name + name->len; ndirents = 0; - page = find_target_block_classic(dir, &qn, &ndirents); - if (IS_ERR(page)) - return PTR_ERR(page); + de = find_target_block_classic(&buf, dir, &qn, &ndirents); + if (IS_ERR(de)) + return PTR_ERR(de); - data = kmap_atomic(page); /* the target page has been mapped */ if (ndirents) - de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents); - else - de = (struct erofs_dirent *)data; + de = find_target_dirent(&qn, (u8 *)de, EROFS_BLKSIZ, ndirents); if (!IS_ERR(de)) { *nid = le64_to_cpu(de->nid); *d_type = de->file_type; } - - kunmap_atomic(data); - put_page(page); - + erofs_put_metabuf(&buf); return PTR_ERR_OR_ZERO(de); } @@ -244,9 +237,6 @@ static struct dentry *erofs_lookup(struct inode *dir, const struct inode_operations erofs_dir_iops = { .lookup = erofs_lookup, .getattr = erofs_getattr, -#ifdef CONFIG_EROFS_FS_XATTR .listxattr = erofs_listxattr, -#endif .get_acl = erofs_get_acl, }; - diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 22e059b4f745c..385380dedb19d 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -1,14 +1,18 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang + * https://www.huawei.com/ + * Copyright (C) 2021, Alibaba Cloud */ #include #include #include #include #include +#include +#include +#include +#include #include "xattr.h" #define CREATE_TRACE_POINTS @@ -46,6 +50,30 @@ void _erofs_info(struct super_block *sb, const char *function, va_end(args); } +static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata) +{ + struct erofs_super_block *dsb; + u32 expected_crc, crc; + + dsb = kmemdup(sbdata + EROFS_SUPER_OFFSET, + EROFS_BLKSIZ - EROFS_SUPER_OFFSET, GFP_KERNEL); + if (!dsb) + return -ENOMEM; + + expected_crc = le32_to_cpu(dsb->checksum); + dsb->checksum = 0; + /* to allow for x86 boot sectors and other oddities. */ + crc = crc32c(~0, dsb, EROFS_BLKSIZ - EROFS_SUPER_OFFSET); + kfree(dsb); + + if (crc != expected_crc) { + erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected", + crc, expected_crc); + return -EBADMSG; + } + return 0; +} + static void erofs_inode_init_once(void *ptr) { struct erofs_inode *vi = ptr; @@ -95,24 +123,124 @@ static bool check_layout_compatibility(struct super_block *sb, return true; } +static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, + struct erofs_device_info *dif, erofs_off_t *pos) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + struct erofs_fscache *fscache; + struct erofs_deviceslot *dis; + struct block_device *bdev; + void *ptr; + + ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*pos), EROFS_KMAP); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + dis = ptr + erofs_blkoff(*pos); + + if (!dif->path) { + if (!dis->tag[0]) { + erofs_err(sb, "empty device tag @ pos %llu", *pos); + return -EINVAL; + } + dif->path = kmemdup_nul(dis->tag, sizeof(dis->tag), GFP_KERNEL); + if (!dif->path) + return -ENOMEM; + } + + if (erofs_is_fscache_mode(sb)) { + fscache = erofs_fscache_register_cookie(sb, dif->path, 0); + if (IS_ERR(fscache)) + return PTR_ERR(fscache); + dif->fscache = fscache; + } else { + bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL, + sb->s_type); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + dif->bdev = bdev; + } + + dif->blocks = le32_to_cpu(dis->blocks); + dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr); + sbi->total_blocks += dif->blocks; + *pos += EROFS_DEVT_SLOT_SIZE; + return 0; +} + +static int erofs_scan_devices(struct super_block *sb, + struct erofs_super_block *dsb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + unsigned int ondisk_extradevs; + erofs_off_t pos; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + struct erofs_device_info *dif; + int id, err = 0; + + sbi->total_blocks = sbi->primarydevice_blocks; + if (!erofs_sb_has_device_table(sbi)) + ondisk_extradevs = 0; + else + ondisk_extradevs = le16_to_cpu(dsb->extra_devices); + + if (sbi->devs->extra_devices && + ondisk_extradevs != sbi->devs->extra_devices) { + erofs_err(sb, "extra devices don't match (ondisk %u, given %u)", + ondisk_extradevs, sbi->devs->extra_devices); + return -EINVAL; + } + if (!ondisk_extradevs) + return 0; + + sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1; + pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE; + down_read(&sbi->devs->rwsem); + if (sbi->devs->extra_devices) { + idr_for_each_entry(&sbi->devs->tree, dif, id) { + err = erofs_init_device(&buf, sb, dif, &pos); + if (err) + break; + } + } else { + for (id = 0; id < ondisk_extradevs; id++) { + dif = kzalloc(sizeof(*dif), GFP_KERNEL); + if (!dif) { + err = -ENOMEM; + break; + } + err = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL); + if (err < 0) { + kfree(dif); + break; + } + ++sbi->devs->extra_devices; + + err = erofs_init_device(&buf, sb, dif, &pos); + if (err) + break; + } + } + up_read(&sbi->devs->rwsem); + erofs_put_metabuf(&buf); + return err; +} + static int erofs_read_superblock(struct super_block *sb) { struct erofs_sb_info *sbi; - struct page *page; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_super_block *dsb; unsigned int blkszbits; void *data; int ret; - page = read_mapping_page(sb->s_bdev->bd_inode->i_mapping, 0, NULL); - if (IS_ERR(page)) { + data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP); + if (IS_ERR(data)) { erofs_err(sb, "cannot read erofs superblock"); - return PTR_ERR(page); + return PTR_ERR(data); } sbi = EROFS_SB(sb); - - data = kmap_atomic(page); dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); ret = -EINVAL; @@ -121,6 +249,14 @@ static int erofs_read_superblock(struct super_block *sb) goto out; } + sbi->feature_compat = le32_to_cpu(dsb->feature_compat); + if (erofs_sb_has_sb_chksum(sbi)) { + ret = erofs_superblock_csum_verify(sb, data); + if (ret) + goto out; + } + + ret = -EINVAL; blkszbits = dsb->blkszbits; /* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */ if (blkszbits != LOG_BLOCK_SIZE) { @@ -132,7 +268,7 @@ static int erofs_read_superblock(struct super_block *sb) if (!check_layout_compatibility(sb, dsb)) goto out; - sbi->blocks = le32_to_cpu(dsb->blocks); + sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks); sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); #ifdef CONFIG_EROFS_FS_XATTR sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); @@ -153,60 +289,26 @@ static int erofs_read_superblock(struct super_block *sb) ret = -EFSCORRUPTED; goto out; } - ret = 0; + + /* handle multiple devices */ + ret = erofs_scan_devices(sb, dsb); out: - kunmap_atomic(data); - put_page(page); + erofs_put_metabuf(&buf); return ret; } -#ifdef CONFIG_EROFS_FS_ZIP -static int erofs_build_cache_strategy(struct super_block *sb, - substring_t *args) -{ - struct erofs_sb_info *sbi = EROFS_SB(sb); - const char *cs = match_strdup(args); - int err = 0; - - if (!cs) { - erofs_err(sb, "Not enough memory to store cache strategy"); - return -ENOMEM; - } - - if (!strcmp(cs, "disabled")) { - sbi->cache_strategy = EROFS_ZIP_CACHE_DISABLED; - } else if (!strcmp(cs, "readahead")) { - sbi->cache_strategy = EROFS_ZIP_CACHE_READAHEAD; - } else if (!strcmp(cs, "readaround")) { - sbi->cache_strategy = EROFS_ZIP_CACHE_READAROUND; - } else { - erofs_err(sb, "Unrecognized cache strategy \"%s\"", cs); - err = -EINVAL; - } - kfree(cs); - return err; -} -#else -static int erofs_build_cache_strategy(struct super_block *sb, - substring_t *args) -{ - erofs_info(sb, "EROFS compression is disabled, so cache strategy is ignored"); - return 0; -} -#endif - /* set up default EROFS parameters */ -static void erofs_default_options(struct erofs_sb_info *sbi) +static void erofs_default_options(struct erofs_fs_context *ctx) { #ifdef CONFIG_EROFS_FS_ZIP - sbi->cache_strategy = EROFS_ZIP_CACHE_READAROUND; - sbi->max_sync_decompress_pages = 3; + ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND; + ctx->opt.max_sync_decompress_pages = 3; #endif #ifdef CONFIG_EROFS_FS_XATTR - set_opt(sbi, XATTR_USER); + set_opt(&ctx->opt, XATTR_USER); #endif #ifdef CONFIG_EROFS_FS_POSIX_ACL - set_opt(sbi, POSIX_ACL); + set_opt(&ctx->opt, POSIX_ACL); #endif } @@ -216,76 +318,124 @@ enum { Opt_acl, Opt_noacl, Opt_cache_strategy, + Opt_device, + Opt_fsid, + Opt_domain_id, Opt_err }; -static match_table_t erofs_tokens = { - {Opt_user_xattr, "user_xattr"}, - {Opt_nouser_xattr, "nouser_xattr"}, - {Opt_acl, "acl"}, - {Opt_noacl, "noacl"}, - {Opt_cache_strategy, "cache_strategy=%s"}, - {Opt_err, NULL} +static const struct constant_table erofs_param_cache_strategy[] = { + {"disabled", EROFS_ZIP_CACHE_DISABLED}, + {"readahead", EROFS_ZIP_CACHE_READAHEAD}, + {"readaround", EROFS_ZIP_CACHE_READAROUND}, + {} }; -static int erofs_parse_options(struct super_block *sb, char *options) -{ - substring_t args[MAX_OPT_ARGS]; - char *p; - int err; +static const struct fs_parameter_spec erofs_param_specs[] = { + fsparam_flag_no("user_xattr", Opt_user_xattr), + fsparam_flag_no("acl", Opt_acl), + fsparam_enum("cache_strategy", Opt_cache_strategy), + fsparam_string("device", Opt_device), + fsparam_string("fsid", Opt_fsid), + fsparam_string("domain_id", Opt_domain_id), + {} +}; - if (!options) - return 0; +static const struct fs_parameter_enum erofs_param_enums[] = { + { Opt_cache_strategy, "disabled", EROFS_ZIP_CACHE_DISABLED }, + { Opt_cache_strategy, "readahead", EROFS_ZIP_CACHE_READAHEAD }, + { Opt_cache_strategy, "readaround", EROFS_ZIP_CACHE_READAROUND }, + {} +}; - while ((p = strsep(&options, ","))) { - int token; +const struct fs_parameter_description erofs_fs_parameters = { + .name = "erofs", + .specs = erofs_param_specs, + .enums = erofs_param_enums, +}; - if (!*p) - continue; +static int erofs_fc_parse_param(struct fs_context *fc, + struct fs_parameter *param) +{ + struct erofs_fs_context *ctx = fc->fs_private; + struct fs_parse_result result; + struct erofs_device_info *dif; + int opt, ret; - args[0].to = args[0].from = NULL; - token = match_token(p, erofs_tokens, args); + opt = fs_parse(fc, &erofs_fs_parameters, param, &result); + if (opt < 0) + return opt; - switch (token) { + switch (opt) { + case Opt_user_xattr: #ifdef CONFIG_EROFS_FS_XATTR - case Opt_user_xattr: - set_opt(EROFS_SB(sb), XATTR_USER); - break; - case Opt_nouser_xattr: - clear_opt(EROFS_SB(sb), XATTR_USER); - break; + if (result.boolean) + set_opt(&ctx->opt, XATTR_USER); + else + clear_opt(&ctx->opt, XATTR_USER); #else - case Opt_user_xattr: - erofs_info(sb, "user_xattr options not supported"); - break; - case Opt_nouser_xattr: - erofs_info(sb, "nouser_xattr options not supported"); - break; + errorf(fc, "{,no}user_xattr options not supported"); #endif + break; + case Opt_acl: #ifdef CONFIG_EROFS_FS_POSIX_ACL - case Opt_acl: - set_opt(EROFS_SB(sb), POSIX_ACL); - break; - case Opt_noacl: - clear_opt(EROFS_SB(sb), POSIX_ACL); - break; + if (result.boolean) + set_opt(&ctx->opt, POSIX_ACL); + else + clear_opt(&ctx->opt, POSIX_ACL); #else - case Opt_acl: - erofs_info(sb, "acl options not supported"); - break; - case Opt_noacl: - erofs_info(sb, "noacl options not supported"); - break; + errorf(fc, "{,no}acl options not supported"); #endif - case Opt_cache_strategy: - err = erofs_build_cache_strategy(sb, args); - if (err) - return err; - break; - default: - erofs_err(sb, "Unrecognized mount option \"%s\" or missing value", p); - return -EINVAL; + break; + case Opt_cache_strategy: +#ifdef CONFIG_EROFS_FS_ZIP + ctx->opt.cache_strategy = result.uint_32; +#else + errorf(fc, "compression not supported, cache_strategy ignored"); +#endif + break; + case Opt_device: + dif = kzalloc(sizeof(*dif), GFP_KERNEL); + if (!dif) + return -ENOMEM; + dif->path = kstrdup(param->string, GFP_KERNEL); + if (!dif->path) { + kfree(dif); + return -ENOMEM; } + down_write(&ctx->devs->rwsem); + ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL); + up_write(&ctx->devs->rwsem); + if (ret < 0) { + kfree(dif->path); + kfree(dif); + return ret; + } + ++ctx->devs->extra_devices; + break; + case Opt_fsid: +#ifdef CONFIG_EROFS_FS_ONDEMAND + kfree(ctx->fsid); + ctx->fsid = kstrdup(param->string, GFP_KERNEL); + if (!ctx->fsid) + return -ENOMEM; +#else + errorf(fc, "fsid option not supported"); + return -EINVAL; +#endif + break; + case Opt_domain_id: +#ifdef CONFIG_EROFS_FS_ONDEMAND + kfree(ctx->domain_id); + ctx->domain_id = kstrdup(param->string, GFP_KERNEL); + if (!ctx->domain_id) + return -ENOMEM; +#else + errorf(fc, "domain_id option not supported"); +#endif + break; + default: + return -ENOPARAM; } return 0; } @@ -349,45 +499,66 @@ static int erofs_init_managed_cache(struct super_block *sb) static int erofs_init_managed_cache(struct super_block *sb) { return 0; } #endif -static int erofs_fill_super(struct super_block *sb, void *data, int silent) +static int erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc) +{ + static const struct tree_descr empty_descr = {""}; + + return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr); +} + +static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) { struct inode *inode; struct erofs_sb_info *sbi; + struct erofs_fs_context *ctx = fc->fs_private; int err; sb->s_magic = EROFS_SUPER_MAGIC; - - if (!sb_set_blocksize(sb, EROFS_BLKSIZ)) { - erofs_err(sb, "failed to set erofs blksize"); - return -EINVAL; - } + sb->s_flags |= SB_RDONLY | SB_NOATIME; + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_op = &erofs_sops; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) return -ENOMEM; sb->s_fs_info = sbi; + sbi->opt = ctx->opt; + sbi->devs = ctx->devs; + ctx->devs = NULL; + sbi->fsid = ctx->fsid; + ctx->fsid = NULL; + sbi->domain_id = ctx->domain_id; + ctx->domain_id = NULL; + + if (erofs_is_fscache_mode(sb)) { + sb->s_blocksize = EROFS_BLKSIZ; + sb->s_blocksize_bits = LOG_BLOCK_SIZE; + + err = erofs_fscache_register_fs(sb); + if (err) + return err; + + err = super_setup_bdi(sb); + if (err) + return err; + sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; + } else { + if (!sb_set_blocksize(sb, EROFS_BLKSIZ)) { + erofs_err(sb, "failed to set erofs blksize"); + return -EINVAL; + } + } + err = erofs_read_superblock(sb); if (err) return err; - sb->s_flags |= SB_RDONLY | SB_NOATIME; - sb->s_maxbytes = MAX_LFS_FILESIZE; - sb->s_time_gran = 1; - - sb->s_op = &erofs_sops; -#ifdef CONFIG_EROFS_FS_XATTR + sb->s_time_gran = 1; sb->s_xattr = erofs_xattr_handlers; -#endif - /* set erofs default mount options */ - erofs_default_options(sbi); - - err = erofs_parse_options(sb, data); - if (err) - return err; - if (test_opt(sbi, POSIX_ACL)) + if (test_opt(&sbi->opt, POSIX_ACL)) sb->s_flags |= SB_POSIXACL; else sb->s_flags &= ~SB_POSIXACL; @@ -418,15 +589,115 @@ static int erofs_fill_super(struct super_block *sb, void *data, int silent) if (err) return err; - erofs_info(sb, "mounted with opts: %s, root inode @ nid %llu.", - (char *)data, ROOT_NID(sbi)); + erofs_info(sb, "mounted with root inode @ nid %llu.", ROOT_NID(sbi)); return 0; } -static struct dentry *erofs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) +static int erofs_fc_anon_get_tree(struct fs_context *fc) { - return mount_bdev(fs_type, flags, dev_name, data, erofs_fill_super); + return get_tree_nodev(fc, erofs_fc_fill_pseudo_super); +} + +static int erofs_fc_get_tree(struct fs_context *fc) +{ + struct erofs_fs_context *ctx = fc->fs_private; + + if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->fsid) + return get_tree_nodev(fc, erofs_fc_fill_super); + + return get_tree_bdev(fc, erofs_fc_fill_super); +} + +static int erofs_fc_reconfigure(struct fs_context *fc) +{ + struct super_block *sb = fc->root->d_sb; + struct erofs_sb_info *sbi = EROFS_SB(sb); + struct erofs_fs_context *ctx = fc->fs_private; + + DBG_BUGON(!sb_rdonly(sb)); + + if (ctx->fsid || ctx->domain_id) + erofs_info(sb, "ignoring reconfiguration for fsid|domain_id."); + + if (test_opt(&ctx->opt, POSIX_ACL)) + fc->sb_flags |= SB_POSIXACL; + else + fc->sb_flags &= ~SB_POSIXACL; + + sbi->opt = ctx->opt; + + fc->sb_flags |= SB_RDONLY; + return 0; +} + +static int erofs_release_device_info(int id, void *ptr, void *data) +{ + struct erofs_device_info *dif = ptr; + + if (dif->bdev) + blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL); + erofs_fscache_unregister_cookie(dif->fscache); + dif->fscache = NULL; + kfree(dif->path); + kfree(dif); + return 0; +} + +static void erofs_free_dev_context(struct erofs_dev_context *devs) +{ + if (!devs) + return; + idr_for_each(&devs->tree, &erofs_release_device_info, NULL); + idr_destroy(&devs->tree); + kfree(devs); +} + +static void erofs_fc_free(struct fs_context *fc) +{ + struct erofs_fs_context *ctx = fc->fs_private; + + erofs_free_dev_context(ctx->devs); + kfree(ctx->fsid); + kfree(ctx->domain_id); + kfree(ctx); +} + +static const struct fs_context_operations erofs_context_ops = { + .parse_param = erofs_fc_parse_param, + .get_tree = erofs_fc_get_tree, + .reconfigure = erofs_fc_reconfigure, + .free = erofs_fc_free, +}; + +static const struct fs_context_operations erofs_anon_context_ops = { + .get_tree = erofs_fc_anon_get_tree, +}; + +static int erofs_init_fs_context(struct fs_context *fc) +{ + struct erofs_fs_context *ctx; + + /* pseudo mount for anon inodes */ + if (fc->sb_flags & SB_KERNMOUNT) { + fc->ops = &erofs_anon_context_ops; + return 0; + } + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL); + if (!ctx->devs) { + kfree(ctx); + return -ENOMEM; + } + fc->fs_private = ctx; + + idr_init(&ctx->devs->tree); + init_rwsem(&ctx->devs->rwsem); + erofs_default_options(ctx); + fc->ops = &erofs_context_ops; + return 0; } /* @@ -439,11 +710,24 @@ static void erofs_kill_sb(struct super_block *sb) WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC); - kill_block_super(sb); + /* pseudo mount for anon inodes */ + if (sb->s_flags & SB_KERNMOUNT) { + kill_anon_super(sb); + return; + } + + if (erofs_is_fscache_mode(sb)) + kill_anon_super(sb); + else + kill_block_super(sb); sbi = EROFS_SB(sb); if (!sbi) return; + erofs_free_dev_context(sbi->devs); + erofs_fscache_unregister_fs(sb); + kfree(sbi->fsid); + kfree(sbi->domain_id); kfree(sbi); sb->s_fs_info = NULL; } @@ -460,12 +744,14 @@ static void erofs_put_super(struct super_block *sb) iput(sbi->managed_cache); sbi->managed_cache = NULL; #endif + erofs_fscache_unregister_cookie(sbi->s_fscache); + sbi->s_fscache = NULL; } -static struct file_system_type erofs_fs_type = { +struct file_system_type erofs_fs_type = { .owner = THIS_MODULE, .name = "erofs", - .mount = erofs_mount, + .init_fs_context = erofs_init_fs_context, .kill_sb = erofs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; @@ -494,6 +780,10 @@ static int __init erofs_module_init(void) if (err) goto zip_err; + err = erofs_fscache_register(); + if (err) + goto fscache_err; + err = register_filesystem(&erofs_fs_type); if (err) goto fs_err; @@ -501,6 +791,8 @@ static int __init erofs_module_init(void) return 0; fs_err: + erofs_fscache_unregister(); +fscache_err: z_erofs_exit_zip_subsystem(); zip_err: erofs_exit_shrinker(); @@ -513,6 +805,7 @@ static int __init erofs_module_init(void) static void __exit erofs_module_exit(void) { unregister_filesystem(&erofs_fs_type); + erofs_fscache_unregister(); z_erofs_exit_zip_subsystem(); erofs_exit_shrinker(); @@ -526,11 +819,14 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct erofs_sb_info *sbi = EROFS_SB(sb); - u64 id = huge_encode_dev(sb->s_bdev->bd_dev); + u64 id = 0; + + if (!erofs_is_fscache_mode(sb)) + id = huge_encode_dev(sb->s_bdev->bd_dev); buf->f_type = sb->s_magic; buf->f_bsize = EROFS_BLKSIZ; - buf->f_blocks = sbi->blocks; + buf->f_blocks = sbi->total_blocks; buf->f_bfree = buf->f_bavail = 0; buf->f_files = ULLONG_MAX; @@ -546,64 +842,43 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) static int erofs_show_options(struct seq_file *seq, struct dentry *root) { struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb); + struct erofs_mount_opts *opt __maybe_unused = &sbi->opt; #ifdef CONFIG_EROFS_FS_XATTR - if (test_opt(sbi, XATTR_USER)) + if (test_opt(opt, XATTR_USER)) seq_puts(seq, ",user_xattr"); else seq_puts(seq, ",nouser_xattr"); #endif #ifdef CONFIG_EROFS_FS_POSIX_ACL - if (test_opt(sbi, POSIX_ACL)) + if (test_opt(opt, POSIX_ACL)) seq_puts(seq, ",acl"); else seq_puts(seq, ",noacl"); #endif #ifdef CONFIG_EROFS_FS_ZIP - if (sbi->cache_strategy == EROFS_ZIP_CACHE_DISABLED) { + if (opt->cache_strategy == EROFS_ZIP_CACHE_DISABLED) seq_puts(seq, ",cache_strategy=disabled"); - } else if (sbi->cache_strategy == EROFS_ZIP_CACHE_READAHEAD) { + else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAHEAD) seq_puts(seq, ",cache_strategy=readahead"); - } else if (sbi->cache_strategy == EROFS_ZIP_CACHE_READAROUND) { + else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAROUND) seq_puts(seq, ",cache_strategy=readaround"); - } else { - seq_puts(seq, ",cache_strategy=(unknown)"); - DBG_BUGON(1); - } +#endif +#ifdef CONFIG_EROFS_FS_ONDEMAND + if (sbi->fsid) + seq_printf(seq, ",fsid=%s", sbi->fsid); + if (sbi->domain_id) + seq_printf(seq, ",domain_id=%s", sbi->domain_id); #endif return 0; } -static int erofs_remount(struct super_block *sb, int *flags, char *data) -{ - struct erofs_sb_info *sbi = EROFS_SB(sb); - unsigned int org_mnt_opt = sbi->mount_opt; - int err; - - DBG_BUGON(!sb_rdonly(sb)); - err = erofs_parse_options(sb, data); - if (err) - goto out; - - if (test_opt(sbi, POSIX_ACL)) - sb->s_flags |= SB_POSIXACL; - else - sb->s_flags &= ~SB_POSIXACL; - - *flags |= SB_RDONLY; - return 0; -out: - sbi->mount_opt = org_mnt_opt; - return err; -} - const struct super_operations erofs_sops = { .put_super = erofs_put_super, .alloc_inode = erofs_alloc_inode, .free_inode = erofs_free_inode, .statfs = erofs_statfs, .show_options = erofs_show_options, - .remount_fs = erofs_remount, }; module_init(erofs_module_init); @@ -612,4 +887,3 @@ module_exit(erofs_module_exit); MODULE_DESCRIPTION("Enhanced ROM File System"); MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc."); MODULE_LICENSE("GPL"); - diff --git a/fs/erofs/tagptr.h b/fs/erofs/tagptr.h index a72897c86744c..64ceb7270b5c1 100644 --- a/fs/erofs/tagptr.h +++ b/fs/erofs/tagptr.h @@ -1,8 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * A tagged pointer implementation - * - * Copyright (C) 2018 Gao Xiang */ #ifndef __EROFS_FS_TAGPTR_H #define __EROFS_FS_TAGPTR_H @@ -107,4 +105,3 @@ tagptr_init(o, cmpxchg(&ptptr->v, o.v, n.v)); }) *ptptr; }) #endif /* __EROFS_FS_TAGPTR_H */ - diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c index 3e28fd082df0a..39f753bec8382 100644 --- a/fs/erofs/utils.c +++ b/fs/erofs/utils.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang + * https://www.huawei.com/ */ #include "internal.h" #include @@ -330,4 +329,3 @@ void erofs_exit_shrinker(void) unregister_shrinker(&erofs_shrinker_info); } #endif /* !CONFIG_EROFS_FS_ZIP */ - diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c index f78496776e764..7c55336959cb4 100644 --- a/fs/erofs/xattr.c +++ b/fs/erofs/xattr.c @@ -1,41 +1,21 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang + * https://www.huawei.com/ + * Copyright (C) 2021-2022, Alibaba Cloud */ #include #include "xattr.h" struct xattr_iter { struct super_block *sb; - struct page *page; + struct erofs_buf buf; void *kaddr; erofs_blk_t blkaddr; unsigned int ofs; }; -static inline void xattr_iter_end(struct xattr_iter *it, bool atomic) -{ - /* the only user of kunmap() is 'init_inode_xattrs' */ - if (!atomic) - kunmap(it->page); - else - kunmap_atomic(it->kaddr); - - unlock_page(it->page); - put_page(it->page); -} - -static inline void xattr_iter_end_final(struct xattr_iter *it) -{ - if (!it->page) - return; - - xattr_iter_end(it, true); -} - static int init_inode_xattrs(struct inode *inode) { struct erofs_inode *const vi = EROFS_I(inode); @@ -44,7 +24,6 @@ static int init_inode_xattrs(struct inode *inode) struct erofs_xattr_ibody_header *ih; struct super_block *sb; struct erofs_sb_info *sbi; - bool atomic_map; int ret = 0; /* the most case is that xattrs of this inode are initialized. */ @@ -92,26 +71,23 @@ static int init_inode_xattrs(struct inode *inode) sb = inode->i_sb; sbi = EROFS_SB(sb); + it.buf = __EROFS_BUF_INITIALIZER; it.blkaddr = erofs_blknr(iloc(sbi, vi->nid) + vi->inode_isize); it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize); - it.page = erofs_get_meta_page(sb, it.blkaddr); - if (IS_ERR(it.page)) { - ret = PTR_ERR(it.page); + /* read in shared xattr array (non-atomic, see kmalloc below) */ + it.kaddr = erofs_read_metabuf(&it.buf, sb, it.blkaddr, EROFS_KMAP); + if (IS_ERR(it.kaddr)) { + ret = PTR_ERR(it.kaddr); goto out_unlock; } - /* read in shared xattr array (non-atomic, see kmalloc below) */ - it.kaddr = kmap(it.page); - atomic_map = false; - ih = (struct erofs_xattr_ibody_header *)(it.kaddr + it.ofs); - vi->xattr_shared_count = ih->h_shared_count; vi->xattr_shared_xattrs = kmalloc_array(vi->xattr_shared_count, sizeof(uint), GFP_KERNEL); if (!vi->xattr_shared_xattrs) { - xattr_iter_end(&it, atomic_map); + erofs_put_metabuf(&it.buf); ret = -ENOMEM; goto out_unlock; } @@ -123,25 +99,22 @@ static int init_inode_xattrs(struct inode *inode) if (it.ofs >= EROFS_BLKSIZ) { /* cannot be unaligned */ DBG_BUGON(it.ofs != EROFS_BLKSIZ); - xattr_iter_end(&it, atomic_map); - it.page = erofs_get_meta_page(sb, ++it.blkaddr); - if (IS_ERR(it.page)) { + it.kaddr = erofs_read_metabuf(&it.buf, sb, ++it.blkaddr, + EROFS_KMAP); + if (IS_ERR(it.kaddr)) { kfree(vi->xattr_shared_xattrs); vi->xattr_shared_xattrs = NULL; - ret = PTR_ERR(it.page); + ret = PTR_ERR(it.kaddr); goto out_unlock; } - - it.kaddr = kmap_atomic(it.page); - atomic_map = true; it.ofs = 0; } vi->xattr_shared_xattrs[i] = le32_to_cpu(*(__le32 *)(it.kaddr + it.ofs)); it.ofs += sizeof(__le32); } - xattr_iter_end(&it, atomic_map); + erofs_put_metabuf(&it.buf); /* paired with smp_mb() at the beginning of the function. */ smp_mb(); @@ -173,19 +146,11 @@ static inline int xattr_iter_fixup(struct xattr_iter *it) if (it->ofs < EROFS_BLKSIZ) return 0; - xattr_iter_end(it, true); - it->blkaddr += erofs_blknr(it->ofs); - - it->page = erofs_get_meta_page(it->sb, it->blkaddr); - if (IS_ERR(it->page)) { - int err = PTR_ERR(it->page); - - it->page = NULL; - return err; - } - - it->kaddr = kmap_atomic(it->page); + it->kaddr = erofs_read_metabuf(&it->buf, it->sb, it->blkaddr, + EROFS_KMAP_ATOMIC); + if (IS_ERR(it->kaddr)) + return PTR_ERR(it->kaddr); it->ofs = erofs_blkoff(it->ofs); return 0; } @@ -208,11 +173,10 @@ static int inline_xattr_iter_begin(struct xattr_iter *it, it->blkaddr = erofs_blknr(iloc(sbi, vi->nid) + inline_xattr_ofs); it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs); - it->page = erofs_get_meta_page(inode->i_sb, it->blkaddr); - if (IS_ERR(it->page)) - return PTR_ERR(it->page); - - it->kaddr = kmap_atomic(it->page); + it->kaddr = erofs_read_metabuf(&it->buf, inode->i_sb, it->blkaddr, + EROFS_KMAP_ATOMIC); + if (IS_ERR(it->kaddr)) + return PTR_ERR(it->kaddr); return vi->xattr_isize - xattr_header_sz; } @@ -273,7 +237,7 @@ static int xattr_foreach(struct xattr_iter *it, it->ofs = 0; } - slice = min_t(unsigned int, PAGE_SIZE - it->ofs, + slice = min_t(unsigned int, EROFS_BLKSIZ - it->ofs, entry.e_name_len - processed); /* handle name */ @@ -308,7 +272,7 @@ static int xattr_foreach(struct xattr_iter *it, it->ofs = 0; } - slice = min_t(unsigned int, PAGE_SIZE - it->ofs, + slice = min_t(unsigned int, EROFS_BLKSIZ - it->ofs, value_sz - processed); op->value(it, processed, it->kaddr + it->ofs, slice); it->ofs += slice; @@ -387,8 +351,6 @@ static int inline_getxattr(struct inode *inode, struct getxattr_iter *it) if (ret != -ENOATTR) break; } - xattr_iter_end_final(&it->it); - return ret ? ret : it->buffer_size; } @@ -405,32 +367,22 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it) xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]); it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]); - - if (!i || blkaddr != it->it.blkaddr) { - if (i) - xattr_iter_end(&it->it, true); - - it->it.page = erofs_get_meta_page(sb, blkaddr); - if (IS_ERR(it->it.page)) - return PTR_ERR(it->it.page); - - it->it.kaddr = kmap_atomic(it->it.page); - it->it.blkaddr = blkaddr; - } + it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, blkaddr, + EROFS_KMAP_ATOMIC); + if (IS_ERR(it->it.kaddr)) + return PTR_ERR(it->it.kaddr); + it->it.blkaddr = blkaddr; ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL); if (ret != -ENOATTR) break; } - if (vi->xattr_shared_count) - xattr_iter_end_final(&it->it); - return ret ? ret : it->buffer_size; } static bool erofs_xattr_user_list(struct dentry *dentry) { - return test_opt(EROFS_SB(dentry->d_sb), XATTR_USER); + return test_opt(&EROFS_SB(dentry->d_sb)->opt, XATTR_USER); } static bool erofs_xattr_trusted_list(struct dentry *dentry) @@ -453,10 +405,11 @@ int erofs_getxattr(struct inode *inode, int index, return ret; it.index = index; - it.name.len = strlen(name); if (it.name.len > EROFS_NAME_LEN) return -ERANGE; + + it.it.buf = __EROFS_BUF_INITIALIZER; it.name.name = name; it.buffer = buffer; @@ -466,6 +419,7 @@ int erofs_getxattr(struct inode *inode, int index, ret = inline_getxattr(inode, &it); if (ret == -ENOATTR) ret = shared_getxattr(inode, &it); + erofs_put_metabuf(&it.it.buf); return ret; } @@ -477,7 +431,7 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler, switch (handler->flags) { case EROFS_XATTR_INDEX_USER: - if (!test_opt(sbi, XATTR_USER)) + if (!test_opt(&sbi->opt, XATTR_USER)) return -EOPNOTSUPP; break; case EROFS_XATTR_INDEX_TRUSTED: @@ -608,7 +562,6 @@ static int inline_listxattr(struct listxattr_iter *it) if (ret) break; } - xattr_iter_end_final(&it->it); return ret ? ret : it->buffer_ofs; } @@ -626,25 +579,16 @@ static int shared_listxattr(struct listxattr_iter *it) xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]); it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]); - if (!i || blkaddr != it->it.blkaddr) { - if (i) - xattr_iter_end(&it->it, true); - - it->it.page = erofs_get_meta_page(sb, blkaddr); - if (IS_ERR(it->it.page)) - return PTR_ERR(it->it.page); - - it->it.kaddr = kmap_atomic(it->it.page); - it->it.blkaddr = blkaddr; - } + it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, blkaddr, + EROFS_KMAP_ATOMIC); + if (IS_ERR(it->it.kaddr)) + return PTR_ERR(it->it.kaddr); + it->it.blkaddr = blkaddr; ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL); if (ret) break; } - if (vi->xattr_shared_count) - xattr_iter_end_final(&it->it); - return ret ? ret : it->buffer_ofs; } @@ -660,6 +604,7 @@ ssize_t erofs_listxattr(struct dentry *dentry, if (ret) return ret; + it.it.buf = __EROFS_BUF_INITIALIZER; it.dentry = dentry; it.buffer = buffer; it.buffer_size = buffer_size; @@ -668,9 +613,10 @@ ssize_t erofs_listxattr(struct dentry *dentry, it.it.sb = dentry->d_sb; ret = inline_listxattr(&it); - if (ret < 0 && ret != -ENOATTR) - return ret; - return shared_listxattr(&it); + if (ret >= 0 || ret == -ENOATTR) + ret = shared_listxattr(&it); + erofs_put_metabuf(&it.it.buf); + return ret; } #ifdef CONFIG_EROFS_FS_POSIX_ACL @@ -709,4 +655,3 @@ struct posix_acl *erofs_get_acl(struct inode *inode, int type) return acl; } #endif - diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h index 3585b84d2f201..4456137c4fdef 100644 --- a/fs/erofs/xattr.h +++ b/fs/erofs/xattr.h @@ -1,8 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang + * https://www.huawei.com/ */ #ifndef __EROFS_XATTR_H #define __EROFS_XATTR_H @@ -75,11 +74,8 @@ static inline int erofs_getxattr(struct inode *inode, int index, return -EOPNOTSUPP; } -static inline ssize_t erofs_listxattr(struct dentry *dentry, - char *buffer, size_t buffer_size) -{ - return -EOPNOTSUPP; -} +#define erofs_listxattr (NULL) +#define erofs_xattr_handlers (NULL) #endif /* !CONFIG_EROFS_FS_XATTR */ #ifdef CONFIG_EROFS_FS_POSIX_ACL @@ -89,4 +85,3 @@ struct posix_acl *erofs_get_acl(struct inode *inode, int type); #endif #endif - diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index fad80c97d2476..0f3d04641f854 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang + * https://www.huawei.com/ */ #include "zdata.h" #include "compress.h" @@ -288,11 +287,10 @@ static inline bool z_erofs_try_inplace_io(struct z_erofs_collector *clt, /* callers must be with collection lock held */ static int z_erofs_attach_page(struct z_erofs_collector *clt, - struct page *page, - enum z_erofs_page_type type) + struct page *page, enum z_erofs_page_type type, + bool pvec_safereuse) { int ret; - bool occupied; /* give priority for inplaceio */ if (clt->mode >= COLLECT_PRIMARY && @@ -300,10 +298,9 @@ static int z_erofs_attach_page(struct z_erofs_collector *clt, z_erofs_try_inplace_io(clt, page)) return 0; - ret = z_erofs_pagevec_enqueue(&clt->vector, - page, type, &occupied); + ret = z_erofs_pagevec_enqueue(&clt->vector, page, type, + pvec_safereuse); clt->cl->vcnt += (unsigned int)ret; - return ret ? 0 : -EAGAIN; } @@ -620,7 +617,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, goto err_out; /* preload all compressed pages (maybe downgrade role if necessary) */ - if (should_alloc_managed_pages(fe, sbi->cache_strategy, map->m_la)) + if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, map->m_la)) cache_strategy = DELAYEDALLOC; else cache_strategy = DONTALLOC; @@ -654,14 +651,15 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED); retry: - err = z_erofs_attach_page(clt, page, page_type); + err = z_erofs_attach_page(clt, page, page_type, + clt->mode >= COLLECT_PRIMARY_FOLLOWED); /* should allocate an additional staging page for pagevec */ if (err == -EAGAIN) { struct page *const newpage = __stagingpage_alloc(pagepool, GFP_NOFS); err = z_erofs_attach_page(clt, newpage, - Z_EROFS_PAGE_TYPE_EXCLUSIVE); + Z_EROFS_PAGE_TYPE_EXCLUSIVE, true); if (!err) goto retry; } @@ -698,13 +696,11 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, goto out; } -static void z_erofs_vle_unzip_kickoff(void *ptr, int bios) +static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, + bool sync, int bios) { - tagptr1_t t = tagptr_init(tagptr1_t, ptr); - struct z_erofs_unzip_io *io = tagptr_unfold_ptr(t); - bool background = tagptr_unfold_tags(t); - - if (!background) { + /* wake up the caller thread for sync decompression */ + if (sync) { unsigned long flags; spin_lock_irqsave(&io->u.wait.lock, flags); @@ -718,37 +714,30 @@ static void z_erofs_vle_unzip_kickoff(void *ptr, int bios) queue_work(z_erofs_workqueue, &io->u.work); } -static inline void z_erofs_vle_read_endio(struct bio *bio) +static void z_erofs_decompressqueue_endio(struct bio *bio) { - struct erofs_sb_info *sbi = NULL; + tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private); + struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t); blk_status_t err = bio->bi_status; struct bio_vec *bvec; struct bvec_iter_all iter_all; bio_for_each_segment_all(bvec, bio, iter_all) { struct page *page = bvec->bv_page; - bool cachemngd = false; DBG_BUGON(PageUptodate(page)); DBG_BUGON(!page->mapping); - if (!sbi && !z_erofs_page_is_staging(page)) - sbi = EROFS_SB(page->mapping->host->i_sb); - - /* sbi should already be gotten if the page is managed */ - if (sbi) - cachemngd = erofs_page_is_managed(sbi, page); - if (err) SetPageError(page); - else if (cachemngd) - SetPageUptodate(page); - if (cachemngd) + if (erofs_page_is_managed(EROFS_SB(q->sb), page)) { + if (!err) + SetPageUptodate(page); unlock_page(page); + } } - - z_erofs_vle_unzip_kickoff(bio->bi_private, -1); + z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1); bio_put(bio); } @@ -953,9 +942,8 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, return err; } -static void z_erofs_vle_unzip_all(struct super_block *sb, - struct z_erofs_unzip_io *io, - struct list_head *pagepool) +static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io, + struct list_head *pagepool) { z_erofs_next_pcluster_t owned = io->head; @@ -971,21 +959,21 @@ static void z_erofs_vle_unzip_all(struct super_block *sb, pcl = container_of(owned, struct z_erofs_pcluster, next); owned = READ_ONCE(pcl->next); - z_erofs_decompress_pcluster(sb, pcl, pagepool); + z_erofs_decompress_pcluster(io->sb, pcl, pagepool); } } -static void z_erofs_vle_unzip_wq(struct work_struct *work) +static void z_erofs_decompressqueue_work(struct work_struct *work) { - struct z_erofs_unzip_io_sb *iosb = - container_of(work, struct z_erofs_unzip_io_sb, io.u.work); + struct z_erofs_decompressqueue *bgq = + container_of(work, struct z_erofs_decompressqueue, u.work); LIST_HEAD(pagepool); - DBG_BUGON(iosb->io.head == Z_EROFS_PCLUSTER_TAIL_CLOSED); - z_erofs_vle_unzip_all(iosb->sb, &iosb->io, &pagepool); + DBG_BUGON(bgq->head == Z_EROFS_PCLUSTER_TAIL_CLOSED); + z_erofs_decompress_queue(bgq, &pagepool); put_pages_list(&pagepool); - kvfree(iosb); + kvfree(bgq); } static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl, @@ -1112,31 +1100,28 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl, return page; } -static struct z_erofs_unzip_io *jobqueue_init(struct super_block *sb, - struct z_erofs_unzip_io *io, - bool foreground) +static struct z_erofs_decompressqueue * +jobqueue_init(struct super_block *sb, + struct z_erofs_decompressqueue *fgq, bool *fg) { - struct z_erofs_unzip_io_sb *iosb; - - if (foreground) { - /* waitqueue available for foreground io */ - DBG_BUGON(!io); + struct z_erofs_decompressqueue *q; - init_waitqueue_head(&io->u.wait); - atomic_set(&io->pending_bios, 0); - goto out; + if (fg && !*fg) { + q = kvzalloc(sizeof(*q), GFP_KERNEL | __GFP_NOWARN); + if (!q) { + *fg = true; + goto fg_out; + } + INIT_WORK(&q->u.work, z_erofs_decompressqueue_work); + } else { +fg_out: + q = fgq; + init_waitqueue_head(&fgq->u.wait); + atomic_set(&fgq->pending_bios, 0); } - - iosb = kvzalloc(sizeof(*iosb), GFP_KERNEL | __GFP_NOFAIL); - DBG_BUGON(!iosb); - - /* initialize fields in the allocated descriptor */ - io = &iosb->io; - iosb->sb = sb; - INIT_WORK(&io->u.work, z_erofs_vle_unzip_wq); -out: - io->head = Z_EROFS_PCLUSTER_TAIL_CLOSED; - return io; + q->sb = sb; + q->head = Z_EROFS_PCLUSTER_TAIL_CLOSED; + return q; } /* define decompression jobqueue types */ @@ -1147,22 +1132,17 @@ enum { }; static void *jobqueueset_init(struct super_block *sb, - z_erofs_next_pcluster_t qtail[], - struct z_erofs_unzip_io *q[], - struct z_erofs_unzip_io *fgq, - bool forcefg) + struct z_erofs_decompressqueue *q[], + struct z_erofs_decompressqueue *fgq, bool *fg) { /* * if managed cache is enabled, bypass jobqueue is needed, * no need to read from device for all pclusters in this queue. */ - q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, true); - qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head; + q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, NULL); + q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, fg); - q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, forcefg); - qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head; - - return tagptr_cast_ptr(tagptr_fold(tagptr1_t, q[JQ_SUBMIT], !forcefg)); + return tagptr_cast_ptr(tagptr_fold(tagptr1_t, q[JQ_SUBMIT], *fg)); } static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl, @@ -1184,55 +1164,35 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl, qtail[JQ_BYPASS] = &pcl->next; } -static bool postsubmit_is_all_bypassed(struct z_erofs_unzip_io *q[], - unsigned int nr_bios, - bool force_fg) -{ - /* - * although background is preferred, no one is pending for submission. - * don't issue workqueue for decompression but drop it directly instead. - */ - if (force_fg || nr_bios) - return false; - - kvfree(container_of(q[JQ_SUBMIT], struct z_erofs_unzip_io_sb, io)); - return true; -} - -static bool z_erofs_vle_submit_all(struct super_block *sb, - z_erofs_next_pcluster_t owned_head, - struct list_head *pagepool, - struct z_erofs_unzip_io *fgq, - bool force_fg) +static void z_erofs_submit_queue(struct super_block *sb, + z_erofs_next_pcluster_t owned_head, + struct list_head *pagepool, + struct z_erofs_decompressqueue *fgq, + bool *force_fg) { struct erofs_sb_info *const sbi __maybe_unused = EROFS_SB(sb); z_erofs_next_pcluster_t qtail[NR_JOBQUEUES]; - struct z_erofs_unzip_io *q[NR_JOBQUEUES]; - struct bio *bio; + struct z_erofs_decompressqueue *q[NR_JOBQUEUES]; void *bi_private; - /* since bio will be NULL, no need to initialize last_index */ - pgoff_t uninitialized_var(last_index); - bool force_submit = false; - unsigned int nr_bios; - - if (owned_head == Z_EROFS_PCLUSTER_TAIL) - return false; + /* bio is NULL initially, so no need to initialize last_{index,bdev} */ + pgoff_t last_index; + struct block_device *last_bdev; + unsigned int nr_bios = 0; + struct bio *bio = NULL; - force_submit = false; - bio = NULL; - nr_bios = 0; - bi_private = jobqueueset_init(sb, qtail, q, fgq, force_fg); + bi_private = jobqueueset_init(sb, q, fgq, force_fg); + qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head; + qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head; /* by default, all need io submission */ q[JQ_SUBMIT]->head = owned_head; do { + struct erofs_map_dev mdev; struct z_erofs_pcluster *pcl; - unsigned int clusterpages; - pgoff_t first_index; - struct page *page; - unsigned int i = 0, bypass = 0; - int err; + pgoff_t cur, end; + unsigned int i = 0; + bool bypass = true; /* no possible 'owned_head' equals the following */ DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); @@ -1240,55 +1200,58 @@ static bool z_erofs_vle_submit_all(struct super_block *sb, pcl = container_of(owned_head, struct z_erofs_pcluster, next); - clusterpages = BIT(pcl->clusterbits); + /* no device id here, thus it will always succeed */ + mdev = (struct erofs_map_dev) { + .m_pa = blknr_to_addr(pcl->obj.index), + }; + (void)erofs_map_dev(sb, &mdev); + + cur = erofs_blknr(mdev.m_pa); + end = cur + BIT(pcl->clusterbits); /* close the main owned chain at first */ owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL, Z_EROFS_PCLUSTER_TAIL_CLOSED); - first_index = pcl->obj.index; - force_submit |= (first_index != last_index + 1); + do { + struct page *page; + int err; -repeat: - page = pickup_page_for_submission(pcl, i, pagepool, - MNGD_MAPPING(sbi), - GFP_NOFS); - if (!page) { - force_submit = true; - ++bypass; - goto skippage; - } + page = pickup_page_for_submission(pcl, i++, pagepool, + MNGD_MAPPING(sbi), + GFP_NOFS); + if (!page) + continue; - if (bio && force_submit) { + if (bio && (cur != last_index + 1 || + last_bdev != mdev.m_bdev)) { submit_bio_retry: - submit_bio(bio); - bio = NULL; - } - - if (!bio) { - bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); - - bio->bi_end_io = z_erofs_vle_read_endio; - bio_set_dev(bio, sb->s_bdev); - bio->bi_iter.bi_sector = (sector_t)(first_index + i) << - LOG_SECTORS_PER_BLOCK; - bio->bi_private = bi_private; - bio->bi_opf = REQ_OP_READ; + submit_bio(bio); + bio = NULL; + } - ++nr_bios; - } + if (!bio) { + bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); + bio->bi_end_io = z_erofs_decompressqueue_endio; + + bio_set_dev(bio, mdev.m_bdev); + last_bdev = mdev.m_bdev; + bio->bi_iter.bi_sector = (sector_t)cur << + LOG_SECTORS_PER_BLOCK; + bio->bi_private = bi_private; + bio->bi_opf = REQ_OP_READ; + ++nr_bios; + } - err = bio_add_page(bio, page, PAGE_SIZE, 0); - if (err < PAGE_SIZE) - goto submit_bio_retry; + err = bio_add_page(bio, page, PAGE_SIZE, 0); + if (err < PAGE_SIZE) + goto submit_bio_retry; - force_submit = false; - last_index = first_index + i; -skippage: - if (++i < clusterpages) - goto repeat; + last_index = cur; + bypass = false; + } while (++cur < end); - if (bypass < clusterpages) + if (!bypass) qtail[JQ_SUBMIT] = &pcl->next; else move_to_bypass_jobqueue(pcl, qtail, owned_head); @@ -1297,26 +1260,29 @@ static bool z_erofs_vle_submit_all(struct super_block *sb, if (bio) submit_bio(bio); - if (postsubmit_is_all_bypassed(q, nr_bios, force_fg)) - return true; - - z_erofs_vle_unzip_kickoff(bi_private, nr_bios); - return true; + /* + * although background is preferred, no one is pending for submission. + * don't issue workqueue for decompression but drop it directly instead. + */ + if (!*force_fg && !nr_bios) { + kvfree(q[JQ_SUBMIT]); + return; + } + z_erofs_decompress_kickoff(q[JQ_SUBMIT], *force_fg, nr_bios); } -static void z_erofs_submit_and_unzip(struct super_block *sb, - struct z_erofs_collector *clt, - struct list_head *pagepool, - bool force_fg) +static void z_erofs_runqueue(struct super_block *sb, + struct z_erofs_collector *clt, + struct list_head *pagepool, bool force_fg) { - struct z_erofs_unzip_io io[NR_JOBQUEUES]; + struct z_erofs_decompressqueue io[NR_JOBQUEUES]; - if (!z_erofs_vle_submit_all(sb, clt->owned_head, - pagepool, io, force_fg)) + if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) return; + z_erofs_submit_queue(sb, clt->owned_head, pagepool, io, &force_fg); - /* decompress no I/O pclusters immediately */ - z_erofs_vle_unzip_all(sb, &io[JQ_BYPASS], pagepool); + /* handle bypass queue (no i/o pclusters) immediately */ + z_erofs_decompress_queue(&io[JQ_BYPASS], pagepool); if (!force_fg) return; @@ -1325,12 +1291,11 @@ static void z_erofs_submit_and_unzip(struct super_block *sb, wait_event(io[JQ_SUBMIT].u.wait, !atomic_read(&io[JQ_SUBMIT].pending_bios)); - /* let's synchronous decompression */ - z_erofs_vle_unzip_all(sb, &io[JQ_SUBMIT], pagepool); + /* handle synchronous decompress queue in the caller context */ + z_erofs_decompress_queue(&io[JQ_SUBMIT], pagepool); } -static int z_erofs_vle_normalaccess_readpage(struct file *file, - struct page *page) +static int z_erofs_readpage(struct file *file, struct page *page) { struct inode *const inode = page->mapping->host; struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); @@ -1345,14 +1310,12 @@ static int z_erofs_vle_normalaccess_readpage(struct file *file, (void)z_erofs_collector_end(&f.clt); /* if some compressed cluster ready, need submit them anyway */ - z_erofs_submit_and_unzip(inode->i_sb, &f.clt, &pagepool, true); + z_erofs_runqueue(inode->i_sb, &f.clt, &pagepool, true); if (err) erofs_err(inode->i_sb, "failed to read, err [%d]", err); - if (f.map.mpage) - put_page(f.map.mpage); - + erofs_put_metabuf(&f.map.buf); /* clean up the remaining free pages */ put_pages_list(&pagepool); return err; @@ -1361,13 +1324,11 @@ static int z_erofs_vle_normalaccess_readpage(struct file *file, static bool should_decompress_synchronously(struct erofs_sb_info *sbi, unsigned int nr) { - return nr <= sbi->max_sync_decompress_pages; + return nr <= sbi->opt.max_sync_decompress_pages; } -static int z_erofs_vle_normalaccess_readpages(struct file *filp, - struct address_space *mapping, - struct list_head *pages, - unsigned int nr_pages) +static int z_erofs_readpages(struct file *filp, struct address_space *mapping, + struct list_head *pages, unsigned int nr_pages) { struct inode *const inode = mapping->host; struct erofs_sb_info *const sbi = EROFS_I_SB(inode); @@ -1422,18 +1383,14 @@ static int z_erofs_vle_normalaccess_readpages(struct file *filp, (void)z_erofs_collector_end(&f.clt); - z_erofs_submit_and_unzip(inode->i_sb, &f.clt, &pagepool, sync); - - if (f.map.mpage) - put_page(f.map.mpage); - + z_erofs_runqueue(inode->i_sb, &f.clt, &pagepool, sync); + erofs_put_metabuf(&f.map.buf); /* clean up the remaining free pages */ put_pages_list(&pagepool); return 0; } -const struct address_space_operations z_erofs_vle_normalaccess_aops = { - .readpage = z_erofs_vle_normalaccess_readpage, - .readpages = z_erofs_vle_normalaccess_readpages, +const struct address_space_operations z_erofs_aops = { + .readpage = z_erofs_readpage, + .readpages = z_erofs_readpages, }; - diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h index 568d5a493876c..68da309d5ad7d 100644 --- a/fs/erofs/zdata.h +++ b/fs/erofs/zdata.h @@ -1,8 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang + * https://www.huawei.com/ */ #ifndef __EROFS_FS_ZDATA_H #define __EROFS_FS_ZDATA_H @@ -84,7 +83,8 @@ struct z_erofs_pcluster { #define Z_EROFS_WORKGROUP_SIZE sizeof(struct z_erofs_pcluster) -struct z_erofs_unzip_io { +struct z_erofs_decompressqueue { + struct super_block *sb; atomic_t pending_bios; z_erofs_next_pcluster_t head; @@ -94,11 +94,6 @@ struct z_erofs_unzip_io { } u; }; -struct z_erofs_unzip_io_sb { - struct z_erofs_unzip_io io; - struct super_block *sb; -}; - #define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, struct page *page) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index fff5741007214..68c58a10a4e66 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2018-2019 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang + * https://www.huawei.com/ */ #include "internal.h" #include @@ -22,17 +21,17 @@ int z_erofs_fill_inode(struct inode *inode) set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); } - inode->i_mapping->a_ops = &z_erofs_vle_normalaccess_aops; + inode->i_mapping->a_ops = &z_erofs_aops; return 0; } -static int fill_inode_lazy(struct inode *inode) +static int z_erofs_fill_inode_lazy(struct inode *inode) { struct erofs_inode *const vi = EROFS_I(inode); struct super_block *const sb = inode->i_sb; int err; erofs_off_t pos; - struct page *page; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; void *kaddr; struct z_erofs_map_header *h; @@ -56,14 +55,13 @@ static int fill_inode_lazy(struct inode *inode) pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + vi->xattr_isize, 8); - page = erofs_get_meta_page(sb, erofs_blknr(pos)); - if (IS_ERR(page)) { - err = PTR_ERR(page); + kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), + EROFS_KMAP_ATOMIC); + if (IS_ERR(kaddr)) { + err = PTR_ERR(kaddr); goto out_unlock; } - kaddr = kmap_atomic(page); - h = kaddr + erofs_blkoff(pos); vi->z_advise = le16_to_cpu(h->h_advise); vi->z_algorithmtype[0] = h->h_algorithmtype & 15; @@ -93,9 +91,7 @@ static int fill_inode_lazy(struct inode *inode) smp_mb(); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); unmap_done: - kunmap_atomic(kaddr); - unlock_page(page); - put_page(page); + erofs_put_metabuf(&buf); out_unlock: clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags); return err; @@ -118,36 +114,16 @@ static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m, erofs_blk_t eblk) { struct super_block *const sb = m->inode->i_sb; - struct erofs_map_blocks *const map = m->map; - struct page *mpage = map->mpage; - - if (mpage) { - if (mpage->index == eblk) { - if (!m->kaddr) - m->kaddr = kmap_atomic(mpage); - return 0; - } - if (m->kaddr) { - kunmap_atomic(m->kaddr); - m->kaddr = NULL; - } - put_page(mpage); - } - - mpage = erofs_get_meta_page(sb, eblk); - if (IS_ERR(mpage)) { - map->mpage = NULL; - return PTR_ERR(mpage); - } - m->kaddr = kmap_atomic(mpage); - unlock_page(mpage); - map->mpage = mpage; + m->kaddr = erofs_read_metabuf(&m->map->buf, sb, eblk, + EROFS_KMAP_ATOMIC); + if (IS_ERR(m->kaddr)) + return PTR_ERR(m->kaddr); return 0; } -static int vle_legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, - unsigned long lcn) +static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, + unsigned long lcn) { struct inode *const inode = m->inode; struct erofs_inode *const vi = EROFS_I(inode); @@ -319,13 +295,13 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos)); } -static int vle_load_cluster_from_disk(struct z_erofs_maprecorder *m, - unsigned int lcn) +static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m, + unsigned int lcn) { const unsigned int datamode = EROFS_I(m->inode)->datalayout; if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY) - return vle_legacy_load_cluster_from_disk(m, lcn); + return legacy_load_cluster_from_disk(m, lcn); if (datamode == EROFS_INODE_FLAT_COMPRESSION) return compacted_load_cluster_from_disk(m, lcn); @@ -333,8 +309,8 @@ static int vle_load_cluster_from_disk(struct z_erofs_maprecorder *m, return -EINVAL; } -static int vle_extent_lookback(struct z_erofs_maprecorder *m, - unsigned int lookback_distance) +static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, + unsigned int lookback_distance) { struct erofs_inode *const vi = EROFS_I(m->inode); struct erofs_map_blocks *const map = m->map; @@ -351,7 +327,7 @@ static int vle_extent_lookback(struct z_erofs_maprecorder *m, /* load extent head logical cluster if needed */ lcn -= lookback_distance; - err = vle_load_cluster_from_disk(m, lcn); + err = z_erofs_load_cluster_from_disk(m, lcn); if (err) return err; @@ -364,7 +340,7 @@ static int vle_extent_lookback(struct z_erofs_maprecorder *m, DBG_BUGON(1); return -EFSCORRUPTED; } - return vle_extent_lookback(m, m->delta[0]); + return z_erofs_extent_lookback(m, m->delta[0]); case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: map->m_flags &= ~EROFS_MAP_ZIPPED; /* fallthrough */ @@ -404,7 +380,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, goto out; } - err = fill_inode_lazy(inode); + err = z_erofs_fill_inode_lazy(inode); if (err) goto out; @@ -413,7 +389,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, m.lcn = ofs >> lclusterbits; endoff = ofs & ((1 << lclusterbits) - 1); - err = vle_load_cluster_from_disk(&m, m.lcn); + err = z_erofs_load_cluster_from_disk(&m, m.lcn); if (err) goto unmap_out; @@ -444,7 +420,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, /* fallthrough */ case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: /* get the correspoinding first chunk */ - err = vle_extent_lookback(&m, m.delta[0]); + err = z_erofs_extent_lookback(&m, m.delta[0]); if (err) goto unmap_out; break; @@ -462,8 +438,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, map->m_flags |= EROFS_MAP_MAPPED; unmap_out: - if (m.kaddr) - kunmap_atomic(m.kaddr); + erofs_unmap_metabuf(&m.map->buf); out: erofs_dbg("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o", @@ -476,4 +451,3 @@ int z_erofs_map_blocks_iter(struct inode *inode, DBG_BUGON(err < 0 && err != -ENOMEM); return err; } - diff --git a/fs/erofs/zpvec.h b/fs/erofs/zpvec.h index 58556903aa945..b05464f4a8083 100644 --- a/fs/erofs/zpvec.h +++ b/fs/erofs/zpvec.h @@ -1,8 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang + * https://www.huawei.com/ */ #ifndef __EROFS_FS_ZPVEC_H #define __EROFS_FS_ZPVEC_H @@ -108,12 +107,17 @@ static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor, static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor, struct page *page, enum z_erofs_page_type type, - bool *occupied) + bool pvec_safereuse) { - *occupied = false; - if (!ctor->next && type) - if (ctor->index + 1 == ctor->nr) + if (!ctor->next) { + /* some pages cannot be reused as pvec safely without I/O */ + if (type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && !pvec_safereuse) + type = Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED; + + if (type != Z_EROFS_PAGE_TYPE_EXCLUSIVE && + ctor->index + 1 == ctor->nr) return false; + } if (ctor->index >= ctor->nr) z_erofs_pagevec_ctor_pagedown(ctor, false); @@ -125,7 +129,6 @@ static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor, /* should remind that collector->next never equal to 1, 2 */ if (type == (uintptr_t)ctor->next) { ctor->next = page; - *occupied = true; } ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, page, type); return true; @@ -154,4 +157,3 @@ z_erofs_pagevec_dequeue(struct z_erofs_pagevec_ctor *ctor, return tagptr_unfold_ptr(t); } #endif - diff --git a/fs/exec.c b/fs/exec.c index 747f5b53f801a..2252c967151d7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -455,6 +455,9 @@ static int prepare_arg_pages(struct linux_binprm *bprm, unsigned long limit, ptr_size; bprm->argc = count(argv, MAX_ARG_STRINGS); + if (bprm->argc == 0) + pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n", + current->comm, bprm->filename); if (bprm->argc < 0) return bprm->argc; @@ -483,8 +486,14 @@ static int prepare_arg_pages(struct linux_binprm *bprm, * the stack. They aren't stored until much later when we can't * signal to the parent that the child has run out of stack space. * Instead, calculate it here so it's possible to fail gracefully. + * + * In the case of argc = 0, make sure there is space for adding a + * empty string (which will bump argc to 1), to ensure confused + * userspace programs don't start processing from argv[1], thinking + * argc can never be 0, to keep them from walking envp by accident. + * See do_execveat_common(). */ - ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); + ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *); if (limit <= ptr_size) return -E2BIG; limit -= ptr_size; @@ -1854,6 +1863,20 @@ static int __do_execve_file(int fd, struct filename *filename, if (retval < 0) goto out; + /* + * When argv is empty, add an empty string ("") as argv[0] to + * ensure confused userspace programs that start processing + * from argv[1] won't end up walking envp. See also + * bprm_stack_limits(). + */ + if (bprm->argc == 0) { + const char *argv[] = { "", NULL }; + retval = copy_strings_kernel(1, argv, bprm); + if (retval < 0) + goto out; + bprm->argc = 1; + } + retval = exec_binprm(bprm); if (retval < 0) goto out; diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index e0cc551645059..abac81a2e694c 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -48,10 +48,9 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, struct ext2_sb_info *sbi = EXT2_SB(sb); if (block_group >= sbi->s_groups_count) { - ext2_error (sb, "ext2_get_group_desc", - "block_group >= groups_count - " - "block_group = %d, groups_count = %lu", - block_group, sbi->s_groups_count); + WARN(1, "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sbi->s_groups_count); return NULL; } @@ -59,10 +58,9 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1); if (!sbi->s_group_desc[group_desc]) { - ext2_error (sb, "ext2_get_group_desc", - "Group descriptor not loaded - " - "block_group = %d, group_desc = %lu, desc = %lu", - block_group, group_desc, offset); + WARN(1, "Group descriptor not loaded - " + "block_group = %d, group_desc = %lu, desc = %lu", + block_group, group_desc, offset); return NULL; } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 065cd2d1bdc68..db403c01d4d5c 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -770,8 +770,12 @@ static loff_t ext2_max_size(int bits) res += 1LL << (bits-2); res += 1LL << (2*(bits-2)); res += 1LL << (3*(bits-2)); + /* Compute how many metadata blocks are needed */ + meta_blocks = 1; + meta_blocks += 1 + ppb; + meta_blocks += 1 + ppb + ppb * ppb; /* Does block tree limit file size? */ - if (res < upper_limit) + if (res + meta_blocks <= upper_limit) goto check_lfs; res = upper_limit; diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 0589e914663fb..e8275b5d27439 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -536,7 +536,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) struct dir_private_info *info = file->private_data; struct inode *inode = file_inode(file); struct fname *fname; - int ret; + int ret = 0; if (!info) { info = ext4_htree_create_dir_info(file, ctx->pos); @@ -584,7 +584,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) info->curr_minor_hash, &info->next_hash); if (ret < 0) - return ret; + goto finished; if (ret == 0) { ctx->pos = ext4_get_htree_eof(file); break; @@ -615,7 +615,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) } finished: info->last_pos = ctx->pos; - return 0; + return ret < 0 ? ret : 0; } static int ext4_dir_open(struct inode * inode, struct file * filp) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ae2cb15d95407..aa3b2223375fd 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1966,6 +1966,10 @@ static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi) * Structure of a directory entry */ #define EXT4_NAME_LEN 255 +/* + * Base length of the ext4 directory entry excluding the name length + */ +#define EXT4_BASE_DIR_LEN (sizeof(struct ext4_dir_entry_2) - EXT4_NAME_LEN) struct ext4_dir_entry { __le32 inode; /* Inode number */ @@ -3156,9 +3160,6 @@ extern int ext4_da_write_inline_data_begin(struct address_space *mapping, unsigned flags, struct page **pagep, void **fsdata); -extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, - unsigned len, unsigned copied, - struct page *page); extern int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ae73e67936832..d5e649e578cb1 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -133,14 +133,25 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle, static int ext4_ext_get_access(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { + int err = 0; + if (path->p_bh) { /* path points to block */ BUFFER_TRACE(path->p_bh, "get_write_access"); - return ext4_journal_get_write_access(handle, path->p_bh); + err = ext4_journal_get_write_access(handle, path->p_bh); + + /* + * The extent buffer's verified bit will be set again in + * __ext4_ext_dirty(). We could leave an inconsistent + * buffer if the extents updating procudure break off du + * to some error happens, force to check it again. + */ + if (!err) + clear_buffer_verified(path->p_bh); } /* path points to leaf/index in inode body */ /* we use in-core data, no need to protect them */ - return 0; + return err; } /* @@ -160,6 +171,9 @@ int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle, /* path points to block */ err = __ext4_handle_dirty_metadata(where, line, handle, inode, path->p_bh); + /* Extents updating done, re-set verified flag */ + if (!err) + set_buffer_verified(path->p_bh); } else { /* path points to leaf/index in inode body */ err = ext4_mark_inode_dirty(handle, inode); @@ -390,9 +404,13 @@ static int ext4_valid_extent_idx(struct inode *inode, static int ext4_valid_extent_entries(struct inode *inode, struct ext4_extent_header *eh, + ext4_lblk_t lblk, ext4_fsblk_t *pblk, int depth) { unsigned short entries; + ext4_lblk_t lblock = 0; + ext4_lblk_t prev = 0; + if (eh->eh_entries == 0) return 1; @@ -403,32 +421,52 @@ static int ext4_valid_extent_entries(struct inode *inode, struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; ext4_fsblk_t pblock = 0; - ext4_lblk_t lblock = 0; - ext4_lblk_t prev = 0; - int len = 0; + + /* + * The logical block in the first entry should equal to + * the number in the index block. + */ + if (depth != ext_depth(inode) && + lblk != le32_to_cpu(ext->ee_block)) + return 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0; /* Check for overlapping extents */ lblock = le32_to_cpu(ext->ee_block); - len = ext4_ext_get_actual_len(ext); if ((lblock <= prev) && prev) { pblock = ext4_ext_pblock(ext); es->s_last_error_block = cpu_to_le64(pblock); return 0; } + prev = lblock + ext4_ext_get_actual_len(ext) - 1; ext++; entries--; - prev = lblock + len - 1; } } else { struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); + + /* + * The logical block in the first entry should equal to + * the number in the parent index block. + */ + if (depth != ext_depth(inode) && + lblk != le32_to_cpu(ext_idx->ei_block)) + return 0; while (entries) { if (!ext4_valid_extent_idx(inode, ext_idx)) return 0; + + /* Check for overlapping index extents */ + lblock = le32_to_cpu(ext_idx->ei_block); + if ((lblock <= prev) && prev) { + *pblk = ext4_idx_pblock(ext_idx); + return 0; + } ext_idx++; entries--; + prev = lblock; } } return 1; @@ -436,7 +474,7 @@ static int ext4_valid_extent_entries(struct inode *inode, static int __ext4_ext_check(const char *function, unsigned int line, struct inode *inode, struct ext4_extent_header *eh, - int depth, ext4_fsblk_t pblk) + int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk) { const char *error_msg; int max = 0, err = -EFSCORRUPTED; @@ -462,7 +500,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, error_msg = "invalid eh_entries"; goto corrupted; } - if (!ext4_valid_extent_entries(inode, eh, depth)) { + if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) { error_msg = "invalid extent entries"; goto corrupted; } @@ -491,7 +529,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, } #define ext4_ext_check(inode, eh, depth, pblk) \ - __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk)) + __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0) int ext4_ext_check_inode(struct inode *inode) { @@ -524,12 +562,14 @@ static void ext4_cache_extents(struct inode *inode, static struct buffer_head * __read_extent_tree_block(const char *function, unsigned int line, - struct inode *inode, ext4_fsblk_t pblk, int depth, - int flags) + struct inode *inode, struct ext4_extent_idx *idx, + int depth, int flags) { struct buffer_head *bh; int err; + ext4_fsblk_t pblk; + pblk = ext4_idx_pblock(idx); bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); @@ -545,8 +585,8 @@ __read_extent_tree_block(const char *function, unsigned int line, if (!ext4_has_feature_journal(inode->i_sb) || (inode->i_ino != le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) { - err = __ext4_ext_check(function, line, inode, - ext_block_hdr(bh), depth, pblk); + err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh), + depth, pblk, le32_to_cpu(idx->ei_block)); if (err) goto errout; } @@ -565,8 +605,8 @@ __read_extent_tree_block(const char *function, unsigned int line, } -#define read_extent_tree_block(inode, pblk, depth, flags) \ - __read_extent_tree_block(__func__, __LINE__, (inode), (pblk), \ +#define read_extent_tree_block(inode, idx, depth, flags) \ + __read_extent_tree_block(__func__, __LINE__, (inode), (idx), \ (depth), (flags)) /* @@ -613,8 +653,7 @@ int ext4_ext_precache(struct inode *inode) i--; continue; } - bh = read_extent_tree_block(inode, - ext4_idx_pblock(path[i].p_idx++), + bh = read_extent_tree_block(inode, path[i].p_idx++, depth - i - 1, EXT4_EX_FORCE_CACHE); if (IS_ERR(bh)) { @@ -917,8 +956,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, path[ppos].p_depth = i; path[ppos].p_ext = NULL; - bh = read_extent_tree_block(inode, path[ppos].p_block, --i, - flags); + bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags); if (IS_ERR(bh)) { ret = PTR_ERR(bh); goto err; @@ -1517,7 +1555,6 @@ static int ext4_ext_search_right(struct inode *inode, struct ext4_extent_header *eh; struct ext4_extent_idx *ix; struct ext4_extent *ex; - ext4_fsblk_t block; int depth; /* Note, NOT eh_depth; depth from top of tree */ int ee_len; @@ -1584,20 +1621,17 @@ static int ext4_ext_search_right(struct inode *inode, * follow it and find the closest allocated * block to the right */ ix++; - block = ext4_idx_pblock(ix); while (++depth < path->p_depth) { /* subtract from p_depth to get proper eh_depth */ - bh = read_extent_tree_block(inode, block, - path->p_depth - depth, 0); + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); ix = EXT_FIRST_INDEX(eh); - block = ext4_idx_pblock(ix); put_bh(bh); } - bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0); + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); @@ -3119,9 +3153,9 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, ext_debug("move to level %d (block %llu)\n", i + 1, ext4_idx_pblock(path[i].p_idx)); memset(path + i + 1, 0, sizeof(*path)); - bh = read_extent_tree_block(inode, - ext4_idx_pblock(path[i].p_idx), depth - i - 1, - EXT4_EX_NOCACHE); + bh = read_extent_tree_block(inode, path[i].p_idx, + depth - i - 1, + EXT4_EX_NOCACHE); if (IS_ERR(bh)) { /* should we reset i_size? */ err = PTR_ERR(bh); @@ -4898,13 +4932,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) FALLOC_FL_INSERT_RANGE)) return -EOPNOTSUPP; - if (mode & FALLOC_FL_PUNCH_HOLE) - return ext4_punch_hole(inode, offset, len); - + inode_lock(inode); ret = ext4_convert_inline_data(inode); + inode_unlock(inode); if (ret) return ret; + if (mode & FALLOC_FL_PUNCH_HOLE) + return ext4_punch_hole(inode, offset, len); + if (mode & FALLOC_FL_COLLAPSE_RANGE) return ext4_collapse_range(inode, offset, len); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 519378a15bc6b..30a75c304aa21 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -729,39 +729,82 @@ int ext4_try_to_write_inline_data(struct address_space *mapping, int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct page *page) { - int ret, no_expand; + handle_t *handle = ext4_journal_current_handle(); + int no_expand; void *kaddr; struct ext4_iloc iloc; + int ret = 0, ret2; + + if (unlikely(copied < len) && !PageUptodate(page)) + copied = 0; - if (unlikely(copied < len)) { - if (!PageUptodate(page)) { - copied = 0; + if (likely(copied)) { + ret = ext4_get_inode_loc(inode, &iloc); + if (ret) { + unlock_page(page); + put_page(page); + ext4_std_error(inode->i_sb, ret); goto out; } - } - - ret = ext4_get_inode_loc(inode, &iloc); - if (ret) { - ext4_std_error(inode->i_sb, ret); - copied = 0; - goto out; - } + ext4_write_lock_xattr(inode, &no_expand); + BUG_ON(!ext4_has_inline_data(inode)); + + /* + * ei->i_inline_off may have changed since ext4_write_begin() + * called ext4_try_to_write_inline_data() + */ + (void) ext4_find_inline_data_nolock(inode); + + kaddr = kmap_atomic(page); + ext4_write_inline_data(inode, &iloc, kaddr, pos, copied); + kunmap_atomic(kaddr); + SetPageUptodate(page); + /* clear page dirty so that writepages wouldn't work for us. */ + ClearPageDirty(page); - ext4_write_lock_xattr(inode, &no_expand); - BUG_ON(!ext4_has_inline_data(inode)); + ext4_write_unlock_xattr(inode, &no_expand); + brelse(iloc.bh); - kaddr = kmap_atomic(page); - ext4_write_inline_data(inode, &iloc, kaddr, pos, len); - kunmap_atomic(kaddr); - SetPageUptodate(page); - /* clear page dirty so that writepages wouldn't work for us. */ - ClearPageDirty(page); + /* + * It's important to update i_size while still holding page + * lock: page writeout could otherwise come in and zero + * beyond i_size. + */ + ext4_update_inode_size(inode, pos + copied); + } + unlock_page(page); + put_page(page); - ext4_write_unlock_xattr(inode, &no_expand); - brelse(iloc.bh); - mark_inode_dirty(inode); + /* + * Don't mark the inode dirty under page lock. First, it unnecessarily + * makes the holding time of page lock longer. Second, it forces lock + * ordering of page lock and transaction start for journaling + * filesystems. + */ + if (likely(copied)) + mark_inode_dirty(inode); out: - return copied; + /* + * If we didn't copy as much data as expected, we need to trim back + * size of xattr containing inline data. + */ + if (pos + len > inode->i_size && ext4_can_truncate(inode)) + ext4_orphan_add(handle, inode); + + ret2 = ext4_journal_stop(handle); + if (!ret) + ret = ret2; + if (pos + len > inode->i_size) { + ext4_truncate_failed_write(inode); + /* + * If truncate failed early the inode might still be + * on the orphan list; we need to make sure the inode + * is removed from the orphan list in that case. + */ + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + } + return ret ? ret : copied; } struct buffer_head * @@ -942,43 +985,6 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping, return ret; } -int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, - unsigned len, unsigned copied, - struct page *page) -{ - int ret; - - ret = ext4_write_inline_data_end(inode, pos, len, copied, page); - if (ret < 0) { - unlock_page(page); - put_page(page); - return ret; - } - copied = ret; - - /* - * No need to use i_size_read() here, the i_size - * cannot change under us because we hold i_mutex. - * - * But it's important to update i_size while still holding page lock: - * page writeout could otherwise come in and zero beyond i_size. - */ - if (pos+copied > inode->i_size) - i_size_write(inode, pos+copied); - unlock_page(page); - put_page(page); - - /* - * Don't mark the inode dirty under page lock. First, it unnecessarily - * makes the holding time of page lock longer. Second, it forces lock - * ordering of page lock and transaction start for journaling - * filesystems. - */ - mark_inode_dirty(inode); - - return copied; -} - #ifdef INLINE_DIR_DEBUG void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, void *inline_start, int inline_size) @@ -1119,7 +1125,15 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc, void *buf, int inline_size) { - ext4_create_inline_data(handle, inode, inline_size); + int ret; + + ret = ext4_create_inline_data(handle, inode, inline_size); + if (ret) { + ext4_msg(inode->i_sb, KERN_EMERG, + "error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)", + inode->i_ino, ret); + return; + } ext4_write_inline_data(inode, iloc, buf, 0, inline_size); ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); } @@ -2004,6 +2018,18 @@ int ext4_convert_inline_data(struct inode *inode) if (!ext4_has_inline_data(inode)) { ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); return 0; + } else if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { + /* + * Inode has inline data but EXT4_STATE_MAY_INLINE_DATA is + * cleared. This means we are in the middle of moving of + * inline data to delay allocated block. Just force writeout + * here to finish conversion. + */ + error = filemap_flush(inode->i_mapping); + if (error) + return error; + if (!ext4_has_inline_data(inode)) + return 0; } needed_blocks = ext4_writepage_trans_blocks(inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1429d01d836bb..3258dd17ebf06 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1426,22 +1426,14 @@ static int ext4_write_end(struct file *file, loff_t old_size = inode->i_size; int ret = 0, ret2; int i_size_changed = 0; - int inline_data = ext4_has_inline_data(inode); bool verity = ext4_verity_in_progress(inode); trace_ext4_write_end(inode, pos, len, copied); - if (inline_data) { - ret = ext4_write_inline_data_end(inode, pos, len, - copied, page); - if (ret < 0) { - unlock_page(page); - put_page(page); - goto errout; - } - copied = ret; - } else - copied = block_write_end(file, mapping, pos, - len, copied, page, fsdata); + + if (ext4_has_inline_data(inode)) + return ext4_write_inline_data_end(inode, pos, len, copied, page); + + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); /* * it's important to update i_size while still holding page lock: * page writeout could otherwise come in and zero beyond i_size. @@ -1462,7 +1454,7 @@ static int ext4_write_end(struct file *file, * ordering of page lock and transaction start for journaling * filesystems. */ - if (i_size_changed || inline_data) + if (i_size_changed) ext4_mark_inode_dirty(handle, inode); if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode)) @@ -1471,7 +1463,7 @@ static int ext4_write_end(struct file *file, * inode->i_size. So truncate them */ ext4_orphan_add(handle, inode); -errout: + ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; @@ -1536,7 +1528,6 @@ static int ext4_journalled_write_end(struct file *file, int partial = 0; unsigned from, to; int size_changed = 0; - int inline_data = ext4_has_inline_data(inode); bool verity = ext4_verity_in_progress(inode); trace_ext4_journalled_write_end(inode, pos, len, copied); @@ -1545,16 +1536,10 @@ static int ext4_journalled_write_end(struct file *file, BUG_ON(!ext4_handle_valid(handle)); - if (inline_data) { - ret = ext4_write_inline_data_end(inode, pos, len, - copied, page); - if (ret < 0) { - unlock_page(page); - put_page(page); - goto errout; - } - copied = ret; - } else if (unlikely(copied < len) && !PageUptodate(page)) { + if (ext4_has_inline_data(inode)) + return ext4_write_inline_data_end(inode, pos, len, copied, page); + + if (unlikely(copied < len) && !PageUptodate(page)) { copied = 0; ext4_journalled_zero_new_buffers(handle, page, from, to); } else { @@ -1577,7 +1562,7 @@ static int ext4_journalled_write_end(struct file *file, if (old_size < pos && !verity) pagecache_isize_extended(inode, old_size, pos); - if (size_changed || inline_data) { + if (size_changed) { ret2 = ext4_mark_inode_dirty(handle, inode); if (!ret) ret = ret2; @@ -1590,7 +1575,6 @@ static int ext4_journalled_write_end(struct file *file, */ ext4_orphan_add(handle, inode); -errout: ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; @@ -1782,6 +1766,7 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk) struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); int ret; bool allocated = false; + bool reserved = false; /* * If the cluster containing lblk is shared with a delayed, @@ -1798,6 +1783,7 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk) ret = ext4_da_reserve_space(inode); if (ret != 0) /* ENOSPC */ goto errout; + reserved = true; } else { /* bigalloc */ if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) { if (!ext4_es_scan_clu(inode, @@ -1810,6 +1796,7 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk) ret = ext4_da_reserve_space(inode); if (ret != 0) /* ENOSPC */ goto errout; + reserved = true; } else { allocated = true; } @@ -1820,6 +1807,8 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk) } ret = ext4_es_insert_delayed_block(inode, lblk, allocated); + if (ret && reserved) + ext4_da_release_space(inode, 1); errout: return ret; @@ -2153,6 +2142,15 @@ static int ext4_writepage(struct page *page, else len = PAGE_SIZE; + /* Should never happen but for bugs in other kernel subsystems */ + if (!page_has_buffers(page)) { + ext4_warning_inode(inode, + "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + return 0; + } + page_bufs = page_buffers(page); /* * We cannot do block allocation or other extent handling in this @@ -2702,6 +2700,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) wait_on_page_writeback(page); BUG_ON(PageWriteback(page)); + /* + * Should never happen but for buggy code in + * other subsystems that call + * set_page_dirty() without properly warning + * the file system first. See [1] for more + * information. + * + * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz + */ + if (!page_has_buffers(page)) { + ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + continue; + } + if (mpd->map.m_len == 0) mpd->first_page = page->index; mpd->next_page = page->index + 1; @@ -3023,19 +3037,6 @@ static int ext4_nonda_switch(struct super_block *sb) return 0; } -/* We always reserve for an inode update; the superblock could be there too */ -static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len) -{ - if (likely(ext4_has_feature_large_file(inode->i_sb))) - return 1; - - if (pos + len <= 0x7fffffffULL) - return 1; - - /* We might need to update the superblock to set LARGE_FILE */ - return 2; -} - static int ext4_da_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -3044,7 +3045,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, struct page *page; pgoff_t index; struct inode *inode = mapping->host; - handle_t *handle; if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return -EIO; @@ -3070,41 +3070,11 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, return 0; } - /* - * grab_cache_page_write_begin() can take a long time if the - * system is thrashing due to memory pressure, or if the page - * is being written back. So grab it first before we start - * the transaction handle. This also allows us to allocate - * the page (if needed) without using GFP_NOFS. - */ -retry_grab: +retry: page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; - unlock_page(page); - - /* - * With delayed allocation, we don't log the i_disksize update - * if there is delayed block allocation. But we still need - * to journalling the i_disksize update if writes to the end - * of file which has an already mapped buffer. - */ -retry_journal: - handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, - ext4_da_write_credits(inode, pos, len)); - if (IS_ERR(handle)) { - put_page(page); - return PTR_ERR(handle); - } - lock_page(page); - if (page->mapping != mapping) { - /* The page got truncated from under us */ - unlock_page(page); - put_page(page); - ext4_journal_stop(handle); - goto retry_grab; - } /* In case writeback began while the page was unlocked */ wait_for_stable_page(page); @@ -3116,20 +3086,18 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, #endif if (ret < 0) { unlock_page(page); - ext4_journal_stop(handle); + put_page(page); /* * block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need - * i_size_read because we hold i_mutex. + * i_size_read because we hold inode lock. */ if (pos + len > inode->i_size) ext4_truncate_failed_write(inode); if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) - goto retry_journal; - - put_page(page); + goto retry; return ret; } @@ -3166,8 +3134,6 @@ static int ext4_da_write_end(struct file *file, struct page *page, void *fsdata) { struct inode *inode = mapping->host; - int ret = 0, ret2; - handle_t *handle = ext4_journal_current_handle(); loff_t new_i_size; unsigned long start, end; int write_mode = (int)(unsigned long)fsdata; @@ -3177,44 +3143,36 @@ static int ext4_da_write_end(struct file *file, len, copied, page, fsdata); trace_ext4_da_write_end(inode, pos, len, copied); + + if (write_mode != CONVERT_INLINE_DATA && + ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) && + ext4_has_inline_data(inode)) + return ext4_write_inline_data_end(inode, pos, len, copied, page); + start = pos & (PAGE_SIZE - 1); end = start + copied - 1; /* - * generic_write_end() will run mark_inode_dirty() if i_size - * changes. So let's piggyback the i_disksize mark_inode_dirty - * into that. + * Since we are holding inode lock, we are sure i_disksize <= + * i_size. We also know that if i_disksize < i_size, there are + * delalloc writes pending in the range upto i_size. If the end of + * the current write is <= i_size, there's no need to touch + * i_disksize since writeback will push i_disksize upto i_size + * eventually. If the end of the current write is > i_size and + * inside an allocated block (ext4_da_should_update_i_disksize() + * check), we need to update i_disksize here as neither + * ext4_writepage() nor certain ext4_writepages() paths not + * allocating blocks update i_disksize. + * + * Note that we defer inode dirtying to generic_write_end() / + * ext4_da_write_inline_data_end(). */ new_i_size = pos + copied; - if (copied && new_i_size > EXT4_I(inode)->i_disksize) { - if (ext4_has_inline_data(inode) || - ext4_da_should_update_i_disksize(page, end)) { - ext4_update_i_disksize(inode, new_i_size); - /* We need to mark inode dirty even if - * new_i_size is less that inode->i_size - * bu greater than i_disksize.(hint delalloc) - */ - ext4_mark_inode_dirty(handle, inode); - } - } - - if (write_mode != CONVERT_INLINE_DATA && - ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) && - ext4_has_inline_data(inode)) - ret2 = ext4_da_write_inline_data_end(inode, pos, len, copied, - page); - else - ret2 = generic_write_end(file, mapping, pos, len, copied, - page, fsdata); - - copied = ret2; - if (ret2 < 0) - ret = ret2; - ret2 = ext4_journal_stop(handle); - if (!ret) - ret = ret2; + if (copied && new_i_size > inode->i_size && + ext4_da_should_update_i_disksize(page, end)) + ext4_update_i_disksize(inode, new_i_size); - return ret ? ret : copied; + return generic_write_end(file, mapping, pos, len, copied, page, fsdata); } /* @@ -4278,7 +4236,8 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) struct super_block *sb = inode->i_sb; ext4_lblk_t first_block, stop_block; struct address_space *mapping = inode->i_mapping; - loff_t first_block_offset, last_block_offset; + loff_t first_block_offset, last_block_offset, max_length; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); handle_t *handle; unsigned int credits; int ret = 0; @@ -4288,15 +4247,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) trace_ext4_punch_hole(inode, offset, length, 0); - ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); - if (ext4_has_inline_data(inode)) { - down_write(&EXT4_I(inode)->i_mmap_sem); - ret = ext4_convert_inline_data(inode); - up_write(&EXT4_I(inode)->i_mmap_sem); - if (ret) - return ret; - } - /* * Write out all dirty pages to avoid race conditions * Then release them. @@ -4324,6 +4274,14 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) offset; } + /* + * For punch hole the length + offset needs to be within one block + * before last range. Adjust the length if it goes beyond that limit. + */ + max_length = sbi->s_bitmap_maxbytes - inode->i_sb->s_blocksize; + if (offset + length > max_length) + length = max_length - offset; + if (offset & (sb->s_blocksize - 1) || (offset + length) & (sb->s_blocksize - 1)) { /* @@ -5626,6 +5584,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_SIZE) { handle_t *handle; loff_t oldsize = inode->i_size; + loff_t old_disksize; int shrink = (attr->ia_size < inode->i_size); if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { @@ -5681,6 +5640,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) inode->i_ctime = inode->i_mtime; } down_write(&EXT4_I(inode)->i_data_sem); + old_disksize = EXT4_I(inode)->i_disksize; EXT4_I(inode)->i_disksize = attr->ia_size; rc = ext4_mark_inode_dirty(handle, inode); if (!error) @@ -5692,6 +5652,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) */ if (!error) i_size_write(inode, attr->ia_size); + else + EXT4_I(inode)->i_disksize = old_disksize; up_write(&EXT4_I(inode)->i_data_sem); ext4_journal_stop(handle); if (error) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index ba13fbb443d58..9fa20f9ba52b5 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1120,8 +1120,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) sizeof(range))) return -EFAULT; - range.minlen = max((unsigned int)range.minlen, - q->limits.discard_granularity); ret = ext4_trim_fs(sb, &range); if (ret < 0) return ret; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b67ea979f0cf7..3c3166ba43649 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3172,6 +3172,15 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, size = size >> bsbits; start = start_off >> bsbits; + /* + * For tiny groups (smaller than 8MB) the chosen allocation + * alignment may be larger than group size. Make sure the + * alignment does not move allocation to a different group which + * makes mballoc fail assertions later. + */ + start = max(start, rounddown(ac->ac_o_ex.fe_logical, + (ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb))); + /* don't cover already allocated blocks in selected range */ if (ar->pleft && start <= ar->lleft) { size -= ar->lleft + 1 - start; @@ -5270,6 +5279,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, */ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); struct ext4_group_info *grp; ext4_group_t group, first_group, last_group; ext4_grpblk_t cnt = 0, first_cluster, last_cluster; @@ -5288,6 +5298,13 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) start >= max_blks || range->len < sb->s_blocksize) return -EINVAL; + /* No point to try to trim less than discard granularity */ + if (range->minlen < q->limits.discard_granularity) { + minlen = EXT4_NUM_B2C(EXT4_SB(sb), + q->limits.discard_granularity >> sb->s_blocksize_bits); + if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) + goto out; + } if (end >= max_blks) end = max_blks - 1; if (end <= first_data_blk) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index be4ee3dcc5cf5..c5b2ea1a93729 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -455,12 +455,12 @@ int ext4_ext_migrate(struct inode *inode) percpu_down_write(&sbi->s_writepages_rwsem); /* - * Worst case we can touch the allocation bitmaps, a bgd - * block, and a block to link in the orphan list. We do need - * need to worry about credits for modifying the quota inode. + * Worst case we can touch the allocation bitmaps and a block + * group descriptor block. We do need need to worry about + * credits for modifying the quota inode. */ handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, - 4 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); + 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); if (IS_ERR(handle)) { retval = PTR_ERR(handle); @@ -477,6 +477,13 @@ int ext4_ext_migrate(struct inode *inode) ext4_journal_stop(handle); goto out_unlock; } + /* + * Use the correct seed for checksum (i.e. the seed from 'inode'). This + * is so that the metadata blocks will have the correct checksum after + * the migration. + */ + ei = EXT4_I(inode); + EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed; i_size_write(tmp_inode, i_size_read(inode)); /* * Set the i_nlink to zero so it will be deleted later @@ -485,7 +492,6 @@ int ext4_ext_migrate(struct inode *inode) clear_nlink(tmp_inode); ext4_ext_tree_init(handle, tmp_inode); - ext4_orphan_add(handle, tmp_inode); ext4_journal_stop(handle); /* @@ -510,17 +516,10 @@ int ext4_ext_migrate(struct inode *inode) handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); if (IS_ERR(handle)) { - /* - * It is impossible to update on-disk structures without - * a handle, so just rollback in-core changes and live other - * work to orphan_list_cleanup() - */ - ext4_orphan_del(NULL, tmp_inode); retval = PTR_ERR(handle); goto out_tmp_inode; } - ei = EXT4_I(inode); i_data = ei->i_data; memset(&lb, 0, sizeof(lb)); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 9905720df9248..d151892db8b0c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -273,9 +273,9 @@ static struct dx_frame *dx_probe(struct ext4_filename *fname, struct dx_hash_info *hinfo, struct dx_frame *frame); static void dx_release(struct dx_frame *frames); -static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, - unsigned blocksize, struct dx_hash_info *hinfo, - struct dx_map_entry map[]); +static int dx_make_map(struct inode *dir, struct buffer_head *bh, + struct dx_hash_info *hinfo, + struct dx_map_entry *map_tail); static void dx_sort_map(struct dx_map_entry *map, unsigned count); static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to, struct dx_map_entry *offsets, int count, unsigned blocksize); @@ -750,12 +750,14 @@ static struct dx_frame * dx_probe(struct ext4_filename *fname, struct inode *dir, struct dx_hash_info *hinfo, struct dx_frame *frame_in) { - unsigned count, indirect; + unsigned count, indirect, level, i; struct dx_entry *at, *entries, *p, *q, *m; struct dx_root *root; struct dx_frame *frame = frame_in; struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR); u32 hash; + ext4_lblk_t block; + ext4_lblk_t blocks[EXT4_HTREE_LEVEL]; memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0])); frame->bh = ext4_read_dirblock(dir, 0, INDEX); @@ -811,6 +813,8 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, } dxtrace(printk("Look up %x", hash)); + level = 0; + blocks[0] = 0; while (1) { count = dx_get_count(entries); if (!count || count > dx_get_limit(entries)) { @@ -852,15 +856,27 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, dx_get_block(at))); frame->entries = entries; frame->at = at; - if (!indirect--) + + block = dx_get_block(at); + for (i = 0; i <= level; i++) { + if (blocks[i] == block) { + ext4_warning_inode(dir, + "dx entry: tree cycle block %u points back to block %u", + blocks[level], block); + goto fail; + } + } + if (++level > indirect) return frame; + blocks[level] = block; frame++; - frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); + frame->bh = ext4_read_dirblock(dir, block, INDEX); if (IS_ERR(frame->bh)) { ret_err = (struct dx_frame *) frame->bh; frame->bh = NULL; goto fail; } + entries = ((struct dx_node *) frame->bh->b_data)->entries; if (dx_get_limit(entries) != dx_node_limit(dir)) { @@ -1205,15 +1221,23 @@ static inline int search_dirblock(struct buffer_head *bh, * Create map of hash values, offsets, and sizes, stored at end of block. * Returns number of entries mapped. */ -static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, - unsigned blocksize, struct dx_hash_info *hinfo, +static int dx_make_map(struct inode *dir, struct buffer_head *bh, + struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) { int count = 0; - char *base = (char *) de; + struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data; + unsigned int buflen = bh->b_size; + char *base = bh->b_data; struct dx_hash_info h = *hinfo; - while ((char *) de < base + blocksize) { + if (ext4_has_metadata_csum(dir->i_sb)) + buflen -= sizeof(struct ext4_dir_entry_tail); + + while ((char *) de < base + buflen) { + if (ext4_check_dir_entry(dir, NULL, de, bh, base, buflen, + ((char *)de) - base)) + return -EFSCORRUPTED; if (de->name_len && de->inode) { ext4fs_dirhash(dir, de->name, de->name_len, &h); map_tail--; @@ -1223,8 +1247,7 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, count++; cond_resched(); } - /* XXX: do we need to check rec_len == 0 case? -Chris */ - de = ext4_next_entry(de, blocksize); + de = ext4_next_entry(de, dir->i_sb->s_blocksize); } return count; } @@ -1385,10 +1408,10 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size, de = (struct ext4_dir_entry_2 *)search_buf; dlimit = search_buf + buf_size; - while ((char *) de < dlimit) { + while ((char *) de < dlimit - EXT4_BASE_DIR_LEN) { /* this code is executed quadratically often */ /* do minimal checking `by hand' */ - if ((char *) de + de->name_len <= dlimit && + if (de->name + de->name_len <= dlimit && ext4_match(dir, fname, de)) { /* found a match - just to be sure, do * a full check */ @@ -1813,7 +1836,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, struct dx_hash_info *hinfo) { unsigned blocksize = dir->i_sb->s_blocksize; - unsigned count, continued; + unsigned continued; + int count; struct buffer_head *bh2; ext4_lblk_t newblock; u32 hash2; @@ -1848,8 +1872,11 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, /* create map in the end of data2 block */ map = (struct dx_map_entry *) (data2 + blocksize); - count = dx_make_map(dir, (struct ext4_dir_entry_2 *) data1, - blocksize, hinfo, map); + count = dx_make_map(dir, *bh, hinfo, map); + if (count < 0) { + err = count; + goto journal_error; + } map -= count; dx_sort_map(map, count); /* Ensure that neither split block is over half full */ @@ -3442,6 +3469,9 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, struct buffer_head *bh; if (!ext4_has_inline_data(inode)) { + struct ext4_dir_entry_2 *de; + unsigned int offset; + /* The first directory block must not be a hole, so * treat it as DIRENT_HTREE */ @@ -3450,9 +3480,30 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, *retval = PTR_ERR(bh); return NULL; } - *parent_de = ext4_next_entry( - (struct ext4_dir_entry_2 *)bh->b_data, - inode->i_sb->s_blocksize); + + de = (struct ext4_dir_entry_2 *) bh->b_data; + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, + bh->b_size, 0) || + le32_to_cpu(de->inode) != inode->i_ino || + strcmp(".", de->name)) { + EXT4_ERROR_INODE(inode, "directory missing '.'"); + brelse(bh); + *retval = -EFSCORRUPTED; + return NULL; + } + offset = ext4_rec_len_from_disk(de->rec_len, + inode->i_sb->s_blocksize); + de = ext4_next_entry(de, inode->i_sb->s_blocksize); + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, + bh->b_size, offset) || + le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { + EXT4_ERROR_INODE(inode, "directory missing '..'"); + brelse(bh); + *retval = -EFSCORRUPTED; + return NULL; + } + *parent_de = de; + return bh; } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 2cc9f2168b9e4..b66b335a0ca6f 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -100,8 +100,10 @@ static void ext4_finish_bio(struct bio *bio) continue; } clear_buffer_async_write(bh); - if (bio->bi_status) + if (bio->bi_status) { + set_buffer_write_io_error(bh); buffer_io_error(bh); + } } while ((bh = bh->b_this_page) != head); bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); local_irq_restore(flags); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ad1d4c8faf449..6410c1e098d36 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -52,6 +52,16 @@ int ext4_resize_begin(struct super_block *sb) if (!capable(CAP_SYS_RESOURCE)) return -EPERM; + /* + * If the reserved GDT blocks is non-zero, the resize_inode feature + * should always be set. + */ + if (EXT4_SB(sb)->s_es->s_reserved_gdt_blocks && + !ext4_has_feature_resize_inode(sb)) { + ext4_error(sb, "resize_inode disabled but reserved GDT blocks non-zero"); + return -EFSCORRUPTED; + } + /* * If we are not using the primary superblock/GDT copy don't resize, * because the user tools have no way of handling this. Probably a @@ -74,6 +84,11 @@ int ext4_resize_begin(struct super_block *sb) return -EPERM; } + if (ext4_has_feature_sparse_super2(sb)) { + ext4_msg(sb, KERN_ERR, "Online resizing not supported with sparse_super2"); + return -EOPNOTSUPP; + } + if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING, &EXT4_SB(sb)->s_ext4_flags)) ret = -EBUSY; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ce8372ceaa43e..eba2506f43991 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1141,6 +1141,12 @@ static void ext4_destroy_inode(struct inode *inode) true); dump_stack(); } + + if (EXT4_I(inode)->i_reserved_data_blocks) + ext4_msg(inode->i_sb, KERN_ERR, + "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!", + inode->i_ino, EXT4_I(inode), + EXT4_I(inode)->i_reserved_data_blocks); } static void init_once(void *foo) @@ -1697,6 +1703,7 @@ static const struct mount_opts { MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET}, {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR}, + {Opt_commit, 0, MOPT_NO_EXT2}, {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, @@ -2830,17 +2837,17 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files) */ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) { - loff_t res = EXT4_NDIR_BLOCKS; + unsigned long long upper_limit, res = EXT4_NDIR_BLOCKS; int meta_blocks; - loff_t upper_limit; - /* This is calculated to be the largest file size for a dense, block + + /* + * This is calculated to be the largest file size for a dense, block * mapped file such that the file's total number of 512-byte sectors, * including data and all indirect blocks, does not exceed (2^48 - 1). * * __u32 i_blocks_lo and _u16 i_blocks_high represent the total * number of 512-byte sectors of the file. */ - if (!has_huge_files) { /* * !has_huge_files or implies that the inode i_block field @@ -2883,7 +2890,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) if (res > MAX_LFS_FILESIZE) res = MAX_LFS_FILESIZE; - return res; + return (loff_t)res; } static ext4_fsblk_t descriptor_loc(struct super_block *sb, @@ -3065,8 +3072,8 @@ static int ext4_run_li_request(struct ext4_li_request *elr) struct ext4_group_desc *gdp = NULL; ext4_group_t group, ngroups; struct super_block *sb; - unsigned long timeout = 0; int ret = 0; + u64 start_time; sb = elr->lr_super; ngroups = EXT4_SB(sb)->s_groups_count; @@ -3086,13 +3093,12 @@ static int ext4_run_li_request(struct ext4_li_request *elr) ret = 1; if (!ret) { - timeout = jiffies; + start_time = ktime_get_real_ns(); ret = ext4_init_inode_table(sb, group, elr->lr_timeout ? 0 : 1); if (elr->lr_timeout == 0) { - timeout = (jiffies - timeout) * - elr->lr_sbi->s_li_wait_mult; - elr->lr_timeout = timeout; + elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) * + elr->lr_sbi->s_li_wait_mult); } elr->lr_next_sched = jiffies + elr->lr_timeout; elr->lr_next_group = group + 1; @@ -3480,9 +3486,11 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp, ext4_fsblk_t first_block, last_block, b; ext4_group_t i, ngroups = ext4_get_groups_count(sb); int s, j, count = 0; + int has_super = ext4_bg_has_super(sb, grp); if (!ext4_has_feature_bigalloc(sb)) - return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) + + return (has_super + ext4_bg_num_gdb(sb, grp) + + (has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) + sbi->s_itb_per_group + 2); first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + @@ -4507,9 +4515,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * Get the # of file system overhead blocks from the * superblock if present. */ - if (es->s_overhead_clusters) - sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters); - else { + sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters); + /* ignore the precalculated value if it is ridiculous */ + if (sbi->s_overhead > ext4_blocks_count(es)) + sbi->s_overhead = 0; + /* + * If the bigalloc feature is not enabled recalculating the + * overhead doesn't take long, so we might as well just redo + * it to make sure we are using the correct value. + */ + if (!ext4_has_feature_bigalloc(sb)) + sbi->s_overhead = 0; + if (sbi->s_overhead == 0) { err = ext4_calculate_overhead(sb); if (err) goto failed_mount_wq; @@ -5907,10 +5924,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA); err = dquot_quota_on(sb, type, format_id, path); - if (err) { - lockdep_set_quota_inode(path->dentry->d_inode, - I_DATA_SEM_NORMAL); - } else { + if (!err) { struct inode *inode = d_inode(path->dentry); handle_t *handle; @@ -5930,7 +5944,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, ext4_journal_stop(handle); unlock_inode: inode_unlock(inode); + if (err) + dquot_quota_off(sb, type); } + if (err) + lockdep_set_quota_inode(path->dentry->d_inode, + I_DATA_SEM_NORMAL); return err; } @@ -5993,8 +6012,19 @@ static int ext4_enable_quotas(struct super_block *sb) "Failed to enable quota tracking " "(type=%d, err=%d). Please run " "e2fsck to fix.", type, err); - for (type--; type >= 0; type--) + for (type--; type >= 0; type--) { + struct inode *inode; + + inode = sb_dqopt(sb)->files[type]; + if (inode) + inode = igrab(inode); dquot_quota_off(sb, type); + if (inode) { + lockdep_set_quota_inode(inode, + I_DATA_SEM_NORMAL); + iput(inode); + } + } return err; } @@ -6096,7 +6126,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, struct buffer_head *bh; handle_t *handle = journal_current_handle(); - if (EXT4_SB(sb)->s_journal && !handle) { + if (!handle) { ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" " cancelled because transaction is not started", (unsigned long long)off, (unsigned long long)len); diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index dd05af983092d..a9457fed351ed 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -52,10 +52,19 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry, return paddr; } +static int ext4_encrypted_symlink_getattr(const struct path *path, + struct kstat *stat, u32 request_mask, + unsigned int query_flags) +{ + ext4_getattr(path, stat, request_mask, query_flags); + + return fscrypt_symlink_getattr(path, stat); +} + const struct inode_operations ext4_encrypted_symlink_inode_operations = { .get_link = ext4_encrypted_get_link, .setattr = ext4_setattr, - .getattr = ext4_getattr, + .getattr = ext4_encrypted_symlink_getattr, .listxattr = ext4_listxattr, }; diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a57219c51c01a..44c5110e18f04 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -149,7 +149,7 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", blkaddr, exist); set_sbi_flag(sbi, SBI_NEED_FSCK); - WARN_ON(1); + dump_stack(); } return exist; } @@ -187,7 +187,7 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, f2fs_warn(sbi, "access invalid blkaddr:%u", blkaddr); set_sbi_flag(sbi, SBI_NEED_FSCK); - WARN_ON(1); + dump_stack(); return false; } else { return __is_bitmap_valid(sbi, blkaddr, type); @@ -583,7 +583,7 @@ int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi) if (time_to_inject(sbi, FAULT_ORPHAN)) { spin_unlock(&im->ino_lock); - f2fs_show_injection_info(FAULT_ORPHAN); + f2fs_show_injection_info(sbi, FAULT_ORPHAN); return -ENOSPC; } @@ -848,6 +848,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, struct page *cp_page_1 = NULL, *cp_page_2 = NULL; struct f2fs_checkpoint *cp_block = NULL; unsigned long long cur_version = 0, pre_version = 0; + unsigned int cp_blocks; int err; err = get_checkpoint_version(sbi, cp_addr, &cp_block, @@ -855,15 +856,16 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (err) return NULL; - if (le32_to_cpu(cp_block->cp_pack_total_block_count) > - sbi->blocks_per_seg) { + cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count); + + if (cp_blocks > sbi->blocks_per_seg || cp_blocks <= F2FS_CP_PACKS) { f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u", le32_to_cpu(cp_block->cp_pack_total_block_count)); goto invalid_cp; } pre_version = *version; - cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; + cp_addr += cp_blocks - 1; err = get_checkpoint_version(sbi, cp_addr, &cp_block, &cp_page_2, version); if (err) @@ -1144,7 +1146,8 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi) if (!is_journalled_quota(sbi)) return false; - down_write(&sbi->quota_sem); + if (!down_write_trylock(&sbi->quota_sem)) + return true; if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) { ret = false; } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 64ee2a064e339..773028921c481 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -167,9 +167,10 @@ static bool f2fs_bio_post_read_required(struct bio *bio) static void f2fs_read_end_io(struct bio *bio) { - if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), - FAULT_READ_IO)) { - f2fs_show_injection_info(FAULT_READ_IO); + struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio)); + + if (time_to_inject(sbi, FAULT_READ_IO)) { + f2fs_show_injection_info(sbi, FAULT_READ_IO); bio->bi_status = BLK_STS_IOERR; } @@ -191,7 +192,7 @@ static void f2fs_write_end_io(struct bio *bio) struct bvec_iter_all iter_all; if (time_to_inject(sbi, FAULT_WRITE_IO)) { - f2fs_show_injection_info(FAULT_WRITE_IO); + f2fs_show_injection_info(sbi, FAULT_WRITE_IO); bio->bi_status = BLK_STS_IOERR; } @@ -1190,7 +1191,21 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, if (err) { if (flag == F2FS_GET_BLOCK_BMAP) map->m_pblk = 0; + if (err == -ENOENT) { + /* + * There is one exceptional case that read_node_page() + * may return -ENOENT due to filesystem has been + * shutdown or cp_error, so force to convert error + * number to EIO for such case. + */ + if (map->m_may_create && + (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || + f2fs_cp_error(sbi))) { + err = -EIO; + goto unlock_out; + } + err = 0; if (map->m_next_pgofs) *map->m_next_pgofs = @@ -2452,8 +2467,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping, /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ if (wbc->sync_mode == WB_SYNC_ALL) atomic_inc(&sbi->wb_sync_req[DATA]); - else if (atomic_read(&sbi->wb_sync_req[DATA])) + else if (atomic_read(&sbi->wb_sync_req[DATA])) { + /* to avoid potential deadlock */ + if (current->plug) + blk_finish_plug(current->plug); goto skip_write; + } if (__should_serialize_io(inode, wbc)) { mutex_lock(&sbi->writepages); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 78d041f9775a4..99c4a868d73b0 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -618,7 +618,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, start: if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) { - f2fs_show_injection_info(FAULT_DIR_DEPTH); + f2fs_show_injection_info(F2FS_I_SB(dir), FAULT_DIR_DEPTH); return -ENOSPC; } @@ -892,6 +892,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode); struct blk_plug plug; bool readdir_ra = sbi->readdir_ra == 1; + bool found_valid_dirent = false; int err = 0; bit_pos = ((unsigned long)ctx->pos % d->max); @@ -906,12 +907,15 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, de = &d->dentry[bit_pos]; if (de->name_len == 0) { + if (found_valid_dirent || !bit_pos) { + printk_ratelimited( + "%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.", + KERN_WARNING, sbi->sb->s_id, + le32_to_cpu(de->ino)); + set_sbi_flag(sbi, SBI_NEED_FSCK); + } bit_pos++; ctx->pos = start_pos + bit_pos; - printk_ratelimited( - "%s, invalid namelen(0), ino:%u, run fsck to fix.", - KERN_WARNING, le32_to_cpu(de->ino)); - set_sbi_flag(sbi, SBI_NEED_FSCK); continue; } @@ -954,6 +958,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, f2fs_ra_node_page(sbi, le32_to_cpu(de->ino)); ctx->pos = start_pos + bit_pos; + found_valid_dirent = true; } out: if (readdir_ra) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4ca3c2a0a0f5b..c73a1638c18b4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -931,6 +931,7 @@ struct f2fs_sm_info { unsigned int segment_count; /* total # of segments */ unsigned int main_segments; /* # of segments in main area */ unsigned int reserved_segments; /* # of reserved segments */ + unsigned int additional_reserved_segments;/* reserved segs for IO align feature */ unsigned int ovp_segments; /* # of overprovision segments */ /* a threshold to reclaim prefree segments */ @@ -1374,9 +1375,10 @@ struct f2fs_private_dio { }; #ifdef CONFIG_F2FS_FAULT_INJECTION -#define f2fs_show_injection_info(type) \ - printk_ratelimited("%sF2FS-fs : inject %s in %s of %pS\n", \ - KERN_INFO, f2fs_fault_name[type], \ +#define f2fs_show_injection_info(sbi, type) \ + printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \ + KERN_INFO, sbi->sb->s_id, \ + f2fs_fault_name[type], \ __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { @@ -1396,7 +1398,7 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) return false; } #else -#define f2fs_show_injection_info(type) do { } while (0) +#define f2fs_show_injection_info(sbi, type) do { } while (0) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { return false; @@ -1781,7 +1783,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, return ret; if (time_to_inject(sbi, FAULT_BLOCK)) { - f2fs_show_injection_info(FAULT_BLOCK); + f2fs_show_injection_info(sbi, FAULT_BLOCK); release = *count; goto release_quota; } @@ -1799,6 +1801,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, true)) avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; + + if (F2FS_IO_ALIGNED(sbi)) + avail_user_block_count -= sbi->blocks_per_seg * + SM_I(sbi)->additional_reserved_segments; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { if (avail_user_block_count > sbi->unusable_block_count) avail_user_block_count -= sbi->unusable_block_count; @@ -2033,7 +2040,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, } if (time_to_inject(sbi, FAULT_BLOCK)) { - f2fs_show_injection_info(FAULT_BLOCK); + f2fs_show_injection_info(sbi, FAULT_BLOCK); goto enospc; } @@ -2044,6 +2051,11 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, false)) valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; + + if (F2FS_IO_ALIGNED(sbi)) + valid_block_count += sbi->blocks_per_seg * + SM_I(sbi)->additional_reserved_segments; + user_block_count = sbi->user_block_count; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) user_block_count -= sbi->unusable_block_count; @@ -2088,11 +2100,17 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, { spin_lock(&sbi->stat_lock); - f2fs_bug_on(sbi, !sbi->total_valid_block_count); - f2fs_bug_on(sbi, !sbi->total_valid_node_count); + if (unlikely(!sbi->total_valid_block_count || + !sbi->total_valid_node_count)) { + f2fs_warn(sbi, "dec_valid_node_count: inconsistent block counts, total_valid_block:%u, total_valid_node:%u", + sbi->total_valid_block_count, + sbi->total_valid_node_count); + set_sbi_flag(sbi, SBI_NEED_FSCK); + } else { + sbi->total_valid_block_count--; + sbi->total_valid_node_count--; + } - sbi->total_valid_node_count--; - sbi->total_valid_block_count--; if (sbi->reserved_blocks && sbi->current_reserved_blocks < sbi->reserved_blocks) sbi->current_reserved_blocks++; @@ -2148,7 +2166,8 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, return page; if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) { - f2fs_show_injection_info(FAULT_PAGE_ALLOC); + f2fs_show_injection_info(F2FS_M_SB(mapping), + FAULT_PAGE_ALLOC); return NULL; } } @@ -2163,7 +2182,7 @@ static inline struct page *f2fs_pagecache_get_page( int fgp_flags, gfp_t gfp_mask) { if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) { - f2fs_show_injection_info(FAULT_PAGE_GET); + f2fs_show_injection_info(F2FS_M_SB(mapping), FAULT_PAGE_GET); return NULL; } @@ -2232,7 +2251,7 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, return bio; } if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { - f2fs_show_injection_info(FAULT_ALLOC_BIO); + f2fs_show_injection_info(sbi, FAULT_ALLOC_BIO); return NULL; } @@ -2797,7 +2816,7 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { if (time_to_inject(sbi, FAULT_KMALLOC)) { - f2fs_show_injection_info(FAULT_KMALLOC); + f2fs_show_injection_info(sbi, FAULT_KMALLOC); return NULL; } @@ -2814,7 +2833,7 @@ static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { if (time_to_inject(sbi, FAULT_KVMALLOC)) { - f2fs_show_injection_info(FAULT_KVMALLOC); + f2fs_show_injection_info(sbi, FAULT_KVMALLOC); return NULL; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6e58b2e62b189..ef08ef0170306 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -682,7 +682,7 @@ int f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) { - f2fs_show_injection_info(FAULT_TRUNCATE); + f2fs_show_injection_info(F2FS_I_SB(inode), FAULT_TRUNCATE); return -EIO; } @@ -981,7 +981,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) } if (pg_start < pg_end) { - struct address_space *mapping = inode->i_mapping; loff_t blk_start, blk_end; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -993,8 +992,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); - truncate_inode_pages_range(mapping, blk_start, - blk_end - 1); + truncate_pagecache_range(inode, blk_start, blk_end - 1); f2fs_lock_op(sbi); ret = f2fs_truncate_hole(inode, pg_start, pg_end); @@ -1322,11 +1320,19 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, ret = -ENOSPC; break; } - if (dn->data_blkaddr != NEW_ADDR) { - f2fs_invalidate_blocks(sbi, dn->data_blkaddr); - dn->data_blkaddr = NEW_ADDR; - f2fs_set_data_blkaddr(dn); + + if (dn->data_blkaddr == NEW_ADDR) + continue; + + if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr, + DATA_GENERIC_ENHANCE)) { + ret = -EFSCORRUPTED; + break; } + + f2fs_invalidate_blocks(sbi, dn->data_blkaddr); + dn->data_blkaddr = NEW_ADDR; + f2fs_set_data_blkaddr(dn); } f2fs_update_extent_cache_range(dn, start, 0, index - start); @@ -1602,6 +1608,10 @@ static long f2fs_fallocate(struct file *file, int mode, inode_lock(inode); + ret = file_modified(file); + if (ret) + goto out; + if (mode & FALLOC_FL_PUNCH_HOLE) { if (offset >= inode->i_size) goto out; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a78aa5480454f..68d5c73c5ed1f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -54,7 +54,7 @@ static int gc_thread_func(void *data) } if (time_to_inject(sbi, FAULT_CHECKPOINT)) { - f2fs_show_injection_info(FAULT_CHECKPOINT); + f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); } @@ -633,6 +633,11 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, set_sbi_flag(sbi, SBI_NEED_FSCK); } + if (f2fs_check_nid_range(sbi, dni->ino)) { + f2fs_put_page(node_page, 1); + return false; + } + *nofs = ofs_of_node(node_page); source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node); f2fs_put_page(node_page, 1); @@ -1095,8 +1100,10 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, int err; if (S_ISREG(inode->i_mode)) { - if (!down_write_trylock(&fi->i_gc_rwsem[READ])) + if (!down_write_trylock(&fi->i_gc_rwsem[READ])) { + sbi->skipped_gc_rwsem++; continue; + } if (!down_write_trylock( &fi->i_gc_rwsem[WRITE])) { sbi->skipped_gc_rwsem++; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 386ad54c13c3a..b5536570707c2 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -455,7 +455,7 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - inode_nohighmem(inode); + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); } else if (S_ISLNK(inode->i_mode)) { if (file_is_encrypt(inode)) inode->i_op = &f2fs_encrypted_symlink_inode_operations; @@ -681,7 +681,7 @@ void f2fs_evict_inode(struct inode *inode) err = f2fs_truncate(inode); if (time_to_inject(sbi, FAULT_EVICT_INODE)) { - f2fs_show_injection_info(FAULT_EVICT_INODE); + f2fs_show_injection_info(sbi, FAULT_EVICT_INODE); err = -EIO; } @@ -689,8 +689,22 @@ void f2fs_evict_inode(struct inode *inode) f2fs_lock_op(sbi); err = f2fs_remove_inode_page(inode); f2fs_unlock_op(sbi); - if (err == -ENOENT) + if (err == -ENOENT) { err = 0; + + /* + * in fuzzed image, another node may has the same + * block address as inode's, if it was truncated + * previously, truncation of inode node will fail. + */ + if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { + f2fs_warn(F2FS_I_SB(inode), + "f2fs_evict_inode: inconsistent node id, ino:%lu", + inode->i_ino); + f2fs_inode_synced(inode); + set_sbi_flag(sbi, SBI_NEED_FSCK); + } + } } /* give more chances, if ENOMEM case */ @@ -777,6 +791,7 @@ void f2fs_handle_failed_inode(struct inode *inode) err = f2fs_get_node_info(sbi, inode->i_ino, &ni); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); + set_inode_flag(inode, FI_FREE_NID); f2fs_warn(sbi, "May loss orphan inode, run fsck to fix."); goto out; } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 3a97ac56821ba..ed95c27e93026 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -679,7 +679,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - inode_nohighmem(inode); + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); set_inode_flag(inode, FI_INC_LINK); f2fs_lock_op(sbi); @@ -1256,9 +1256,18 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry, return target; } +static int f2fs_encrypted_symlink_getattr(const struct path *path, + struct kstat *stat, u32 request_mask, + unsigned int query_flags) +{ + f2fs_getattr(path, stat, request_mask, query_flags); + + return fscrypt_symlink_getattr(path, stat); +} + const struct inode_operations f2fs_encrypted_symlink_inode_operations = { .get_link = f2fs_encrypted_get_link, - .getattr = f2fs_getattr, + .getattr = f2fs_encrypted_symlink_getattr, .setattr = f2fs_setattr, #ifdef CONFIG_F2FS_FS_XATTR .listxattr = f2fs_listxattr, diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 48bb5d3c709db..3dc7cc3d6ac69 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1385,6 +1385,7 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, nid, nid_of_node(page), ino_of_node(page), ofs_of_node(page), cpver_of_node(page), next_blkaddr_of_node(page)); + set_sbi_flag(sbi, SBI_NEED_FSCK); err = -EINVAL; out_err: ClearPageUptodate(page); @@ -1994,8 +1995,12 @@ static int f2fs_write_node_pages(struct address_space *mapping, if (wbc->sync_mode == WB_SYNC_ALL) atomic_inc(&sbi->wb_sync_req[NODE]); - else if (atomic_read(&sbi->wb_sync_req[NODE])) + else if (atomic_read(&sbi->wb_sync_req[NODE])) { + /* to avoid potential deadlock */ + if (current->plug) + blk_finish_plug(current->plug); goto skip_write; + } trace_f2fs_writepages(mapping->host, wbc, NODE); @@ -2406,7 +2411,7 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct free_nid *i = NULL; retry: if (time_to_inject(sbi, FAULT_ALLOC_NID)) { - f2fs_show_injection_info(FAULT_ALLOC_NID); + f2fs_show_injection_info(sbi, FAULT_ALLOC_NID); return false; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5ba677f85533c..7759323bd7751 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -352,16 +352,19 @@ void f2fs_drop_inmem_page(struct inode *inode, struct page *page) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct list_head *head = &fi->inmem_pages; struct inmem_pages *cur = NULL; + struct inmem_pages *tmp; f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page)); mutex_lock(&fi->inmem_lock); - list_for_each_entry(cur, head, list) { - if (cur->page == page) + list_for_each_entry(tmp, head, list) { + if (tmp->page == page) { + cur = tmp; break; + } } - f2fs_bug_on(sbi, list_empty(head) || cur->page != page); + f2fs_bug_on(sbi, !cur); list_del(&cur->list); mutex_unlock(&fi->inmem_lock); @@ -489,7 +492,7 @@ int f2fs_commit_inmem_pages(struct inode *inode) void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { if (time_to_inject(sbi, FAULT_CHECKPOINT)) { - f2fs_show_injection_info(FAULT_CHECKPOINT); + f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); } @@ -1017,8 +1020,9 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, if (dc->error) printk_ratelimited( - "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d", - KERN_INFO, dc->lstart, dc->start, dc->len, dc->error); + "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d", + KERN_INFO, sbi->sb->s_id, + dc->lstart, dc->start, dc->len, dc->error); __detach_discard_cmd(dcc, dc); } @@ -1158,7 +1162,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, dc->len += len; if (time_to_inject(sbi, FAULT_DISCARD)) { - f2fs_show_injection_info(FAULT_DISCARD); + f2fs_show_injection_info(sbi, FAULT_DISCARD); err = -EIO; goto submit; } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 2034b9a07d632..f39620f475425 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -508,7 +508,8 @@ static inline unsigned int free_segments(struct f2fs_sb_info *sbi) static inline int reserved_segments(struct f2fs_sb_info *sbi) { - return SM_I(sbi)->reserved_segments; + return SM_I(sbi)->reserved_segments + + SM_I(sbi)->additional_reserved_segments; } static inline unsigned int free_sections(struct f2fs_sb_info *sbi) @@ -541,11 +542,10 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi)); } -static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi) +static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi, + unsigned int node_blocks, unsigned int dent_blocks) { - unsigned int node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) + - get_pages(sbi, F2FS_DIRTY_DENTS); - unsigned int dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS); + unsigned int segno, left_blocks; int i; @@ -571,19 +571,28 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi) static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed, int needed) { - int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); - int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); - int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); + unsigned int total_node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) + + get_pages(sbi, F2FS_DIRTY_DENTS) + + get_pages(sbi, F2FS_DIRTY_IMETA); + unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS); + unsigned int node_secs = total_node_blocks / BLKS_PER_SEC(sbi); + unsigned int dent_secs = total_dent_blocks / BLKS_PER_SEC(sbi); + unsigned int node_blocks = total_node_blocks % BLKS_PER_SEC(sbi); + unsigned int dent_blocks = total_dent_blocks % BLKS_PER_SEC(sbi); + unsigned int free, need_lower, need_upper; if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return false; - if (free_sections(sbi) + freed == reserved_sections(sbi) + needed && - has_curseg_enough_space(sbi)) + free = free_sections(sbi) + freed; + need_lower = node_secs + dent_secs + reserved_sections(sbi) + needed; + need_upper = need_lower + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0); + + if (free > need_upper) return false; - return (free_sections(sbi) + freed) <= - (node_secs + 2 * dent_secs + imeta_secs + - reserved_sections(sbi) + needed); + else if (free <= need_lower) + return true; + return !has_curseg_enough_space(sbi, node_blocks, dent_blocks); } static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6d904dc9bd199..232c99e4a1ee9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -277,6 +277,46 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).s_resgid)); } +static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi) +{ + unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec; + unsigned int avg_vblocks; + unsigned int wanted_reserved_segments; + block_t avail_user_block_count; + + if (!F2FS_IO_ALIGNED(sbi)) + return 0; + + /* average valid block count in section in worst case */ + avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi); + + /* + * we need enough free space when migrating one section in worst case + */ + wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) * + reserved_segments(sbi); + wanted_reserved_segments -= reserved_segments(sbi); + + avail_user_block_count = sbi->user_block_count - + sbi->current_reserved_blocks - + F2FS_OPTION(sbi).root_reserved_blocks; + + if (wanted_reserved_segments * sbi->blocks_per_seg > + avail_user_block_count) { + f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u", + wanted_reserved_segments, + avail_user_block_count >> sbi->log_blocks_per_seg); + return -ENOSPC; + } + + SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments; + + f2fs_info(sbi, "IO align feature needs additional reserved segment: %u", + wanted_reserved_segments); + + return 0; +} + static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi) { if (!F2FS_OPTION(sbi).unusable_cap_perc) @@ -1994,64 +2034,78 @@ static int f2fs_enable_quotas(struct super_block *sb) return 0; } -int f2fs_quota_sync(struct super_block *sb, int type) +static int f2fs_quota_sync_file(struct f2fs_sb_info *sbi, int type) { - struct f2fs_sb_info *sbi = F2FS_SB(sb); - struct quota_info *dqopt = sb_dqopt(sb); - int cnt; - int ret; + struct quota_info *dqopt = sb_dqopt(sbi->sb); + struct address_space *mapping = dqopt->files[type]->i_mapping; + int ret = 0; - /* - * do_quotactl - * f2fs_quota_sync - * down_read(quota_sem) - * dquot_writeback_dquots() - * f2fs_dquot_commit - * block_operation - * down_read(quota_sem) - */ - f2fs_lock_op(sbi); + ret = dquot_writeback_dquots(sbi->sb, type); + if (ret) + goto out; - down_read(&sbi->quota_sem); - ret = dquot_writeback_dquots(sb, type); + ret = filemap_fdatawrite(mapping); if (ret) goto out; + /* if we are using journalled quota */ + if (is_journalled_quota(sbi)) + goto out; + + ret = filemap_fdatawait(mapping); + + truncate_inode_pages(&dqopt->files[type]->i_data, 0); +out: + if (ret) + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + return ret; +} + +int f2fs_quota_sync(struct super_block *sb, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct quota_info *dqopt = sb_dqopt(sb); + int cnt; + int ret = 0; + /* * Now when everything is written we can discard the pagecache so * that userspace sees the changes. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - struct address_space *mapping; if (type != -1 && cnt != type) continue; + if (!sb_has_quota_active(sb, cnt)) continue; - mapping = dqopt->files[cnt]->i_mapping; + if (!f2fs_sb_has_quota_ino(sbi)) + inode_lock(dqopt->files[cnt]); - ret = filemap_fdatawrite(mapping); - if (ret) - goto out; + /* + * do_quotactl + * f2fs_quota_sync + * down_read(quota_sem) + * dquot_writeback_dquots() + * f2fs_dquot_commit + * block_operation + * down_read(quota_sem) + */ + f2fs_lock_op(sbi); + down_read(&sbi->quota_sem); - /* if we are using journalled quota */ - if (is_journalled_quota(sbi)) - continue; + ret = f2fs_quota_sync_file(sbi, cnt); - ret = filemap_fdatawait(mapping); - if (ret) - set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); + f2fs_unlock_op(sbi); - inode_lock(dqopt->files[cnt]); - truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); - inode_unlock(dqopt->files[cnt]); + if (!f2fs_sb_has_quota_ino(sbi)) + inode_unlock(dqopt->files[cnt]); + + if (ret) + break; } -out: - if (ret) - set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); - up_read(&sbi->quota_sem); - f2fs_unlock_op(sbi); return ret; } @@ -3438,6 +3492,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_nm; } + err = adjust_reserved_segment(sbi); + if (err) + goto free_nm; + /* For write statistics */ if (sb->s_bdev->bd_part) sbi->sectors_written_start = diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 3a5360e045cfd..84bb376650935 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -262,7 +262,9 @@ static ssize_t __sbi_store(struct f2fs_attr *a, if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); if (t > (unsigned long)(sbi->user_block_count - - F2FS_OPTION(sbi).root_reserved_blocks)) { + F2FS_OPTION(sbi).root_reserved_blocks - + sbi->blocks_per_seg * + SM_I(sbi)->additional_reserved_segments)) { spin_unlock(&sbi->stat_lock); return -EINVAL; } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 296b3189448a4..cf1bfed1e6621 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -661,8 +661,17 @@ static int __f2fs_setxattr(struct inode *inode, int index, } last = here; - while (!IS_XATTR_LAST_ENTRY(last)) + while (!IS_XATTR_LAST_ENTRY(last)) { + if ((void *)(last) + sizeof(__u32) > last_base_addr || + (void *)XATTR_NEXT_ENTRY(last) > last_base_addr) { + f2fs_err(F2FS_I_SB(inode), "inode (%lu) has invalid last xattr entry, entry_size: %zu", + inode->i_ino, ENTRY_SIZE(last)); + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + error = -EFSCORRUPTED; + goto exit; + } last = XATTR_NEXT_ENTRY(last); + } newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size); diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 3647c65a0f482..0191eb1dc7f66 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -93,7 +93,8 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent, err_brelse: brelse(bhs[0]); err: - fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr); + fat_msg_ratelimit(sb, KERN_ERR, "FAT read failed (blocknr %llu)", + (llu)blocknr); return -EIO; } @@ -106,8 +107,8 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent, fatent->fat_inode = MSDOS_SB(sb)->fat_inode; fatent->bhs[0] = sb_bread(sb, blocknr); if (!fatent->bhs[0]) { - fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", - (llu)blocknr); + fat_msg_ratelimit(sb, KERN_ERR, "FAT read failed (blocknr %llu)", + (llu)blocknr); return -EIO; } fatent->nr_bhs = 1; diff --git a/fs/fcntl.c b/fs/fcntl.c index 1b9c6f8944b1f..09e473d11055c 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -993,13 +993,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) { while (fa) { struct fown_struct *fown; + unsigned long flags; if (fa->magic != FASYNC_MAGIC) { printk(KERN_ERR "kill_fasync: bad magic number in " "fasync_struct!\n"); return; } - read_lock(&fa->fa_lock); + read_lock_irqsave(&fa->fa_lock, flags); if (fa->fa_file) { fown = &fa->fa_file->f_owner; /* Don't send SIGURG to processes which have not set a @@ -1008,7 +1009,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) if (!(sig == SIGURG && fown->signum == 0)) send_sigio(fown, fa->fa_fd, band); } - read_unlock(&fa->fa_lock); + read_unlock_irqrestore(&fa->fa_lock, flags); fa = rcu_dereference(fa->fa_next); } } diff --git a/fs/file.c b/fs/file.c index 9d33f98d81bb2..77eace162272e 100644 --- a/fs/file.c +++ b/fs/file.c @@ -727,24 +727,69 @@ void do_close_on_exec(struct files_struct *files) spin_unlock(&files->file_lock); } +static inline struct file *__fget_files_rcu(struct files_struct *files, + unsigned int fd, fmode_t mask, unsigned int refs) +{ + for (;;) { + struct file *file; + struct fdtable *fdt = rcu_dereference_raw(files->fdt); + struct file __rcu **fdentry; + + if (unlikely(fd >= fdt->max_fds)) + return NULL; + + fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds); + file = rcu_dereference_raw(*fdentry); + if (unlikely(!file)) + return NULL; + + if (unlikely(file->f_mode & mask)) + return NULL; + + /* + * Ok, we have a file pointer. However, because we do + * this all locklessly under RCU, we may be racing with + * that file being closed. + * + * Such a race can take two forms: + * + * (a) the file ref already went down to zero, + * and get_file_rcu_many() fails. Just try + * again: + */ + if (unlikely(!get_file_rcu_many(file, refs))) + continue; + + /* + * (b) the file table entry has changed under us. + * Note that we don't need to re-check the 'fdt->fd' + * pointer having changed, because it always goes + * hand-in-hand with 'fdt'. + * + * If so, we need to put our refs and try again. + */ + if (unlikely(rcu_dereference_raw(files->fdt) != fdt) || + unlikely(rcu_dereference_raw(*fdentry) != file)) { + fput_many(file, refs); + continue; + } + + /* + * Ok, we have a ref to the file, and checked that it + * still exists. + */ + return file; + } +} + + static struct file *__fget(unsigned int fd, fmode_t mask, unsigned int refs) { struct files_struct *files = current->files; struct file *file; rcu_read_lock(); -loop: - file = fcheck_files(files, fd); - if (file) { - /* File object ref couldn't be taken. - * dup2() atomicity guarantee is the reason - * we loop to catch the new file (or NULL pointer) - */ - if (file->f_mode & mask) - file = NULL; - else if (!get_file_rcu_many(file, refs)) - goto loop; - } + file = __fget_files_rcu(files, fd, mask, refs); rcu_read_unlock(); return file; diff --git a/fs/file_table.c b/fs/file_table.c index b2c2d716914ea..56726c5dcc01d 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -198,6 +198,7 @@ static struct file *alloc_file(const struct path *path, int flags, file->f_inode = path->dentry->d_inode; file->f_mapping = path->dentry->d_inode->i_mapping; file->f_wb_err = filemap_sample_wb_err(file->f_mapping); + file->f_sb_err = file_sample_sb_err(file); if ((file->f_mode & FMODE_READ) && likely(fop->read || fop->read_iter)) file->f_mode |= FMODE_CAN_READ; @@ -375,6 +376,7 @@ void __fput_sync(struct file *file) } EXPORT_SYMBOL(fput); +EXPORT_SYMBOL(__fput_sync); void __init files_init(void) { diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 22e9c88f3960a..e70e2d302f7af 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -131,25 +131,6 @@ static bool inode_io_list_move_locked(struct inode *inode, return false; } -/** - * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list - * @inode: inode to be removed - * @wb: bdi_writeback @inode is being removed from - * - * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and - * clear %WB_has_dirty_io if all are empty afterwards. - */ -static void inode_io_list_del_locked(struct inode *inode, - struct bdi_writeback *wb) -{ - assert_spin_locked(&wb->list_lock); - assert_spin_locked(&inode->i_lock); - - inode->i_state &= ~I_SYNC_QUEUED; - list_del_init(&inode->i_io_list); - wb_io_lists_depopulated(wb); -} - static void wb_wakeup(struct bdi_writeback *wb) { spin_lock_bh(&wb->work_lock); @@ -244,6 +225,13 @@ void wb_wait_for_completion(struct wb_completion *done) /* one round can affect upto 5 slots */ #define WB_FRN_MAX_IN_FLIGHT 1024 /* don't queue too many concurrently */ +/* + * Maximum inodes per isw. A specific value has been chosen to make + * struct inode_switch_wbs_context fit into 1024 bytes kmalloc. + */ +#define WB_MAX_INODES_PER_ISW ((1024UL - sizeof(struct inode_switch_wbs_context)) \ + / sizeof(struct inode *)) + static atomic_t isw_nr_in_flight = ATOMIC_INIT(0); static struct workqueue_struct *isw_wq; @@ -278,6 +266,28 @@ void __inode_attach_wb(struct inode *inode, struct page *page) } EXPORT_SYMBOL_GPL(__inode_attach_wb); +/** + * inode_cgwb_move_to_attached - put the inode onto wb->b_attached list + * @inode: inode of interest with i_lock held + * @wb: target bdi_writeback + * + * Remove the inode from wb's io lists and if necessarily put onto b_attached + * list. Only inodes attached to cgwb's are kept on this list. + */ +static void inode_cgwb_move_to_attached(struct inode *inode, + struct bdi_writeback *wb) +{ + assert_spin_locked(&wb->list_lock); + assert_spin_locked(&inode->i_lock); + + inode->i_state &= ~I_SYNC_QUEUED; + if (wb != &wb->bdi->wb) + list_move(&inode->i_io_list, &wb->b_attached); + else + list_del_init(&inode->i_io_list); + wb_io_lists_depopulated(wb); +} + /** * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it * @inode: inode of interest with i_lock held @@ -332,11 +342,18 @@ static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode) } struct inode_switch_wbs_context { - struct inode *inode; - struct bdi_writeback *new_wb; + struct rcu_work work; - struct rcu_head rcu_head; - struct work_struct work; + /* + * Multiple inodes can be switched at once. The switching procedure + * consists of two parts, separated by a RCU grace period. To make + * sure that the second part is executed for each inode gone through + * the first part, all inode pointers are placed into a NULL-terminated + * array embedded into struct inode_switch_wbs_context. Otherwise + * an inode could be left in a non-consistent state. + */ + struct bdi_writeback *new_wb; + struct inode *inodes[]; }; static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) @@ -349,50 +366,23 @@ static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) up_write(&bdi->wb_switch_rwsem); } -static void inode_switch_wbs_work_fn(struct work_struct *work) +static bool inode_do_switch_wbs(struct inode *inode, + struct bdi_writeback *old_wb, + struct bdi_writeback *new_wb) { - struct inode_switch_wbs_context *isw = - container_of(work, struct inode_switch_wbs_context, work); - struct inode *inode = isw->inode; - struct backing_dev_info *bdi = inode_to_bdi(inode); struct address_space *mapping = inode->i_mapping; - struct bdi_writeback *old_wb = inode->i_wb; - struct bdi_writeback *new_wb = isw->new_wb; XA_STATE(xas, &mapping->i_pages, 0); struct page *page; bool switched = false; - /* - * If @inode switches cgwb membership while sync_inodes_sb() is - * being issued, sync_inodes_sb() might miss it. Synchronize. - */ - down_read(&bdi->wb_switch_rwsem); - - /* - * By the time control reaches here, RCU grace period has passed - * since I_WB_SWITCH assertion and all wb stat update transactions - * between unlocked_inode_to_wb_begin/end() are guaranteed to be - * synchronizing against the i_pages lock. - * - * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock - * gives us exclusion against all wb related operations on @inode - * including IO list manipulations and stat updates. - */ - if (old_wb < new_wb) { - spin_lock(&old_wb->list_lock); - spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING); - } else { - spin_lock(&new_wb->list_lock); - spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING); - } spin_lock(&inode->i_lock); xa_lock_irq(&mapping->i_pages); /* - * Once I_FREEING is visible under i_lock, the eviction path owns - * the inode and we shouldn't modify ->i_io_list. + * Once I_FREEING or I_WILL_FREE are visible under i_lock, the eviction + * path owns the inode and we shouldn't modify ->i_io_list. */ - if (unlikely(inode->i_state & I_FREEING)) + if (unlikely(inode->i_state & (I_FREEING | I_WILL_FREE))) goto skip_switch; trace_inode_switch_wbs(inode, old_wb, new_wb); @@ -419,21 +409,28 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) wb_get(new_wb); /* - * Transfer to @new_wb's IO list if necessary. The specific list - * @inode was on is ignored and the inode is put on ->b_dirty which - * is always correct including from ->b_dirty_time. The transfer - * preserves @inode->dirtied_when ordering. + * Transfer to @new_wb's IO list if necessary. If the @inode is dirty, + * the specific list @inode was on is ignored and the @inode is put on + * ->b_dirty which is always correct including from ->b_dirty_time. + * The transfer preserves @inode->dirtied_when ordering. If the @inode + * was clean, it means it was on the b_attached list, so move it onto + * the b_attached list of @new_wb. */ if (!list_empty(&inode->i_io_list)) { - struct inode *pos; - - inode_io_list_del_locked(inode, old_wb); inode->i_wb = new_wb; - list_for_each_entry(pos, &new_wb->b_dirty, i_io_list) - if (time_after_eq(inode->dirtied_when, - pos->dirtied_when)) - break; - inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev); + + if (inode->i_state & I_DIRTY_ALL) { + struct inode *pos; + + list_for_each_entry(pos, &new_wb->b_dirty, i_io_list) + if (time_after_eq(inode->dirtied_when, + pos->dirtied_when)) + break; + inode_io_list_move_locked(inode, new_wb, + pos->i_io_list.prev); + } else { + inode_cgwb_move_to_attached(inode, new_wb); + } } else { inode->i_wb = new_wb; } @@ -452,31 +449,94 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) xa_unlock_irq(&mapping->i_pages); spin_unlock(&inode->i_lock); + + return switched; +} + +static void inode_switch_wbs_work_fn(struct work_struct *work) +{ + struct inode_switch_wbs_context *isw = + container_of(to_rcu_work(work), struct inode_switch_wbs_context, work); + struct backing_dev_info *bdi = inode_to_bdi(isw->inodes[0]); + struct bdi_writeback *old_wb = isw->inodes[0]->i_wb; + struct bdi_writeback *new_wb = isw->new_wb; + unsigned long nr_switched = 0; + struct inode **inodep; + + /* + * If @inode switches cgwb membership while sync_inodes_sb() is + * being issued, sync_inodes_sb() might miss it. Synchronize. + */ + down_read(&bdi->wb_switch_rwsem); + + /* + * By the time control reaches here, RCU grace period has passed + * since I_WB_SWITCH assertion and all wb stat update transactions + * between unlocked_inode_to_wb_begin/end() are guaranteed to be + * synchronizing against the i_pages lock. + * + * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock + * gives us exclusion against all wb related operations on @inode + * including IO list manipulations and stat updates. + */ + if (old_wb < new_wb) { + spin_lock(&old_wb->list_lock); + spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock(&new_wb->list_lock); + spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING); + } + + for (inodep = isw->inodes; *inodep; inodep++) { + WARN_ON_ONCE((*inodep)->i_wb != old_wb); + if (inode_do_switch_wbs(*inodep, old_wb, new_wb)) + nr_switched++; + } + spin_unlock(&new_wb->list_lock); spin_unlock(&old_wb->list_lock); up_read(&bdi->wb_switch_rwsem); - if (switched) { + if (nr_switched) { wb_wakeup(new_wb); - wb_put(old_wb); + wb_put_many(old_wb, nr_switched); } - wb_put(new_wb); - iput(inode); + for (inodep = isw->inodes; *inodep; inodep++) + iput(*inodep); + wb_put(new_wb); kfree(isw); - atomic_dec(&isw_nr_in_flight); } -static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head) +static bool inode_prepare_wbs_switch(struct inode *inode, + struct bdi_writeback *new_wb) { - struct inode_switch_wbs_context *isw = container_of(rcu_head, - struct inode_switch_wbs_context, rcu_head); + /* + * Paired with smp_mb() in cgroup_writeback_umount(). + * isw_nr_in_flight must be increased before checking SB_ACTIVE and + * grabbing an inode, otherwise isw_nr_in_flight can be observed as 0 + * in cgroup_writeback_umount() and the isw_wq will be not flushed. + */ + smp_mb(); + + if (IS_DAX(inode)) + return false; + + /* while holding I_WB_SWITCH, no one else can update the association */ + spin_lock(&inode->i_lock); + if (!(inode->i_sb->s_flags & SB_ACTIVE) || + inode->i_state & (I_WB_SWITCH | I_FREEING | I_WILL_FREE) || + inode_to_wb(inode) == new_wb) { + spin_unlock(&inode->i_lock); + return false; + } + inode->i_state |= I_WB_SWITCH; + __iget(inode); + spin_unlock(&inode->i_lock); - /* needs to grab bh-unsafe locks, bounce to work item */ - INIT_WORK(&isw->work, inode_switch_wbs_work_fn); - queue_work(isw_wq, &isw->work); + return true; } /** @@ -501,7 +561,7 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) if (atomic_read(&isw_nr_in_flight) > WB_FRN_MAX_IN_FLIGHT) return; - isw = kzalloc(sizeof(*isw), GFP_ATOMIC); + isw = kzalloc(sizeof(*isw) + 2 * sizeof(struct inode *), GFP_ATOMIC); if (!isw) return; @@ -521,19 +581,10 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) if (!isw->new_wb) goto out_free; - /* while holding I_WB_SWITCH, no one else can update the association */ - spin_lock(&inode->i_lock); - if (!(inode->i_sb->s_flags & SB_ACTIVE) || - inode->i_state & (I_WB_SWITCH | I_FREEING) || - inode_to_wb(inode) == isw->new_wb) { - spin_unlock(&inode->i_lock); + if (!inode_prepare_wbs_switch(inode, isw->new_wb)) goto out_free; - } - inode->i_state |= I_WB_SWITCH; - __iget(inode); - spin_unlock(&inode->i_lock); - isw->inode = inode; + isw->inodes[0] = inode; /* * In addition to synchronizing among switchers, I_WB_SWITCH tells @@ -541,7 +592,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) * lock so that stat transfer can synchronize against them. * Let's continue after I_WB_SWITCH is guaranteed to be visible. */ - call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); + INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn); + queue_rcu_work(isw_wq, &isw->work); return; out_free: @@ -551,6 +603,73 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) kfree(isw); } +/** + * cleanup_offline_cgwb - detach associated inodes + * @wb: target wb + * + * Switch all inodes attached to @wb to a nearest living ancestor's wb in order + * to eventually release the dying @wb. Returns %true if not all inodes were + * switched and the function has to be restarted. + */ +bool cleanup_offline_cgwb(struct bdi_writeback *wb) +{ + struct cgroup_subsys_state *memcg_css; + struct inode_switch_wbs_context *isw; + struct inode *inode; + int nr; + bool restart = false; + + isw = kzalloc(sizeof(*isw) + WB_MAX_INODES_PER_ISW * + sizeof(struct inode *), GFP_KERNEL); + if (!isw) + return restart; + + atomic_inc(&isw_nr_in_flight); + + for (memcg_css = wb->memcg_css->parent; memcg_css; + memcg_css = memcg_css->parent) { + isw->new_wb = wb_get_create(wb->bdi, memcg_css, GFP_KERNEL); + if (isw->new_wb) + break; + } + if (unlikely(!isw->new_wb)) + isw->new_wb = &wb->bdi->wb; /* wb_get() is noop for bdi's wb */ + + nr = 0; + spin_lock(&wb->list_lock); + list_for_each_entry(inode, &wb->b_attached, i_io_list) { + if (!inode_prepare_wbs_switch(inode, isw->new_wb)) + continue; + + isw->inodes[nr++] = inode; + + if (nr >= WB_MAX_INODES_PER_ISW - 1) { + restart = true; + break; + } + } + spin_unlock(&wb->list_lock); + + /* no attached inodes? bail out */ + if (nr == 0) { + atomic_dec(&isw_nr_in_flight); + wb_put(isw->new_wb); + kfree(isw); + return restart; + } + + /* + * In addition to synchronizing among switchers, I_WB_SWITCH tells + * the RCU protected stat update paths to grab the i_page + * lock so that stat transfer can synchronize against them. + * Let's continue after I_WB_SWITCH is guaranteed to be visible. + */ + INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn); + queue_rcu_work(isw_wq, &isw->work); + + return restart; +} + /** * wbc_attach_and_unlock_inode - associate wbc with target inode and unlock it * @wbc: writeback_control of interest @@ -1006,6 +1125,12 @@ int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr, */ void cgroup_writeback_umount(void) { + /* + * SB_ACTIVE should be reliably cleared before checking + * isw_nr_in_flight, see generic_shutdown_super(). + */ + smp_mb(); + if (atomic_read(&isw_nr_in_flight)) { /* * Use rcu_barrier() to wait for all pending callbacks to @@ -1030,6 +1155,17 @@ fs_initcall(cgroup_writeback_init); static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } +static void inode_cgwb_move_to_attached(struct inode *inode, + struct bdi_writeback *wb) +{ + assert_spin_locked(&wb->list_lock); + assert_spin_locked(&inode->i_lock); + + inode->i_state &= ~I_SYNC_QUEUED; + list_del_init(&inode->i_io_list); + wb_io_lists_depopulated(wb); +} + static struct bdi_writeback * locked_inode_to_wb_and_lock_list(struct inode *inode) __releases(&inode->i_lock) @@ -1131,7 +1267,11 @@ void inode_io_list_del(struct inode *inode) wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); - inode_io_list_del_locked(inode, wb); + + inode->i_state &= ~I_SYNC_QUEUED; + list_del_init(&inode->i_io_list); + wb_io_lists_depopulated(wb); + spin_unlock(&inode->i_lock); spin_unlock(&wb->list_lock); } @@ -1443,7 +1583,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, inode->i_state &= ~I_SYNC_QUEUED; } else { /* The inode is clean. Remove from writeback lists. */ - inode_io_list_del_locked(inode, wb); + inode_cgwb_move_to_attached(inode, wb); } } @@ -1586,7 +1726,7 @@ static int writeback_single_inode(struct inode *inode, * touch it. See comment above for explanation. */ if (!(inode->i_state & I_DIRTY_ALL)) - inode_io_list_del_locked(inode, wb); + inode_cgwb_move_to_attached(inode, wb); spin_unlock(&wb->list_lock); inode_sync_complete(inode); out: @@ -1650,11 +1790,12 @@ static long writeback_sb_inodes(struct super_block *sb, }; unsigned long start_time = jiffies; long write_chunk; - long wrote = 0; /* count both pages and inodes */ + long total_wrote = 0; /* count both pages and inodes */ while (!list_empty(&wb->b_io)) { struct inode *inode = wb_inode(wb->b_io.prev); struct bdi_writeback *tmp_wb; + long wrote; if (inode->i_sb != sb) { if (work->sb) { @@ -1730,7 +1871,9 @@ static long writeback_sb_inodes(struct super_block *sb, wbc_detach_inode(&wbc); work->nr_pages -= write_chunk - wbc.nr_to_write; - wrote += write_chunk - wbc.nr_to_write; + wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped; + wrote = wrote < 0 ? 0 : wrote; + total_wrote += wrote; if (need_resched()) { /* @@ -1752,7 +1895,7 @@ static long writeback_sb_inodes(struct super_block *sb, tmp_wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); if (!(inode->i_state & I_DIRTY_ALL)) - wrote++; + total_wrote++; requeue_inode(inode, tmp_wb, &wbc); inode_sync_complete(inode); spin_unlock(&inode->i_lock); @@ -1766,14 +1909,14 @@ static long writeback_sb_inodes(struct super_block *sb, * bail out to wb_writeback() often enough to check * background threshold and other termination conditions. */ - if (wrote) { + if (total_wrote) { if (time_is_before_jiffies(start_time + HZ / 10UL)) break; if (work->nr_pages <= 0) break; } } - return wrote; + return total_wrote; } static long __writeback_inodes_wb(struct bdi_writeback *wb, diff --git a/fs/fs_context.c b/fs/fs_context.c index a2367c7aef5b3..e492a83fa100e 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -258,7 +258,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, struct fs_context *fc; int ret = -ENOMEM; - fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL_ACCOUNT); if (!fc) return ERR_PTR(-ENOMEM); @@ -686,7 +686,7 @@ const struct fs_context_operations legacy_fs_context_ops = { */ static int legacy_init_fs_context(struct fs_context *fc) { - fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL); + fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL_ACCOUNT); if (!fc->fs_private) return -ENOMEM; fc->ops = &legacy_fs_context_ops; diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig index 506c5e643f0d9..1c3981e9a12b7 100644 --- a/fs/fscache/Kconfig +++ b/fs/fscache/Kconfig @@ -2,6 +2,7 @@ config FSCACHE tristate "General filesystem local caching manager" + select NETFS_SUPPORT help This option enables a generic filesystem caching manager that can be used by various network and other filesystems to cache data locally. diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile index 79e08e05ef848..3b2ffa93ac184 100644 --- a/fs/fscache/Makefile +++ b/fs/fscache/Makefile @@ -7,6 +7,7 @@ fscache-y := \ cache.o \ cookie.o \ fsdef.o \ + io.o \ main.o \ netfs.o \ object.o \ diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 0ce39658a6200..44a426c8ea01e 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -74,10 +74,8 @@ void fscache_free_cookie(struct fscache_cookie *cookie) static int fscache_set_key(struct fscache_cookie *cookie, const void *index_key, size_t index_key_len) { - unsigned long long h; u32 *buf; int bufs; - int i; bufs = DIV_ROUND_UP(index_key_len, sizeof(*buf)); @@ -91,17 +89,7 @@ static int fscache_set_key(struct fscache_cookie *cookie, } memcpy(buf, index_key, index_key_len); - - /* Calculate a hash and combine this with the length in the first word - * or first half word - */ - h = (unsigned long)cookie->parent; - h += index_key_len + cookie->type; - - for (i = 0; i < bufs; i++) - h += buf[i]; - - cookie->key_hash = h ^ (h >> 32); + cookie->key_hash = fscache_hash(0, buf, bufs); return 0; } diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 9616af3768e11..24dbbc8c34ff1 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -97,6 +97,8 @@ extern struct workqueue_struct *fscache_object_wq; extern struct workqueue_struct *fscache_op_wq; DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait); +extern unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n); + static inline bool fscache_object_congested(void) { return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq); @@ -142,6 +144,10 @@ extern int fscache_wait_for_operation_activation(struct fscache_object *, atomic_t *, atomic_t *); extern void fscache_invalidate_writes(struct fscache_cookie *); +struct fscache_retrieval *fscache_alloc_retrieval(struct fscache_cookie *cookie, + struct address_space *mapping, + fscache_rw_complete_t end_io_func, + void *context); /* * proc.c diff --git a/fs/fscache/io.c b/fs/fscache/io.c new file mode 100644 index 0000000000000..8ecc1141802f4 --- /dev/null +++ b/fs/fscache/io.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Cache data I/O routines + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define FSCACHE_DEBUG_LEVEL PAGE +#include +#define FSCACHE_USE_NEW_IO_API +#include +#include +#include +#include "internal.h" + +/* + * Start a cache read operation. + * - we return: + * -ENOMEM - out of memory, some pages may be being read + * -ERESTARTSYS - interrupted, some pages may be being read + * -ENOBUFS - no backing object or space available in which to cache any + * pages not being read + * -ENODATA - no data available in the backing object for some or all of + * the pages + * 0 - dispatched a read on all pages + */ +int __fscache_begin_read_operation(struct netfs_read_request *rreq, + struct fscache_cookie *cookie) +{ + struct fscache_retrieval *op; + struct fscache_object *object; + bool wake_cookie = false; + int ret; + + _enter("rr=%08x", rreq->debug_id); + + fscache_stat(&fscache_n_retrievals); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs; + + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); + + if (fscache_wait_for_deferred_lookup(cookie) < 0) + return -ERESTARTSYS; + + op = fscache_alloc_retrieval(cookie, NULL, NULL, NULL); + if (!op) + return -ENOMEM; + trace_fscache_page_op(cookie, NULL, &op->op, fscache_page_op_retr_multi); + + spin_lock(&cookie->lock); + + if (!fscache_cookie_enabled(cookie) || + hlist_empty(&cookie->backing_objects)) + goto nobufs_unlock; + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + __fscache_use_cookie(cookie); + atomic_inc(&object->n_reads); + __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); + + if (fscache_submit_op(object, &op->op) < 0) + goto nobufs_unlock_dec; + spin_unlock(&cookie->lock); + + fscache_stat(&fscache_n_retrieval_ops); + + /* we wait for the operation to become active, and then process it + * *here*, in this thread, and not in the thread pool */ + ret = fscache_wait_for_operation_activation( + object, &op->op, + __fscache_stat(&fscache_n_retrieval_op_waits), + __fscache_stat(&fscache_n_retrievals_object_dead)); + if (ret < 0) + goto error; + + /* ask the cache to honour the operation */ + ret = object->cache->ops->begin_read_operation(rreq, op); + +error: + if (ret == -ENOMEM) + fscache_stat(&fscache_n_retrievals_nomem); + else if (ret == -ERESTARTSYS) + fscache_stat(&fscache_n_retrievals_intr); + else if (ret == -ENODATA) + fscache_stat(&fscache_n_retrievals_nodata); + else if (ret < 0) + fscache_stat(&fscache_n_retrievals_nobufs); + else + fscache_stat(&fscache_n_retrievals_ok); + + fscache_put_retrieval(op); + _leave(" = %d", ret); + return ret; + +nobufs_unlock_dec: + atomic_dec(&object->n_reads); + wake_cookie = __fscache_unuse_cookie(cookie); +nobufs_unlock: + spin_unlock(&cookie->lock); + fscache_put_retrieval(op); + if (wake_cookie) + __fscache_wake_unused_cookie(cookie); +nobufs: + fscache_stat(&fscache_n_retrievals_nobufs); + _leave(" = -ENOBUFS"); + return -ENOBUFS; +} +EXPORT_SYMBOL(__fscache_begin_read_operation); diff --git a/fs/fscache/main.c b/fs/fscache/main.c index 59c2494efda34..cc2fab786db66 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -39,6 +39,7 @@ MODULE_PARM_DESC(fscache_debug, struct kobject *fscache_root; struct workqueue_struct *fscache_object_wq; +EXPORT_SYMBOL(fscache_object_wq); struct workqueue_struct *fscache_op_wq; DEFINE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait); @@ -94,6 +95,45 @@ static struct ctl_table fscache_sysctls_root[] = { }; #endif +/* + * Mixing scores (in bits) for (7,20): + * Input delta: 1-bit 2-bit + * 1 round: 330.3 9201.6 + * 2 rounds: 1246.4 25475.4 + * 3 rounds: 1907.1 31295.1 + * 4 rounds: 2042.3 31718.6 + * Perfect: 2048 31744 + * (32*64) (32*31/2 * 64) + */ +#define HASH_MIX(x, y, a) \ + ( x ^= (a), \ + y ^= x, x = rol32(x, 7),\ + x += y, y = rol32(y,20),\ + y *= 9 ) + +static inline unsigned int fold_hash(unsigned long x, unsigned long y) +{ + /* Use arch-optimized multiply if one exists */ + return __hash_32(y ^ __hash_32(x)); +} + +/* + * Generate a hash. This is derived from full_name_hash(), but we want to be + * sure it is arch independent and that it doesn't change as bits of the + * computed hash value might appear on disk. The caller also guarantees that + * the hashed data will be a series of aligned 32-bit words. + */ +unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n) +{ + unsigned int a, x = 0, y = salt; + + for (; n; n--) { + a = *data++; + HASH_MIX(x, y, a); + } + return fold_hash(x, y); +} + /* * initialise the fs caching module */ diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 26af6fdf15387..991b0a871744e 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -299,7 +299,7 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op) /* * allocate a retrieval op */ -static struct fscache_retrieval *fscache_alloc_retrieval( +struct fscache_retrieval *fscache_alloc_retrieval( struct fscache_cookie *cookie, struct address_space *mapping, fscache_rw_complete_t end_io_func, diff --git a/fs/fuse/control.c b/fs/fuse/control.c index c23f6f243ad42..66e291b64096c 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -191,6 +191,45 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file, return ret; } +static ssize_t fuse_conn_invalidate_inode_write(struct file *file, + const char __user *buf, + size_t count, loff_t *ppos) +{ + char tmp[64]; + struct fuse_conn *fc; + uint64_t ino; + int64_t off, len; + int ret; + + if (*ppos != 0) + return -EINVAL; + + if (count >= sizeof(tmp)) + return -ENOSPC; + + ret = simple_write_to_buffer(tmp, sizeof(tmp) - 1, ppos, buf, count); + if (ret != count) + return ret < 0 ? ret : -EIO; + + tmp[ret] = '\0'; + ret = sscanf(tmp, "%llu:%lld:%lld", &ino, &off, &len); + if (ret != 3) + return -EINVAL; + + fc = fuse_ctl_file_conn_get(file); + if (!fc) + return -ENODEV; + + down_read(&fc->killsb); + ret = -ENOENT; + if (fc->sb) + ret = fuse_reverse_inval_inode(fc->sb, ino, off, len); + up_read(&fc->killsb); + fuse_conn_put(fc); + + return ret < 0 ? ret : count; +} + static const struct file_operations fuse_ctl_abort_ops = { .open = nonseekable_open, .write = fuse_conn_abort_write, @@ -217,6 +256,12 @@ static const struct file_operations fuse_conn_congestion_threshold_ops = { .llseek = no_llseek, }; +static const struct file_operations fuse_ctl_invalidate_inode_ops = { + .open = nonseekable_open, + .write = fuse_conn_invalidate_inode_write, + .llseek = no_llseek, +}; + static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, struct fuse_conn *fc, const char *name, @@ -279,13 +324,16 @@ int fuse_ctl_add_conn(struct fuse_conn *fc) if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1, NULL, &fuse_ctl_waiting_ops) || - !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1, - NULL, &fuse_ctl_abort_ops) || + (!fc->no_abort_control && !fuse_ctl_add_dentry(parent, fc, "abort", + S_IFREG | 0200, 1, NULL, &fuse_ctl_abort_ops)) || !fuse_ctl_add_dentry(parent, fc, "max_background", S_IFREG | 0600, 1, NULL, &fuse_conn_max_background_ops) || !fuse_ctl_add_dentry(parent, fc, "congestion_threshold", S_IFREG | 0600, 1, NULL, - &fuse_conn_congestion_threshold_ops)) + &fuse_conn_congestion_threshold_ops) || + !fuse_ctl_add_dentry(parent, fc, "invalidate_inode", + S_IFREG | 0200, 1, NULL, + &fuse_ctl_invalidate_inode_ops)) goto err; return 0; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 7205a89fbb5f3..ae01bb0cfbb46 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -844,6 +844,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) if (!(buf->flags & PIPE_BUF_FLAG_LRU)) lru_cache_add_file(newpage); + /* + * Release while we have extra ref on stolen page. Otherwise + * anon_pipe_buf_release() might think the page can be reused. + */ + pipe_buf_release(cs->pipe, buf); + err = 0; spin_lock(&cs->req->waitq.lock); if (test_bit(FR_ABORTED, &cs->req->flags)) @@ -927,7 +933,17 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, while (count) { if (cs->write && cs->pipebufs && page) { - return fuse_ref_page(cs, page, offset, count); + /* + * Can't control lifetime of pipe buffers, so always + * copy user pages. + */ + if (cs->req->args->user_pages) { + err = fuse_copy_fill(cs); + if (err) + return err; + } else { + return fuse_ref_page(cs, page, offset, count); + } } else if (!cs->len) { if (cs->move_pages && page && offset == 0 && count == PAGE_SIZE) { @@ -2027,8 +2043,12 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, pipe_lock(pipe); out_free: - for (idx = 0; idx < nbuf; idx++) - pipe_buf_release(pipe, &bufs[idx]); + for (idx = 0; idx < nbuf; idx++) { + struct pipe_buffer *buf = &bufs[idx]; + + if (buf->ops) + pipe_buf_release(pipe, buf); + } pipe_unlock(pipe); kvfree(bufs); @@ -2232,42 +2252,89 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new) return 0; } -static long fuse_dev_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) +static int fuse_dev_ioc_clone(struct file *file, unsigned long arg) { - int err = -ENOTTY; + int err = -EFAULT; + int oldfd; - if (cmd == FUSE_DEV_IOC_CLONE) { - int oldfd; + if (!get_user(oldfd, (__u32 __user *) arg)) { + struct file *old = fget(oldfd); - err = -EFAULT; - if (!get_user(oldfd, (__u32 __user *) arg)) { - struct file *old = fget(oldfd); + err = -EINVAL; + if (old) { + struct fuse_dev *fud = NULL; - err = -EINVAL; - if (old) { - struct fuse_dev *fud = NULL; - - /* - * Check against file->f_op because CUSE - * uses the same ioctl handler. - */ - if (old->f_op == file->f_op && - old->f_cred->user_ns == file->f_cred->user_ns) - fud = fuse_get_dev(old); - - if (fud) { - mutex_lock(&fuse_mutex); - err = fuse_device_clone(fud->fc, file); - mutex_unlock(&fuse_mutex); - } - fput(old); + /* + * Check against file->f_op because CUSE + * uses the same ioctl handler. + */ + if (old->f_op == file->f_op && + old->f_cred->user_ns == file->f_cred->user_ns) + fud = fuse_get_dev(old); + + if (fud) { + mutex_lock(&fuse_mutex); + err = fuse_device_clone(fud->fc, file); + mutex_unlock(&fuse_mutex); } + fput(old); } } return err; } +static int fuse_dev_ioc_restore_req(struct file *file) +{ + unsigned int i; + LIST_HEAD(to_restore); + struct fuse_req *req, *next; + struct fuse_dev *fud = fuse_get_dev(file); + struct fuse_pqueue *fpq = &fud->pq; + struct fuse_conn *fc = fud->fc; + struct fuse_iqueue *fiq = &fc->iq; + + if (fc->sb && !sb_rdonly(fc->sb)) + return -EOPNOTSUPP; + + spin_lock(&fpq->lock); + list_for_each_entry_safe(req, next, &fpq->io, list) { + spin_lock(&req->waitq.lock); + if (!test_bit(FR_LOCKED, &req->flags)) { + set_bit(FR_PRIVATE, &req->flags); + __fuse_get_request(req); + list_move_tail(&req->list, &to_restore); + } + spin_unlock(&req->waitq.lock); + } + for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) + list_splice_init(&fpq->processing[i], &to_restore); + spin_unlock(&fpq->lock); + + list_for_each_entry(req, &to_restore, list) { + clear_bit(FR_SENT, &req->flags); + set_bit(FR_PENDING, &req->flags); + } + + spin_lock(&fiq->lock); + list_splice(&to_restore, &fiq->pending); + spin_unlock(&fiq->lock); + + return 0; +} + +static long fuse_dev_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case FUSE_DEV_IOC_CLONE: + return fuse_dev_ioc_clone(file, arg); + case FUSE_DEV_IOC_RESTORE_REQ: + return fuse_dev_ioc_restore_req(file); + default: + return -ENOTTY; + } +} + const struct file_operations fuse_dev_operations = { .owner = THIS_MODULE, .open = fuse_dev_open, diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 60378f3baaae1..ee3f579dbc401 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -255,7 +255,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) forget_all_cached_acls(inode); fuse_change_attributes(inode, &outarg.attr, entry_attr_timeout(&outarg), - attr_version); + attr_version, FUSE_INODE_UNLOCKED); fuse_change_entry_timeout(entry, &outarg); } else if (inode) { fi = get_fuse_inode(inode); @@ -836,7 +836,7 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent, if (fuse_is_bad(olddir)) return -EIO; - if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; if (flags) { @@ -939,7 +939,7 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, } static int fuse_do_getattr(struct inode *inode, struct kstat *stat, - struct file *file) + struct file *file, enum inode_lock_state lock_state) { int err; struct fuse_getattr_in inarg; @@ -976,7 +976,8 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, } else { fuse_change_attributes(inode, &outarg.attr, attr_timeout(&outarg), - attr_version); + attr_version, + lock_state); if (stat) fuse_fillattr(inode, &outarg.attr, stat); } @@ -986,7 +987,8 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, static int fuse_update_get_attr(struct inode *inode, struct file *file, struct kstat *stat, u32 request_mask, - unsigned int flags) + unsigned int flags, + enum inode_lock_state lock_state) { struct fuse_inode *fi = get_fuse_inode(inode); int err = 0; @@ -1003,7 +1005,7 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, if (sync) { forget_all_cached_acls(inode); - err = fuse_do_getattr(inode, stat, file); + err = fuse_do_getattr(inode, stat, file, lock_state); } else if (stat) { generic_fillattr(inode, stat); stat->mode = fi->orig_i_mode; @@ -1013,11 +1015,13 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, return err; } -int fuse_update_attributes(struct inode *inode, struct file *file) +int fuse_update_attributes(struct inode *inode, struct file *file, + enum inode_lock_state lock_state) { /* Do *not* need to get atime for internal purposes */ return fuse_update_get_attr(inode, file, NULL, - STATX_BASIC_STATS & ~STATX_ATIME, 0); + STATX_BASIC_STATS & ~STATX_ATIME, 0, + lock_state); } int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, @@ -1032,7 +1036,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, if (!parent) return -ENOENT; - inode_lock(parent); + inode_lock_nested(parent, I_MUTEX_PARENT); if (!S_ISDIR(parent->i_mode)) goto unlock; @@ -1151,7 +1155,7 @@ static int fuse_perm_getattr(struct inode *inode, int mask) return -ECHILD; forget_all_cached_acls(inode); - return fuse_do_getattr(inode, NULL, NULL); + return fuse_do_getattr(inode, NULL, NULL, FUSE_INODE_MAY_LOCKED); } /* @@ -1612,7 +1616,8 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, } fuse_change_attributes_common(inode, &outarg.attr, - attr_timeout(&outarg)); + attr_timeout(&outarg), + !trust_local_cmtime); oldsize = inode->i_size; /* see the comment in fuse_change_attributes() */ if (!is_wb || is_truncate || !S_ISREG(inode->i_mode)) @@ -1673,7 +1678,7 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) * ia_mode calculation may have used stale i_mode. * Refresh and recalculate. */ - ret = fuse_do_getattr(inode, NULL, file); + ret = fuse_do_getattr(inode, NULL, file, FUSE_INODE_LOCKED); if (ret) return ret; @@ -1719,7 +1724,8 @@ static int fuse_getattr(const struct path *path, struct kstat *stat, if (!fuse_allow_current_process(fc)) return -EACCES; - return fuse_update_get_attr(inode, NULL, stat, request_mask, flags); + return fuse_update_get_attr(inode, NULL, stat, request_mask, flags, + FUSE_INODE_UNLOCKED); } static const struct inode_operations fuse_dir_inode_operations = { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 0cc44f3ed3475..49f6e9a43c151 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -213,6 +213,9 @@ void fuse_finish_open(struct inode *inode, struct file *file) invalidate_inode_pages2(inode->i_mapping); } + if (ff->open_flags & FOPEN_INVAL_ATTR) + fuse_invalidate_attr(inode); + if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache) fuse_link_write_file(file); } @@ -364,7 +367,7 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) struct fuse_writepage_args { struct fuse_io_args ia; - struct list_head writepages_entry; + struct rb_node writepages_entry; struct list_head queue_entry; struct fuse_writepage_args *next; struct inode *inode; @@ -373,33 +376,27 @@ struct fuse_writepage_args { static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi, pgoff_t idx_from, pgoff_t idx_to) { - struct fuse_writepage_args *wpa; + struct rb_node *n; + + n = fi->writepages.rb_node; - list_for_each_entry(wpa, &fi->writepages, writepages_entry) { + while (n) { + struct fuse_writepage_args *wpa; pgoff_t curr_index; + wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry); WARN_ON(get_fuse_inode(wpa->inode) != fi); curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT; - if (idx_from < curr_index + wpa->ia.ap.num_pages && - curr_index <= idx_to) { + if (idx_from >= curr_index + wpa->ia.ap.num_pages) + n = n->rb_right; + else if (idx_to < curr_index) + n = n->rb_left; + else return wpa; - } } return NULL; } -/* - * Check if any page of this file is under writeback. - * - * The fuse_inode lock should be held by the caller. - */ -bool fuse_file_is_writeback_locked(struct inode *inode) -{ - struct fuse_inode *fi = get_fuse_inode(inode); - - return fuse_find_writeback(fi, 0, ULONG_MAX); -} - /* * Check if any page in a range is under writeback * @@ -1037,7 +1034,8 @@ static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to) if (fc->auto_inval_data || (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) { int err; - err = fuse_update_attributes(inode, iocb->ki_filp); + err = fuse_update_attributes(inode, iocb->ki_filp, + FUSE_INODE_UNLOCKED); if (err) return err; } @@ -1323,7 +1321,8 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) if (get_fuse_conn(inode)->writeback_cache) { /* Update size (EOF optimization) and mode (SUID clearing) */ - err = fuse_update_attributes(mapping->host, file); + err = fuse_update_attributes(mapping->host, file, + FUSE_INODE_UNLOCKED); if (err) return err; @@ -1454,6 +1453,7 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, (PAGE_SIZE - ret) & (PAGE_SIZE - 1); } + ap->args.user_pages = true; if (write) ap->args.in_pages = 1; else @@ -1660,7 +1660,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct backing_dev_info *bdi = inode_to_bdi(inode); int i; - list_del(&wpa->writepages_entry); + rb_erase(&wpa->writepages_entry, &fi->writepages); for (i = 0; i < ap->num_pages; i++) { dec_wb_stat(&bdi->wb, WB_WRITEBACK); dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP); @@ -1748,6 +1748,43 @@ __acquires(fi->lock) } } +static struct fuse_writepage_args *fuse_insert_writeback(struct rb_root *root, + struct fuse_writepage_args *wpa) +{ + pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT; + pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1; + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + + WARN_ON(!wpa->ia.ap.num_pages); + while (*p) { + struct fuse_writepage_args *curr; + pgoff_t curr_index; + + parent = *p; + curr = rb_entry(parent, struct fuse_writepage_args, + writepages_entry); + WARN_ON(curr->inode != wpa->inode); + curr_index = curr->ia.write.in.offset >> PAGE_SHIFT; + + if (idx_from >= curr_index + curr->ia.ap.num_pages) + p = &(*p)->rb_right; + else if (idx_to < curr_index) + p = &(*p)->rb_left; + else + return curr; + } + + rb_link_node(&wpa->writepages_entry, parent, p); + rb_insert_color(&wpa->writepages_entry, root); + return NULL; +} + +static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa) +{ + WARN_ON(fuse_insert_writeback(root, wpa)); +} + static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args, int error) { @@ -1774,7 +1811,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args, wpa->next = next->next; next->next = NULL; next->ia.ff = fuse_file_get(wpa->ia.ff); - list_add(&next->writepages_entry, &fi->writepages); + tree_insert(&fi->writepages, next); /* * Skip fuse_flush_writepages() to make it easy to crop requests @@ -1801,6 +1838,15 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args, */ fuse_send_writepage(fc, next, inarg->offset + inarg->size); } + + if (fc->wb_trust_server) + fi->attr_version = atomic64_inc_return(&fc->attr_version); + /* + * Ensure attr_version increases before the page is move out of the + * writepages rb-tree. + */ + smp_mb(); + fi->writectr--; fuse_writepage_finish(fc, wpa); spin_unlock(&fi->lock); @@ -1838,6 +1884,9 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc) struct fuse_file *ff; int err; + if (fc->wb_trust_server) + return 0; + ff = __fuse_write_file_get(fc, fi); err = fuse_flush_times(inode, ff); if (ff) @@ -1909,7 +1958,7 @@ static int fuse_writepage_locked(struct page *page) inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); spin_lock(&fi->lock); - list_add(&wpa->writepages_entry, &fi->writepages); + tree_insert(&fi->writepages, wpa); list_add_tail(&wpa->queue_entry, &fi->queued_writes); fuse_flush_writepages(inode); spin_unlock(&fi->lock); @@ -2004,14 +2053,14 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data) } /* - * First recheck under fi->lock if the offending offset is still under - * writeback. If yes, then iterate auxiliary write requests, to see if there's + * Check under fi->lock if the page is under writeback, and insert it onto the + * rb_tree if not. Otherwise iterate auxiliary write requests, to see if there's * one already added for a page at this offset. If there's none, then insert * this new request onto the auxiliary list, otherwise reuse the existing one by - * copying the new page contents over to the old temporary page. + * swapping the new temp page with the old one. */ -static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa, - struct page *page) +static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa, + struct page *page) { struct fuse_inode *fi = get_fuse_inode(new_wpa->inode); struct fuse_writepage_args *tmp; @@ -2019,17 +2068,15 @@ static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa, struct fuse_args_pages *new_ap = &new_wpa->ia.ap; WARN_ON(new_ap->num_pages != 0); + new_ap->num_pages = 1; spin_lock(&fi->lock); - list_del(&new_wpa->writepages_entry); - old_wpa = fuse_find_writeback(fi, page->index, page->index); + old_wpa = fuse_insert_writeback(&fi->writepages, new_wpa); if (!old_wpa) { - list_add(&new_wpa->writepages_entry, &fi->writepages); spin_unlock(&fi->lock); - return false; + return true; } - new_ap->num_pages = 1; for (tmp = old_wpa->next; tmp; tmp = tmp->next) { pgoff_t curr_index; @@ -2058,7 +2105,7 @@ static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa, fuse_writepage_free(new_wpa); } - return true; + return false; } static int fuse_writepages_fill(struct page *page, @@ -2137,12 +2184,6 @@ static int fuse_writepages_fill(struct page *page, ap->args.end = fuse_writepage_end; ap->num_pages = 0; wpa->inode = inode; - - spin_lock(&fi->lock); - list_add(&wpa->writepages_entry, &fi->writepages); - spin_unlock(&fi->lock); - - data->wpa = wpa; } set_page_writeback(page); @@ -2150,26 +2191,25 @@ static int fuse_writepages_fill(struct page *page, ap->pages[ap->num_pages] = tmp_page; ap->descs[ap->num_pages].offset = 0; ap->descs[ap->num_pages].length = PAGE_SIZE; + data->orig_pages[ap->num_pages] = page; inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); err = 0; - if (is_writeback && fuse_writepage_in_flight(wpa, page)) { + if (data->wpa) { + /* + * Protected by fi->lock against concurrent access by + * fuse_page_is_writeback(). + */ + spin_lock(&fi->lock); + ap->num_pages++; + spin_unlock(&fi->lock); + } else if (fuse_writepage_add(wpa, page)) { + data->wpa = wpa; + } else { end_page_writeback(page); - data->wpa = NULL; - goto out_unlock; } - data->orig_pages[ap->num_pages] = page; - - /* - * Protected by fi->lock against concurrent access by - * fuse_page_is_writeback(). - */ - spin_lock(&fi->lock); - ap->num_pages++; - spin_unlock(&fi->lock); - out_unlock: unlock_page(page); @@ -2586,7 +2626,7 @@ static loff_t fuse_lseek(struct file *file, loff_t offset, int whence) return vfs_setpos(file, outarg.offset, inode->i_sb->s_maxbytes); fallback: - err = fuse_update_attributes(inode, file); + err = fuse_update_attributes(inode, file, FUSE_INODE_LOCKED); if (!err) return generic_file_llseek(file, offset, whence); else @@ -2606,7 +2646,7 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence) break; case SEEK_END: inode_lock(inode); - retval = fuse_update_attributes(inode, file); + retval = fuse_update_attributes(inode, file, FUSE_INODE_LOCKED); if (!retval) retval = generic_file_llseek(file, offset, whence); inode_unlock(inode); @@ -3476,5 +3516,5 @@ void fuse_init_file_inode(struct inode *inode) INIT_LIST_HEAD(&fi->queued_writes); fi->writectr = 0; init_waitqueue_head(&fi->page_waitq); - INIT_LIST_HEAD(&fi->writepages); + fi->writepages = RB_ROOT; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 4d64994903775..d2006ae029e0b 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -48,7 +48,7 @@ #define FUSE_NAME_MAX 1024 /** Number of dentries for each connection in the control filesystem */ -#define FUSE_CTL_NUM_DENTRIES 5 +#define FUSE_CTL_NUM_DENTRIES 6 /** List of active connections */ extern struct list_head fuse_conn_list; @@ -114,7 +114,7 @@ struct fuse_inode { wait_queue_head_t page_waitq; /* List of writepage requestst (pending or sent) */ - struct list_head writepages; + struct rb_root writepages; }; /* readdir cache (directory only) */ @@ -251,6 +251,7 @@ struct fuse_args { bool nocreds:1; bool in_pages:1; bool out_pages:1; + bool user_pages:1; bool out_argvar:1; bool page_zeroing:1; bool page_replace:1; @@ -364,6 +365,12 @@ struct fuse_req { #if IS_ENABLED(CONFIG_VIRTIO_FS) /** virtio-fs's physically contiguous buffer for in and out args */ void *argbuf; + /** dma sg list */ + struct scatterlist *sg; + /** number of out sg list */ + unsigned int out_sgs; + /** number of in sg list */ + unsigned int in_sgs; #endif }; @@ -485,7 +492,7 @@ struct fuse_fs_context { bool default_permissions:1; bool allow_other:1; bool destroy:1; - bool no_control:1; + bool no_abort_control:1; bool no_force_umount:1; bool no_mount_options:1; bool delete_stale:1; @@ -724,8 +731,8 @@ struct fuse_conn { /* Delete dentries that have gone stale */ unsigned int delete_stale:1; - /** Do not create entry in fusectl fs */ - unsigned int no_control:1; + /** Do not create abort entry in fusectl fs */ + unsigned int no_abort_control:1; /** Do not allow MNT_FORCE umount */ unsigned int no_force_umount:1; @@ -916,14 +923,21 @@ void fuse_init_dir(struct inode *inode); */ void fuse_init_symlink(struct inode *inode); +enum inode_lock_state { + FUSE_INODE_LOCKED, + FUSE_INODE_MAY_LOCKED, + FUSE_INODE_UNLOCKED, +}; + /** * Change attributes of an inode */ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, - u64 attr_valid, u64 attr_version); + u64 attr_valid, u64 attr_version, + enum inode_lock_state lock_state); void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, - u64 attr_valid); + u64 attr_valid, bool update_cmtime); /** * Initialize the client device @@ -1028,7 +1042,8 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); void fuse_update_ctime(struct inode *inode); -int fuse_update_attributes(struct inode *inode, struct file *file); +int fuse_update_attributes(struct inode *inode, struct file *file, + enum inode_lock_state lock_state); void fuse_flush_writepages(struct inode *inode); @@ -1117,6 +1132,5 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args); */ u64 fuse_get_unique(struct fuse_iqueue *fiq); void fuse_free_conn(struct fuse_conn *fc); -bool fuse_file_is_writeback_locked(struct inode *inode); #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9fd6abb11796a..aee1460c9cce0 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -146,7 +146,7 @@ static ino_t fuse_squash_ino(u64 ino64) } void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, - u64 attr_valid) + u64 attr_valid, bool update_cmtime) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -166,7 +166,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, inode->i_atime.tv_sec = attr->atime; inode->i_atime.tv_nsec = attr->atimensec; /* mtime from server may be stale due to local buffered write */ - if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) { + if (update_cmtime || !S_ISREG(inode->i_mode)) { inode->i_mtime.tv_sec = attr->mtime; inode->i_mtime.tv_nsec = attr->mtimensec; inode->i_ctime.tv_sec = attr->ctime; @@ -191,7 +191,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, } void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, - u64 attr_valid, u64 attr_version) + u64 attr_valid, u64 attr_version, + enum inode_lock_state lock_state) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -199,10 +200,36 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, loff_t oldsize; struct timespec64 old_mtime; bool try_wb_update = false; - - if (is_wb && fc->wb_trust_server && S_ISREG(inode->i_mode) && - inode_trylock(inode)) + bool wb_update = false; + bool inval = false; + bool update_cmtime = !is_wb; + int res = 0; + bool attr_is_inval = false; + bool should_unlock_inode = false; + + if (fc->wb_trust_server && S_ISREG(inode->i_mode)) { try_wb_update = true; + switch (lock_state) { + case FUSE_INODE_LOCKED: + break; + case FUSE_INODE_MAY_LOCKED: + if (!inode_trylock(inode)) + try_wb_update = false; + else + should_unlock_inode = true; + break; + case FUSE_INODE_UNLOCKED: + res = down_write_killable(&inode->i_rwsem); + if (res) + try_wb_update = false; + else + should_unlock_inode = true; + break; + default: + WARN_ON(1); + try_wb_update = false; + } + } spin_lock(&fi->lock); /* @@ -210,24 +237,51 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, * may update i_size. In these cases trust the cached value in the * inode. * - * One expection is if the page cache is clean and there is no in-flight - * fuse writeback request. The c/mtime and file size are allowed to be - * updated from server, so clear is_wb in that case. + * But with wb_trust_server, if all the following conditions are met, + * then we allow the attributes to be refreshed: + * + * - inode is not in the process of being written (I_SYNC) + * - inode has no dirty pages (I_DIRTY_PAGES) + * - inode data-related attributes are clean (I_DIRTY_DATASYNC) + * - inode does not have any page writeback in progress + * + * Note: checking PAGECACHE_TAG_WRITEBACK is not sufficient in fuse, + * since inode can appear to have no PageWriteback pages, yet still have + * outstanding write request. */ - if (try_wb_update && !fuse_file_is_writeback_locked(inode) && - !filemap_range_needs_writeback(inode->i_mapping, 0, LLONG_MAX)) - is_wb = 0; + if (try_wb_update && !(inode->i_state & (I_DIRTY_PAGES | I_SYNC | + I_DIRTY_DATASYNC)) && RB_EMPTY_ROOT(&fi->writepages)) + wb_update = true; + /* + * Ensure the ordering of cleanness checking and following attr_version + * comparison. + */ + smp_mb(); if ((attr_version != 0 && fi->attr_version > attr_version) || test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { spin_unlock(&fi->lock); - if (try_wb_update) - inode_unlock(inode); - return; + goto out_unlock_inode; + } + + if (fc->wb_trust_server && attr->should_inval) { + update_cmtime = true; + if (S_ISREG(inode->i_mode)) + inval = true; } old_mtime = inode->i_mtime; - fuse_change_attributes_common(inode, attr, attr_valid); + attr_is_inval = !!(STATX_BASIC_STATS & ~STATX_ATIME & + READ_ONCE(fi->inval_mask)); + fuse_change_attributes_common(inode, attr, attr_valid, + update_cmtime); + /* + * If we did not get the inode lock in this function, then we should + * re-invalidate the attributes and try it later. + */ + if (fc->wb_trust_server && S_ISREG(inode->i_mode) && !wb_update && + attr_is_inval) + fuse_invalidate_attr(inode); oldsize = inode->i_size; /* @@ -235,18 +289,22 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, * extend local i_size without keeping userspace server in sync. So, * attr->size coming from server can be stale. We cannot trust it. */ - if (!is_wb || !S_ISREG(inode->i_mode)) + if (!is_wb || wb_update || !S_ISREG(inode->i_mode)) i_size_write(inode, attr->size); spin_unlock(&fi->lock); - if (!is_wb && S_ISREG(inode->i_mode)) { - bool inval = false; - + if ((!is_wb || wb_update) && S_ISREG(inode->i_mode)) { if (oldsize != attr->size) { truncate_pagecache(inode, attr->size); if (!fc->explicit_inval_data) inval = true; - } else if (fc->auto_inval_data) { + } else if (!fc->wb_trust_server && fc->auto_inval_data) { + /* + * When fc->wb_trust_server is set, the old_mtime + * can be generated by kernel and must not equal to + * new_mtime generated by server. So skip in such + * case. + */ struct timespec64 new_mtime = { .tv_sec = attr->mtime, .tv_nsec = attr->mtimensec, @@ -259,11 +317,13 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, if (!timespec64_equal(&old_mtime, &new_mtime)) inval = true; } - - if (inval) - invalidate_inode_pages2(inode->i_mapping); } - if (try_wb_update) + + if (inval) + invalidate_inode_pages2(inode->i_mapping); + +out_unlock_inode: + if (should_unlock_inode) inode_unlock(inode); } @@ -338,7 +398,8 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, spin_lock(&fi->lock); fi->nlookup++; spin_unlock(&fi->lock); - fuse_change_attributes(inode, attr, attr_valid, attr_version); + fuse_change_attributes(inode, attr, attr_valid, attr_version, + FUSE_INODE_UNLOCKED); return inode; } @@ -1237,7 +1298,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) fc->group_id = ctx->group_id; fc->max_read = max_t(unsigned, 4096, ctx->max_read); fc->destroy = ctx->destroy; - fc->no_control = ctx->no_control; + fc->no_abort_control = ctx->no_abort_control; fc->no_force_umount = ctx->no_force_umount; fc->no_mount_options = ctx->no_mount_options; diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 9f596a4ec4b8c..6c8c3d4c96881 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -220,7 +220,7 @@ static int fuse_direntplus_link(struct file *file, forget_all_cached_acls(inode); fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o), - attr_version); + attr_version, FUSE_INODE_UNLOCKED); /* * The other branch comes via fuse_iget() * which bumps nlookup inside @@ -468,7 +468,8 @@ static int fuse_readdir_cached(struct file *file, struct dir_context *ctx) * cache; both cases require an up-to-date mtime value. */ if (!ctx->pos && fc->auto_inval_data) { - int err = fuse_update_attributes(inode, file); + int err = fuse_update_attributes(inode, file, + FUSE_INODE_LOCKED); if (err) return err; diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 01d2e67eb163e..2ff315ce30549 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -14,6 +14,18 @@ #include #include "fuse_i.h" +/* Disabled by default */ +#define IO_TIMEOUT_MS_DEFAULT 0 + +static unsigned long io_timeout_ms = IO_TIMEOUT_MS_DEFAULT; +module_param(io_timeout_ms, ulong, 0644); + +static bool irq_use_wq = true; +module_param(irq_use_wq, bool, 0644); + +static bool enable_dma_premap; +module_param(enable_dma_premap, bool, 0444); + /* List of virtio-fs device instances and a lock for the list. Also provides * mutual exclusion in device removal and mounting path */ @@ -167,12 +179,23 @@ static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) mutex_unlock(&virtio_fs_mutex); } +static void virtio_fs_forgot_requests_abort(struct virtio_fs_vq *fsvq); +static void virtio_fs_requests_abort(struct virtio_fs_vq *fsvq); + static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) { + ktime_t start = ktime_get(); + u64 timeout_ms; + WARN_ON(fsvq->in_flight < 0); /* Wait for in flight requests to finish.*/ while (1) { + timeout_ms = READ_ONCE(io_timeout_ms); + if (virtqueue_is_broken(fsvq->vq) || (timeout_ms && + ktime_after(ktime_sub_ms(ktime_get(), timeout_ms), start))) + break; + spin_lock(&fsvq->lock); if (!fsvq->in_flight) { spin_unlock(&fsvq->lock); @@ -182,7 +205,12 @@ static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) /* TODO use completion instead of timeout */ usleep_range(1000, 2000); } - + if (fsvq->in_flight) { + if (fsvq->vq->index == VQ_HIPRIO) + virtio_fs_forgot_requests_abort(fsvq); + else + virtio_fs_requests_abort(fsvq); + } flush_work(&fsvq->done_work); flush_delayed_work(&fsvq->dispatch_work); } @@ -503,6 +531,62 @@ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) req->argbuf = NULL; } +static int virtio_fs_request_map(struct fuse_req *req, + struct virtio_fs_vq *fsvq, + struct scatterlist *sg, + unsigned int out_sgs, + unsigned int in_sgs) +{ + dma_addr_t addr; + unsigned int i, total_sgs = out_sgs + in_sgs; + + req->sg = kmalloc_array(total_sgs, sizeof(*sg), GFP_ATOMIC); + if (!req->sg) + return -ENOMEM; + + for (i = 0; i < total_sgs; i++) { + addr = virtio_dma_map_sg(fsvq->vq->vdev, &sg[i], + i < out_sgs ? DMA_TO_DEVICE : + DMA_FROM_DEVICE); + if (virtio_dma_mapping_error(fsvq->vq->vdev, addr)) + goto err; + + sg[i].dma_address = addr; + } + req->out_sgs = out_sgs; + req->in_sgs = in_sgs; + memcpy(req->sg, sg, sizeof(*sg) * total_sgs); + + return 0; +err: + while (i--) + virtio_dma_unmap(fsvq->vq->vdev, sg[i].dma_address, + sg[i].length, i < out_sgs ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + kfree(req->sg); + req->sg = NULL; + return -ENOMEM; +} + +static void virtio_fs_request_unmap(struct fuse_req *req, + struct virtio_fs_vq *fsvq) +{ + unsigned int i, total_sgs = req->out_sgs + req->in_sgs; + + if (!req->sg) + return; + + for (i = 0; i < total_sgs; i++) { + virtio_dma_unmap(fsvq->vq->vdev, + req->sg[i].dma_address, + req->sg[i].length, + i < req->out_sgs ? DMA_TO_DEVICE : + DMA_FROM_DEVICE); + } + kfree(req->sg); + req->sg = NULL; +} + /* Work function for request completion */ static void virtio_fs_request_complete(struct fuse_req *req, struct virtio_fs_vq *fsvq) @@ -514,6 +598,8 @@ static void virtio_fs_request_complete(struct fuse_req *req, unsigned int len, i, thislen; struct page *page; + virtio_fs_request_unmap(req, fsvq); + /* * TODO verify that server properly follows FUSE protocol * (oh.uniq, oh.len) @@ -556,6 +642,19 @@ static void virtio_fs_complete_req_work(struct work_struct *work) kfree(w); } +static struct fuse_req *virtio_fs_get_buf(struct virtqueue *vq) +{ + struct fuse_req *req; + unsigned int len; + + if (enable_dma_premap) + req = virtqueue_get_mapped_buf_split(vq, &len, NULL); + else + req = virtqueue_get_buf(vq, &len); + + return req; +} + static void virtio_fs_requests_done_work(struct work_struct *work) { struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, @@ -564,7 +663,6 @@ static void virtio_fs_requests_done_work(struct work_struct *work) struct virtqueue *vq = fsvq->vq; struct fuse_req *req; struct fuse_req *next; - unsigned int len; LIST_HEAD(reqs); /* Collect completed requests off the virtqueue */ @@ -572,7 +670,7 @@ static void virtio_fs_requests_done_work(struct work_struct *work) do { virtqueue_disable_cb(vq); - while ((req = virtqueue_get_buf(vq, &len)) != NULL) { + while ((req = virtio_fs_get_buf(vq)) != NULL) { spin_lock(&fpq->lock); list_move_tail(&req->list, &reqs); spin_unlock(&fpq->lock); @@ -599,6 +697,78 @@ static void virtio_fs_requests_done_work(struct work_struct *work) } } +static void virtio_fs_forgot_requests_abort(struct virtio_fs_vq *fsvq) +{ + struct virtqueue *vq = fsvq->vq; + void *req; + + /* Free completed FUSE_FORGET requests */ + spin_lock(&fsvq->lock); + while ((req = virtqueue_detach_unused_buf(vq)) != NULL) { + kfree(req); + dec_in_flight_req(fsvq); + } + spin_unlock(&fsvq->lock); +} + +static struct fuse_req *virtio_fs_detach_buf(struct virtqueue *vq) +{ + struct fuse_req *req; + + if (enable_dma_premap) + req = virtqueue_detach_unused_mapped_buf_split(vq); + else + req = virtqueue_detach_unused_buf(vq); + + return req; +} + +static void virtio_fs_requests_abort(struct virtio_fs_vq *fsvq) +{ + struct fuse_pqueue *fpq = &fsvq->fud->pq; + struct virtqueue *vq = fsvq->vq; + struct fuse_req *req; + struct fuse_req *next; + LIST_HEAD(reqs); + + spin_lock(&fsvq->lock); + while ((req = virtio_fs_detach_buf(vq)) != NULL) { + spin_lock(&fpq->lock); + list_move_tail(&req->list, &reqs); + spin_unlock(&fpq->lock); + } + spin_unlock(&fsvq->lock); + + list_for_each_entry_safe(req, next, &reqs, list) { + pr_err("Abort virtiofs request: %u\n", req->in.h.opcode); + list_del_init(&req->list); + virtio_fs_request_unmap(req, fsvq); + req->out.h.error = -ECONNABORTED; + virtio_fs_request_complete(req, fsvq); + } +} + +static void virtio_fs_requests_done(struct virtio_fs_vq *fsvq) +{ + struct fuse_pqueue *fpq = &fsvq->fud->pq; + struct virtqueue *vq = fsvq->vq; + struct fuse_req *req; + + do { + spin_lock(&fsvq->lock); + req = virtio_fs_get_buf(vq); + spin_unlock(&fsvq->lock); + + if (!req) + break; + + spin_lock(&fpq->lock); + list_del_init(&req->list); + spin_unlock(&fpq->lock); + virtio_fs_request_complete(req, fsvq); + } while (likely(!virtqueue_is_broken(vq))); +} + /* Virtqueue interrupt handler */ static void virtio_fs_vq_done(struct virtqueue *vq) { @@ -606,7 +776,10 @@ static void virtio_fs_vq_done(struct virtqueue *vq) dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); - queue_work(virtiofs_wq, &fsvq->done_work); + if (irq_use_wq || in_interrupt() || vq->index == VQ_HIPRIO) + queue_work(virtiofs_wq, &fsvq->done_work); + else + virtio_fs_requests_done(fsvq); } /* Initialize virtqueues */ @@ -983,6 +1156,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, bool notify; struct fuse_pqueue *fpq; + req->sg = NULL; /* Does the sglist fit on the stack? */ total_sgs = sg_count_fuse_req(req); if (total_sgs > ARRAY_SIZE(stack_sgs)) { @@ -1030,7 +1204,18 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, } vq = fsvq->vq; - ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); + if (enable_dma_premap) { + ret = virtio_fs_request_map(req, fsvq, sg, out_sgs, in_sgs); + if (ret) { + spin_unlock(&fsvq->lock); + goto out; + } + ret = virtqueue_add_mapped_buf_split(vq, sgs, total_sgs, + out_sgs, in_sgs, req, + NULL, GFP_ATOMIC); + } else + ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, + req, GFP_ATOMIC); if (ret < 0) { spin_unlock(&fsvq->lock); goto out; @@ -1055,6 +1240,9 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, virtqueue_notify(vq); out: + if (ret < 0 && req->sg) + virtio_fs_request_unmap(req, fsvq); + if (ret < 0 && req->argbuf) { kfree(req->argbuf); req->argbuf = NULL; @@ -1132,7 +1320,7 @@ static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx) ctx->max_read = UINT_MAX; ctx->blksize = 512; ctx->destroy = true; - ctx->no_control = true; + ctx->no_abort_control = true; } static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index aaec3c5b02028..77a497a4b2368 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -940,7 +940,7 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, else if (height == ip->i_height) ret = gfs2_hole_size(inode, lblock, len, mp, iomap); else - iomap->length = size - pos; + iomap->length = size - iomap->offset; } else if (flags & IOMAP_WRITE) { u64 alloc_size; @@ -1233,13 +1233,12 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, if (length != written && (iomap->flags & IOMAP_F_NEW)) { /* Deallocate blocks that were just allocated. */ - loff_t blockmask = i_blocksize(inode) - 1; - loff_t end = (pos + length) & ~blockmask; + loff_t hstart = round_up(pos + written, i_blocksize(inode)); + loff_t hend = iomap->offset + iomap->length; - pos = (pos + written + blockmask) & ~blockmask; - if (pos < end) { - truncate_pagecache_range(inode, pos, end - 1); - punch_hole(ip, pos, end - pos); + if (hstart < hend) { + truncate_pagecache_range(inode, hstart, hend - 1); + punch_hole(ip, hstart, hend - hstart); } } diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 72dec177b3494..94c290a333a0a 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -292,6 +292,11 @@ static void gdlm_put_lock(struct gfs2_glock *gl) gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_update_request_times(gl); + /* don't want to call dlm if we've unmounted the lock protocol */ + if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) { + gfs2_glock_free(gl); + return; + } /* don't want to skip dlm_unlock writing the lvb when lock has one */ if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c056ed5c6df30..8153a3eac540a 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -925,15 +925,15 @@ static int read_rindex_entry(struct gfs2_inode *ip) rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); spin_lock_init(&rgd->rd_rsspin); - error = compute_bitstructs(rgd); - if (error) - goto fail; - error = gfs2_glock_get(sdp, rgd->rd_addr, &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); if (error) goto fail; + error = compute_bitstructs(rgd); + if (error) + goto fail_glock; + rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED); if (rgd->rd_data > sdp->sd_max_rg_data) @@ -950,6 +950,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) } error = 0; /* someone else read in the rgrp; free it and ignore it */ +fail_glock: gfs2_glock_put(rgd->rd_gl); fail: @@ -1429,7 +1430,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp) start = r.start >> bs_shift; end = start + (r.len >> bs_shift); - minlen = max_t(u64, r.minlen, + minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize); + minlen = max_t(u64, minlen, q->limits.discard_granularity) >> bs_shift; if (end <= start || minlen > sdp->sd_max_rg_data) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 358398b1fe0c9..7d039ba5ae28b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -38,6 +38,7 @@ #include #include +#include static const struct super_operations hugetlbfs_ops; static const struct address_space_operations hugetlbfs_aops; @@ -200,6 +201,54 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) */ #ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA +static unsigned long +hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; + + info.flags = 0; + info.length = len; + info.low_limit = current->mm->mmap_base; + info.high_limit = arch_get_mmap_end(addr); + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + return vm_unmapped_area(&info); +} + +static unsigned long +hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = max(PAGE_SIZE, mmap_min_addr); + info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base); + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + addr = vm_unmapped_area(&info); + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + if (unlikely(offset_in_page(addr))) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = current->mm->mmap_base; + info.high_limit = arch_get_mmap_end(addr); + addr = vm_unmapped_area(&info); + } + + return addr; +} + static unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) @@ -207,7 +256,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct hstate *h = hstate_file(file); - struct vm_unmapped_area_info info; + const unsigned long mmap_end = arch_get_mmap_end(addr); if (len & ~huge_page_mask(h)) return -EINVAL; @@ -223,18 +272,21 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (addr) { addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && + if (mmap_end - len >= addr && (!vma || addr + len <= vm_start_gap(vma))) return addr; } - info.flags = 0; - info.length = len; - info.low_limit = TASK_UNMAPPED_BASE; - info.high_limit = TASK_SIZE; - info.align_mask = PAGE_MASK & ~huge_page_mask(h); - info.align_offset = 0; - return vm_unmapped_area(&info); + /* + * Use mm->get_unmapped_area value as a hint to use topdown routine. + * If architectures have special needs, they should define their own + * version of hugetlb_get_unmapped_area. + */ + if (mm->get_unmapped_area == arch_get_unmapped_area_topdown) + return hugetlb_get_unmapped_area_topdown(file, addr, len, + pgoff, flags); + return hugetlb_get_unmapped_area_bottomup(file, addr, len, + pgoff, flags); } #endif diff --git a/fs/io_uring.c b/fs/io_uring.c index 682a6e5cbb7dd..5a4517890d10b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2914,9 +2914,13 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->flags |= REQ_F_ISREG; kiocb->ki_pos = READ_ONCE(sqe->off); - if (kiocb->ki_pos == -1 && !(req->file->f_mode & FMODE_STREAM)) { - req->flags |= REQ_F_CUR_POS; - kiocb->ki_pos = req->file->f_pos; + if (kiocb->ki_pos == -1) { + if (!(req->file->f_mode & FMODE_STREAM)) { + req->flags |= REQ_F_CUR_POS; + kiocb->ki_pos = req->file->f_pos; + } else { + kiocb->ki_pos = 0; + } } kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); kiocb->ki_flags = iocb_flags(kiocb->ki_filp); @@ -3307,13 +3311,15 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) ret = nr; break; } + ret += nr; if (!iov_iter_is_bvec(iter)) { iov_iter_advance(iter, nr); } else { - req->rw.len -= nr; req->rw.addr += nr; + req->rw.len -= nr; + if (!req->rw.len) + break; } - ret += nr; if (nr != iovec.iov_len) break; } @@ -3503,6 +3509,12 @@ static int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter) return -EINVAL; } +static bool need_read_all(struct io_kiocb *req) +{ + return req->flags & REQ_F_ISREG || + S_ISBLK(file_inode(req->file)->i_mode); +} + static int io_read(struct io_kiocb *req, bool force_nonblock, struct io_comp_state *cs) { @@ -3563,7 +3575,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, /* read it all, or we did blocking attempt. no retry. */ if (!iov_iter_count(iter) || !force_nonblock || - (req->file->f_flags & O_NONBLOCK) || !(req->flags & REQ_F_ISREG)) + (req->file->f_flags & O_NONBLOCK) || !need_read_all(req)) goto done; io_size -= ret; @@ -4288,6 +4300,7 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head) } else { list_add_tail(&buf->list, &(*head)->list); } + cond_resched(); } return i ? i : -ENOMEM; @@ -6037,15 +6050,12 @@ static int io_files_update(struct io_kiocb *req, bool force_nonblock, struct io_uring_files_update up; int ret; - if (force_nonblock) - return -EAGAIN; - up.offset = req->files_update.offset; up.fds = req->files_update.arg; - mutex_lock(&ctx->uring_lock); + io_ring_submit_lock(ctx, !force_nonblock); ret = __io_sqe_files_update(ctx, &up, req->files_update.nr_args); - mutex_unlock(&ctx->uring_lock); + io_ring_submit_unlock(ctx, !force_nonblock); if (ret < 0) req_set_fail_links(req); @@ -6520,10 +6530,11 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) * We don't expect the list to be empty, that will only happen if we * race with the completion of the linked work. */ - if (prev && refcount_inc_not_zero(&prev->refs)) + if (prev) { io_remove_next_linked(prev); - else - prev = NULL; + if (!refcount_inc_not_zero(&prev->refs)) + prev = NULL; + } spin_unlock_irqrestore(&ctx->completion_lock, flags); if (prev) { @@ -9003,7 +9014,7 @@ static void io_disable_sqo_submit(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); ctx->sqo_dead = 1; - if (ctx->flags & IORING_SETUP_R_DISABLED) + if (ctx->flags & IORING_SETUP_R_DISABLED && ctx->sq_data) io_sq_offload_start(ctx); mutex_unlock(&ctx->uring_lock); @@ -9392,9 +9403,10 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, if (ctx->flags & IORING_SETUP_SQPOLL) { io_cqring_overflow_flush(ctx, false, NULL, NULL); - ret = -EOWNERDEAD; - if (unlikely(ctx->sqo_dead)) + if (unlikely(ctx->sqo_dead)) { + ret = -EOWNERDEAD; goto out; + } if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); if (flags & IORING_ENTER_SQ_WAIT) { diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 53a625ed6ccac..a28f966a376c8 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -535,7 +535,8 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) * write started inside the existing inode size. */ if (pos + len > i_size) - truncate_pagecache_range(inode, max(pos, i_size), pos + len); + truncate_pagecache_range(inode, max(pos, i_size), + pos + len - 1); } static int diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c index bd0cc3dcc9807..2d18246f67266 100644 --- a/fs/iomap/swapfile.c +++ b/fs/iomap/swapfile.c @@ -30,11 +30,16 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi) { struct iomap *iomap = &isi->iomap; unsigned long nr_pages; + unsigned long max_pages; uint64_t first_ppage; uint64_t first_ppage_reported; uint64_t next_ppage; int error; + if (unlikely(isi->nr_pages >= isi->sis->max)) + return 0; + max_pages = isi->sis->max - isi->nr_pages; + /* * Round the start up and the end down so that the physical * extent aligns to a page boundary. @@ -47,6 +52,7 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi) if (first_ppage >= next_ppage) return 0; nr_pages = next_ppage - first_ppage; + nr_pages = min(nr_pages, max_pages); /* * Calculate how much swap space we're adding; the first page contains diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 62c0462dc89f3..74e487d63c62c 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -155,7 +155,6 @@ struct iso9660_options{ unsigned int overriderockperm:1; unsigned int uid_set:1; unsigned int gid_set:1; - unsigned int utf8:1; unsigned char map; unsigned char check; unsigned int blocksize; @@ -355,7 +354,6 @@ static int parse_options(char *options, struct iso9660_options *popt) popt->gid = GLOBAL_ROOT_GID; popt->uid = GLOBAL_ROOT_UID; popt->iocharset = NULL; - popt->utf8 = 0; popt->overriderockperm = 0; popt->session=-1; popt->sbsector=-1; @@ -388,10 +386,13 @@ static int parse_options(char *options, struct iso9660_options *popt) case Opt_cruft: popt->cruft = 1; break; +#ifdef CONFIG_JOLIET case Opt_utf8: - popt->utf8 = 1; + kfree(popt->iocharset); + popt->iocharset = kstrdup("utf8", GFP_KERNEL); + if (!popt->iocharset) + return 0; break; -#ifdef CONFIG_JOLIET case Opt_iocharset: kfree(popt->iocharset); popt->iocharset = match_strdup(&args[0]); @@ -494,7 +495,6 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root) if (sbi->s_nocompress) seq_puts(m, ",nocompress"); if (sbi->s_overriderockperm) seq_puts(m, ",overriderockperm"); if (sbi->s_showassoc) seq_puts(m, ",showassoc"); - if (sbi->s_utf8) seq_puts(m, ",utf8"); if (sbi->s_check) seq_printf(m, ",check=%c", sbi->s_check); if (sbi->s_mapping) seq_printf(m, ",map=%c", sbi->s_mapping); @@ -517,9 +517,10 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",fmode=%o", sbi->s_fmode); #ifdef CONFIG_JOLIET - if (sbi->s_nls_iocharset && - strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0) + if (sbi->s_nls_iocharset) seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset); + else + seq_puts(m, ",iocharset=utf8"); #endif return 0; } @@ -867,14 +868,13 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) sbi->s_nls_iocharset = NULL; #ifdef CONFIG_JOLIET - if (joliet_level && opt.utf8 == 0) { + if (joliet_level) { char *p = opt.iocharset ? opt.iocharset : CONFIG_NLS_DEFAULT; - sbi->s_nls_iocharset = load_nls(p); - if (! sbi->s_nls_iocharset) { - /* Fail only if explicit charset specified */ - if (opt.iocharset) + if (strcmp(p, "utf8") != 0) { + sbi->s_nls_iocharset = opt.iocharset ? + load_nls(opt.iocharset) : load_nls_default(); + if (!sbi->s_nls_iocharset) goto out_freesbi; - sbi->s_nls_iocharset = load_nls_default(); } } #endif @@ -890,7 +890,6 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) sbi->s_gid = opt.gid; sbi->s_uid_set = opt.uid_set; sbi->s_gid_set = opt.gid_set; - sbi->s_utf8 = opt.utf8; sbi->s_nocompress = opt.nocompress; sbi->s_overriderockperm = opt.overriderockperm; /* @@ -1329,6 +1328,8 @@ static int isofs_read_inode(struct inode *inode, int relocated) de = (struct iso_directory_record *) (bh->b_data + offset); de_len = *(unsigned char *) de; + if (de_len < sizeof(struct iso_directory_record)) + goto fail; if (offset + de_len > bufsize) { int frag1 = bufsize - offset; diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 055ec6c586f7f..dcdc191ed1834 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -44,7 +44,6 @@ struct isofs_sb_info { unsigned char s_session; unsigned int s_high_sierra:1; unsigned int s_rock:2; - unsigned int s_utf8:1; unsigned int s_cruft:1; /* Broken disks with high byte of length * containing junk */ unsigned int s_nocompress:1; diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c index be8b6a9d0b926..c0f04a1e7f695 100644 --- a/fs/isofs/joliet.c +++ b/fs/isofs/joliet.c @@ -41,14 +41,12 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls) int get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode) { - unsigned char utf8; struct nls_table *nls; unsigned char len = 0; - utf8 = ISOFS_SB(inode->i_sb)->s_utf8; nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset; - if (utf8) { + if (!nls) { len = utf16s_to_utf8s((const wchar_t *) de->name, de->name_len[0] >> 1, UTF16_BIG_ENDIAN, outname, PAGE_SIZE); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 88146008b3e36..d45ceb2e21492 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -451,7 +451,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) } spin_unlock(&commit_transaction->t_handle_lock); commit_transaction->t_state = T_SWITCH; - write_unlock(&journal->j_state_lock); J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <= journal->j_max_transaction_buffers); @@ -471,6 +470,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) * has reserved. This is consistent with the existing behaviour * that multiple jbd2_journal_get_write_access() calls to the same * buffer are perfectly permissible. + * We use journal->j_state_lock here to serialize processing of + * t_reserved_list with eviction of buffers from journal_unmap_buffer(). */ while (commit_transaction->t_reserved_list) { jh = commit_transaction->t_reserved_list; @@ -490,6 +491,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) jbd2_journal_refile_buffer(journal, jh); } + write_unlock(&journal->j_state_lock); /* * Now try to drop any written-back buffers from the journal's * checkpoint lists. We do this *before* commit because it potentially diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index b288c8ae1236b..837cd55fd4c5e 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c @@ -415,13 +415,15 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c) jffs2_free_ino_caches(c); jffs2_free_raw_node_refs(c); ret = -EIO; - goto out_free; + goto out_sum_exit; } jffs2_calc_trigger_levels(c); return 0; + out_sum_exit: + jffs2_sum_exit(c); out_free: kvfree(c->blocks); diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index f8fb89b10227c..34880a4c21732 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -135,20 +135,15 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, struct page *pg; struct inode *inode = mapping->host; struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); + struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); pgoff_t index = pos >> PAGE_SHIFT; uint32_t pageofs = index << PAGE_SHIFT; int ret = 0; - pg = grab_cache_page_write_begin(mapping, index, flags); - if (!pg) - return -ENOMEM; - *pagep = pg; - jffs2_dbg(1, "%s()\n", __func__); if (pageofs > inode->i_size) { /* Make new hole frag from old EOF to new page */ - struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); struct jffs2_raw_inode ri; struct jffs2_full_dnode *fn; uint32_t alloc_len; @@ -159,7 +154,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); if (ret) - goto out_page; + goto out_err; mutex_lock(&f->sem); memset(&ri, 0, sizeof(ri)); @@ -189,7 +184,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, ret = PTR_ERR(fn); jffs2_complete_reservation(c); mutex_unlock(&f->sem); - goto out_page; + goto out_err; } ret = jffs2_add_full_dnode_to_inode(c, f, fn); if (f->metadata) { @@ -204,13 +199,26 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, jffs2_free_full_dnode(fn); jffs2_complete_reservation(c); mutex_unlock(&f->sem); - goto out_page; + goto out_err; } jffs2_complete_reservation(c); inode->i_size = pageofs; mutex_unlock(&f->sem); } + /* + * While getting a page and reading data in, lock c->alloc_sem until + * the page is Uptodate. Otherwise GC task may attempt to read the same + * page in read_cache_page(), which causes a deadlock. + */ + mutex_lock(&c->alloc_sem); + pg = grab_cache_page_write_begin(mapping, index, flags); + if (!pg) { + ret = -ENOMEM; + goto release_sem; + } + *pagep = pg; + /* * Read in the page if it wasn't already present. Cannot optimize away * the whole page write case until jffs2_write_end can handle the @@ -220,15 +228,17 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, mutex_lock(&f->sem); ret = jffs2_do_readpage_nolock(inode, pg); mutex_unlock(&f->sem); - if (ret) - goto out_page; + if (ret) { + unlock_page(pg); + put_page(pg); + goto release_sem; + } } jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); - return ret; -out_page: - unlock_page(pg); - put_page(pg); +release_sem: + mutex_unlock(&c->alloc_sem); +out_err: return ret; } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index ab8cdd9e93259..ee2282b8c7a73 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -602,8 +602,9 @@ int jffs2_do_fill_super(struct super_block *sb, struct fs_context *fc) jffs2_free_ino_caches(c); jffs2_free_raw_node_refs(c); kvfree(c->blocks); - out_inohash: jffs2_clear_xattr_subsystem(c); + jffs2_sum_exit(c); + out_inohash: kfree(c->inocache_list); out_wbuf: jffs2_flash_cleanup(c); diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 0b1a7f68b7122..f73904c08b39c 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -136,7 +136,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) if (!s) { JFFS2_WARNING("Can't allocate memory for summary\n"); ret = -ENOMEM; - goto out; + goto out_buf; } } @@ -274,13 +274,15 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) } ret = 0; out: + jffs2_sum_reset_collected(s); + kfree(s); + out_buf: if (buf_size) kfree(flashbuf); #ifndef __ECOS else mtd_unpoint(c->mtd, 0, c->mtd->size); #endif - kfree(s); return ret; } diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index d862cfc3d3a83..62c4a5450cda2 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -146,12 +146,13 @@ void jfs_evict_inode(struct inode *inode) dquot_initialize(inode); if (JFS_IP(inode)->fileset == FILESYSTEM_I) { + struct inode *ipimap = JFS_SBI(inode->i_sb)->ipimap; truncate_inode_pages_final(&inode->i_data); if (test_cflag(COMMIT_Freewmap, inode)) jfs_free_zero_link(inode); - if (JFS_SBI(inode->i_sb)->ipimap) + if (ipimap && JFS_IP(ipimap)->i_imap) diFree(inode); /* diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 6fe82ce8663ef..d3cb27487c706 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -148,6 +148,7 @@ static const s8 budtab[256] = { * 0 - success * -ENOMEM - insufficient memory * -EIO - i/o error + * -EINVAL - wrong bmap data */ int dbMount(struct inode *ipbmap) { @@ -179,6 +180,12 @@ int dbMount(struct inode *ipbmap) bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); + if (!bmp->db_numag) { + release_metapage(mp); + kfree(bmp); + return -EINVAL; + } + bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel); bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); @@ -378,7 +385,8 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) } /* write the last buffer. */ - write_metapage(mp); + if (mp) + write_metapage(mp); IREAD_UNLOCK(ipbmap); diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index 616de103dccc5..d41733540df91 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c @@ -80,14 +80,14 @@ int jfs_mount(struct super_block *sb) * (initialize mount inode from the superblock) */ if ((rc = chkSuper(sb))) { - goto errout20; + goto out; } ipaimap = diReadSpecial(sb, AGGREGATE_I, 0); if (ipaimap == NULL) { jfs_err("jfs_mount: Failed to read AGGREGATE_I"); rc = -EIO; - goto errout20; + goto out; } sbi->ipaimap = ipaimap; @@ -98,7 +98,7 @@ int jfs_mount(struct super_block *sb) */ if ((rc = diMount(ipaimap))) { jfs_err("jfs_mount: diMount(ipaimap) failed w/rc = %d", rc); - goto errout21; + goto err_ipaimap; } /* @@ -107,7 +107,7 @@ int jfs_mount(struct super_block *sb) ipbmap = diReadSpecial(sb, BMAP_I, 0); if (ipbmap == NULL) { rc = -EIO; - goto errout22; + goto err_umount_ipaimap; } jfs_info("jfs_mount: ipbmap:0x%p", ipbmap); @@ -119,7 +119,7 @@ int jfs_mount(struct super_block *sb) */ if ((rc = dbMount(ipbmap))) { jfs_err("jfs_mount: dbMount failed w/rc = %d", rc); - goto errout22; + goto err_ipbmap; } /* @@ -138,7 +138,7 @@ int jfs_mount(struct super_block *sb) if (!ipaimap2) { jfs_err("jfs_mount: Failed to read AGGREGATE_I"); rc = -EIO; - goto errout35; + goto err_umount_ipbmap; } sbi->ipaimap2 = ipaimap2; @@ -150,7 +150,7 @@ int jfs_mount(struct super_block *sb) if ((rc = diMount(ipaimap2))) { jfs_err("jfs_mount: diMount(ipaimap2) failed, rc = %d", rc); - goto errout35; + goto err_ipaimap2; } } else /* Secondary aggregate inode table is not valid */ @@ -167,7 +167,7 @@ int jfs_mount(struct super_block *sb) jfs_err("jfs_mount: Failed to read FILESYSTEM_I"); /* open fileset secondary inode allocation map */ rc = -EIO; - goto errout40; + goto err_umount_ipaimap2; } jfs_info("jfs_mount: ipimap:0x%p", ipimap); @@ -177,41 +177,34 @@ int jfs_mount(struct super_block *sb) /* initialize fileset inode allocation map */ if ((rc = diMount(ipimap))) { jfs_err("jfs_mount: diMount failed w/rc = %d", rc); - goto errout41; + goto err_ipimap; } - goto out; + return rc; /* * unwind on error */ - errout41: /* close fileset inode allocation map inode */ +err_ipimap: + /* close fileset inode allocation map inode */ diFreeSpecial(ipimap); - - errout40: /* fileset closed */ - +err_umount_ipaimap2: /* close secondary aggregate inode allocation map */ - if (ipaimap2) { + if (ipaimap2) diUnmount(ipaimap2, 1); +err_ipaimap2: + /* close aggregate inodes */ + if (ipaimap2) diFreeSpecial(ipaimap2); - } - - errout35: - - /* close aggregate block allocation map */ +err_umount_ipbmap: /* close aggregate block allocation map */ dbUnmount(ipbmap, 1); +err_ipbmap: /* close aggregate inodes */ diFreeSpecial(ipbmap); - - errout22: /* close aggregate inode allocation map */ - +err_umount_ipaimap: /* close aggregate inode allocation map */ diUnmount(ipaimap, 1); - - errout21: /* close aggregate inodes */ +err_ipaimap: /* close aggregate inodes */ diFreeSpecial(ipaimap); - errout20: /* aggregate closed */ - - out: - +out: if (rc) jfs_err("Mount JFS Failure: %d", rc); diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 7d4af6cea2a63..99ee657596b5f 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -19,7 +19,15 @@ DEFINE_MUTEX(kernfs_mutex); static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ -static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */ +/* + * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to + * call pr_cont() while holding rename_lock. Because sometimes pr_cont() + * will perform wakeups when releasing console_sem. Holding rename_lock + * will introduce deadlock if the scheduler reads the kernfs_name in the + * wakeup path. + */ +static DEFINE_SPINLOCK(kernfs_pr_cont_lock); +static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) @@ -230,12 +238,12 @@ void pr_cont_kernfs_name(struct kernfs_node *kn) { unsigned long flags; - spin_lock_irqsave(&kernfs_rename_lock, flags); + spin_lock_irqsave(&kernfs_pr_cont_lock, flags); - kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); + kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); pr_cont("%s", kernfs_pr_cont_buf); - spin_unlock_irqrestore(&kernfs_rename_lock, flags); + spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); } /** @@ -249,10 +257,10 @@ void pr_cont_kernfs_path(struct kernfs_node *kn) unsigned long flags; int sz; - spin_lock_irqsave(&kernfs_rename_lock, flags); + spin_lock_irqsave(&kernfs_pr_cont_lock, flags); - sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf, - sizeof(kernfs_pr_cont_buf)); + sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf, + sizeof(kernfs_pr_cont_buf)); if (sz < 0) { pr_cont("(error)"); goto out; @@ -266,7 +274,7 @@ void pr_cont_kernfs_path(struct kernfs_node *kn) pr_cont("%s", kernfs_pr_cont_buf); out: - spin_unlock_irqrestore(&kernfs_rename_lock, flags); + spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); } /** @@ -870,13 +878,12 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, lockdep_assert_held(&kernfs_mutex); - /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */ - spin_lock_irq(&kernfs_rename_lock); + spin_lock_irq(&kernfs_pr_cont_lock); len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf)); if (len >= sizeof(kernfs_pr_cont_buf)) { - spin_unlock_irq(&kernfs_rename_lock); + spin_unlock_irq(&kernfs_pr_cont_lock); return NULL; } @@ -888,7 +895,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, parent = kernfs_find_ns(parent, name, ns); } - spin_unlock_irq(&kernfs_rename_lock); + spin_unlock_irq(&kernfs_pr_cont_lock); return parent; } diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 61d3cc2283dc8..273a81971ed57 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -395,28 +395,10 @@ nlmsvc_release_lockowner(struct nlm_lock *lock) nlmsvc_put_lockowner(lock->fl.fl_owner); } -static void nlmsvc_locks_copy_lock(struct file_lock *new, struct file_lock *fl) -{ - struct nlm_lockowner *nlm_lo = (struct nlm_lockowner *)fl->fl_owner; - new->fl_owner = nlmsvc_get_lockowner(nlm_lo); -} - -static void nlmsvc_locks_release_private(struct file_lock *fl) -{ - nlmsvc_put_lockowner((struct nlm_lockowner *)fl->fl_owner); -} - -static const struct file_lock_operations nlmsvc_lock_ops = { - .fl_copy_lock = nlmsvc_locks_copy_lock, - .fl_release_private = nlmsvc_locks_release_private, -}; - void nlmsvc_locks_init_private(struct file_lock *fl, struct nlm_host *host, pid_t pid) { fl->fl_owner = nlmsvc_find_lockowner(host, pid); - if (fl->fl_owner != NULL) - fl->fl_ops = &nlmsvc_lock_ops; } /* @@ -634,7 +616,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, conflock->caller = "somehost"; /* FIXME */ conflock->len = strlen(conflock->caller); conflock->oh.len = 0; /* don't return OH info */ - conflock->svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid; + conflock->svid = lock->fl.fl_pid; conflock->fl.fl_type = lock->fl.fl_type; conflock->fl.fl_start = lock->fl.fl_start; conflock->fl.fl_end = lock->fl.fl_end; @@ -788,9 +770,21 @@ nlmsvc_notify_blocked(struct file_lock *fl) printk(KERN_WARNING "lockd: notification for unknown block!\n"); } +static fl_owner_t nlmsvc_get_owner(fl_owner_t owner) +{ + return nlmsvc_get_lockowner(owner); +} + +static void nlmsvc_put_owner(fl_owner_t owner) +{ + nlmsvc_put_lockowner(owner); +} + const struct lock_manager_operations nlmsvc_lock_operations = { .lm_notify = nlmsvc_notify_blocked, .lm_grant = nlmsvc_grant_deferred, + .lm_get_owner = nlmsvc_get_owner, + .lm_put_owner = nlmsvc_put_owner, }; /* diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 7b09a9158e401..3fffc709afd43 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -447,7 +447,8 @@ static const struct address_space_operations minix_aops = { .writepage = minix_writepage, .write_begin = minix_write_begin, .write_end = generic_write_end, - .bmap = minix_bmap + .bmap = minix_bmap, + .direct_IO = noop_direct_IO }; static const struct inode_operations minix_symlink_inode_operations = { diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 8f34daf85f703..5d5227ce4d91e 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -168,7 +168,7 @@ struct cb_devicenotifyitem { }; struct cb_devicenotifyargs { - int ndevs; + uint32_t ndevs; struct cb_devicenotifyitem *devs; }; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index fc775b0b5194f..31922657e836e 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -364,12 +364,11 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, struct cb_process_state *cps) { struct cb_devicenotifyargs *args = argp; - int i; + const struct pnfs_layoutdriver_type *ld = NULL; + uint32_t i; __be32 res = 0; - struct nfs_client *clp = cps->clp; - struct nfs_server *server = NULL; - if (!clp) { + if (!cps->clp) { res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); goto out; } @@ -377,23 +376,15 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, for (i = 0; i < args->ndevs; i++) { struct cb_devicenotifyitem *dev = &args->devs[i]; - if (!server || - server->pnfs_curr_ld->id != dev->cbd_layout_type) { - rcu_read_lock(); - list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) - if (server->pnfs_curr_ld && - server->pnfs_curr_ld->id == dev->cbd_layout_type) { - rcu_read_unlock(); - goto found; - } - rcu_read_unlock(); - continue; + if (!ld || ld->id != dev->cbd_layout_type) { + pnfs_put_layoutdriver(ld); + ld = pnfs_find_layoutdriver(dev->cbd_layout_type); + if (!ld) + continue; } - - found: - nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); + nfs4_delete_deviceid(ld, cps->clp, &dev->cbd_dev_id); } - + pnfs_put_layoutdriver(ld); out: kfree(args->devs); return res; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 73a5a5ea29766..04d27f0ed39ac 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -258,11 +258,9 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, void *argp) { struct cb_devicenotifyargs *args = argp; + uint32_t tmp, n, i; __be32 *p; __be32 status = 0; - u32 tmp; - int n, i; - args->ndevs = 0; /* Num of device notifications */ p = xdr_inline_decode(xdr, sizeof(uint32_t)); @@ -271,12 +269,8 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, goto out; } n = ntohl(*p++); - if (n <= 0) - goto out; - if (n > ULONG_MAX / sizeof(*args->devs)) { - status = htonl(NFS4ERR_BADXDR); + if (n == 0) goto out; - } args->devs = kmalloc_array(n, sizeof(*args->devs), GFP_KERNEL); if (!args->devs) { @@ -330,19 +324,21 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, dev->cbd_immediate = 0; } - args->ndevs++; - dprintk("%s: type %d layout 0x%x immediate %d\n", __func__, dev->cbd_notify_type, dev->cbd_layout_type, dev->cbd_immediate); } + args->ndevs = n; + dprintk("%s: ndevs %d\n", __func__, args->ndevs); + return 0; +err: + kfree(args->devs); out: + args->devs = NULL; + args->ndevs = 0; dprintk("%s: status %d ndevs %d\n", __func__, ntohl(status), args->ndevs); return status; -err: - kfree(args->devs); - goto out; } static __be32 decode_sessionid(struct xdr_stream *xdr, diff --git a/fs/nfs/client.c b/fs/nfs/client.c index af838d1ed281c..35abe63655a9d 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -176,6 +176,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) INIT_LIST_HEAD(&clp->cl_superblocks); clp->cl_rpcclient = ERR_PTR(-EINVAL); + clp->cl_flags = cl_init->init_flags; clp->cl_proto = cl_init->proto; clp->cl_nconnect = cl_init->nconnect; clp->cl_net = get_net(cl_init->net); @@ -419,7 +420,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) list_add_tail(&new->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); - new->cl_flags = cl_init->init_flags; return rpc_ops->init_client(new, cl_init); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e7c0790308fe0..28ceee102d0b3 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1638,6 +1638,24 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, no_open: res = nfs_lookup(dir, dentry, lookup_flags); + if (!res) { + inode = d_inode(dentry); + if ((lookup_flags & LOOKUP_DIRECTORY) && inode && + !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) + res = ERR_PTR(-ENOTDIR); + else if (inode && S_ISREG(inode->i_mode)) + res = ERR_PTR(-EOPENSTALE); + } else if (!IS_ERR(res)) { + inode = d_inode(res); + if ((lookup_flags & LOOKUP_DIRECTORY) && inode && + !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) { + dput(res); + res = ERR_PTR(-ENOTDIR); + } else if (inode && S_ISREG(inode->i_mode)) { + dput(res); + res = ERR_PTR(-EOPENSTALE); + } + } if (switched) { d_lookup_done(dentry); if (!res) @@ -2035,6 +2053,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) trace_nfs_link_enter(inode, dir, dentry); d_drop(dentry); + if (S_ISREG(inode->i_mode)) + nfs_sync_inode(inode); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { ihold(inode); @@ -2123,6 +2143,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, } } + if (S_ISREG(old_inode->i_mode)) + nfs_sync_inode(old_inode); task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); if (IS_ERR(task)) { error = PTR_ERR(task); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 6b0bf4ebd8124..0682037f972be 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -272,8 +272,8 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); if (iov_iter_rw(iter) == READ) - return nfs_file_direct_read(iocb, iter); - return nfs_file_direct_write(iocb, iter); + return nfs_file_direct_read(iocb, iter, true); + return nfs_file_direct_write(iocb, iter, true); } static void nfs_direct_release_pages(struct page **pages, unsigned int npages) @@ -524,6 +524,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers into which to read data + * @swap: flag indicating this is swap IO, not O_DIRECT IO * * We use this function for direct reads instead of calling * generic_file_aio_read() in order to avoid gfar's check to see if @@ -539,7 +540,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * client must read the updated atime from the server back into its * cache. */ -ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) +ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, + bool swap) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -581,12 +583,14 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) if (iter_is_iovec(iter)) dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; - nfs_start_io_direct(inode); + if (!swap) + nfs_start_io_direct(inode); NFS_I(inode)->read_io += count; requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); - nfs_end_io_direct(inode); + if (!swap) + nfs_end_io_direct(inode); if (requested > 0) { result = nfs_direct_wait(dreq); @@ -851,7 +855,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { */ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, struct iov_iter *iter, - loff_t pos) + loff_t pos, int ioflags) { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; @@ -859,7 +863,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); - nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, + nfs_pageio_init_write(&desc, inode, ioflags, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; get_dreq(dreq); @@ -937,6 +941,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers from which to write data + * @swap: flag indicating this is swap IO, not O_DIRECT IO * * We use this function for direct writes instead of calling * generic_file_aio_write() in order to avoid taking the inode @@ -953,7 +958,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) +ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, + bool swap) { ssize_t result = -EINVAL, requested; size_t count; @@ -967,7 +973,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", file, iov_iter_count(iter), (long long) iocb->ki_pos); - result = generic_write_checks(iocb, iter); + if (swap) + /* bypass generic checks */ + result = iov_iter_count(iter); + else + result = generic_write_checks(iocb, iter); if (result <= 0) return result; count = result; @@ -997,16 +1007,22 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - nfs_start_io_direct(inode); + if (swap) { + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, + FLUSH_STABLE); + } else { + nfs_start_io_direct(inode); - requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, + FLUSH_COND_STABLE); - if (mapping->nrpages) { - invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, end); - } + if (mapping->nrpages) { + invalidate_inode_pages2_range(mapping, + pos >> PAGE_SHIFT, end); + } - nfs_end_io_direct(inode); + nfs_end_io_direct(inode); + } if (requested > 0) { result = nfs_direct_wait(dreq); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 387a2cfa7e172..3233da79d49a4 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -161,7 +161,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) ssize_t result; if (iocb->ki_flags & IOCB_DIRECT) - return nfs_file_direct_read(iocb, to); + return nfs_file_direct_read(iocb, to, false); dprintk("NFS: read(%pD2, %zu@%lu)\n", iocb->ki_filp, @@ -394,11 +394,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, return status; NFS_I(mapping->host)->write_io += copied; - if (nfs_ctx_key_to_expire(ctx, mapping->host)) { - status = nfs_wb_all(mapping->host); - if (status < 0) - return status; - } + if (nfs_ctx_key_to_expire(ctx, mapping->host)) + nfs_wb_all(mapping->host); return copied; } @@ -609,7 +606,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) return result; if (iocb->ki_flags & IOCB_DIRECT) - return nfs_file_direct_write(iocb, from); + return nfs_file_direct_write(iocb, from, false); dprintk("NFS: write(%pD2, %zu@%Ld)\n", file, iov_iter_count(from), (long long) iocb->ki_pos); diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 3eda40a320a53..1f12297109b41 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -378,10 +378,10 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, goto noconnect; ds = mirror->mirror_ds->ds; + if (READ_ONCE(ds->ds_clp)) + goto out; /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ smp_rmb(); - if (ds->ds_clp) - goto out; /* FIXME: For now we assume the server sent only one version of NFS * to use for the DS. diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 209263c0c5377..3bddf5332b6d6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -791,12 +791,9 @@ int nfs_getattr(const struct path *path, struct kstat *stat, goto out_no_update; /* Flush out writes to the server in order to update c/mtime. */ - if ((request_mask & (STATX_CTIME|STATX_MTIME)) && - S_ISREG(inode->i_mode)) { - err = filemap_write_and_wait(inode->i_mapping); - if (err) - goto out; - } + if ((request_mask & (STATX_CTIME | STATX_MTIME)) && + S_ISREG(inode->i_mode)) + filemap_write_and_wait(inode->i_mapping); /* * We may force a getattr if the user cares about atime. diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9e717796e57b7..a4dc182e8989b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -775,6 +775,7 @@ static inline bool nfs_error_is_fatal_on_server(int err) case 0: case -ERESTARTSYS: case -EINTR: + case -ENOMEM: return false; } return nfs_error_is_fatal(err); diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 887f9136a9db3..af557dc2cfe1d 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -953,7 +953,7 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, error = decode_filename_inline(xdr, &entry->name, &entry->len); if (unlikely(error)) - return error; + return -EAGAIN; /* * The type (size and byte order) of nfscookie isn't defined in diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 23d75cddbb2ee..84369d51353a5 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1968,7 +1968,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, bool plus) { struct user_namespace *userns = rpc_userns(entry->server->client); - struct nfs_entry old = *entry; __be32 *p; int error; u64 new_cookie; @@ -1988,15 +1987,15 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, error = decode_fileid3(xdr, &entry->ino); if (unlikely(error)) - return error; + return -EAGAIN; error = decode_inline_filename3(xdr, &entry->name, &entry->len); if (unlikely(error)) - return error; + return -EAGAIN; error = decode_cookie3(xdr, &new_cookie); if (unlikely(error)) - return error; + return -EAGAIN; entry->d_type = DT_UNKNOWN; @@ -2004,7 +2003,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, entry->fattr->valid = 0; error = decode_post_op_attr(xdr, entry->fattr, userns); if (unlikely(error)) - return error; + return -EAGAIN; if (entry->fattr->valid & NFS_ATTR_FATTR_V3) entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); @@ -2019,11 +2018,8 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, return -EAGAIN; if (*p != xdr_zero) { error = decode_nfs_fh3(xdr, entry->fh); - if (unlikely(error)) { - if (error == -E2BIG) - goto out_truncated; - return error; - } + if (unlikely(error)) + return -EAGAIN; } else zero_nfs_fh3(entry->fh); } @@ -2032,11 +2028,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, entry->cookie = new_cookie; return 0; - -out_truncated: - dprintk("NFS: directory entry contains invalid file handle\n"); - *entry = old; - return -EAGAIN; } /* diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 6b7c926824ae0..504812ea4bc29 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -295,8 +295,9 @@ static ssize_t _nfs42_proc_copy(struct file *src, goto out; } - truncate_pagecache_range(dst_inode, pos_dst, - pos_dst + res->write_res.count); + WARN_ON_ONCE(invalidate_inode_pages2_range(dst_inode->i_mapping, + pos_dst >> PAGE_SHIFT, + (pos_dst + res->write_res.count - 1) >> PAGE_SHIFT)); status = res->write_res.count; out: diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index aed865a846296..2b78f7b8d5467 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -769,8 +769,7 @@ static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp, status = decode_clone(xdr); if (status) goto out; - status = decode_getfattr(xdr, res->dst_fattr, res->server); - + decode_getfattr(xdr, res->dst_fattr, res->server); out: res->rpc_status = status; return status; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 5708b5a636f19..ebd77a301057e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -279,7 +279,8 @@ struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, struct nfs_fh *, struct nfs_fattr *); int nfs4_replace_transport(struct nfs_server *server, const struct nfs4_fs_locations *locations); - +size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, + size_t salen, struct net *net); /* nfs4proc.c */ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *); extern int nfs4_async_handle_error(struct rpc_task *task, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 8cace8350fa3d..3671a51fe5ebc 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1293,8 +1293,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, } nfs_put_client(clp); - if (server->nfs_client->cl_hostname == NULL) + if (server->nfs_client->cl_hostname == NULL) { server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL); + if (server->nfs_client->cl_hostname == NULL) + return -ENOMEM; + } nfs_server_insert_lists(server); return nfs_probe_destination(server); diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 2e460c33ae487..768258848a684 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -121,8 +121,8 @@ static int nfs4_validate_fspath(struct dentry *dentry, return 0; } -static size_t nfs_parse_server_name(char *string, size_t len, - struct sockaddr *sa, size_t salen, struct net *net) +size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, + size_t salen, struct net *net) { ssize_t ret; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5ecaf7b6b0fa1..ba4a03a69fbf0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -359,6 +359,14 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent kunmap_atomic(start); } +static void nfs4_fattr_set_prechange(struct nfs_fattr *fattr, u64 version) +{ + if (!(fattr->valid & NFS_ATTR_FATTR_PRECHANGE)) { + fattr->pre_change_attr = version; + fattr->valid |= NFS_ATTR_FATTR_PRECHANGE; + } +} + static void nfs4_test_and_free_stateid(struct nfs_server *server, nfs4_stateid *stateid, const struct cred *cred) @@ -1549,15 +1557,16 @@ static bool nfs_stateid_is_sequential(struct nfs4_state *state, { if (test_bit(NFS_OPEN_STATE, &state->flags)) { /* The common case - we're updating to a new sequence number */ - if (nfs4_stateid_match_other(stateid, &state->open_stateid) && - nfs4_stateid_is_next(&state->open_stateid, stateid)) { - return true; + if (nfs4_stateid_match_other(stateid, &state->open_stateid)) { + if (nfs4_stateid_is_next(&state->open_stateid, stateid)) + return true; + return false; } - } else { - /* This is the first OPEN in this generation */ - if (stateid->seqid == cpu_to_be32(1)) - return true; + /* The server returned a new stateid */ } + /* This is the first OPEN in this generation */ + if (stateid->seqid == cpu_to_be32(1)) + return true; return false; } @@ -3032,6 +3041,10 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, } out: + if (opendata->lgp) { + nfs4_lgopen_release(opendata->lgp); + opendata->lgp = NULL; + } if (!opendata->cancelled) nfs4_sequence_free_slot(&opendata->o_res.seq_res); return ret; @@ -6306,7 +6319,9 @@ static void nfs4_delegreturn_release(void *calldata) pnfs_roc_release(&data->lr.arg, &data->lr.res, data->res.lr_ret); if (inode) { - nfs_post_op_update_inode_force_wcc(inode, &data->fattr); + nfs4_fattr_set_prechange(&data->fattr, + inode_peek_iversion_raw(inode)); + nfs_refresh_inode(inode, &data->fattr); nfs_iput_and_deactive(inode); } kfree(calldata); @@ -7917,6 +7932,7 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) case -NFS4ERR_DEADSESSION: nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); + return; } if (args->dir == NFS4_CDFC4_FORE_OR_BOTH && res->dir != NFS4_CDFS4_BOTH) { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ea680f619438b..1d2b81a233bbb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -49,6 +49,7 @@ #include #include #include +#include #include @@ -2070,6 +2071,9 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred } result = -NFS4ERR_NXIO; + if (!locations->nlocations) + goto out; + if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) { dprintk("<-- %s: No fs_locations data, migration skipped\n", __func__); @@ -2501,9 +2505,17 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) static void nfs4_state_manager(struct nfs_client *clp) { + unsigned int memflags; int status = 0; const char *section = "", *section_sep = ""; + /* + * State recovery can deadlock if the direct reclaim code tries + * start NFS writeback. So ensure memory allocations are all + * GFP_NOFS. + */ + memflags = memalloc_nofs_save(); + /* Ensure exclusive access to NFSv4 state */ do { clear_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); @@ -2597,6 +2609,7 @@ static void nfs4_state_manager(struct nfs_client *clp) clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); } + memalloc_nofs_restore(memflags); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); @@ -2613,6 +2626,7 @@ static void nfs4_state_manager(struct nfs_client *clp) return; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; + memflags = memalloc_nofs_save(); } while (refcount_read(&clp->cl_count) > 1 && !signalled()); goto out_drain; @@ -2624,6 +2638,7 @@ static void nfs4_state_manager(struct nfs_client *clp) clp->cl_hostname, -status); ssleep(1); out_drain: + memalloc_nofs_restore(memflags); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 9a022a4fb9643..2b7741fe42ead 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3683,8 +3683,6 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st if (unlikely(!p)) goto out_eio; n = be32_to_cpup(p); - if (n <= 0) - goto out_eio; for (res->nlocations = 0; res->nlocations < n; res->nlocations++) { u32 m; struct nfs4_fs_location *loc; @@ -4187,10 +4185,11 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, } else printk(KERN_WARNING "%s: label too long (%u)!\n", __func__, len); + if (label && label->label) + dprintk("%s: label=%.*s, len=%d, PI=%d, LFS=%d\n", + __func__, label->len, (char *)label->label, + label->len, label->pi, label->lfs); } - if (label && label->label) - dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__, - (char *)label->label, label->len, label->pi, label->lfs); return status; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 1b512df1003f9..3b19fa74b0620 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -92,6 +92,17 @@ find_pnfs_driver(u32 id) return local; } +const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id) +{ + return find_pnfs_driver(id); +} + +void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld) +{ + if (ld) + module_put(ld->owner); +} + void unset_pnfs_layoutdriver(struct nfs_server *nfss) { @@ -1950,6 +1961,7 @@ pnfs_update_layout(struct inode *ino, lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); if (lo == NULL) { spin_unlock(&ino->i_lock); + lseg = ERR_PTR(-ENOMEM); trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_NOMEM); goto out; @@ -2079,6 +2091,7 @@ pnfs_update_layout(struct inode *ino, lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags); if (!lgp) { + lseg = ERR_PTR(-ENOMEM); trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL, PNFS_UPDATE_LAYOUT_NOMEM); nfs_layoutget_end(lo); @@ -2098,6 +2111,12 @@ pnfs_update_layout(struct inode *ino, case -ERECALLCONFLICT: case -EAGAIN: break; + case -ENODATA: + /* The server returned NFS4ERR_LAYOUTUNAVAILABLE */ + pnfs_layout_set_fail_bit( + lo, pnfs_iomode_to_fail_bit(iomode)); + lseg = NULL; + goto out_put_layout_hdr; default: if (!nfs_error_is_fatal(PTR_ERR(lseg))) { pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 3d55edd6b25ad..68339680bb7d1 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -226,6 +226,8 @@ struct pnfs_devicelist { extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); +extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id); +extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld); /* nfs4proc.c */ extern size_t max_response_pages(struct nfs_server *server); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 249cf9037dbd7..aff44a7b98f86 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -641,7 +641,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, } smp_wmb(); - ds->ds_clp = clp; + WRITE_ONCE(ds->ds_clp, clp); dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: return status; @@ -714,7 +714,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, } smp_wmb(); - ds->ds_clp = clp; + WRITE_ONCE(ds->ds_clp, clp); dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: return status; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 613c3ef23e07b..10ce264a64567 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -692,11 +692,7 @@ static int nfs_writepage_locked(struct page *page, err = nfs_do_writepage(page, wbc, &pgio); pgio.pg_error = 0; nfs_pageio_complete(&pgio); - if (err < 0) - return err; - if (nfs_error_is_fatal(pgio.pg_error)) - return pgio.pg_error; - return 0; + return err; } int nfs_writepage(struct page *page, struct writeback_control *wbc) @@ -747,9 +743,6 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) if (err < 0) goto out_err; - err = pgio.pg_error; - if (nfs_error_is_fatal(err)) - goto out_err; return 0; out_err: return err; @@ -1050,25 +1043,11 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, struct nfs_page *req, *tmp; int ret = 0; -restart: list_for_each_entry_safe(req, tmp, src, wb_list) { kref_get(&req->wb_kref); if (!nfs_lock_request(req)) { - int status; - - /* Prevent deadlock with nfs_lock_and_join_requests */ - if (!list_empty(dst)) { - nfs_release_request(req); - continue; - } - /* Ensure we make progress to prevent livelock */ - mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); - status = nfs_wait_on_request(req); nfs_release_request(req); - mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); - if (status < 0) - break; - goto restart; + continue; } nfs_request_remove_commit_list(req, cinfo); clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); @@ -1443,7 +1422,7 @@ static void nfs_async_write_error(struct list_head *head, int error) while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - if (nfs_error_is_fatal(error)) + if (nfs_error_is_fatal_on_server(error)) nfs_write_error(req, error); else nfs_redirty_request(req); @@ -1935,6 +1914,7 @@ static int __nfs_commit_inode(struct inode *inode, int how, int may_wait = how & FLUSH_SYNC; int ret, nscan; + how &= ~FLUSH_SYNC; nfs_init_cinfo_from_inode(&cinfo, inode); nfs_commit_begin(cinfo.mds); for (;;) { diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 662937472e9bd..79ad6b2c96082 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -44,6 +44,17 @@ struct nfsd_fcache_bucket { static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); +struct nfsd_fcache_disposal { + struct list_head list; + struct work_struct work; + struct net *net; + spinlock_t lock; + struct list_head freeme; + struct rcu_head rcu; +}; + +struct workqueue_struct *nfsd_filecache_wq __read_mostly; + static struct kmem_cache *nfsd_file_slab; static struct kmem_cache *nfsd_file_mark_slab; static struct nfsd_fcache_bucket *nfsd_file_hashtbl; @@ -52,32 +63,21 @@ static long nfsd_file_lru_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; static atomic_long_t nfsd_filecache_count; static struct delayed_work nfsd_filecache_laundrette; +static DEFINE_SPINLOCK(laundrette_lock); +static LIST_HEAD(laundrettes); -enum nfsd_file_laundrette_ctl { - NFSD_FILE_LAUNDRETTE_NOFLUSH = 0, - NFSD_FILE_LAUNDRETTE_MAY_FLUSH -}; +static void nfsd_file_gc(void); static void -nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl) +nfsd_file_schedule_laundrette(void) { long count = atomic_long_read(&nfsd_filecache_count); if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) return; - /* Be more aggressive about scanning if over the threshold */ - if (count > NFSD_FILE_LRU_THRESHOLD) - mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0); - else - schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY); - - if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH) - return; - - /* ...and don't delay flushing if we're out of control */ - if (count >= NFSD_FILE_LRU_LIMIT) - flush_delayed_work(&nfsd_filecache_laundrette); + queue_delayed_work(system_wq, &nfsd_filecache_laundrette, + NFSD_LAUNDRETTE_DELAY); } static void @@ -260,8 +260,6 @@ nfsd_file_do_unhash(struct nfsd_file *nf) nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; hlist_del_rcu(&nf->nf_node); - if (!list_empty(&nf->nf_lru)) - list_lru_del(&nfsd_file_lru, &nf->nf_lru); atomic_long_dec(&nfsd_filecache_count); } @@ -270,6 +268,8 @@ nfsd_file_unhash(struct nfsd_file *nf) { if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { nfsd_file_do_unhash(nf); + if (!list_empty(&nf->nf_lru)) + list_lru_del(&nfsd_file_lru, &nf->nf_lru); return true; } return false; @@ -316,7 +316,9 @@ nfsd_file_put(struct nfsd_file *nf) set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused) - nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH); + nfsd_file_schedule_laundrette(); + if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) + nfsd_file_gc(); } struct nfsd_file * @@ -357,6 +359,58 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) flush_delayed_fput(); } +static void +nfsd_file_list_remove_disposal(struct list_head *dst, + struct nfsd_fcache_disposal *l) +{ + spin_lock(&l->lock); + list_splice_init(&l->freeme, dst); + spin_unlock(&l->lock); +} + +static void +nfsd_file_list_add_disposal(struct list_head *files, struct net *net) +{ + struct nfsd_fcache_disposal *l; + + rcu_read_lock(); + list_for_each_entry_rcu(l, &laundrettes, list) { + if (l->net == net) { + spin_lock(&l->lock); + list_splice_tail_init(files, &l->freeme); + spin_unlock(&l->lock); + queue_work(nfsd_filecache_wq, &l->work); + break; + } + } + rcu_read_unlock(); +} + +static void +nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, + struct net *net) +{ + struct nfsd_file *nf, *tmp; + + list_for_each_entry_safe(nf, tmp, src, nf_lru) { + if (nf->nf_net == net) + list_move_tail(&nf->nf_lru, dst); + } +} + +static void +nfsd_file_dispose_list_delayed(struct list_head *dispose) +{ + LIST_HEAD(list); + struct nfsd_file *nf; + + while(!list_empty(dispose)) { + nf = list_first_entry(dispose, struct nfsd_file, nf_lru); + nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); + nfsd_file_list_add_disposal(&list, nf->nf_net); + } +} + /* * Note this can deadlock with nfsd_file_cache_purge. */ @@ -403,18 +457,40 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, return LRU_SKIP; } -static void -nfsd_file_lru_dispose(struct list_head *head) +static unsigned long +nfsd_file_lru_walk_list(struct shrink_control *sc) { - while(!list_empty(head)) { - struct nfsd_file *nf = list_first_entry(head, - struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); + LIST_HEAD(head); + struct nfsd_file *nf; + unsigned long ret; + + if (sc) + ret = list_lru_shrink_walk(&nfsd_file_lru, sc, + nfsd_file_lru_cb, &head); + else + ret = list_lru_walk(&nfsd_file_lru, + nfsd_file_lru_cb, + &head, LONG_MAX); + list_for_each_entry(nf, &head, nf_lru) { spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); nfsd_file_do_unhash(nf); spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - nfsd_file_put_noref(nf); } + nfsd_file_dispose_list_delayed(&head); + return ret; +} + +static void +nfsd_file_gc(void) +{ + nfsd_file_lru_walk_list(NULL); +} + +static void +nfsd_file_gc_worker(struct work_struct *work) +{ + nfsd_file_gc(); + nfsd_file_schedule_laundrette(); } static unsigned long @@ -426,12 +502,7 @@ nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) static unsigned long nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) { - LIST_HEAD(head); - unsigned long ret; - - ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head); - nfsd_file_lru_dispose(&head); - return ret; + return nfsd_file_lru_walk_list(sc); } static struct shrinker nfsd_file_shrinker = { @@ -493,7 +564,7 @@ nfsd_file_close_inode(struct inode *inode) __nfsd_file_close_inode(inode, hashval, &dispose); trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); - nfsd_file_dispose_list(&dispose); + nfsd_file_dispose_list_delayed(&dispose); } /** @@ -509,16 +580,11 @@ static void nfsd_file_delayed_close(struct work_struct *work) { LIST_HEAD(head); + struct nfsd_fcache_disposal *l = container_of(work, + struct nfsd_fcache_disposal, work); - list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX); - - if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags)) - nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH); - - if (!list_empty(&head)) { - nfsd_file_lru_dispose(&head); - flush_delayed_fput(); - } + nfsd_file_list_remove_disposal(&head, l); + nfsd_file_dispose_list(&head); } static int @@ -579,6 +645,10 @@ nfsd_file_cache_init(void) if (nfsd_file_hashtbl) return 0; + nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); + if (!nfsd_filecache_wq) + goto out; + nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, sizeof(*nfsd_file_hashtbl), GFP_KERNEL); if (!nfsd_file_hashtbl) { @@ -632,7 +702,7 @@ nfsd_file_cache_init(void) spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); } - INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close); + INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); out: return ret; out_notifier: @@ -648,6 +718,8 @@ nfsd_file_cache_init(void) nfsd_file_mark_slab = NULL; kfree(nfsd_file_hashtbl); nfsd_file_hashtbl = NULL; + destroy_workqueue(nfsd_filecache_wq); + nfsd_filecache_wq = NULL; goto out; } @@ -686,6 +758,88 @@ nfsd_file_cache_purge(struct net *net) } } +static struct nfsd_fcache_disposal * +nfsd_alloc_fcache_disposal(struct net *net) +{ + struct nfsd_fcache_disposal *l; + + l = kmalloc(sizeof(*l), GFP_KERNEL); + if (!l) + return NULL; + INIT_WORK(&l->work, nfsd_file_delayed_close); + l->net = net; + spin_lock_init(&l->lock); + INIT_LIST_HEAD(&l->freeme); + return l; +} + +static void +nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) +{ + rcu_assign_pointer(l->net, NULL); + cancel_work_sync(&l->work); + nfsd_file_dispose_list(&l->freeme); + kfree_rcu(l, rcu); +} + +static void +nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l) +{ + spin_lock(&laundrette_lock); + list_add_tail_rcu(&l->list, &laundrettes); + spin_unlock(&laundrette_lock); +} + +static void +nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l) +{ + spin_lock(&laundrette_lock); + list_del_rcu(&l->list); + spin_unlock(&laundrette_lock); +} + +static int +nfsd_alloc_fcache_disposal_net(struct net *net) +{ + struct nfsd_fcache_disposal *l; + + l = nfsd_alloc_fcache_disposal(net); + if (!l) + return -ENOMEM; + nfsd_add_fcache_disposal(l); + return 0; +} + +static void +nfsd_free_fcache_disposal_net(struct net *net) +{ + struct nfsd_fcache_disposal *l; + + rcu_read_lock(); + list_for_each_entry_rcu(l, &laundrettes, list) { + if (l->net != net) + continue; + nfsd_del_fcache_disposal(l); + rcu_read_unlock(); + nfsd_free_fcache_disposal(l); + return; + } + rcu_read_unlock(); +} + +int +nfsd_file_cache_start_net(struct net *net) +{ + return nfsd_alloc_fcache_disposal_net(net); +} + +void +nfsd_file_cache_shutdown_net(struct net *net) +{ + nfsd_file_cache_purge(net); + nfsd_free_fcache_disposal_net(net); +} + void nfsd_file_cache_shutdown(void) { @@ -712,6 +866,8 @@ nfsd_file_cache_shutdown(void) nfsd_file_mark_slab = NULL; kfree(nfsd_file_hashtbl); nfsd_file_hashtbl = NULL; + destroy_workqueue(nfsd_filecache_wq); + nfsd_filecache_wq = NULL; } static bool @@ -881,7 +1037,8 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, nfsd_file_hashtbl[hashval].nfb_count); spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - atomic_long_inc(&nfsd_filecache_count); + if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) + nfsd_file_gc(); nf->nf_mark = nfsd_file_mark_find_or_create(nf); if (nf->nf_mark) diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 851d9abf54c25..79a7d6808d979 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -51,6 +51,8 @@ struct nfsd_file { int nfsd_file_cache_init(void); void nfsd_file_cache_purge(struct net *); void nfsd_file_cache_shutdown(void); +int nfsd_file_cache_start_net(struct net *net); +void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index cea68d8411ac5..bb97c020c96b1 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -195,6 +195,11 @@ nfsd3_proc_write(struct svc_rqst *rqstp) (unsigned long long) argp->offset, argp->stable? " stable" : ""); + resp->status = nfserr_fbig; + if (argp->offset > (u64)OFFSET_MAX || + argp->offset + argp->len > (u64)OFFSET_MAX) + return rpc_success; + fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 4798667af647c..452ed633a2c76 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -992,8 +992,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, unsigned long cnt; int nvecs; - if (write->wr_offset >= OFFSET_MAX) - return nfserr_inval; + if (write->wr_offset > (u64)OFFSET_MAX || + write->wr_offset + write->wr_buflen > (u64)OFFSET_MAX) + return nfserr_fbig; cnt = write->wr_buflen; trace_nfsd_write_start(rqstp, &cstate->current_fh, diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index c35c0ebaf722c..7d408957ed623 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -2177,6 +2177,7 @@ static struct notifier_block nfsd4_cld_block = { int register_cld_notifier(void) { + WARN_ON(!nfsd_net_id); return rpc_pipefs_notifier_register(&nfsd4_cld_block); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 56b33065f5ab6..228c2b0753dcf 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2577,9 +2577,9 @@ static void force_expire_client(struct nfs4_client *clp) struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); bool already_expired; - spin_lock(&clp->cl_lock); + spin_lock(&nn->client_lock); clp->cl_time = 0; - spin_unlock(&clp->cl_lock); + spin_unlock(&nn->client_lock); wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0); spin_lock(&nn->client_lock); @@ -3941,8 +3941,10 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, status = nfserr_clid_inuse; if (client_has_state(old) && !same_creds(&unconf->cl_cred, - &old->cl_cred)) + &old->cl_cred)) { + old = NULL; goto out; + } status = mark_client_expired_locked(old); if (status) { old = NULL; @@ -6892,16 +6894,12 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, if (sop->so_is_open_owner || !same_owner_str(sop, owner)) continue; - /* see if there are still any locks associated with it */ - lo = lockowner(sop); - list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { - if (check_for_locks(stp->st_stid.sc_file, lo)) { - status = nfserr_locks_held; - spin_unlock(&clp->cl_lock); - return status; - } + if (atomic_read(&sop->so_count) != 1) { + spin_unlock(&clp->cl_lock); + return nfserr_locks_held; } + lo = lockowner(sop); nfs4_get_stateowner(sop); break; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index d6f244559e759..e61d9c4359573 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3131,15 +3131,18 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, goto fail; cd->rd_maxcount -= entry_bytes; /* - * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so - * let's always let through the first entry, at least: + * RFC 3530 14.2.24 describes rd_dircount as only a "hint", and + * notes that it could be zero. If it is zero, then the server + * should enforce only the rd_maxcount value. */ - if (!cd->rd_dircount) - goto fail; - name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8; - if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) - goto fail; - cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); + if (cd->rd_dircount) { + name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8; + if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) + goto fail; + cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); + if (!cd->rd_dircount) + cd->rd_maxcount = 0; + } cd->cookie_offset = cookie_offset; skip_entry: diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 4a258065188e1..670e97dd67f06 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -173,14 +173,10 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) if (status) goto out_nomem; - nn->drc_hashtbl = kcalloc(hashsize, - sizeof(*nn->drc_hashtbl), GFP_KERNEL); - if (!nn->drc_hashtbl) { - nn->drc_hashtbl = vzalloc(array_size(hashsize, - sizeof(*nn->drc_hashtbl))); - if (!nn->drc_hashtbl) - goto out_shrinker; - } + nn->drc_hashtbl = kvzalloc(array_size(hashsize, + sizeof(*nn->drc_hashtbl)), GFP_KERNEL); + if (!nn->drc_hashtbl) + goto out_shrinker; for (i = 0; i < hashsize; i++) { INIT_LIST_HEAD(&nn->drc_hashtbl[i].lru_head); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 075ef83ae53c2..055cc0458f270 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -792,7 +792,10 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr svc_xprt_put(xprt); } out_err: - nfsd_destroy(net); + if (!list_empty(&nn->nfsd_serv->sv_permsocks)) + nn->nfsd_serv->sv_nrthreads--; + else + nfsd_destroy(net); return err; } @@ -1524,12 +1527,9 @@ static int __init init_nfsd(void) int retval; printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); - retval = register_cld_notifier(); - if (retval) - return retval; retval = nfsd4_init_slabs(); if (retval) - goto out_unregister_notifier; + return retval; retval = nfsd4_init_pnfs(); if (retval) goto out_free_slabs; @@ -1547,10 +1547,15 @@ static int __init init_nfsd(void) goto out_free_exports; retval = register_pernet_subsys(&nfsd_net_ops); if (retval < 0) + goto out_free_filesystem; + retval = register_cld_notifier(); + if (retval) goto out_free_all; return 0; out_free_all: unregister_pernet_subsys(&nfsd_net_ops); +out_free_filesystem: + unregister_filesystem(&nfsd_fs_type); out_free_exports: remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); @@ -1563,13 +1568,12 @@ static int __init init_nfsd(void) nfsd4_exit_pnfs(); out_free_slabs: nfsd4_free_slabs(); -out_unregister_notifier: - unregister_cld_notifier(); return retval; } static void __exit exit_nfsd(void) { + unregister_cld_notifier(); unregister_pernet_subsys(&nfsd_net_ops); nfsd_drc_slab_free(); remove_proc_entry("fs/nfs/exports", NULL); @@ -1580,7 +1584,6 @@ static void __exit exit_nfsd(void) nfsd4_exit_pnfs(); nfsd_fault_inject_cleanup(); unregister_filesystem(&nfsd_fs_type); - unregister_cld_notifier(); } MODULE_AUTHOR("Olaf Kirch "); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 754c763374dd5..4aca93e11af77 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -230,7 +230,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) unsigned long cnt = argp->len; unsigned int nvecs; - dprintk("nfsd: WRITE %s %d bytes at %d\n", + dprintk("nfsd: WRITE %s %u bytes at %d\n", SVCFH_fmt(&argp->fh), argp->len, argp->offset); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 155a4e43b24ee..d63cdda1782d4 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -394,13 +394,18 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre nn->lockd_up = 1; } - ret = nfs4_state_start_net(net); + ret = nfsd_file_cache_start_net(net); if (ret) goto out_lockd; + ret = nfs4_state_start_net(net); + if (ret) + goto out_filecache; nn->nfsd_net_up = true; return 0; +out_filecache: + nfsd_file_cache_shutdown_net(net); out_lockd: if (nn->lockd_up) { lockd_down(net); @@ -415,7 +420,7 @@ static void nfsd_shutdown_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - nfsd_file_cache_purge(net); + nfsd_file_cache_shutdown_net(net); nfs4_state_shutdown_net(net); if (nn->lockd_up) { lockd_down(net); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index b073bdc2e6e89..127db5351d016 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -53,14 +53,14 @@ TRACE_EVENT(nfsd_compound_status, DECLARE_EVENT_CLASS(nfsd_io_class, TP_PROTO(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, - unsigned long len), + u64 offset, + u32 len), TP_ARGS(rqstp, fhp, offset, len), TP_STRUCT__entry( __field(u32, xid) __field(u32, fh_hash) - __field(loff_t, offset) - __field(unsigned long, len) + __field(u64, offset) + __field(u32, len) ), TP_fast_assign( __entry->xid = be32_to_cpu(rqstp->rq_xid); @@ -68,7 +68,7 @@ DECLARE_EVENT_CLASS(nfsd_io_class, __entry->offset = offset; __entry->len = len; ), - TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld len=%lu", + TP_printk("xid=0x%08x fh_hash=0x%08x offset=%llu len=%u", __entry->xid, __entry->fh_hash, __entry->offset, __entry->len) ) @@ -77,8 +77,8 @@ DECLARE_EVENT_CLASS(nfsd_io_class, DEFINE_EVENT(nfsd_io_class, nfsd_##name, \ TP_PROTO(struct svc_rqst *rqstp, \ struct svc_fh *fhp, \ - loff_t offset, \ - unsigned long len), \ + u64 offset, \ + u32 len), \ TP_ARGS(rqstp, fhp, offset, len)) DEFINE_NFSD_IO_EVENT(read_start); diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index ea7cca3a64b76..6251d8754c82b 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -33,7 +33,7 @@ struct nfsd_readargs { struct nfsd_writeargs { svc_fh fh; __u32 offset; - int len; + __u32 len; struct kvec first; }; diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 4391fd3abd8f8..e00e184b12615 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -20,6 +20,23 @@ #include "page.h" #include "btnode.h" + +/** + * nilfs_init_btnc_inode - initialize B-tree node cache inode + * @btnc_inode: inode to be initialized + * + * nilfs_init_btnc_inode() sets up an inode for B-tree node cache. + */ +void nilfs_init_btnc_inode(struct inode *btnc_inode) +{ + struct nilfs_inode_info *ii = NILFS_I(btnc_inode); + + btnc_inode->i_mode = S_IFREG; + ii->i_flags = 0; + memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap)); + mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS); +} + void nilfs_btnode_cache_clear(struct address_space *btnc) { invalidate_mapping_pages(btnc, 0, -1); @@ -29,7 +46,7 @@ void nilfs_btnode_cache_clear(struct address_space *btnc) struct buffer_head * nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) { - struct inode *inode = NILFS_BTNC_I(btnc); + struct inode *inode = btnc->host; struct buffer_head *bh; bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); @@ -57,7 +74,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, struct buffer_head **pbh, sector_t *submit_ptr) { struct buffer_head *bh; - struct inode *inode = NILFS_BTNC_I(btnc); + struct inode *inode = btnc->host; struct page *page; int err; @@ -157,7 +174,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) { struct buffer_head *obh, *nbh; - struct inode *inode = NILFS_BTNC_I(btnc); + struct inode *inode = btnc->host; __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; int err; diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 0f88dbc9bcb3e..05ab64d354dc9 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -30,6 +30,7 @@ struct nilfs_btnode_chkey_ctxt { struct buffer_head *newbh; }; +void nilfs_init_btnc_inode(struct inode *btnc_inode); void nilfs_btnode_cache_clear(struct address_space *); struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr); diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 23e043eca237b..919d1238ce45f 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -58,7 +58,8 @@ static void nilfs_btree_free_path(struct nilfs_btree_path *path) static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp) { - struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; + struct address_space *btnc = btnc_inode->i_mapping; struct buffer_head *bh; bh = nilfs_btnode_create_block(btnc, ptr); @@ -470,7 +471,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp, const struct nilfs_btree_readahead_info *ra) { - struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; + struct address_space *btnc = btnc_inode->i_mapping; struct buffer_head *bh, *ra_bh; sector_t submit_ptr = 0; int ret; @@ -1742,6 +1744,10 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, dat = nilfs_bmap_get_dat(btree); } + ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode); + if (ret < 0) + return ret; + ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); if (ret < 0) return ret; @@ -1914,7 +1920,7 @@ static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; path[level].bp_ctxt.bh = path[level].bp_bh; ret = nilfs_btnode_prepare_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); if (ret < 0) { nilfs_dat_abort_update(dat, @@ -1940,7 +1946,7 @@ static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, if (buffer_nilfs_node(path[level].bp_bh)) { nilfs_btnode_commit_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); path[level].bp_bh = path[level].bp_ctxt.bh; } @@ -1959,7 +1965,7 @@ static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, &path[level].bp_newreq.bpr_req); if (buffer_nilfs_node(path[level].bp_bh)) nilfs_btnode_abort_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); } @@ -2135,7 +2141,8 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, struct list_head *listp) { - struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; + struct address_space *btcache = btnc_inode->i_mapping; struct list_head lists[NILFS_BTREE_LEVEL_MAX]; struct pagevec pvec; struct buffer_head *bh, *head; @@ -2189,12 +2196,12 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree, path[level].bp_ctxt.newkey = blocknr; path[level].bp_ctxt.bh = *bh; ret = nilfs_btnode_prepare_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); if (ret < 0) return ret; nilfs_btnode_commit_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); *bh = path[level].bp_ctxt.bh; } @@ -2399,6 +2406,10 @@ int nilfs_btree_init(struct nilfs_bmap *bmap) if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) ret = -EIO; + else + ret = nilfs_attach_btree_node_cache( + &NILFS_BMAP_I(bmap)->vfs_inode); + return ret; } diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 6f4066636be9a..a3523a243e113 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -497,7 +497,9 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size, di = NILFS_DAT_I(dat); lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); nilfs_palloc_setup_cache(dat, &di->palloc_cache); - nilfs_mdt_setup_shadow_map(dat, &di->shadow); + err = nilfs_mdt_setup_shadow_map(dat, &di->shadow); + if (err) + goto failed; err = nilfs_read_inode_common(dat, raw_inode); if (err) diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index aa3c328ee189c..114774ac2185a 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -126,9 +126,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, __u64 vbn, struct buffer_head **out_bh) { + struct inode *btnc_inode = NILFS_I(inode)->i_assoc_inode; int ret; - ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, + ret = nilfs_btnode_submit_block(btnc_inode->i_mapping, vbn ? : pbn, pbn, REQ_OP_READ, 0, out_bh, &pbn); if (ret == -EEXIST) /* internal code (cache hit) */ @@ -170,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode) ii->i_flags = 0; nilfs_bmap_init_gc(ii->i_bmap); - return 0; + return nilfs_attach_btree_node_cache(inode); } /** @@ -185,7 +186,7 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); list_del_init(&ii->i_dirty); truncate_inode_pages(&ii->vfs_inode.i_data, 0); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); iput(&ii->vfs_inode); } } diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 671085512e0fd..35b0bfe9324f2 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -28,12 +28,16 @@ * @cno: checkpoint number * @root: pointer on NILFS root object (mounted checkpoint) * @for_gc: inode for GC flag + * @for_btnc: inode for B-tree node cache flag + * @for_shadow: inode for shadowed page cache flag */ struct nilfs_iget_args { u64 ino; __u64 cno; struct nilfs_root *root; - int for_gc; + bool for_gc; + bool for_btnc; + bool for_shadow; }; static int nilfs_iget_test(struct inode *inode, void *opaque); @@ -322,7 +326,8 @@ static int nilfs_insert_inode_locked(struct inode *inode, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = 0 + .ino = ino, .root = root, .cno = 0, .for_gc = false, + .for_btnc = false, .for_shadow = false }; return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); @@ -534,6 +539,19 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) return 0; ii = NILFS_I(inode); + if (test_bit(NILFS_I_BTNC, &ii->i_state)) { + if (!args->for_btnc) + return 0; + } else if (args->for_btnc) { + return 0; + } + if (test_bit(NILFS_I_SHADOW, &ii->i_state)) { + if (!args->for_shadow) + return 0; + } else if (args->for_shadow) { + return 0; + } + if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) return !args->for_gc; @@ -545,15 +563,17 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) struct nilfs_iget_args *args = opaque; inode->i_ino = args->ino; - if (args->for_gc) { + NILFS_I(inode)->i_cno = args->cno; + NILFS_I(inode)->i_root = args->root; + if (args->root && args->ino == NILFS_ROOT_INO) + nilfs_get_root(args->root); + + if (args->for_gc) NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); - NILFS_I(inode)->i_cno = args->cno; - NILFS_I(inode)->i_root = NULL; - } else { - if (args->root && args->ino == NILFS_ROOT_INO) - nilfs_get_root(args->root); - NILFS_I(inode)->i_root = args->root; - } + if (args->for_btnc) + NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); + if (args->for_shadow) + NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW); return 0; } @@ -561,7 +581,8 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = 0 + .ino = ino, .root = root, .cno = 0, .for_gc = false, + .for_btnc = false, .for_shadow = false }; return ilookup5(sb, ino, nilfs_iget_test, &args); @@ -571,7 +592,8 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = 0 + .ino = ino, .root = root, .cno = 0, .for_gc = false, + .for_btnc = false, .for_shadow = false }; return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -602,7 +624,8 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno) { struct nilfs_iget_args args = { - .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 + .ino = ino, .root = NULL, .cno = cno, .for_gc = true, + .for_btnc = false, .for_shadow = false }; struct inode *inode; int err; @@ -622,6 +645,113 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, return inode; } +/** + * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode + * @inode: inode object + * + * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode, + * or does nothing if the inode already has it. This function allocates + * an additional inode to maintain page cache of B-tree nodes one-on-one. + * + * Return Value: On success, 0 is returned. On errors, one of the following + * negative error code is returned. + * + * %-ENOMEM - Insufficient memory available. + */ +int nilfs_attach_btree_node_cache(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + struct inode *btnc_inode; + struct nilfs_iget_args args; + + if (ii->i_assoc_inode) + return 0; + + args.ino = inode->i_ino; + args.root = ii->i_root; + args.cno = ii->i_cno; + args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; + args.for_btnc = true; + args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0; + + btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, + nilfs_iget_set, &args); + if (unlikely(!btnc_inode)) + return -ENOMEM; + if (btnc_inode->i_state & I_NEW) { + nilfs_init_btnc_inode(btnc_inode); + unlock_new_inode(btnc_inode); + } + NILFS_I(btnc_inode)->i_assoc_inode = inode; + NILFS_I(btnc_inode)->i_bmap = ii->i_bmap; + ii->i_assoc_inode = btnc_inode; + + return 0; +} + +/** + * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode + * @inode: inode object + * + * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its + * holder inode bound to @inode, or does nothing if @inode doesn't have it. + */ +void nilfs_detach_btree_node_cache(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + struct inode *btnc_inode = ii->i_assoc_inode; + + if (btnc_inode) { + NILFS_I(btnc_inode)->i_assoc_inode = NULL; + ii->i_assoc_inode = NULL; + iput(btnc_inode); + } +} + +/** + * nilfs_iget_for_shadow - obtain inode for shadow mapping + * @inode: inode object that uses shadow mapping + * + * nilfs_iget_for_shadow() allocates a pair of inodes that holds page + * caches for shadow mapping. The page cache for data pages is set up + * in one inode and the one for b-tree node pages is set up in the + * other inode, which is attached to the former inode. + * + * Return Value: On success, a pointer to the inode for data pages is + * returned. On errors, one of the following negative error code is returned + * in a pointer type. + * + * %-ENOMEM - Insufficient memory available. + */ +struct inode *nilfs_iget_for_shadow(struct inode *inode) +{ + struct nilfs_iget_args args = { + .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false, + .for_btnc = false, .for_shadow = true + }; + struct inode *s_inode; + int err; + + s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, + nilfs_iget_set, &args); + if (unlikely(!s_inode)) + return ERR_PTR(-ENOMEM); + if (!(s_inode->i_state & I_NEW)) + return inode; + + NILFS_I(s_inode)->i_flags = 0; + memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap)); + mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS); + + err = nilfs_attach_btree_node_cache(s_inode); + if (unlikely(err)) { + iget_failed(s_inode); + return ERR_PTR(err); + } + unlock_new_inode(s_inode); + return s_inode; +} + void nilfs_write_inode_common(struct inode *inode, struct nilfs_inode *raw_inode, int has_bmap) { @@ -770,7 +900,8 @@ static void nilfs_clear_inode(struct inode *inode) if (test_bit(NILFS_I_BMAP, &ii->i_state)) nilfs_bmap_clear(ii->i_bmap); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + if (!test_bit(NILFS_I_BTNC, &ii->i_state)) + nilfs_detach_btree_node_cache(inode); if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) nilfs_put_root(ii->i_root); diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 700870a92bc4a..7c9055d767d16 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -469,9 +469,18 @@ int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) void nilfs_mdt_clear(struct inode *inode) { struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + struct nilfs_shadow_map *shadow = mdi->mi_shadow; if (mdi->mi_palloc_cache) nilfs_palloc_destroy_cache(inode); + + if (shadow) { + struct inode *s_inode = shadow->inode; + + shadow->inode = NULL; + iput(s_inode); + mdi->mi_shadow = NULL; + } } /** @@ -505,12 +514,15 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode, struct nilfs_shadow_map *shadow) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct inode *s_inode; INIT_LIST_HEAD(&shadow->frozen_buffers); - address_space_init_once(&shadow->frozen_data); - nilfs_mapping_init(&shadow->frozen_data, inode); - address_space_init_once(&shadow->frozen_btnodes); - nilfs_mapping_init(&shadow->frozen_btnodes, inode); + + s_inode = nilfs_iget_for_shadow(inode); + if (IS_ERR(s_inode)) + return PTR_ERR(s_inode); + + shadow->inode = s_inode; mi->mi_shadow = shadow; return 0; } @@ -524,14 +536,15 @@ int nilfs_mdt_save_to_shadow_map(struct inode *inode) struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_inode_info *ii = NILFS_I(inode); struct nilfs_shadow_map *shadow = mi->mi_shadow; + struct inode *s_inode = shadow->inode; int ret; - ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping); + ret = nilfs_copy_dirty_pages(s_inode->i_mapping, inode->i_mapping); if (ret) goto out; - ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes, - &ii->i_btnode_cache); + ret = nilfs_copy_dirty_pages(NILFS_I(s_inode)->i_assoc_inode->i_mapping, + ii->i_assoc_inode->i_mapping); if (ret) goto out; @@ -547,7 +560,7 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) struct page *page; int blkbits = inode->i_blkbits; - page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); + page = grab_cache_page(shadow->inode->i_mapping, bh->b_page->index); if (!page) return -ENOMEM; @@ -579,7 +592,7 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) struct page *page; int n; - page = find_lock_page(&shadow->frozen_data, bh->b_page->index); + page = find_lock_page(shadow->inode->i_mapping, bh->b_page->index); if (page) { if (page_has_buffers(page)) { n = bh_offset(bh) >> inode->i_blkbits; @@ -620,10 +633,11 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) nilfs_palloc_clear_cache(inode); nilfs_clear_dirty_pages(inode->i_mapping, true); - nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); + nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping); - nilfs_clear_dirty_pages(&ii->i_btnode_cache, true); - nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes); + nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); + nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, + NILFS_I(shadow->inode)->i_assoc_inode->i_mapping); nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); @@ -638,10 +652,11 @@ void nilfs_mdt_clear_shadow_map(struct inode *inode) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_shadow_map *shadow = mi->mi_shadow; + struct inode *shadow_btnc_inode = NILFS_I(shadow->inode)->i_assoc_inode; down_write(&mi->mi_sem); nilfs_release_frozen_buffers(shadow); - truncate_inode_pages(&shadow->frozen_data, 0); - truncate_inode_pages(&shadow->frozen_btnodes, 0); + truncate_inode_pages(shadow->inode->i_mapping, 0); + truncate_inode_pages(shadow_btnc_inode->i_mapping, 0); up_write(&mi->mi_sem); } diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index e77aea4bb921c..9d8ac0d27c16e 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -18,14 +18,12 @@ /** * struct nilfs_shadow_map - shadow mapping of meta data file * @bmap_store: shadow copy of bmap state - * @frozen_data: shadowed dirty data pages - * @frozen_btnodes: shadowed dirty b-tree nodes' pages + * @inode: holder of page caches used in shadow mapping * @frozen_buffers: list of frozen buffers */ struct nilfs_shadow_map { struct nilfs_bmap_store bmap_store; - struct address_space frozen_data; - struct address_space frozen_btnodes; + struct inode *inode; struct list_head frozen_buffers; }; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 42395ba52da62..dc772eaa13cf1 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -28,7 +28,7 @@ * @i_xattr: * @i_dir_start_lookup: page index of last successful search * @i_cno: checkpoint number for GC inode - * @i_btnode_cache: cached pages of b-tree nodes + * @i_assoc_inode: associated inode (B-tree node cache holder or back pointer) * @i_dirty: list for connecting dirty files * @xattr_sem: semaphore for extended attributes processing * @i_bh: buffer contains disk inode @@ -43,7 +43,7 @@ struct nilfs_inode_info { __u64 i_xattr; /* sector_t ??? */ __u32 i_dir_start_lookup; __u64 i_cno; /* check point number for GC inode */ - struct address_space i_btnode_cache; + struct inode *i_assoc_inode; struct list_head i_dirty; /* List for connecting dirty files */ #ifdef CONFIG_NILFS_XATTR @@ -75,13 +75,6 @@ NILFS_BMAP_I(const struct nilfs_bmap *bmap) return container_of(bmap, struct nilfs_inode_info, i_bmap_data); } -static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) -{ - struct nilfs_inode_info *ii = - container_of(btnc, struct nilfs_inode_info, i_btnode_cache); - return &ii->vfs_inode; -} - /* * Dynamic state flags of NILFS on-memory inode (i_state) */ @@ -98,6 +91,8 @@ enum { NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */ NILFS_I_BMAP, /* has bmap and btnode_cache */ NILFS_I_GCINODE, /* inode for GC, on memory only */ + NILFS_I_BTNC, /* inode for btree node cache */ + NILFS_I_SHADOW, /* inode for shadowed page cache */ }; /* @@ -203,6 +198,9 @@ static inline int nilfs_acl_chmod(struct inode *inode) static inline int nilfs_init_acl(struct inode *inode, struct inode *dir) { + if (S_ISLNK(inode->i_mode)) + return 0; + inode->i_mode &= ~current_umask(); return 0; } @@ -264,6 +262,9 @@ struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, unsigned long ino); extern struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno); +int nilfs_attach_btree_node_cache(struct inode *inode); +void nilfs_detach_btree_node_cache(struct inode *inode); +struct inode *nilfs_iget_for_shadow(struct inode *inode); extern void nilfs_update_inode(struct inode *, struct buffer_head *, int); extern void nilfs_truncate(struct inode *); extern void nilfs_evict_inode(struct inode *); diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index d7fc8d369d89e..98d21ad9a073f 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -450,10 +450,9 @@ void nilfs_mapping_init(struct address_space *mapping, struct inode *inode) /* * NILFS2 needs clear_page_dirty() in the following two cases: * - * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears - * page dirty flags when it copies back pages from the shadow cache - * (gcdat->{i_mapping,i_btnode_cache}) to its original cache - * (dat->{i_mapping,i_btnode_cache}). + * 1) For B-tree node pages and data pages of DAT file, NILFS2 clears dirty + * flag of pages when it copies back pages from shadow cache to the + * original cache. * * 2) Some B-tree operations like insertion or deletion may dispose buffers * in dirty state, and this needs to cancel the dirty state of their pages. diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 91b58c897f92d..eb3ac76190887 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -738,15 +738,18 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, struct list_head *listp) { struct nilfs_inode_info *ii = NILFS_I(inode); - struct address_space *mapping = &ii->i_btnode_cache; + struct inode *btnc_inode = ii->i_assoc_inode; struct pagevec pvec; struct buffer_head *bh, *head; unsigned int i; pgoff_t index = 0; + if (!btnc_inode) + return; + pagevec_init(&pvec); - while (pagevec_lookup_tag(&pvec, mapping, &index, + while (pagevec_lookup_tag(&pvec, btnc_inode->i_mapping, &index, PAGECACHE_TAG_DIRTY)) { for (i = 0; i < pagevec_count(&pvec); i++) { bh = head = page_buffers(pvec.pages[i]); @@ -2410,7 +2413,7 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) continue; list_del_init(&ii->i_dirty); truncate_inode_pages(&ii->vfs_inode.i_data, 0); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); iput(&ii->vfs_inode); } } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 5729ee86da9ae..b5bc9f0c6a406 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -151,7 +151,8 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) ii->i_bh = NULL; ii->i_state = 0; ii->i_cno = 0; - nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode); + ii->i_assoc_inode = NULL; + ii->i_bmap = &ii->i_bmap_data; return &ii->vfs_inode; } @@ -1375,8 +1376,6 @@ static void nilfs_inode_init_once(void *obj) #ifdef CONFIG_NILFS_XATTR init_rwsem(&ii->xattr_sem); #endif - address_space_init_once(&ii->i_btnode_cache); - ii->i_bmap = &ii->i_bmap_data; inode_init_once(&ii->vfs_inode); } diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index c6c8a33c81d5e..28a2db3b1787f 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -64,11 +64,9 @@ static const struct sysfs_ops nilfs_##name##_attr_ops = { \ #define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \ static void nilfs_##name##_attr_release(struct kobject *kobj) \ { \ - struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \ - struct the_nilfs *nilfs = container_of(kobj->parent, \ - struct the_nilfs, \ - ns_##parent_name##_kobj); \ - subgroups = nilfs->ns_##parent_name##_subgroups; \ + struct nilfs_sysfs_##parent_name##_subgroups *subgroups = container_of(kobj, \ + struct nilfs_sysfs_##parent_name##_subgroups, \ + sg_##name##_kobj); \ complete(&subgroups->sg_##name##_kobj_unregister); \ } \ static struct kobj_type nilfs_##name##_ktype = { \ @@ -94,12 +92,12 @@ static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \ err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \ #name); \ if (err) \ - return err; \ - return 0; \ + kobject_put(kobj); \ + return err; \ } \ static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \ { \ - kobject_del(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \ + kobject_put(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \ } /************************************************************************ @@ -210,14 +208,14 @@ int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root) } if (err) - return err; + kobject_put(&root->snapshot_kobj); - return 0; + return err; } void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root) { - kobject_del(&root->snapshot_kobj); + kobject_put(&root->snapshot_kobj); } /************************************************************************ @@ -1000,7 +998,7 @@ int nilfs_sysfs_create_device_group(struct super_block *sb) err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL, "%s", sb->s_id); if (err) - goto free_dev_subgroups; + goto cleanup_dev_kobject; err = nilfs_sysfs_create_mounted_snapshots_group(nilfs); if (err) @@ -1037,9 +1035,7 @@ int nilfs_sysfs_create_device_group(struct super_block *sb) nilfs_sysfs_delete_mounted_snapshots_group(nilfs); cleanup_dev_kobject: - kobject_del(&nilfs->ns_dev_kobj); - -free_dev_subgroups: + kobject_put(&nilfs->ns_dev_kobj); kfree(nilfs->ns_dev_subgroups); failed_create_device_group: diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 484785cdf96e2..931870768556c 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -797,14 +797,13 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) void nilfs_put_root(struct nilfs_root *root) { - if (refcount_dec_and_test(&root->count)) { - struct the_nilfs *nilfs = root->nilfs; + struct the_nilfs *nilfs = root->nilfs; - nilfs_sysfs_delete_snapshot_group(root); - - spin_lock(&nilfs->ns_cptree_lock); + if (refcount_dec_and_lock(&root->count, &nilfs->ns_cptree_lock)) { rb_erase(&root->rb_node, &nilfs->ns_cptree); spin_unlock(&nilfs->ns_cptree_lock); + + nilfs_sysfs_delete_snapshot_group(root); iput(root->ifile); kfree(root); diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 1e2bfd26b3521..7df9ad4d84338 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -84,16 +84,9 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); inode = igrab(fsnotify_conn_inode(mark->connector)); if (inode) { - /* - * IN_ALL_EVENTS represents all of the mask bits - * that we expose to userspace. There is at - * least one bit (FS_EVENT_ON_CHILD) which is - * used only internally to the kernel. - */ - u32 mask = mark->mask & IN_ALL_EVENTS; - seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ", + seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:0 ", inode_mark->wd, inode->i_ino, inode->i_sb->s_dev, - mask, mark->ignored_mask); + inotify_mark_user_mask(mark)); show_mark_fhandle(m, inode); seq_putc(m, '\n'); iput(inode); diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 3f246f7b8a92b..8b8bf52dd08b0 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -22,6 +22,18 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse) return container_of(fse, struct inotify_event_info, fse); } +/* + * INOTIFY_USER_FLAGS represents all of the mask bits that we expose to + * userspace. There is at least one bit (FS_EVENT_ON_CHILD) which is + * used only internally to the kernel. + */ +#define INOTIFY_USER_MASK (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK) + +static inline __u32 inotify_mark_user_mask(struct fsnotify_mark *fsn_mark) +{ + return fsn_mark->mask & INOTIFY_USER_MASK; +} + extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group); extern int inotify_handle_event(struct fsnotify_group *group, diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 81ffc8629fc4b..b949b2c02f4be 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -86,7 +86,7 @@ static inline __u32 inotify_arg_to_mask(u32 arg) mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT); /* mask off the flags used to open the fd */ - mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK)); + mask |= (arg & INOTIFY_USER_MASK); return mask; } diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 1d96216dffd19..fdf8e03bf3df7 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -426,7 +426,7 @@ void fsnotify_free_mark(struct fsnotify_mark *mark) void fsnotify_destroy_mark(struct fsnotify_mark *mark, struct fsnotify_group *group) { - mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); + mutex_lock(&group->mark_mutex); fsnotify_detach_mark(mark); mutex_unlock(&group->mark_mutex); fsnotify_free_mark(mark); @@ -738,7 +738,7 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, * move marks to free to to_free list in one go and then free marks in * to_free list one by one. */ - mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); + mutex_lock(&group->mark_mutex); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { if ((1U << mark->connector->type) & type_mask) list_move(&mark->g_list, &to_free); @@ -747,7 +747,7 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, clear: while (1) { - mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); + mutex_lock(&group->mark_mutex); if (list_empty(head)) { mutex_unlock(&group->mark_mutex); break; diff --git a/fs/ntfs/Kconfig b/fs/ntfs/Kconfig index de9fb5cff2269..026ec41418049 100644 --- a/fs/ntfs/Kconfig +++ b/fs/ntfs/Kconfig @@ -52,6 +52,7 @@ config NTFS_DEBUG config NTFS_RW bool "NTFS write support" depends on NTFS_FS + depends on PAGE_SIZE_LESS_THAN_64KB help This enables the partial, but safe, write support in the NTFS driver. diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index d563abc3e1364..f171563e51065 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -592,9 +592,25 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name, a = (ATTR_RECORD*)((u8*)ctx->attr + le32_to_cpu(ctx->attr->length)); for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { - if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + - le32_to_cpu(ctx->mrec->bytes_allocated)) + u8 *mrec_end = (u8 *)ctx->mrec + + le32_to_cpu(ctx->mrec->bytes_allocated); + u8 *name_end; + + /* check whether ATTR_RECORD wrap */ + if ((u8 *)a < (u8 *)ctx->mrec) + break; + + /* check whether Attribute Record Header is within bounds */ + if ((u8 *)a > mrec_end || + (u8 *)a + sizeof(ATTR_RECORD) > mrec_end) + break; + + /* check whether ATTR_RECORD's name is within bounds */ + name_end = (u8 *)a + le16_to_cpu(a->name_offset) + + a->name_length * sizeof(ntfschar); + if (name_end > mrec_end) break; + ctx->attr = a; if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) || a->type == AT_END)) diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 3c4811469ae86..e278bfc5ee7ff 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -1503,7 +1503,7 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end, na.type = AT_BITMAP; na.name = I30; na.name_len = 4; - bmp_vi = ilookup5(vi->i_sb, vi->i_ino, (test_t)ntfs_test_inode, &na); + bmp_vi = ilookup5(vi->i_sb, vi->i_ino, ntfs_test_inode, &na); if (bmp_vi) { write_inode_now(bmp_vi, !datasync); iput(bmp_vi); diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 46dc16e01fe20..cf222c9225d6d 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -30,10 +30,10 @@ /** * ntfs_test_inode - compare two (possibly fake) inodes for equality * @vi: vfs inode which to test - * @na: ntfs attribute which is being tested with + * @data: data which is being tested with * * Compare the ntfs attribute embedded in the ntfs specific part of the vfs - * inode @vi for equality with the ntfs attribute @na. + * inode @vi for equality with the ntfs attribute @data. * * If searching for the normal file/directory inode, set @na->type to AT_UNUSED. * @na->name and @na->name_len are then ignored. @@ -43,8 +43,9 @@ * NOTE: This function runs with the inode_hash_lock spin lock held so it is not * allowed to sleep. */ -int ntfs_test_inode(struct inode *vi, ntfs_attr *na) +int ntfs_test_inode(struct inode *vi, void *data) { + ntfs_attr *na = (ntfs_attr *)data; ntfs_inode *ni; if (vi->i_ino != na->mft_no) @@ -72,9 +73,9 @@ int ntfs_test_inode(struct inode *vi, ntfs_attr *na) /** * ntfs_init_locked_inode - initialize an inode * @vi: vfs inode to initialize - * @na: ntfs attribute which to initialize @vi to + * @data: data which to initialize @vi to * - * Initialize the vfs inode @vi with the values from the ntfs attribute @na in + * Initialize the vfs inode @vi with the values from the ntfs attribute @data in * order to enable ntfs_test_inode() to do its work. * * If initializing the normal file/directory inode, set @na->type to AT_UNUSED. @@ -87,8 +88,9 @@ int ntfs_test_inode(struct inode *vi, ntfs_attr *na) * NOTE: This function runs with the inode->i_lock spin lock held so it is not * allowed to sleep. (Hence the GFP_ATOMIC allocation.) */ -static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) +static int ntfs_init_locked_inode(struct inode *vi, void *data) { + ntfs_attr *na = (ntfs_attr *)data; ntfs_inode *ni = NTFS_I(vi); vi->i_ino = na->mft_no; @@ -131,7 +133,6 @@ static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) return 0; } -typedef int (*set_t)(struct inode *, void *); static int ntfs_read_locked_inode(struct inode *vi); static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi); static int ntfs_read_locked_index_inode(struct inode *base_vi, @@ -164,8 +165,8 @@ struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no) na.name = NULL; na.name_len = 0; - vi = iget5_locked(sb, mft_no, (test_t)ntfs_test_inode, - (set_t)ntfs_init_locked_inode, &na); + vi = iget5_locked(sb, mft_no, ntfs_test_inode, + ntfs_init_locked_inode, &na); if (unlikely(!vi)) return ERR_PTR(-ENOMEM); @@ -225,8 +226,8 @@ struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type, na.name = name; na.name_len = name_len; - vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode, - (set_t)ntfs_init_locked_inode, &na); + vi = iget5_locked(base_vi->i_sb, na.mft_no, ntfs_test_inode, + ntfs_init_locked_inode, &na); if (unlikely(!vi)) return ERR_PTR(-ENOMEM); @@ -280,8 +281,8 @@ struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name, na.name = name; na.name_len = name_len; - vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode, - (set_t)ntfs_init_locked_inode, &na); + vi = iget5_locked(base_vi->i_sb, na.mft_no, ntfs_test_inode, + ntfs_init_locked_inode, &na); if (unlikely(!vi)) return ERR_PTR(-ENOMEM); @@ -1880,6 +1881,10 @@ int ntfs_read_inode_mount(struct inode *vi) } /* Now allocate memory for the attribute list. */ ni->attr_list_size = (u32)ntfs_attr_size(a); + if (!ni->attr_list_size) { + ntfs_error(sb, "Attr_list_size is zero"); + goto put_err_out; + } ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); if (!ni->attr_list) { ntfs_error(sb, "Not enough memory to allocate buffer " diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index 98e670fbdd31d..363e4e8206738 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h @@ -253,9 +253,7 @@ typedef struct { ATTR_TYPE type; } ntfs_attr; -typedef int (*test_t)(struct inode *, void *); - -extern int ntfs_test_inode(struct inode *vi, ntfs_attr *na); +extern int ntfs_test_inode(struct inode *vi, void *data); extern struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no); extern struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type, diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 3aac5c917afe7..58234b42d68f1 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -958,7 +958,7 @@ bool ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, * dirty code path of the inode dirty code path when writing * $MFT occurs. */ - vi = ilookup5_nowait(sb, mft_no, (test_t)ntfs_test_inode, &na); + vi = ilookup5_nowait(sb, mft_no, ntfs_test_inode, &na); } if (vi) { ntfs_debug("Base inode 0x%lx is in icache.", mft_no); @@ -1019,7 +1019,7 @@ bool ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, vi = igrab(mft_vi); BUG_ON(vi != mft_vi); } else - vi = ilookup5_nowait(sb, na.mft_no, (test_t)ntfs_test_inode, + vi = ilookup5_nowait(sb, na.mft_no, ntfs_test_inode, &na); if (!vi) { /* diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 0a6fe7d5aba74..4db87b26cf7b2 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -7048,7 +7048,7 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di) int ocfs2_convert_inline_data_to_extents(struct inode *inode, struct buffer_head *di_bh) { - int ret, i, has_data, num_pages = 0; + int ret, has_data, num_pages = 0; int need_free = 0; u32 bit_off, num; handle_t *handle; @@ -7057,26 +7057,17 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_alloc_context *data_ac = NULL; - struct page **pages = NULL; - loff_t end = osb->s_clustersize; + struct page *page = NULL; struct ocfs2_extent_tree et; int did_quota = 0; has_data = i_size_read(inode) ? 1 : 0; if (has_data) { - pages = kcalloc(ocfs2_pages_per_cluster(osb->sb), - sizeof(struct page *), GFP_NOFS); - if (pages == NULL) { - ret = -ENOMEM; - mlog_errno(ret); - return ret; - } - ret = ocfs2_reserve_clusters(osb, 1, &data_ac); if (ret) { mlog_errno(ret); - goto free_pages; + goto out; } } @@ -7096,7 +7087,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, } if (has_data) { - unsigned int page_end; + unsigned int page_end = min_t(unsigned, PAGE_SIZE, + osb->s_clustersize); u64 phys; ret = dquot_alloc_space_nodirty(inode, @@ -7120,15 +7112,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, */ block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); - /* - * Non sparse file systems zero on extend, so no need - * to do that now. - */ - if (!ocfs2_sparse_alloc(osb) && - PAGE_SIZE < osb->s_clustersize) - end = PAGE_SIZE; - - ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages); + ret = ocfs2_grab_eof_pages(inode, 0, page_end, &page, + &num_pages); if (ret) { mlog_errno(ret); need_free = 1; @@ -7139,20 +7124,15 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, * This should populate the 1st page for us and mark * it up to date. */ - ret = ocfs2_read_inline_data(inode, pages[0], di_bh); + ret = ocfs2_read_inline_data(inode, page, di_bh); if (ret) { mlog_errno(ret); need_free = 1; goto out_unlock; } - page_end = PAGE_SIZE; - if (PAGE_SIZE > osb->s_clustersize) - page_end = osb->s_clustersize; - - for (i = 0; i < num_pages; i++) - ocfs2_map_and_dirty_page(inode, handle, 0, page_end, - pages[i], i > 0, &phys); + ocfs2_map_and_dirty_page(inode, handle, 0, page_end, page, 0, + &phys); } spin_lock(&oi->ip_lock); @@ -7183,8 +7163,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, } out_unlock: - if (pages) - ocfs2_unlock_and_free_pages(pages, num_pages); + if (page) + ocfs2_unlock_and_free_pages(&page, num_pages); out_commit: if (ret < 0 && did_quota) @@ -7208,8 +7188,6 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, out: if (data_ac) ocfs2_free_alloc_context(data_ac); -free_pages: - kfree(pages); return ret; } diff --git a/fs/ocfs2/dlmfs/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c index 3df5be25bfb1f..d23bc720753ed 100644 --- a/fs/ocfs2/dlmfs/userdlm.c +++ b/fs/ocfs2/dlmfs/userdlm.c @@ -435,6 +435,11 @@ int user_dlm_cluster_lock(struct user_lock_res *lockres, } spin_lock(&lockres->l_lock); + if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { + spin_unlock(&lockres->l_lock); + status = -EAGAIN; + goto bail; + } /* We only compare against the currently granted level * here. If the lock is blocked waiting on a downconvert, @@ -601,7 +606,7 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) spin_lock(&lockres->l_lock); if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { spin_unlock(&lockres->l_lock); - return 0; + goto bail; } lockres->l_flags |= USER_LOCK_IN_TEARDOWN; @@ -615,12 +620,17 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) } if (lockres->l_ro_holders || lockres->l_ex_holders) { + lockres->l_flags &= ~USER_LOCK_IN_TEARDOWN; spin_unlock(&lockres->l_lock); goto bail; } status = 0; if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { + /* + * lock is never requested, leave USER_LOCK_IN_TEARDOWN set + * to avoid new lock request coming in. + */ spin_unlock(&lockres->l_lock); goto bail; } @@ -631,6 +641,10 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) status = ocfs2_dlm_unlock(conn, &lockres->l_lksb, DLM_LKF_VALBLK); if (status) { + spin_lock(&lockres->l_lock); + lockres->l_flags &= ~USER_LOCK_IN_TEARDOWN; + lockres->l_flags &= ~USER_LOCK_BUSY; + spin_unlock(&lockres->l_lock); user_log_dlm_error("ocfs2_dlm_unlock", status, lockres); goto bail; } diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 50a863fc17792..207ec61569ea4 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3933,7 +3933,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, oi = OCFS2_I(inode); oi->ip_dir_lock_gen++; mlog(0, "generation: %u\n", oi->ip_dir_lock_gen); - goto out; + goto out_forget; } if (!S_ISREG(inode->i_mode)) @@ -3964,6 +3964,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, filemap_fdatawait(mapping); } +out_forget: forget_all_cached_acls(inode); out: diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 58d4546a208e6..c30747bdfb127 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -478,10 +478,11 @@ int ocfs2_truncate_file(struct inode *inode, * greater than page size, so we have to truncate them * anyway. */ - unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(inode->i_mapping, new_i_size); if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + unmap_mapping_range(inode->i_mapping, + new_i_size + PAGE_SIZE - 1, 0, 1); + truncate_inode_pages(inode->i_mapping, new_i_size); status = ocfs2_truncate_inline(inode, di_bh, new_i_size, i_size_read(inode), 1); if (status) @@ -500,6 +501,9 @@ int ocfs2_truncate_file(struct inode *inode, goto bail_unlock_sem; } + unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); + truncate_inode_pages(inode->i_mapping, new_i_size); + status = ocfs2_commit_truncate(osb, inode, di_bh); if (status < 0) { mlog_errno(status); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 60b4b6df1ed36..c1cf67b24c19b 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1100,17 +1100,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) goto read_super_error; } - root = d_make_root(inode); - if (!root) { - status = -ENOMEM; - mlog_errno(status); - goto read_super_error; - } - - sb->s_root = root; - - ocfs2_complete_mount_recovery(osb); - osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL, &ocfs2_kset->kobj); if (!osb->osb_dev_kset) { @@ -1128,6 +1117,17 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) goto read_super_error; } + root = d_make_root(inode); + if (!root) { + status = -ENOMEM; + mlog_errno(status); + goto read_super_error; + } + + sb->s_root = root; + + ocfs2_complete_mount_recovery(osb); + if (ocfs2_mount_local(osb)) snprintf(nodestr, sizeof(nodestr), "local"); else @@ -2150,11 +2150,17 @@ static int ocfs2_initialize_super(struct super_block *sb, } if (ocfs2_clusterinfo_valid(osb)) { + /* + * ci_stack and ci_cluster in ocfs2_cluster_info may not be null + * terminated, so make sure no overflow happens here by using + * memcpy. Destination strings will always be null terminated + * because osb is allocated using kzalloc. + */ osb->osb_stackflags = OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags; - strlcpy(osb->osb_cluster_stack, + memcpy(osb->osb_cluster_stack, OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, - OCFS2_STACK_LABEL_LEN + 1); + OCFS2_STACK_LABEL_LEN); if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) { mlog(ML_ERROR, "couldn't mount because of an invalid " @@ -2163,9 +2169,9 @@ static int ocfs2_initialize_super(struct super_block *sb, status = -EINVAL; goto bail; } - strlcpy(osb->osb_cluster_name, + memcpy(osb->osb_cluster_name, OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster, - OCFS2_CLUSTER_NAME_LEN + 1); + OCFS2_CLUSTER_NAME_LEN); } else { /* The empty string is identical with classic tools that * don't know about s_cluster_info. */ diff --git a/fs/open.c b/fs/open.c index 0c976e884b027..828eb7b78f198 100644 --- a/fs/open.c +++ b/fs/open.c @@ -743,9 +743,8 @@ static int do_dentry_open(struct file *f, path_get(&f->f_path); f->f_inode = inode; f->f_mapping = inode->i_mapping; - - /* Ensure that we skip any errors that predate opening of the file */ f->f_wb_err = filemap_sample_wb_err(f->f_mapping); + f->f_sb_err = file_sample_sb_err(f); if (unlikely(f->f_flags & O_PATH)) { f->f_mode = FMODE_PATH | FMODE_OPENED; diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index fe484cf93e5cd..8bbe9486e3a62 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -26,8 +26,10 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__); new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP); - if (!new_op) + if (!new_op) { + ret = -ENOMEM; goto out_put_parent; + } new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW; new_op->upcall.req.lookup.parent_refn = parent->refn; diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 2bb916d68576f..023b9bc54b7ce 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -179,7 +179,7 @@ orangefs_bufmap_free(struct orangefs_bufmap *bufmap) { kfree(bufmap->page_array); kfree(bufmap->desc_array); - kfree(bufmap->buffer_index_array); + bitmap_free(bufmap->buffer_index_array); kfree(bufmap); } @@ -229,8 +229,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc) bufmap->desc_size = user_desc->size; bufmap->desc_shift = ilog2(bufmap->desc_size); - bufmap->buffer_index_array = - kzalloc(DIV_ROUND_UP(bufmap->desc_count, BITS_PER_LONG), GFP_KERNEL); + bufmap->buffer_index_array = bitmap_zalloc(bufmap->desc_count, GFP_KERNEL); if (!bufmap->buffer_index_array) goto out_free_bufmap; @@ -253,7 +252,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc) out_free_desc_array: kfree(bufmap->desc_array); out_free_index_array: - kfree(bufmap->buffer_index_array); + bitmap_free(bufmap->buffer_index_array); out_free_bufmap: kfree(bufmap); out: diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 143c52de97ecc..3e5f412471dbf 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -117,7 +117,8 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) return error; } -static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) +static int ovl_copy_up_data(struct ovl_fs *ofs, struct path *old, + struct path *new, loff_t len) { struct file *old_file; struct file *new_file; @@ -170,7 +171,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) len -= bytes; } out: - if (!error) + if (!error && ovl_should_sync(ofs)) error = vfs_fsync(new_file, 0); fput(new_file); out_fput: @@ -439,6 +440,7 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) { + struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info; int err; /* @@ -454,7 +456,8 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) upperpath.dentry = temp; ovl_path_lowerdata(c->dentry, &datapath); - err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size); + err = ovl_copy_up_data(ofs, &datapath, &upperpath, + c->stat.size); if (err) return err; } @@ -738,6 +741,7 @@ static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode, /* Copy up data of an inode which was copied up metadata only in the past. */ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c) { + struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info; struct path upperpath, datapath; int err; char *capability = NULL; @@ -758,7 +762,7 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c) goto out; } - err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size); + err = ovl_copy_up_data(ofs, &datapath, &upperpath, c->stat.size); if (err) goto out_free; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 6509ec3cb3730..a07c128a2be5b 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -42,7 +42,7 @@ int ovl_cleanup(struct inode *wdir, struct dentry *wdentry) return err; } -static struct dentry *ovl_lookup_temp(struct dentry *workdir) +struct dentry *ovl_lookup_temp(struct dentry *workdir) { struct dentry *temp; char name[20]; @@ -113,8 +113,7 @@ int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir, goto out; } -static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, - umode_t mode) +int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, umode_t mode) { int err; struct dentry *d, *dentry = *newdentry; @@ -243,6 +242,9 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode, ovl_dir_modified(dentry->d_parent, false); ovl_dentry_set_upper_alias(dentry); + ovl_dentry_update_reval(dentry, newdentry, + DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); + if (!hardlink) { /* * ovl_obtain_alias() can be called after ovl_create_real() @@ -513,8 +515,10 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, goto out_cleanup; } err = ovl_instantiate(dentry, inode, newdentry, hardlink); - if (err) - goto out_cleanup; + if (err) { + ovl_cleanup(udir, newdentry); + dput(newdentry); + } out_dput: dput(upper); out_unlock: @@ -556,28 +560,42 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, goto out_revert_creds; } - err = -ENOMEM; - override_cred = prepare_creds(); - if (override_cred) { + if (!attr->hardlink) { + err = -ENOMEM; + override_cred = prepare_creds(); + if (!override_cred) + goto out_revert_creds; + /* + * In the creation cases(create, mkdir, mknod, symlink), + * ovl should transfer current's fs{u,g}id to underlying + * fs. Because underlying fs want to initialize its new + * inode owner using current's fs{u,g}id. And in this + * case, the @inode is a new inode that is initialized + * in inode_init_owner() to current's fs{u,g}id. So use + * the inode's i_{u,g}id to override the cred's fs{u,g}id. + * + * But in the other hardlink case, ovl_link() does not + * create a new inode, so just use the ovl mounter's + * fs{u,g}id. + */ override_cred->fsuid = inode->i_uid; override_cred->fsgid = inode->i_gid; - if (!attr->hardlink) { - err = security_dentry_create_files_as(dentry, - attr->mode, &dentry->d_name, old_cred, - override_cred); - if (err) { - put_cred(override_cred); - goto out_revert_creds; - } + err = security_dentry_create_files_as(dentry, + attr->mode, &dentry->d_name, old_cred, + override_cred); + if (err) { + put_cred(override_cred); + goto out_revert_creds; } put_cred(override_creds(override_cred)); put_cred(override_cred); - - if (!ovl_dentry_is_whiteout(dentry)) - err = ovl_create_upper(dentry, inode, attr); - else - err = ovl_create_over_whiteout(dentry, inode, attr); } + + if (!ovl_dentry_is_whiteout(dentry)) + err = ovl_create_upper(dentry, inode, attr); + else + err = ovl_create_over_whiteout(dentry, inode, attr); + out_revert_creds: revert_creds(old_cred); return err; @@ -1160,9 +1178,13 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, goto out_dput; } } else { - if (!d_is_negative(newdentry) && - (!new_opaque || !ovl_is_whiteout(newdentry))) - goto out_dput; + if (!d_is_negative(newdentry)) { + if (!new_opaque || !ovl_is_whiteout(newdentry)) + goto out_dput; + } else { + if (flags & RENAME_EXCHANGE) + goto out_dput; + } } if (olddentry == trap) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 11dd8177770df..7e67b8c6452c7 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -274,7 +274,7 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len, return FILEID_INVALID; dentry = d_find_any_alias(inode); - if (WARN_ON(!dentry)) + if (!dentry) return FILEID_INVALID; type = ovl_dentry_to_fh(dentry, fid, max_len); @@ -333,6 +333,8 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb, if (upper_alias) ovl_dentry_set_upper_alias(dentry); } + ovl_dentry_update_reval(dentry, upper, + DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); return d_instantiate_anon(dentry, inode); @@ -395,6 +397,7 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, */ take_dentry_name_snapshot(&name, real); this = lookup_one_len(name.name.name, connected, name.name.len); + release_dentry_name_snapshot(&name); err = PTR_ERR(this); if (IS_ERR(this)) { goto fail; @@ -409,7 +412,6 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, } out: - release_dentry_name_snapshot(&name); dput(parent); inode_unlock(dir); return this; diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index ab5e92897270a..af733eec4e424 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -213,9 +213,8 @@ static void ovl_file_accessed(struct file *file) touch_atime(&file->f_path); } -static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb) +static rwf_t ovl_iocb_to_rwf(int ifl) { - int ifl = iocb->ki_flags; rwf_t flags = 0; if (ifl & IOCB_NOWAIT) @@ -246,7 +245,7 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) old_cred = ovl_override_creds(file_inode(file)->i_sb); ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, - ovl_iocb_to_rwf(iocb)); + ovl_iocb_to_rwf(iocb->ki_flags)); revert_creds(old_cred); ovl_file_accessed(file); @@ -263,6 +262,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) struct fd real; const struct cred *old_cred; ssize_t ret; + int ifl = iocb->ki_flags; if (!iov_iter_count(iter)) return 0; @@ -278,10 +278,13 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) if (ret) goto out_unlock; + if (!ovl_should_sync(inode->i_sb->s_fs_info)) + ifl &= ~(IOCB_DSYNC | IOCB_SYNC); + old_cred = ovl_override_creds(file_inode(file)->i_sb); file_start_write(real.file); ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, - ovl_iocb_to_rwf(iocb)); + ovl_iocb_to_rwf(ifl)); file_end_write(real.file); revert_creds(old_cred); @@ -296,45 +299,48 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) return ret; } -static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) +/* + * Calling iter_file_splice_write() directly from overlay's f_op may deadlock + * due to lock order inversion between pipe->mutex in iter_file_splice_write() + * and file_start_write(real.file) in ovl_write_iter(). + * + * So do everything ovl_write_iter() does and call iter_file_splice_write() on + * the real file. + */ +static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) { - ssize_t ret; struct fd real; const struct cred *old_cred; + struct inode *inode = file_inode(out); + struct inode *realinode = ovl_inode_real(inode); + ssize_t ret; - ret = ovl_real_fdget(in, &real); + inode_lock(inode); + /* Update mode */ + ovl_copyattr(realinode, inode); + ret = file_remove_privs(out); if (ret) - return ret; - - old_cred = ovl_override_creds(file_inode(in)->i_sb); - ret = generic_file_splice_read(real.file, ppos, pipe, len, flags); - revert_creds(old_cred); - - ovl_file_accessed(in); - fdput(real); - return ret; -} - -static ssize_t -ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags) -{ - struct fd real; - const struct cred *old_cred; - ssize_t ret; + goto out_unlock; ret = ovl_real_fdget(out, &real); if (ret) - return ret; + goto out_unlock; + + old_cred = ovl_override_creds(inode->i_sb); + file_start_write(real.file); - old_cred = ovl_override_creds(file_inode(out)->i_sb); ret = iter_file_splice_write(pipe, real.file, ppos, len, flags); - revert_creds(old_cred); - ovl_file_accessed(out); + file_end_write(real.file); + /* Update size */ + ovl_copyattr(realinode, inode); + revert_creds(old_cred); fdput(real); + +out_unlock: + inode_unlock(inode); + return ret; } @@ -344,6 +350,10 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) const struct cred *old_cred; int ret; + ret = ovl_sync_status(file_inode(file)->i_sb->s_fs_info); + if (ret <= 0) + return ret; + ret = ovl_real_fdget_meta(file, &real, !datasync); if (ret) return ret; @@ -694,7 +704,7 @@ const struct file_operations ovl_file_operations = { .fadvise = ovl_fadvise, .unlocked_ioctl = ovl_ioctl, .compat_ioctl = ovl_compat_ioctl, - .splice_read = ovl_splice_read, + .splice_read = generic_file_splice_read, .splice_write = ovl_splice_write, .copy_file_range = ovl_copy_file_range, diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 56b55397a7a00..84167332b92e0 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -58,6 +58,24 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) attr->ia_valid &= ~ATTR_MODE; + /* + * We might have to translate ovl file into real file object + * once use cases emerge. For now, simply don't let underlying + * filesystem rely on attr->ia_file + */ + attr->ia_valid &= ~ATTR_FILE; + + /* + * If open(O_TRUNC) is done, VFS calls ->setattr with ATTR_OPEN + * set. Overlayfs does not pass O_TRUNC flag to underlying + * filesystem during open -> do not pass ATTR_OPEN. This + * disables optimization in fuse which assumes open(O_TRUNC) + * already set file size to 0. But we never passed O_TRUNC to + * fuse. So by clearing ATTR_OPEN, fuse will be forced to send + * setattr request to server. + */ + attr->ia_valid &= ~ATTR_OPEN; + inode_lock(upperdentry->d_inode); old_cred = ovl_override_creds(dentry->d_sb); err = notify_change(upperdentry, attr, NULL); diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index f47c591402d76..1c1e1c534a5b0 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -851,7 +851,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (err) goto out; - if (upperdentry && unlikely(ovl_dentry_remote(upperdentry))) { + if (upperdentry && upperdentry->d_flags & DCACHE_OP_REAL) { dput(upperdentry); err = -EREMOTE; goto out; @@ -1082,6 +1082,9 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, goto out_free_oe; } + ovl_dentry_update_reval(dentry, upperdentry, + DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); + revert_creds(old_cred); if (origin_path) { dput(origin_path->dentry); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 6934bcf030f0b..e546a58dd07ce 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -212,6 +212,8 @@ bool ovl_index_all(struct super_block *sb); bool ovl_verify_lower(struct super_block *sb); struct ovl_entry *ovl_alloc_entry(unsigned int numlower); bool ovl_dentry_remote(struct dentry *dentry); +void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *upperdentry, + unsigned int mask); bool ovl_dentry_weird(struct dentry *dentry); enum ovl_path_type ovl_path_type(struct dentry *dentry); void ovl_path_upper(struct dentry *dentry, struct path *path); @@ -277,6 +279,7 @@ bool ovl_is_metacopy_dentry(struct dentry *dentry); char *ovl_get_redirect_xattr(struct dentry *dentry, int padding); ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value, size_t padding); +int ovl_sync_status(struct ovl_fs *ofs); static inline bool ovl_is_impuredir(struct dentry *dentry) { @@ -409,9 +412,11 @@ struct ovl_cattr { #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) +int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, umode_t mode); struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry, struct ovl_cattr *attr); int ovl_cleanup(struct inode *dir, struct dentry *dentry); +struct dentry *ovl_lookup_temp(struct dentry *workdir); struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr); /* file.c */ diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 28348c44ea5b2..547f2925ba307 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -17,6 +17,7 @@ struct ovl_config { bool nfs_export; int xino; bool metacopy; + bool ovl_volatile; }; struct ovl_sb { @@ -73,8 +74,15 @@ struct ovl_fs { struct inode *indexdir_trap; /* Inode numbers in all layers do not use the high xino_bits */ unsigned int xino_bits; + /* r/o snapshot of upperdir sb's only taken on volatile mounts */ + errseq_t errseq; }; +static inline bool ovl_should_sync(struct ovl_fs *ofs) +{ + return !ofs->config.ovl_volatile; +} + /* private information held for every overlayfs dentry */ struct ovl_entry { union { diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 7255e6a5838f8..1d30428879a7f 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -826,11 +826,16 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, struct ovl_dir_file *od = file->private_data; struct dentry *dentry = file->f_path.dentry; struct file *realfile = od->realfile; + int err; /* Nothing to sync for lower */ if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) return 0; + err = ovl_sync_status(dentry->d_sb->s_fs_info); + if (err <= 0) + return err; + /* * Need to check if we started out being a lower dir, but got copied up */ diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index f036d7544d4a6..ef7ca491e9761 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -116,53 +116,54 @@ static struct dentry *ovl_d_real(struct dentry *dentry, return dentry; } -static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) +static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak) { - struct ovl_entry *oe = dentry->d_fsdata; - unsigned int i; int ret = 1; - for (i = 0; i < oe->numlower; i++) { - struct dentry *d = oe->lowerstack[i].dentry; - - if (d->d_flags & DCACHE_OP_REVALIDATE) { - ret = d->d_op->d_revalidate(d, flags); - if (ret < 0) - return ret; - if (!ret) { - if (!(flags & LOOKUP_RCU)) - d_invalidate(d); - return -ESTALE; - } + if (weak) { + if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) + ret = d->d_op->d_weak_revalidate(d, flags); + } else if (d->d_flags & DCACHE_OP_REVALIDATE) { + ret = d->d_op->d_revalidate(d, flags); + if (!ret) { + if (!(flags & LOOKUP_RCU)) + d_invalidate(d); + ret = -ESTALE; } } - return 1; + return ret; } -static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) +static int ovl_dentry_revalidate_common(struct dentry *dentry, + unsigned int flags, bool weak) { struct ovl_entry *oe = dentry->d_fsdata; + struct dentry *upper; unsigned int i; int ret = 1; - for (i = 0; i < oe->numlower; i++) { - struct dentry *d = oe->lowerstack[i].dentry; + upper = ovl_dentry_upper(dentry); + if (upper) + ret = ovl_revalidate_real(upper, flags, weak); - if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { - ret = d->d_op->d_weak_revalidate(d, flags); - if (ret <= 0) - break; - } + for (i = 0; ret > 0 && i < oe->numlower; i++) { + ret = ovl_revalidate_real(oe->lowerstack[i].dentry, flags, + weak); } return ret; } -static const struct dentry_operations ovl_dentry_operations = { - .d_release = ovl_dentry_release, - .d_real = ovl_d_real, -}; +static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) +{ + return ovl_dentry_revalidate_common(dentry, flags, false); +} -static const struct dentry_operations ovl_reval_dentry_operations = { +static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) +{ + return ovl_dentry_revalidate_common(dentry, flags, true); +} + +static const struct dentry_operations ovl_dentry_operations = { .d_release = ovl_dentry_release, .d_real = ovl_d_real, .d_revalidate = ovl_dentry_revalidate, @@ -259,8 +260,19 @@ static int ovl_sync_fs(struct super_block *sb, int wait) struct super_block *upper_sb; int ret; - if (!ofs->upper_mnt) - return 0; + ret = ovl_sync_status(ofs); + /* + * We have to always set the err, because the return value isn't + * checked in syncfs, and instead indirectly return an error via + * the sb's writeback errseq, which VFS inspects after this call. + */ + if (ret < 0) { + errseq_set(&sb->s_wb_err, -EIO); + return -EIO; + } + + if (!ret) + return ret; /* * If this is a sync(2) call or an emergency sync, all the super blocks @@ -366,6 +378,8 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) if (ofs->config.metacopy != ovl_metacopy_def) seq_printf(m, ",metacopy=%s", ofs->config.metacopy ? "on" : "off"); + if (ofs->config.ovl_volatile) + seq_puts(m, ",volatile"); return 0; } @@ -406,6 +420,7 @@ enum { OPT_XINO_AUTO, OPT_METACOPY_ON, OPT_METACOPY_OFF, + OPT_VOLATILE, OPT_ERR, }; @@ -424,6 +439,7 @@ static const match_table_t ovl_tokens = { {OPT_XINO_AUTO, "xino=auto"}, {OPT_METACOPY_ON, "metacopy=on"}, {OPT_METACOPY_OFF, "metacopy=off"}, + {OPT_VOLATILE, "volatile"}, {OPT_ERR, NULL} }; @@ -562,6 +578,10 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) config->metacopy = false; break; + case OPT_VOLATILE: + config->ovl_volatile = true; + break; + default: pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); return -EINVAL; @@ -576,6 +596,11 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) config->workdir = NULL; } + if (!config->upperdir && config->ovl_volatile) { + pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n"); + config->ovl_volatile = false; + } + err = ovl_parse_redirect_mode(config, config->redirect_mode); if (err) return err; @@ -650,10 +675,14 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, goto retry; } - work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode)); - err = PTR_ERR(work); - if (IS_ERR(work)) - goto out_err; + err = ovl_mkdir_real(dir, &work, attr.ia_mode); + if (err) + goto out_dput; + + /* Weird filesystem returning with hashed negative (kernfs)? */ + err = -EINVAL; + if (d_really_is_negative(work)) + goto out_dput; /* * Try to remove POSIX ACL xattrs from workdir. We are good if: @@ -753,13 +782,12 @@ static int ovl_mount_dir(const char *name, struct path *path) ovl_unescape(tmp); err = ovl_mount_dir_noesc(tmp, path); - if (!err) - if (ovl_dentry_remote(path->dentry)) { - pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", - tmp); - path_put_init(path); - err = -EINVAL; - } + if (!err && path->dentry->d_flags & DCACHE_OP_REAL) { + pr_err("filesystem on '%s' not supported as upperdir\n", + tmp); + path_put_init(path); + err = -EINVAL; + } kfree(tmp); } return err; @@ -780,7 +808,7 @@ static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, } static int ovl_lower_dir(const char *name, struct path *path, - struct ovl_fs *ofs, int *stack_depth, bool *remote) + struct ovl_fs *ofs, int *stack_depth) { int fh_type; int err; @@ -795,9 +823,6 @@ static int ovl_lower_dir(const char *name, struct path *path, *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); - if (ovl_dentry_remote(path->dentry)) - *remote = true; - /* * The inodes index feature and NFS export need to encode and decode * file handles, so they require that all layers support them. @@ -1076,11 +1101,112 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, return err; } +static struct dentry *ovl_lookup_or_create(struct dentry *parent, + const char *name, umode_t mode) +{ + size_t len = strlen(name); + struct dentry *child; + + inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); + child = lookup_one_len(name, parent, len); + if (!IS_ERR(child) && !child->d_inode) + child = ovl_create_real(parent->d_inode, child, + OVL_CATTR(mode)); + inode_unlock(parent->d_inode); + dput(parent); + + return child; +} + +/* + * Creates $workdir/work/incompat/volatile/dirty file if it is not already + * present. + */ +static int ovl_create_volatile_dirty(struct ovl_fs *ofs) +{ + unsigned int ctr; + struct dentry *d = dget(ofs->workbasedir); + static const char *const volatile_path[] = { + OVL_WORKDIR_NAME, "incompat", "volatile", "dirty" + }; + const char *const *name = volatile_path; + + for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) { + d = ovl_lookup_or_create(d, *name, ctr > 1 ? S_IFDIR : S_IFREG); + if (IS_ERR(d)) + return PTR_ERR(d); + } + dput(d); + return 0; +} + +/* + * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and + * negative values if error is encountered. + */ +static int ovl_check_rename_whiteout(struct dentry *workdir) +{ + struct inode *dir = d_inode(workdir); + struct dentry *temp; + struct dentry *dest; + struct dentry *whiteout; + struct name_snapshot name; + int err; + + inode_lock_nested(dir, I_MUTEX_PARENT); + + temp = ovl_create_temp(workdir, OVL_CATTR(S_IFREG | 0)); + err = PTR_ERR(temp); + if (IS_ERR(temp)) + goto out_unlock; + + dest = ovl_lookup_temp(workdir); + err = PTR_ERR(dest); + if (IS_ERR(dest)) { + dput(temp); + goto out_unlock; + } + + /* Name is inline and stable - using snapshot as a copy helper */ + take_dentry_name_snapshot(&name, temp); + err = ovl_do_rename(dir, temp, dir, dest, RENAME_WHITEOUT); + if (err) { + if (err == -EINVAL) + err = 0; + goto cleanup_temp; + } + + whiteout = lookup_one_len(name.name.name, workdir, name.name.len); + err = PTR_ERR(whiteout); + if (IS_ERR(whiteout)) + goto cleanup_temp; + + err = ovl_is_whiteout(whiteout); + + /* Best effort cleanup of whiteout and temp file */ + if (err) + ovl_cleanup(dir, whiteout); + dput(whiteout); + +cleanup_temp: + ovl_cleanup(dir, temp); + release_dentry_name_snapshot(&name); + dput(temp); + dput(dest); + +out_unlock: + inode_unlock(dir); + + return err; +} + static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, struct path *workpath) { struct vfsmount *mnt = ofs->upper_mnt; struct dentry *temp; + bool rename_whiteout; + bool d_type; int fh_type; int err; @@ -1106,11 +1232,8 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, if (err < 0) goto out; - /* - * We allowed this configuration and don't want to break users over - * kernel upgrade. So warn instead of erroring out. - */ - if (!err) + d_type = err; + if (!d_type) pr_warn("overlayfs: upper fs needs to support d_type.\n"); /* Check if upper/work fs supports O_TMPFILE */ @@ -1121,6 +1244,16 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, else pr_warn("overlayfs: upper fs does not support tmpfile.\n"); + + /* Check if upper/work fs supports RENAME_WHITEOUT */ + err = ovl_check_rename_whiteout(ofs->workdir); + if (err < 0) + goto out; + + rename_whiteout = err; + if (!rename_whiteout) + pr_warn("upper fs does not support RENAME_WHITEOUT.\n"); + /* * Check if upper/work fs supports trusted.overlay.* xattr */ @@ -1135,6 +1268,30 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); } + /* + * We allowed sub-optimal upper fs configuration and don't want to break + * users over kernel upgrade, but we never allowed remote upper fs, so + * we can enforce strict requirements for remote upper fs. + */ + if (ovl_dentry_remote(ofs->workdir) && + (!d_type || !rename_whiteout || ofs->noxattr)) { + pr_err("upper fs missing required features.\n"); + err = -EINVAL; + goto out; + } + + /* + * For volatile mount, create a incompat/volatile/dirty file to keep + * track of it. + */ + if (ofs->config.ovl_volatile) { + err = ovl_create_volatile_dirty(ofs); + if (err < 0) { + pr_err("Failed to create volatile/dirty file.\n"); + goto out; + } + } + /* Check if upper/work fs supports file handles */ fh_type = ovl_can_decode_fh(ofs->workdir->d_sb); if (ofs->config.index && !fh_type) { @@ -1441,7 +1598,6 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, char *lowertmp, *lower; struct path *stack = NULL; unsigned int stacklen, numlower = 0, i; - bool remote = false; struct ovl_entry *oe; err = -ENOMEM; @@ -1473,7 +1629,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, lower = lowertmp; for (numlower = 0; numlower < stacklen; numlower++) { err = ovl_lower_dir(lower, &stack[numlower], ofs, - &sb->s_stack_depth, &remote); + &sb->s_stack_depth); if (err) goto out_err; @@ -1501,11 +1657,6 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, oe->lowerstack[i].layer = &ofs->lower_layers[i]; } - if (remote) - sb->s_d_op = &ovl_reval_dentry_operations; - else - sb->s_d_op = &ovl_dentry_operations; - out: for (i = 0; i < numlower; i++) path_put(&stack[i]); @@ -1601,6 +1752,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) struct cred *cred; int err; + sb->s_d_op = &ovl_dentry_operations; + err = -ENOMEM; ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); if (!ofs) @@ -1635,6 +1788,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &ovl_super_operations; if (ofs->config.upperdir) { + struct super_block *upper_sb; + if (!ofs->config.workdir) { pr_err("overlayfs: missing 'workdir'\n"); goto out_err; @@ -1644,6 +1799,16 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (err) goto out_err; + upper_sb = ofs->upper_mnt->mnt_sb; + if (!ovl_should_sync(ofs)) { + ofs->errseq = errseq_sample(&upper_sb->s_wb_err); + if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) { + err = -EIO; + pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n"); + goto out_err; + } + } + err = ovl_get_workdir(sb, ofs, &upperpath); if (err) goto out_err; @@ -1651,9 +1816,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!ofs->workdir) sb->s_flags |= SB_RDONLY; - sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth; - sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran; - + sb->s_stack_depth = upper_sb->s_stack_depth; + sb->s_time_gran = upper_sb->s_time_gran; } oe = ovl_get_lowerstack(sb, ofs); err = PTR_ERR(oe); @@ -1727,6 +1891,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) ovl_set_upperdata(d_inode(root_dentry)); ovl_inode_init(d_inode(root_dentry), upperpath.dentry, ovl_dentry_lower(root_dentry), NULL); + ovl_dentry_update_reval(root_dentry, upperpath.dentry, + DCACHE_OP_WEAK_REVALIDATE); sb->s_root = root_dentry; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index f5678a3f83508..c938ac37fc8ac 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -105,8 +105,24 @@ struct ovl_entry *ovl_alloc_entry(unsigned int numlower) bool ovl_dentry_remote(struct dentry *dentry) { return dentry->d_flags & - (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE | - DCACHE_OP_REAL); + (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); +} + +void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *upperdentry, + unsigned int mask) +{ + struct ovl_entry *oe = OVL_E(dentry); + unsigned int i, flags = 0; + + if (upperdentry) + flags |= upperdentry->d_flags; + for (i = 0; i < oe->numlower; i++) + flags |= oe->lowerstack[i].dentry->d_flags; + + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~mask; + dentry->d_flags |= flags & mask; + spin_unlock(&dentry->d_lock); } bool ovl_dentry_weird(struct dentry *dentry) @@ -936,3 +952,30 @@ char *ovl_get_redirect_xattr(struct dentry *dentry, int padding) kfree(buf); return ERR_PTR(res); } + +/* + * ovl_sync_status() - Check fs sync status for volatile mounts + * + * Returns 1 if this is not a volatile mount and a real sync is required. + * + * Returns 0 if syncing can be skipped because mount is volatile, and no errors + * have occurred on the upperdir since the mount. + * + * Returns -errno if it is a volatile mount, and the error that occurred since + * the last mount. If the error code changes, it'll return the latest error + * code. + */ + +int ovl_sync_status(struct ovl_fs *ofs) +{ + struct vfsmount *mnt; + + if (ovl_should_sync(ofs)) + return 1; + + mnt = ofs->upper_mnt; + if (!mnt) + return 0; + + return errseq_check(&mnt->mnt_sb->s_wb_err, ofs->errseq); +} diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 8c3dbe13e647c..372b4dad4863e 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -446,6 +446,9 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, proc_set_user(ent, (*parent)->uid, (*parent)->gid); ent->proc_dops = &proc_misc_dentry_ops; + /* Revalidate everything under /proc/${pid}/net */ + if ((*parent)->proc_dops == &proc_net_dentry_ops) + pde_force_lookup(ent); out: return ent; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index e2ed8e08cc7ad..d1cabccc02b7a 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -193,8 +193,6 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg) return 1; p = pfn_to_page(pfn); - if (!memmap_valid_within(pfn, p, page_zone(p))) - return 1; ent = kmalloc(sizeof(*ent), GFP_KERNEL); if (!ent) diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 313b7c751867f..9cd5b47199cba 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -343,6 +343,9 @@ static __net_init int proc_net_ns_init(struct net *net) proc_set_user(netd, uid, gid); + /* Seed dentry revalidation for /proc/${pid}/net */ + pde_force_lookup(netd); + err = -EEXIST; net_statd = proc_net_mkdir(net, "stat", netd); if (!net_statd) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d80989b6c3448..f4264dd4ea31b 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; @@ -1397,6 +1398,38 @@ struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *tab } EXPORT_SYMBOL(register_sysctl); +/** + * __register_sysctl_init() - register sysctl table to path + * @path: path name for sysctl base + * @table: This is the sysctl table that needs to be registered to the path + * @table_name: The name of sysctl table, only used for log printing when + * registration fails + * + * The sysctl interface is used by userspace to query or modify at runtime + * a predefined value set on a variable. These variables however have default + * values pre-set. Code which depends on these variables will always work even + * if register_sysctl() fails. If register_sysctl() fails you'd just loose the + * ability to query or modify the sysctls dynamically at run time. Chances of + * register_sysctl() failing on init are extremely low, and so for both reasons + * this function does not return any error as it is used by initialization code. + * + * Context: Can only be called after your respective sysctl base path has been + * registered. So for instance, most base directories are registered early on + * init before init levels are processed through proc_sys_init() and + * sysctl_init(). + */ +void __init __register_sysctl_init(const char *path, struct ctl_table *table, + const char *table_name) +{ + struct ctl_table_header *hdr = register_sysctl(path, table); + + if (unlikely(!hdr)) { + pr_err("failed when register_sysctl %s to %s\n", table_name, path); + return; + } + kmemleak_not_leak(hdr); +} + static char *append_path(const char *path, char *pos, const char *name) { int namelen; diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 080ca9d5eccbb..b1102a31a1085 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -125,9 +125,13 @@ ssize_t read_from_oldmem(char *buf, size_t count, nr_bytes = count; /* If pfn is not ram, return zeros for sparse dump files */ - if (pfn_is_ram(pfn) == 0) - memset(buf, 0, nr_bytes); - else { + if (pfn_is_ram(pfn) == 0) { + tmp = 0; + if (!userbuf) + memset(buf, 0, nr_bytes); + else if (clear_user(buf, nr_bytes)) + tmp = -EFAULT; + } else { if (encrypted) tmp = copy_oldmem_page_encrypted(pfn, buf, nr_bytes, @@ -136,10 +140,10 @@ ssize_t read_from_oldmem(char *buf, size_t count, else tmp = copy_oldmem_page(pfn, buf, nr_bytes, offset, userbuf); - - if (tmp < 0) - return tmp; } + if (tmp < 0) + return tmp; + *ppos += nr_bytes; count -= nr_bytes; buf += nr_bytes; diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index a6ee23aadd283..66645a5a35f30 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -15,13 +15,48 @@ #include #include "qnx4.h" +/* + * A qnx4 directory entry is an inode entry or link info + * depending on the status field in the last byte. The + * first byte is where the name start either way, and a + * zero means it's empty. + * + * Also, due to a bug in gcc, we don't want to use the + * real (differently sized) name arrays in the inode and + * link entries, but always the 'de_name[]' one in the + * fake struct entry. + * + * See + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578#c6 + * + * for details, but basically gcc will take the size of the + * 'name' array from one of the used union entries randomly. + * + * This use of 'de_name[]' (48 bytes) avoids the false positive + * warnings that would happen if gcc decides to use 'inode.di_name' + * (16 bytes) even when the pointer and size were to come from + * 'link.dl_name' (48 bytes). + * + * In all cases the actual name pointer itself is the same, it's + * only the gcc internal 'what is the size of this field' logic + * that can get confused. + */ +union qnx4_directory_entry { + struct { + const char de_name[48]; + u8 de_pad[15]; + u8 de_status; + }; + struct qnx4_inode_entry inode; + struct qnx4_link_info link; +}; + static int qnx4_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); unsigned int offset; struct buffer_head *bh; - struct qnx4_inode_entry *de; - struct qnx4_link_info *le; unsigned long blknum; int ix, ino; int size; @@ -38,27 +73,27 @@ static int qnx4_readdir(struct file *file, struct dir_context *ctx) } ix = (ctx->pos >> QNX4_DIR_ENTRY_SIZE_BITS) % QNX4_INODES_PER_BLOCK; for (; ix < QNX4_INODES_PER_BLOCK; ix++, ctx->pos += QNX4_DIR_ENTRY_SIZE) { + union qnx4_directory_entry *de; + offset = ix * QNX4_DIR_ENTRY_SIZE; - de = (struct qnx4_inode_entry *) (bh->b_data + offset); - if (!de->di_fname[0]) + de = (union qnx4_directory_entry *) (bh->b_data + offset); + + if (!de->de_name[0]) continue; - if (!(de->di_status & (QNX4_FILE_USED|QNX4_FILE_LINK))) + if (!(de->de_status & (QNX4_FILE_USED|QNX4_FILE_LINK))) continue; - if (!(de->di_status & QNX4_FILE_LINK)) - size = QNX4_SHORT_NAME_MAX; - else - size = QNX4_NAME_MAX; - size = strnlen(de->di_fname, size); - QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, de->di_fname)); - if (!(de->di_status & QNX4_FILE_LINK)) + if (!(de->de_status & QNX4_FILE_LINK)) { + size = sizeof(de->inode.di_fname); ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1; - else { - le = (struct qnx4_link_info*)de; - ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) * + } else { + size = sizeof(de->link.dl_fname); + ino = ( le32_to_cpu(de->link.dl_inode_blk) - 1 ) * QNX4_INODES_PER_BLOCK + - le->dl_inode_ndx; + de->link.dl_inode_ndx; } - if (!dir_emit(ctx, de->di_fname, size, ino, DT_UNKNOWN)) { + size = strnlen(de->de_name, size); + QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, name)); + if (!dir_emit(ctx, de->de_name, size, ino, DT_UNKNOWN)) { brelse(bh); return 0; } diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 7abc3230c21a4..dc5f8654b277d 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -693,9 +693,14 @@ int dquot_quota_sync(struct super_block *sb, int type) /* This is not very clever (and fast) but currently I don't know about * any other simple way of getting quota data to disk and we must get * them there for userspace to be visible... */ - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); - sync_blockdev(sb->s_bdev); + if (sb->s_op->sync_fs) { + ret = sb->s_op->sync_fs(sb, 1); + if (ret) + return ret; + } + ret = sync_blockdev(sb->s_bdev); + if (ret) + return ret; /* * Now when everything is written we can discard the pagecache so diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index c5562c871c8be..1a188fbdf34e5 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c @@ -423,6 +423,7 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, quota_error(dquot->dq_sb, "Quota structure has offset to " "other block (%u) than it should (%u)", blk, (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); + ret = -EIO; goto out_buf; } ret = read_blk(info, blk, buf); @@ -488,6 +489,13 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, goto out_buf; } newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); + if (newblk < QT_TREEOFF || newblk >= info->dqi_blocks) { + quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)", + newblk, info->dqi_blocks); + ret = -EUCLEAN; + goto out_buf; + } + if (depth == info->dqi_qtree_depth - 1) { ret = free_dqentry(info, dquot, newblk); newblk = 0; @@ -587,6 +595,13 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info, blk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); if (!blk) /* No reference? */ goto out_buf; + if (blk < QT_TREEOFF || blk >= info->dqi_blocks) { + quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)", + blk, info->dqi_blocks); + ret = -EUCLEAN; + goto out_buf; + } + if (depth < info->dqi_qtree_depth - 1) ret = find_tree_dqentry(info, dquot, blk, depth+1); else diff --git a/fs/read_write.c b/fs/read_write.c index 7458fccc59e18..59d819c5b92e6 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -939,6 +939,34 @@ static ssize_t do_iter_read(struct file *file, struct iov_iter *iter, return ret; } +ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb, + struct iov_iter *iter) +{ + size_t tot_len; + ssize_t ret = 0; + + if (!file->f_op->read_iter) + return -EINVAL; + if (!(file->f_mode & FMODE_READ)) + return -EBADF; + if (!(file->f_mode & FMODE_CAN_READ)) + return -EINVAL; + + tot_len = iov_iter_count(iter); + if (!tot_len) + goto out; + ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len); + if (ret < 0) + return ret; + + ret = call_read_iter(file, iocb, iter); +out: + if (ret >= 0) + fsnotify_access(file); + return ret; +} +EXPORT_SYMBOL(vfs_iocb_iter_read); + ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags) { @@ -975,6 +1003,34 @@ static ssize_t do_iter_write(struct file *file, struct iov_iter *iter, return ret; } +ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, + struct iov_iter *iter) +{ + size_t tot_len; + ssize_t ret = 0; + + if (!file->f_op->write_iter) + return -EINVAL; + if (!(file->f_mode & FMODE_WRITE)) + return -EBADF; + if (!(file->f_mode & FMODE_CAN_WRITE)) + return -EINVAL; + + tot_len = iov_iter_count(iter); + if (!tot_len) + return 0; + ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len); + if (ret < 0) + return ret; + + ret = call_write_iter(file, iocb, iter); + if (ret > 0) + fsnotify_modify(file); + + return ret; +} +EXPORT_SYMBOL(vfs_iocb_iter_write); + ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags) { diff --git a/fs/select.c b/fs/select.c index e51796063cb6e..7716d9d5be1e8 100644 --- a/fs/select.c +++ b/fs/select.c @@ -458,9 +458,11 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds) return max; } -#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR) -#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR) -#define POLLEX_SET (EPOLLPRI) +#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR |\ + EPOLLNVAL) +#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR |\ + EPOLLNVAL) +#define POLLEX_SET (EPOLLPRI | EPOLLNVAL) static inline void wait_key_set(poll_table *wait, unsigned long in, unsigned long out, unsigned long bit, @@ -527,6 +529,7 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) break; if (!(bit & all_bits)) continue; + mask = EPOLLNVAL; f = fdget(i); if (f.file) { wait_key_set(wait, in, out, bit, @@ -534,34 +537,34 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) mask = vfs_poll(f.file, wait); fdput(f); - if ((mask & POLLIN_SET) && (in & bit)) { - res_in |= bit; - retval++; - wait->_qproc = NULL; - } - if ((mask & POLLOUT_SET) && (out & bit)) { - res_out |= bit; - retval++; - wait->_qproc = NULL; - } - if ((mask & POLLEX_SET) && (ex & bit)) { - res_ex |= bit; - retval++; - wait->_qproc = NULL; - } - /* got something, stop busy polling */ - if (retval) { - can_busy_loop = false; - busy_flag = 0; - - /* - * only remember a returned - * POLL_BUSY_LOOP if we asked for it - */ - } else if (busy_flag & mask) - can_busy_loop = true; - } + if ((mask & POLLIN_SET) && (in & bit)) { + res_in |= bit; + retval++; + wait->_qproc = NULL; + } + if ((mask & POLLOUT_SET) && (out & bit)) { + res_out |= bit; + retval++; + wait->_qproc = NULL; + } + if ((mask & POLLEX_SET) && (ex & bit)) { + res_ex |= bit; + retval++; + wait->_qproc = NULL; + } + /* got something, stop busy polling */ + if (retval) { + can_busy_loop = false; + busy_flag = 0; + + /* + * only remember a returned + * POLL_BUSY_LOOP if we asked for it + */ + } else if (busy_flag & mask) + can_busy_loop = true; + } if (res_in) *rinp = res_in; diff --git a/fs/signalfd.c b/fs/signalfd.c index 5b78719be4455..3e94d181930fd 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -35,17 +35,7 @@ void signalfd_cleanup(struct sighand_struct *sighand) { - wait_queue_head_t *wqh = &sighand->signalfd_wqh; - /* - * The lockless check can race with remove_wait_queue() in progress, - * but in this case its caller should run under rcu_read_lock() and - * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return. - */ - if (likely(!waitqueue_active(wqh))) - return; - - /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */ - wake_up_poll(wqh, EPOLLHUP | POLLFREE); + wake_up_pollfree(&sighand->signalfd_wqh); } struct signalfd_ctx { diff --git a/fs/stat.c b/fs/stat.c index 65cf51fda2aa4..0d96602bbe9e1 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -300,9 +300,6 @@ SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, stat # define choose_32_64(a,b) b #endif -#define valid_dev(x) choose_32_64(old_valid_dev(x),true) -#define encode_dev(x) choose_32_64(old_encode_dev,new_encode_dev)(x) - #ifndef INIT_STRUCT_STAT_PADDING # define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st)) #endif @@ -311,7 +308,9 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) { struct stat tmp; - if (!valid_dev(stat->dev) || !valid_dev(stat->rdev)) + if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev)) + return -EOVERFLOW; + if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev)) return -EOVERFLOW; #if BITS_PER_LONG == 32 if (stat->size > MAX_NON_LFS) @@ -319,7 +318,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) #endif INIT_STRUCT_STAT_PADDING(tmp); - tmp.st_dev = encode_dev(stat->dev); + tmp.st_dev = new_encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; @@ -329,7 +328,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); - tmp.st_rdev = encode_dev(stat->rdev); + tmp.st_rdev = new_encode_dev(stat->rdev); tmp.st_size = stat->size; tmp.st_atime = stat->atime.tv_sec; tmp.st_mtime = stat->mtime.tv_sec; @@ -609,11 +608,13 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) { struct compat_stat tmp; - if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) + if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev)) + return -EOVERFLOW; + if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev)) return -EOVERFLOW; memset(&tmp, 0, sizeof(tmp)); - tmp.st_dev = old_encode_dev(stat->dev); + tmp.st_dev = new_encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; @@ -623,7 +624,7 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); - tmp.st_rdev = old_encode_dev(stat->rdev); + tmp.st_rdev = new_encode_dev(stat->rdev); if ((u64) stat->size > MAX_NON_LFS) return -EOVERFLOW; tmp.st_size = stat->size; diff --git a/fs/super.c b/fs/super.c index 60049ca8cb198..8fc5c416e5fc5 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1470,8 +1470,8 @@ struct dentry *mount_nodev(struct file_system_type *fs_type, } EXPORT_SYMBOL(mount_nodev); -static int reconfigure_single(struct super_block *s, - int flags, void *data) +int reconfigure_single(struct super_block *s, + int flags, void *data) { struct fs_context *fc; int ret; @@ -1697,11 +1697,9 @@ static void lockdep_sb_freeze_acquire(struct super_block *sb) percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_); } -static void sb_freeze_unlock(struct super_block *sb) +static void sb_freeze_unlock(struct super_block *sb, int level) { - int level; - - for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--) + for (level--; level >= 0; level--) percpu_up_write(sb->s_writers.rw_sem + level); } @@ -1772,7 +1770,14 @@ int freeze_super(struct super_block *sb) sb_wait_write(sb, SB_FREEZE_PAGEFAULT); /* All writers are done so after syncing there won't be dirty data */ - sync_filesystem(sb); + ret = sync_filesystem(sb); + if (ret) { + sb->s_writers.frozen = SB_UNFROZEN; + sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT); + wake_up(&sb->s_writers.wait_unfrozen); + deactivate_locked_super(sb); + return ret; + } /* Now wait for internal filesystem counter */ sb->s_writers.frozen = SB_FREEZE_FS; @@ -1784,7 +1789,7 @@ int freeze_super(struct super_block *sb) printk(KERN_ERR "VFS:Filesystem freeze failed\n"); sb->s_writers.frozen = SB_UNFROZEN; - sb_freeze_unlock(sb); + sb_freeze_unlock(sb, SB_FREEZE_FS); wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); return ret; @@ -1835,7 +1840,7 @@ static int thaw_super_locked(struct super_block *sb) } sb->s_writers.frozen = SB_UNFROZEN; - sb_freeze_unlock(sb); + sb_freeze_unlock(sb, SB_FREEZE_FS); out: wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); diff --git a/fs/sync.c b/fs/sync.c index 4d1ff010bc5af..c6f6f5be5682a 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -161,7 +161,7 @@ SYSCALL_DEFINE1(syncfs, int, fd) { struct fd f = fdget(fd); struct super_block *sb; - int ret; + int ret, ret2; if (!f.file) return -EBADF; @@ -171,8 +171,10 @@ SYSCALL_DEFINE1(syncfs, int, fd) ret = sync_filesystem(sb); up_read(&sb->s_umount); + ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err); + fdput(f); - return ret; + return ret ? ret : ret2; } /** diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 0caa151cae4ee..7878f145bf1bf 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -159,6 +159,77 @@ struct tracefs_fs_info { struct tracefs_mount_opts mount_opts; }; +static void change_gid(struct dentry *dentry, kgid_t gid) +{ + if (!dentry->d_inode) + return; + dentry->d_inode->i_gid = gid; +} + +/* + * Taken from d_walk, but without he need for handling renames. + * Nothing can be renamed while walking the list, as tracefs + * does not support renames. This is only called when mounting + * or remounting the file system, to set all the files to + * the given gid. + */ +static void set_gid(struct dentry *parent, kgid_t gid) +{ + struct dentry *this_parent; + struct list_head *next; + + this_parent = parent; + spin_lock(&this_parent->d_lock); + + change_gid(this_parent, gid); +repeat: + next = this_parent->d_subdirs.next; +resume: + while (next != &this_parent->d_subdirs) { + struct list_head *tmp = next; + struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + next = tmp->next; + + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + + change_gid(dentry, gid); + + if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); + this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); + goto repeat; + } + spin_unlock(&dentry->d_lock); + } + /* + * All done at this level ... ascend and resume the search. + */ + rcu_read_lock(); +ascend: + if (this_parent != parent) { + struct dentry *child = this_parent; + this_parent = child->d_parent; + + spin_unlock(&child->d_lock); + spin_lock(&this_parent->d_lock); + + /* go into the first sibling still alive */ + do { + next = child->d_child.next; + if (next == &this_parent->d_subdirs) + goto ascend; + child = list_entry(next, struct dentry, d_child); + } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)); + rcu_read_unlock(); + goto resume; + } + rcu_read_unlock(); + spin_unlock(&this_parent->d_lock); + return; +} + static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) { substring_t args[MAX_OPT_ARGS]; @@ -217,7 +288,9 @@ static int tracefs_apply_options(struct super_block *sb) inode->i_mode |= opts->mode; inode->i_uid = opts->uid; - inode->i_gid = opts->gid; + + /* Set all the group ids to the mount option */ + set_gid(sb->s_root, opts->gid); return 0; } @@ -409,6 +482,8 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, inode->i_mode = mode; inode->i_fop = fops ? fops : &tracefs_file_operations; inode->i_private = data; + inode->i_uid = d_inode(dentry->d_parent)->i_uid; + inode->i_gid = d_inode(dentry->d_parent)->i_gid; d_instantiate(dentry, inode); fsnotify_create(dentry->d_parent->d_inode, dentry); return end_creating(dentry); @@ -427,9 +502,12 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent, if (unlikely(!inode)) return failed_creating(dentry); - inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + /* Do not set bits for OTH */ + inode->i_mode = S_IFDIR | S_IRWXU | S_IRUSR| S_IRGRP | S_IXUSR | S_IXGRP; inode->i_op = ops; inode->i_fop = &simple_dir_operations; + inode->i_uid = d_inode(dentry->d_parent)->i_uid; + inode->i_gid = d_inode(dentry->d_parent)->i_gid; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index eeb93f009b282..83a173feb6989 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -361,15 +361,18 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry, { struct inode *inode; struct ubifs_info *c = dir->i_sb->s_fs_info; - struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1}; + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .dirtied_ino = 1}; struct ubifs_budget_req ino_req = { .dirtied_ino = 1 }; struct ubifs_inode *ui, *dir_ui = ubifs_inode(dir); int err, instantiated = 0; struct fscrypt_name nm; /* - * Budget request settings: new dirty inode, new direntry, - * budget for dirtied inode will be released via writeback. + * Budget request settings: new inode, new direntry, changing the + * parent directory inode. + * Allocate budget separately for new dirtied inode, the budget will + * be released via writeback. */ dbg_gen("dent '%pd', mode %#hx in dir ino %lu", @@ -439,6 +442,8 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry, make_bad_inode(inode); if (!instantiated) iput(inode); + else if (whiteout) + iput(*whiteout); out_budg: ubifs_release_budget(c, &req); if (!instantiated) @@ -955,7 +960,8 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct ubifs_inode *dir_ui = ubifs_inode(dir); struct ubifs_info *c = dir->i_sb->s_fs_info; int err, sz_change; - struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 }; + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .dirtied_ino = 1}; struct fscrypt_name nm; /* @@ -1330,6 +1336,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, if (flags & RENAME_WHITEOUT) { union ubifs_dev_desc *dev = NULL; + struct ubifs_budget_req wht_req; dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); if (!dev) { @@ -1351,6 +1358,23 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, whiteout_ui->data = dev; whiteout_ui->data_len = ubifs_encode_dev(dev, MKDEV(0, 0)); ubifs_assert(c, !whiteout_ui->dirty); + + memset(&wht_req, 0, sizeof(struct ubifs_budget_req)); + wht_req.dirtied_ino = 1; + wht_req.dirtied_ino_d = ALIGN(whiteout_ui->data_len, 8); + /* + * To avoid deadlock between space budget (holds ui_mutex and + * waits wb work) and writeback work(waits ui_mutex), do space + * budget before ubifs inodes locked. + */ + err = ubifs_budget_space(c, &wht_req); + if (err) { + iput(whiteout); + goto out_release; + } + + /* Add the old_dentry size to the old_dir size. */ + old_sz -= CALC_DENT_SIZE(fname_len(&old_nm)); } lock_4_inodes(old_dir, new_dir, new_inode, whiteout); @@ -1425,18 +1449,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, } if (whiteout) { - struct ubifs_budget_req wht_req = { .dirtied_ino = 1, - .dirtied_ino_d = \ - ALIGN(ubifs_inode(whiteout)->data_len, 8) }; - - err = ubifs_budget_space(c, &wht_req); - if (err) { - kfree(whiteout_ui->data); - whiteout_ui->data_len = 0; - iput(whiteout); - goto out_release; - } - inc_nlink(whiteout); mark_inode_dirty(whiteout); diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 8dada89bbe4da..6069c63d833ae 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1629,6 +1629,16 @@ static const char *ubifs_get_link(struct dentry *dentry, return fscrypt_get_symlink(inode, ui->data, ui->data_len, done); } +static int ubifs_symlink_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) +{ + ubifs_getattr(path, stat, request_mask, query_flags); + + if (IS_ENCRYPTED(d_inode(path->dentry))) + return fscrypt_symlink_getattr(path, stat); + return 0; +} + const struct address_space_operations ubifs_file_address_operations = { .readpage = ubifs_readpage, .writepage = ubifs_writepage, @@ -1654,7 +1664,7 @@ const struct inode_operations ubifs_file_inode_operations = { const struct inode_operations ubifs_symlink_inode_operations = { .get_link = ubifs_get_link, .setattr = ubifs_setattr, - .getattr = ubifs_getattr, + .getattr = ubifs_symlink_getattr, #ifdef CONFIG_UBIFS_FS_XATTR .listxattr = ubifs_listxattr, #endif diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index eae9cf5a57b05..89b671ad0f9aa 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -846,16 +846,42 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) */ n = aligned_len >> c->max_write_shift; if (n) { - n <<= c->max_write_shift; + int m = n - 1; + dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, wbuf->offs); - err = ubifs_leb_write(c, wbuf->lnum, buf + written, - wbuf->offs, n); + + if (m) { + /* '(n-1)<max_write_shift < len' is always true. */ + m <<= c->max_write_shift; + err = ubifs_leb_write(c, wbuf->lnum, buf + written, + wbuf->offs, m); + if (err) + goto out; + wbuf->offs += m; + aligned_len -= m; + len -= m; + written += m; + } + + /* + * The non-written len of buf may be less than 'n' because + * parameter 'len' is not 8 bytes aligned, so here we read + * min(len, n) bytes from buf. + */ + n = 1 << c->max_write_shift; + memcpy(wbuf->buf, buf + written, min(len, n)); + if (n > len) { + ubifs_assert(c, n - len < 8); + ubifs_pad(c, wbuf->buf + len, n - len); + } + + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, n); if (err) goto out; wbuf->offs += n; aligned_len -= n; - len -= n; + len -= min(len, n); written += n; } diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index eeb1be2598881..2923d5a6a7c0b 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -101,7 +101,7 @@ static int setflags(struct inode *inode, int flags) struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_info *c = inode->i_sb->s_fs_info; struct ubifs_budget_req req = { .dirtied_ino = 1, - .dirtied_ino_d = ui->data_len }; + .dirtied_ino_d = ALIGN(ui->data_len, 8) }; err = ubifs_budget_space(c, &req); if (err) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 2cbc3c36f3a8c..b37c6b1e33253 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1835,7 +1835,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) kthread_stop(c->bgt); c->bgt = NULL; } - free_wbufs(c); kfree(c->write_reserve_buf); c->write_reserve_buf = NULL; vfree(c->ileb_buf); diff --git a/fs/udf/dir.c b/fs/udf/dir.c index c19dba45aa209..d0f92a52e3bab 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "udf_i.h" #include "udf_sb.h" @@ -44,7 +45,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) struct fileIdentDesc *fi = NULL; struct fileIdentDesc cfi; udf_pblk_t block, iblock; - loff_t nf_pos; + loff_t nf_pos, emit_pos = 0; int flen; unsigned char *fname = NULL, *copy_name = NULL; unsigned char *nameptr; @@ -58,6 +59,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) int i, num, ret = 0; struct extent_position epos = { NULL, 0, {0, 0} }; struct super_block *sb = dir->i_sb; + bool pos_valid = false; if (ctx->pos == 0) { if (!dir_emit_dot(file, ctx)) @@ -68,6 +70,21 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) if (nf_pos >= size) goto out; + /* + * Something changed since last readdir (either lseek was called or dir + * changed)? We need to verify the position correctly points at the + * beginning of some dir entry so that the directory parsing code does + * not get confused. Since UDF does not have any reliable way of + * identifying beginning of dir entry (names are under user control), + * we need to scan the directory from the beginning. + */ + if (!inode_eq_iversion(dir, file->f_version)) { + emit_pos = nf_pos; + nf_pos = 0; + } else { + pos_valid = true; + } + fname = kmalloc(UDF_NAME_LEN, GFP_NOFS); if (!fname) { ret = -ENOMEM; @@ -123,13 +140,21 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) while (nf_pos < size) { struct kernel_lb_addr tloc; + loff_t cur_pos = nf_pos; - ctx->pos = (nf_pos >> 2) + 1; + /* Update file position only if we got past the current one */ + if (nf_pos >= emit_pos) { + ctx->pos = (nf_pos >> 2) + 1; + pos_valid = true; + } fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, &elen, &offset); if (!fi) goto out; + /* Still not at offset where user asked us to read from? */ + if (cur_pos < emit_pos) + continue; liu = le16_to_cpu(cfi.lengthOfImpUse); lfi = cfi.lengthFileIdent; @@ -187,8 +212,11 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) } /* end while */ ctx->pos = (nf_pos >> 2) + 1; + pos_valid = true; out: + if (pos_valid) + file->f_version = inode_query_iversion(dir); if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 507f8f9103270..639aabf30eaf0 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -258,10 +258,6 @@ int udf_expand_file_adinicb(struct inode *inode) char *kaddr; struct udf_inode_info *iinfo = UDF_I(inode); int err; - struct writeback_control udf_wbc = { - .sync_mode = WB_SYNC_NONE, - .nr_to_write = 1, - }; WARN_ON_ONCE(!inode_is_locked(inode)); if (!iinfo->i_lenAlloc) { @@ -305,8 +301,10 @@ int udf_expand_file_adinicb(struct inode *inode) iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; /* from now on we have normal address_space methods */ inode->i_data.a_ops = &udf_aops; + set_page_dirty(page); + unlock_page(page); up_write(&iinfo->i_data_sem); - err = inode->i_data.a_ops->writepage(page, &udf_wbc); + err = filemap_fdatawrite(inode->i_mapping); if (err) { /* Restore everything back so that we don't lose data... */ lock_page(page); @@ -318,6 +316,7 @@ int udf_expand_file_adinicb(struct inode *inode) unlock_page(page); iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; inode->i_data.a_ops = &udf_adinicb_aops; + iinfo->i_lenAlloc = inode->i_size; up_write(&iinfo->i_data_sem); } put_page(page); diff --git a/fs/udf/misc.c b/fs/udf/misc.c index 401e64cde1be0..853bcff51043f 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c @@ -173,13 +173,22 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type, else offset = le32_to_cpu(eahd->appAttrLocation); - while (offset < iinfo->i_lenEAttr) { + while (offset + sizeof(*gaf) < iinfo->i_lenEAttr) { + uint32_t attrLength; + gaf = (struct genericFormat *)&ea[offset]; + attrLength = le32_to_cpu(gaf->attrLength); + + /* Detect undersized elements and buffer overflows */ + if ((attrLength < sizeof(*gaf)) || + (attrLength > (iinfo->i_lenEAttr - offset))) + break; + if (le32_to_cpu(gaf->attrType) == type && gaf->attrSubtype == subtype) return gaf; else - offset += le32_to_cpu(gaf->attrLength); + offset += attrLength; } } diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 3c3d3b20889c8..3c009562375d3 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -30,6 +30,7 @@ #include #include #include +#include static inline int udf_match(int len1, const unsigned char *name1, int len2, const unsigned char *name2) @@ -135,6 +136,8 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, mark_buffer_dirty_inode(fibh->ebh, inode); mark_buffer_dirty_inode(fibh->sbh, inode); } + inode_inc_iversion(inode); + return 0; } diff --git a/fs/udf/super.c b/fs/udf/super.c index 8bb001c7927f0..5193b94c06834 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -57,6 +57,7 @@ #include #include #include +#include #include "udf_sb.h" #include "udf_i.h" @@ -108,16 +109,10 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb) return NULL; lvid = (struct logicalVolIntegrityDesc *)UDF_SB(sb)->s_lvid_bh->b_data; partnum = le32_to_cpu(lvid->numOfPartitions); - if ((sb->s_blocksize - sizeof(struct logicalVolIntegrityDescImpUse) - - offsetof(struct logicalVolIntegrityDesc, impUse)) / - (2 * sizeof(uint32_t)) < partnum) { - udf_err(sb, "Logical volume integrity descriptor corrupted " - "(numOfPartitions = %u)!\n", partnum); - return NULL; - } /* The offset is to skip freeSpaceTable and sizeTable arrays */ offset = partnum * 2 * sizeof(uint32_t); - return (struct logicalVolIntegrityDescImpUse *)&(lvid->impUse[offset]); + return (struct logicalVolIntegrityDescImpUse *) + (((uint8_t *)(lvid + 1)) + offset); } /* UDF filesystem type */ @@ -155,6 +150,7 @@ static struct inode *udf_alloc_inode(struct super_block *sb) init_rwsem(&ei->i_data_sem); ei->cached_extent.lstart = -1; spin_lock_init(&ei->i_extent_cache_lock); + inode_set_iversion(&ei->vfs_inode, 1); return &ei->vfs_inode; } @@ -349,10 +345,10 @@ static int udf_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",lastblock=%u", sbi->s_last_block); if (sbi->s_anchor != 0) seq_printf(seq, ",anchor=%u", sbi->s_anchor); - if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) - seq_puts(seq, ",utf8"); - if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP) && sbi->s_nls_map) + if (sbi->s_nls_map) seq_printf(seq, ",iocharset=%s", sbi->s_nls_map->charset); + else + seq_puts(seq, ",iocharset=utf8"); return 0; } @@ -557,19 +553,24 @@ static int udf_parse_options(char *options, struct udf_options *uopt, /* Ignored (never implemented properly) */ break; case Opt_utf8: - uopt->flags |= (1 << UDF_FLAG_UTF8); + if (!remount) { + unload_nls(uopt->nls_map); + uopt->nls_map = NULL; + } break; case Opt_iocharset: if (!remount) { - if (uopt->nls_map) - unload_nls(uopt->nls_map); - /* - * load_nls() failure is handled later in - * udf_fill_super() after all options are - * parsed. - */ + unload_nls(uopt->nls_map); + uopt->nls_map = NULL; + } + /* When nls_map is not loaded then UTF-8 is used */ + if (!remount && strcmp(args[0].from, "utf8") != 0) { uopt->nls_map = load_nls(args[0].from); - uopt->flags |= (1 << UDF_FLAG_NLS_MAP); + if (!uopt->nls_map) { + pr_err("iocharset %s not found\n", + args[0].from); + return 0; + } } break; case Opt_uforget: @@ -1548,6 +1549,7 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ struct udf_sb_info *sbi = UDF_SB(sb); struct logicalVolIntegrityDesc *lvid; int indirections = 0; + u32 parts, impuselen; while (++indirections <= UDF_MAX_LVID_NESTING) { final_bh = NULL; @@ -1574,15 +1576,27 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ lvid = (struct logicalVolIntegrityDesc *)final_bh->b_data; if (lvid->nextIntegrityExt.extLength == 0) - return; + goto check; loc = leea_to_cpu(lvid->nextIntegrityExt); } udf_warn(sb, "Too many LVID indirections (max %u), ignoring.\n", UDF_MAX_LVID_NESTING); +out_err: brelse(sbi->s_lvid_bh); sbi->s_lvid_bh = NULL; + return; +check: + parts = le32_to_cpu(lvid->numOfPartitions); + impuselen = le32_to_cpu(lvid->lengthOfImpUse); + if (parts >= sb->s_blocksize || impuselen >= sb->s_blocksize || + sizeof(struct logicalVolIntegrityDesc) + impuselen + + 2 * parts * sizeof(u32) > sb->s_blocksize) { + udf_warn(sb, "Corrupted LVID (parts=%u, impuselen=%u), " + "ignoring.\n", parts, impuselen); + goto out_err; + } } /* @@ -2145,21 +2159,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) if (!udf_parse_options((char *)options, &uopt, false)) goto parse_options_failure; - if (uopt.flags & (1 << UDF_FLAG_UTF8) && - uopt.flags & (1 << UDF_FLAG_NLS_MAP)) { - udf_err(sb, "utf8 cannot be combined with iocharset\n"); - goto parse_options_failure; - } - if ((uopt.flags & (1 << UDF_FLAG_NLS_MAP)) && !uopt.nls_map) { - uopt.nls_map = load_nls_default(); - if (!uopt.nls_map) - uopt.flags &= ~(1 << UDF_FLAG_NLS_MAP); - else - udf_debug("Using default NLS map\n"); - } - if (!(uopt.flags & (1 << UDF_FLAG_NLS_MAP))) - uopt.flags |= (1 << UDF_FLAG_UTF8); - fileset.logicalBlockNum = 0xFFFFFFFF; fileset.partitionReferenceNum = 0xFFFF; @@ -2314,8 +2313,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) error_out: iput(sbi->s_vat_inode); parse_options_failure: - if (uopt.nls_map) - unload_nls(uopt.nls_map); + unload_nls(uopt.nls_map); if (lvid_open) udf_close_lvid(sb); brelse(sbi->s_lvid_bh); @@ -2365,8 +2363,7 @@ static void udf_put_super(struct super_block *sb) sbi = UDF_SB(sb); iput(sbi->s_vat_inode); - if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) - unload_nls(sbi->s_nls_map); + unload_nls(sbi->s_nls_map); if (!sb_rdonly(sb)) udf_close_lvid(sb); brelse(sbi->s_lvid_bh); diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 3d83be54c4748..8eace7a633d38 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -20,8 +20,6 @@ #define UDF_FLAG_UNDELETE 6 #define UDF_FLAG_UNHIDE 7 #define UDF_FLAG_VARCONV 8 -#define UDF_FLAG_NLS_MAP 9 -#define UDF_FLAG_UTF8 10 #define UDF_FLAG_UID_FORGET 11 /* save -1 for uid to disk */ #define UDF_FLAG_GID_FORGET 12 #define UDF_FLAG_UID_SET 13 diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 5fcfa96463ebb..622569007b530 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -177,7 +177,7 @@ static int udf_name_from_CS0(struct super_block *sb, return 0; } - if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) + if (UDF_SB(sb)->s_nls_map) conv_f = UDF_SB(sb)->s_nls_map->uni2char; else conv_f = NULL; @@ -285,7 +285,7 @@ static int udf_name_to_CS0(struct super_block *sb, if (ocu_max_len <= 0) return 0; - if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) + if (UDF_SB(sb)->s_nls_map) conv_f = UDF_SB(sb)->s_nls_map->char2uni; else conv_f = NULL; diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 911025f3ac147..85d0db9358140 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -32,11 +32,6 @@ int sysctl_unprivileged_userfaultfd __read_mostly = 1; static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly; -enum userfaultfd_state { - UFFD_STATE_WAIT_API, - UFFD_STATE_RUNNING, -}; - /* * Start with fault_pending_wqh and fault_wqh so they're more likely * to be in the same cacheline. @@ -68,8 +63,6 @@ struct userfaultfd_ctx { unsigned int flags; /* features requested from the userspace */ unsigned int features; - /* state machine */ - enum userfaultfd_state state; /* released */ bool released; /* memory mappings are changing because of non-cooperative event */ @@ -103,6 +96,14 @@ struct userfaultfd_wake_range { unsigned long len; }; +/* internal indication that UFFD_API ioctl was successfully executed */ +#define UFFD_FEATURE_INITIALIZED (1u << 31) + +static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx) +{ + return ctx->features & UFFD_FEATURE_INITIALIZED; +} + static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode, int wake_flags, void *key) { @@ -699,7 +700,6 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) refcount_set(&ctx->refcount, 1); ctx->flags = octx->flags; - ctx->state = UFFD_STATE_RUNNING; ctx->features = octx->features; ctx->released = false; ctx->mmap_changing = false; @@ -980,38 +980,33 @@ static __poll_t userfaultfd_poll(struct file *file, poll_table *wait) poll_wait(file, &ctx->fd_wqh, wait); - switch (ctx->state) { - case UFFD_STATE_WAIT_API: + if (!userfaultfd_is_initialized(ctx)) return EPOLLERR; - case UFFD_STATE_RUNNING: - /* - * poll() never guarantees that read won't block. - * userfaults can be waken before they're read(). - */ - if (unlikely(!(file->f_flags & O_NONBLOCK))) - return EPOLLERR; - /* - * lockless access to see if there are pending faults - * __pollwait last action is the add_wait_queue but - * the spin_unlock would allow the waitqueue_active to - * pass above the actual list_add inside - * add_wait_queue critical section. So use a full - * memory barrier to serialize the list_add write of - * add_wait_queue() with the waitqueue_active read - * below. - */ - ret = 0; - smp_mb(); - if (waitqueue_active(&ctx->fault_pending_wqh)) - ret = EPOLLIN; - else if (waitqueue_active(&ctx->event_wqh)) - ret = EPOLLIN; - - return ret; - default: - WARN_ON_ONCE(1); + + /* + * poll() never guarantees that read won't block. + * userfaults can be waken before they're read(). + */ + if (unlikely(!(file->f_flags & O_NONBLOCK))) return EPOLLERR; - } + /* + * lockless access to see if there are pending faults + * __pollwait last action is the add_wait_queue but + * the spin_unlock would allow the waitqueue_active to + * pass above the actual list_add inside + * add_wait_queue critical section. So use a full + * memory barrier to serialize the list_add write of + * add_wait_queue() with the waitqueue_active read + * below. + */ + ret = 0; + smp_mb(); + if (waitqueue_active(&ctx->fault_pending_wqh)) + ret = EPOLLIN; + else if (waitqueue_active(&ctx->event_wqh)) + ret = EPOLLIN; + + return ret; } static const struct file_operations userfaultfd_fops; @@ -1205,7 +1200,7 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf, struct uffd_msg msg; int no_wait = file->f_flags & O_NONBLOCK; - if (ctx->state == UFFD_STATE_WAIT_API) + if (!userfaultfd_is_initialized(ctx)) return -EINVAL; for (;;) { @@ -1807,9 +1802,10 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, static inline unsigned int uffd_ctx_features(__u64 user_features) { /* - * For the current set of features the bits just coincide + * For the current set of features the bits just coincide. Set + * UFFD_FEATURE_INITIALIZED to mark the features as enabled. */ - return (unsigned int)user_features; + return (unsigned int)user_features | UFFD_FEATURE_INITIALIZED; } /* @@ -1822,12 +1818,10 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, { struct uffdio_api uffdio_api; void __user *buf = (void __user *)arg; + unsigned int ctx_features; int ret; __u64 features; - ret = -EINVAL; - if (ctx->state != UFFD_STATE_WAIT_API) - goto out; ret = -EFAULT; if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api))) goto out; @@ -1844,9 +1838,13 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, ret = -EFAULT; if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api))) goto out; - ctx->state = UFFD_STATE_RUNNING; + /* only enable the requested features for this uffd context */ - ctx->features = uffd_ctx_features(features); + ctx_features = uffd_ctx_features(features); + ret = -EINVAL; + if (cmpxchg(&ctx->features, 0, ctx_features) != 0) + goto err_out; + ret = 0; out: return ret; @@ -1863,7 +1861,7 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd, int ret = -EINVAL; struct userfaultfd_ctx *ctx = file->private_data; - if (cmd != UFFDIO_API && ctx->state == UFFD_STATE_WAIT_API) + if (cmd != UFFDIO_API && !userfaultfd_is_initialized(ctx)) return -EINVAL; switch(cmd) { @@ -1964,7 +1962,6 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) refcount_set(&ctx->refcount, 1); ctx->flags = flags; ctx->features = 0; - ctx->state = UFFD_STATE_WAIT_API; ctx->released = false; ctx->mmap_changing = false; ctx->mm = current->mm; diff --git a/fs/verity/enable.c b/fs/verity/enable.c index eabc6ac199064..1370bfd17e870 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -136,7 +136,7 @@ static int build_merkle_tree(struct inode *inode, * (level 0) and ascending to the root node (level 'num_levels - 1'). * Then at the end (level 'num_levels'), calculate the root hash. */ - blocks = (inode->i_size + params->block_size - 1) >> + blocks = ((u64)inode->i_size + params->block_size - 1) >> params->log_blocksize; for (level = 0; level <= params->num_levels; level++) { err = build_merkle_tree_level(inode, level, blocks, params, diff --git a/fs/verity/open.c b/fs/verity/open.c index 63d1004b688cb..6200826107c24 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -89,7 +89,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, */ /* Compute number of levels and the number of blocks in each level */ - blocks = (inode->i_size + params->block_size - 1) >> log_blocksize; + blocks = ((u64)inode->i_size + params->block_size - 1) >> log_blocksize; pr_debug("Data is %lld bytes (%llu blocks)\n", inode->i_size, blocks); while (blocks > 1) { if (params->num_levels >= FS_VERITY_MAX_LEVELS) { diff --git a/fs/xattr.c b/fs/xattr.c index f2854570d4119..ad59c9e2d0cf8 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -205,15 +205,15 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, } /** - * __vfs_setxattr_locked: set an extended attribute while holding the inode + * __vfs_setxattr_locked - set an extended attribute while holding the inode * lock * - * @dentry - object to perform setxattr on - * @name - xattr name to set - * @value - value to set @name to - * @size - size of @value - * @flags - flags to pass into filesystem operations - * @delegated_inode - on return, will contain an inode pointer that + * @dentry: object to perform setxattr on + * @name: xattr name to set + * @value: value to set @name to + * @size: size of @value + * @flags: flags to pass into filesystem operations + * @delegated_inode: on return, will contain an inode pointer that * a delegation was broken on, NULL if none. */ int @@ -416,12 +416,12 @@ __vfs_removexattr(struct dentry *dentry, const char *name) EXPORT_SYMBOL(__vfs_removexattr); /** - * __vfs_removexattr_locked: set an extended attribute while holding the inode + * __vfs_removexattr_locked - set an extended attribute while holding the inode * lock * - * @dentry - object to perform setxattr on - * @name - name of xattr to remove - * @delegated_inode - on return, will contain an inode pointer that + * @dentry: object to perform setxattr on + * @name: name of xattr to remove + * @delegated_inode: on return, will contain an inode pointer that * a delegation was broken on, NULL if none. */ int diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index b3584cd2cc164..a9b32879a9d15 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -202,7 +202,7 @@ xfs_calc_inode_chunk_res( * blocks as needed to mark inuse MAXEXTLEN blocks' worth of realtime extents, * as well as the realtime summary block. */ -unsigned int +static unsigned int xfs_rtalloc_log_count( struct xfs_mount *mp, unsigned int num_ops) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b339ff93df997..7a9048c4c2f95 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3232,7 +3232,6 @@ xfs_rename( * appropriately. */ if (flags & RENAME_WHITEOUT) { - ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE))); error = xfs_rename_alloc_whiteout(target_dp, &wip); if (error) return error; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index b3021d9b34a5e..7b7a009425e21 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -714,7 +714,8 @@ xfs_ioc_space( flags |= XFS_PREALLOC_CLEAR; if (bf->l_start > XFS_ISIZE(ip)) { error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), - bf->l_start - XFS_ISIZE(ip), 0); + bf->l_start - XFS_ISIZE(ip), + XFS_BMAPI_PREALLOC); if (error) goto out_unlock; } diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 9373662cdb44f..ff5fecff51167 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -536,8 +536,14 @@ typedef u64 acpi_integer; * Can be used with access_width of struct acpi_generic_address and access_size of * struct acpi_resource_generic_register. */ -#define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2)) -#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) - 1)) +#define ACPI_ACCESS_BIT_SHIFT 2 +#define ACPI_ACCESS_BYTE_SHIFT -1 +#define ACPI_ACCESS_BIT_MAX (31 - ACPI_ACCESS_BIT_SHIFT) +#define ACPI_ACCESS_BYTE_MAX (31 - ACPI_ACCESS_BYTE_SHIFT) +#define ACPI_ACCESS_BIT_DEFAULT (8 - ACPI_ACCESS_BIT_SHIFT) +#define ACPI_ACCESS_BYTE_DEFAULT (8 - ACPI_ACCESS_BYTE_SHIFT) +#define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + ACPI_ACCESS_BIT_SHIFT)) +#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) + ACPI_ACCESS_BYTE_SHIFT)) /******************************************************************************* * diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index c716ea81e6531..268674c1d5685 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -495,6 +495,38 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm } #endif +/* + * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end, + * and set corresponding cleared_*. + */ +static inline void tlb_flush_pte_range(struct mmu_gather *tlb, + unsigned long address, unsigned long size) +{ + __tlb_adjust_range(tlb, address, size); + tlb->cleared_ptes = 1; +} + +static inline void tlb_flush_pmd_range(struct mmu_gather *tlb, + unsigned long address, unsigned long size) +{ + __tlb_adjust_range(tlb, address, size); + tlb->cleared_pmds = 1; +} + +static inline void tlb_flush_pud_range(struct mmu_gather *tlb, + unsigned long address, unsigned long size) +{ + __tlb_adjust_range(tlb, address, size); + tlb->cleared_puds = 1; +} + +static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, + unsigned long address, unsigned long size) +{ + __tlb_adjust_range(tlb, address, size); + tlb->cleared_p4ds = 1; +} + #ifndef __tlb_remove_tlb_entry #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) #endif @@ -508,19 +540,21 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm */ #define tlb_remove_tlb_entry(tlb, ptep, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ - tlb->cleared_ptes = 1; \ + tlb_flush_pte_range(tlb, address, PAGE_SIZE); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ do { \ unsigned long _sz = huge_page_size(h); \ - __tlb_adjust_range(tlb, address, _sz); \ - if (_sz == PMD_SIZE) \ - tlb->cleared_pmds = 1; \ - else if (_sz == PUD_SIZE) \ - tlb->cleared_puds = 1; \ + if (_sz >= P4D_SIZE) \ + tlb_flush_p4d_range(tlb, address, _sz); \ + else if (_sz >= PUD_SIZE) \ + tlb_flush_pud_range(tlb, address, _sz); \ + else if (_sz >= PMD_SIZE) \ + tlb_flush_pmd_range(tlb, address, _sz); \ + else \ + tlb_flush_pte_range(tlb, address, _sz); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) @@ -534,8 +568,7 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ do { \ - __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE); \ - tlb->cleared_pmds = 1; \ + tlb_flush_pmd_range(tlb, address, HPAGE_PMD_SIZE); \ __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ } while (0) @@ -549,8 +582,7 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm #define tlb_remove_pud_tlb_entry(tlb, pudp, address) \ do { \ - __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \ - tlb->cleared_puds = 1; \ + tlb_flush_pud_range(tlb, address, HPAGE_PUD_SIZE); \ __tlb_remove_pud_tlb_entry(tlb, pudp, address); \ } while (0) @@ -575,9 +607,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm #ifndef pte_free_tlb #define pte_free_tlb(tlb, ptep, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb_flush_pmd_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ - tlb->cleared_pmds = 1; \ __pte_free_tlb(tlb, ptep, address); \ } while (0) #endif @@ -585,9 +616,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm #ifndef pmd_free_tlb #define pmd_free_tlb(tlb, pmdp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb_flush_pud_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ - tlb->cleared_puds = 1; \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) #endif @@ -596,9 +626,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm #ifndef pud_free_tlb #define pud_free_tlb(tlb, pudp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb_flush_p4d_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ - tlb->cleared_p4ds = 1; \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8a5ced9cf5273..77d4281815579 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -110,17 +110,17 @@ #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD -#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY -#define MCOUNT_REC() . = ALIGN(8); \ - __start_mcount_loc = .; \ - KEEP(*(__patchable_function_entries)) \ - __stop_mcount_loc = .; -#else +/* + * The ftrace call sites are logged to a section whose name depends on the + * compiler option used. A given kernel image will only use one, AKA + * FTRACE_CALLSITE_SECTION. We capture all of them here to avoid header + * dependencies for FTRACE_CALLSITE_SECTION's definition. + */ #define MCOUNT_REC() . = ALIGN(8); \ __start_mcount_loc = .; \ KEEP(*(__mcount_loc)) \ + KEEP(*(__patchable_function_entries)) \ __stop_mcount_loc = .; -#endif #else #define MCOUNT_REC() #endif diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index e5bd302f2c49e..657ff6c5dc261 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -188,6 +188,8 @@ void *crypto_alloc_instance(const char *name, struct crypto_alg *alg, void crypto_init_queue(struct crypto_queue *queue, unsigned int max_qlen); int crypto_enqueue_request(struct crypto_queue *queue, struct crypto_async_request *request); +void crypto_enqueue_request_head(struct crypto_queue *queue, + struct crypto_async_request *request); struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue); static inline unsigned int crypto_queue_len(struct crypto_queue *queue) { diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h new file mode 100644 index 0000000000000..d439496fa6baa --- /dev/null +++ b/include/crypto/blake2s.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _CRYPTO_BLAKE2S_H +#define _CRYPTO_BLAKE2S_H + +#include +#include +#include +#include + +enum blake2s_lengths { + BLAKE2S_BLOCK_SIZE = 64, + BLAKE2S_HASH_SIZE = 32, + BLAKE2S_KEY_SIZE = 32, + + BLAKE2S_128_HASH_SIZE = 16, + BLAKE2S_160_HASH_SIZE = 20, + BLAKE2S_224_HASH_SIZE = 28, + BLAKE2S_256_HASH_SIZE = 32, +}; + +struct blake2s_state { + u32 h[8]; + u32 t[2]; + u32 f[2]; + u8 buf[BLAKE2S_BLOCK_SIZE]; + unsigned int buflen; + unsigned int outlen; +}; + +enum blake2s_iv { + BLAKE2S_IV0 = 0x6A09E667UL, + BLAKE2S_IV1 = 0xBB67AE85UL, + BLAKE2S_IV2 = 0x3C6EF372UL, + BLAKE2S_IV3 = 0xA54FF53AUL, + BLAKE2S_IV4 = 0x510E527FUL, + BLAKE2S_IV5 = 0x9B05688CUL, + BLAKE2S_IV6 = 0x1F83D9ABUL, + BLAKE2S_IV7 = 0x5BE0CD19UL, +}; + +void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen); +void blake2s_final(struct blake2s_state *state, u8 *out); + +static inline void blake2s_init_param(struct blake2s_state *state, + const u32 param) +{ + *state = (struct blake2s_state){{ + BLAKE2S_IV0 ^ param, + BLAKE2S_IV1, + BLAKE2S_IV2, + BLAKE2S_IV3, + BLAKE2S_IV4, + BLAKE2S_IV5, + BLAKE2S_IV6, + BLAKE2S_IV7, + }}; +} + +static inline void blake2s_init(struct blake2s_state *state, + const size_t outlen) +{ + blake2s_init_param(state, 0x01010000 | outlen); + state->outlen = outlen; +} + +static inline void blake2s_init_key(struct blake2s_state *state, + const size_t outlen, const void *key, + const size_t keylen) +{ + WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE || + !key || !keylen || keylen > BLAKE2S_KEY_SIZE)); + + blake2s_init_param(state, 0x01010000 | keylen << 8 | outlen); + memcpy(state->buf, key, keylen); + state->buflen = BLAKE2S_BLOCK_SIZE; + state->outlen = outlen; +} + +static inline void blake2s(u8 *out, const u8 *in, const u8 *key, + const size_t outlen, const size_t inlen, + const size_t keylen) +{ + struct blake2s_state state; + + WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen || + outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE || + (!key && keylen))); + + if (keylen) + blake2s_init_key(&state, outlen, key, keylen); + else + blake2s_init(&state, outlen); + + blake2s_update(&state, in, inlen); + blake2s_final(&state, out); +} + +#endif /* _CRYPTO_BLAKE2S_H */ diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h index d1e723c6a37dd..542f896115f45 100644 --- a/include/crypto/chacha.h +++ b/include/crypto/chacha.h @@ -51,4 +51,19 @@ int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, int crypto_chacha_crypt(struct skcipher_request *req); int crypto_xchacha_crypt(struct skcipher_request *req); +enum chacha_constants { /* expand 32-byte k */ + CHACHA_CONSTANT_EXPA = 0x61707865U, + CHACHA_CONSTANT_ND_3 = 0x3320646eU, + CHACHA_CONSTANT_2_BY = 0x79622d32U, + CHACHA_CONSTANT_TE_K = 0x6b206574U +}; + +static inline void chacha_init_consts(u32 *state) +{ + state[0] = CHACHA_CONSTANT_EXPA; + state[1] = CHACHA_CONSTANT_ND_3; + state[2] = CHACHA_CONSTANT_2_BY; + state[3] = CHACHA_CONSTANT_TE_K; +} + #endif /* _CRYPTO_CHACHA_H */ diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index 8c9af21efce1d..a6c3b8e7deb64 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -105,6 +105,12 @@ struct drbg_test_data { struct drbg_string *testentropy; /* TEST PARAMETER: test entropy */ }; +enum drbg_seed_state { + DRBG_SEED_STATE_UNSEEDED, + DRBG_SEED_STATE_PARTIAL, /* Seeded with !rng_is_initialized() */ + DRBG_SEED_STATE_FULL, +}; + struct drbg_state { struct mutex drbg_mutex; /* lock around DRBG */ unsigned char *V; /* internal state 10.1.1.1 1a) */ @@ -127,16 +133,14 @@ struct drbg_state { struct crypto_wait ctr_wait; /* CTR mode async wait obj */ struct scatterlist sg_in, sg_out; /* CTR mode SGLs */ - bool seeded; /* DRBG fully seeded? */ + enum drbg_seed_state seeded; /* DRBG fully seeded? */ bool pr; /* Prediction resistance enabled? */ bool fips_primed; /* Continuous test primed? */ unsigned char *prev; /* FIPS 140-2 continuous test value */ - struct work_struct seed_work; /* asynchronous seeding support */ struct crypto_rng *jent; const struct drbg_state_ops *d_ops; const struct drbg_core *core; struct drbg_string test_data; - struct random_ready_callback random_ready; }; static inline __u8 drbg_statelen(struct drbg_state *drbg) @@ -184,11 +188,7 @@ static inline size_t drbg_max_addtl(struct drbg_state *drbg) static inline size_t drbg_max_requests(struct drbg_state *drbg) { /* SP800-90A requires 2**48 maximum requests before reseeding */ -#if (__BITS_PER_LONG == 32) - return SIZE_MAX; -#else - return (1UL<<48); -#endif + return (1<<20); } /* diff --git a/include/crypto/engine.h b/include/crypto/engine.h index 84c708bba00b9..a004188faffcd 100644 --- a/include/crypto/engine.h +++ b/include/crypto/engine.h @@ -24,7 +24,9 @@ * @idling: the engine is entering idle state * @busy: request pump is busy * @running: the engine is on working - * @cur_req_prepared: current request is prepared + * @retry_support: indication that the hardware allows re-execution + * of a failed backlog request + * crypto-engine, in head position to keep order * @list: link with the global crypto engine list * @queue_lock: spinlock to syncronise access to request queue * @queue: the crypto queue of the engine @@ -35,6 +37,8 @@ * @unprepare_crypt_hardware: there are currently no more requests on the * queue so the subsystem notifies the driver that it may relax the * hardware by issuing this call + * @do_batch_requests: execute a batch of requests. Depends on multiple + * requests support. * @kworker: kthread worker struct for request pump * @pump_requests: work struct for scheduling work to the request pump * @priv_data: the engine private data @@ -45,7 +49,8 @@ struct crypto_engine { bool idling; bool busy; bool running; - bool cur_req_prepared; + + bool retry_support; struct list_head list; spinlock_t queue_lock; @@ -56,6 +61,8 @@ struct crypto_engine { int (*prepare_crypt_hardware)(struct crypto_engine *engine); int (*unprepare_crypt_hardware)(struct crypto_engine *engine); + int (*do_batch_requests)(struct crypto_engine *engine); + struct kthread_worker *kworker; struct kthread_work pump_requests; @@ -106,6 +113,10 @@ void crypto_finalize_skcipher_request(struct crypto_engine *engine, int crypto_engine_start(struct crypto_engine *engine); int crypto_engine_stop(struct crypto_engine *engine); struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt); +struct crypto_engine *crypto_engine_alloc_init_and_set(struct device *dev, + bool retry_support, + int (*cbk_do_batch)(struct crypto_engine *engine), + bool rt, int qlen); int crypto_engine_exit(struct crypto_engine *engine); #endif /* _CRYPTO_ENGINE_H */ diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h new file mode 100644 index 0000000000000..3ba066845b699 --- /dev/null +++ b/include/crypto/internal/blake2s.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +#ifndef _CRYPTO_INTERNAL_BLAKE2S_H +#define _CRYPTO_INTERNAL_BLAKE2S_H + +#include + +void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, + size_t nblocks, const u32 inc); + +void blake2s_compress_arch(struct blake2s_state *state,const u8 *block, + size_t nblocks, const u32 inc); + +static inline void blake2s_set_lastblock(struct blake2s_state *state) +{ + state->f[0] = -1; +} + +#endif /* _CRYPTO_INTERNAL_BLAKE2S_H */ diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h index 0588ef3bc6ff6..48722f2b8543b 100644 --- a/include/crypto/public_key.h +++ b/include/crypto/public_key.h @@ -38,9 +38,9 @@ extern void public_key_free(struct public_key *key); struct public_key_signature { struct asymmetric_key_id *auth_ids[2]; u8 *s; /* Signature */ - u32 s_size; /* Number of bytes in signature */ u8 *digest; - u8 digest_size; /* Number of bytes in digest */ + u32 s_size; /* Number of bytes in signature */ + u32 digest_size; /* Number of bytes in digest */ const char *pkey_algo; const char *hash_algo; const char *encoding; diff --git a/include/crypto/sm3.h b/include/crypto/sm3.h index 1438942dc7737..1f021ad0533ff 100644 --- a/include/crypto/sm3.h +++ b/include/crypto/sm3.h @@ -1,5 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Common values for SM3 algorithm + * + * Copyright (C) 2017 ARM Limited or its affiliates. + * Copyright (C) 2017 Gilad Ben-Yossef + * Copyright (C) 2021 Tianjia Zhang */ #ifndef _CRYPTO_SM3_H @@ -30,11 +35,30 @@ struct sm3_state { u8 buffer[SM3_BLOCK_SIZE]; }; -struct shash_desc; +/* + * Stand-alone implementation of the SM3 algorithm. It is designed to + * have as little dependencies as possible so it can be used in the + * kexec_file purgatory. In other cases you should generally use the + * hash APIs from include/crypto/hash.h. Especially when hashing large + * amounts of data as those APIs may be hw-accelerated. + * + * For details see lib/crypto/sm3.c + */ + +static inline void sm3_init(struct sm3_state *sctx) +{ + sctx->state[0] = SM3_IVA; + sctx->state[1] = SM3_IVB; + sctx->state[2] = SM3_IVC; + sctx->state[3] = SM3_IVD; + sctx->state[4] = SM3_IVE; + sctx->state[5] = SM3_IVF; + sctx->state[6] = SM3_IVG; + sctx->state[7] = SM3_IVH; + sctx->count = 0; +} -extern int crypto_sm3_update(struct shash_desc *desc, const u8 *data, - unsigned int len); +void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len); +void sm3_final(struct sm3_state *sctx, u8 *out); -extern int crypto_sm3_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *hash); #endif diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h index 7afd730d16ffb..709f286e7b253 100644 --- a/include/crypto/sm4.h +++ b/include/crypto/sm4.h @@ -3,6 +3,7 @@ /* * Common values for the SM4 algorithm * Copyright (C) 2018 ARM Limited or its affiliates. + * Copyright (c) 2021 Tianjia Zhang */ #ifndef _CRYPTO_SM4_H @@ -15,17 +16,29 @@ #define SM4_BLOCK_SIZE 16 #define SM4_RKEY_WORDS 32 -struct crypto_sm4_ctx { +struct sm4_ctx { u32 rkey_enc[SM4_RKEY_WORDS]; u32 rkey_dec[SM4_RKEY_WORDS]; }; -int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len); -int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key, +/** + * sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016 + * @ctx: The location where the computed key will be stored. + * @in_key: The supplied key. + * @key_len: The length of the supplied key. + * + * Returns 0 on success. The function fails only if an invalid key size (or + * pointer) is supplied. + */ +int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key, unsigned int key_len); -void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in); -void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in); +/** + * sm4_crypt_block - Encrypt or decrypt a single SM4 block + * @rk: The rkey_enc for encrypt or rkey_dec for decrypt + * @out: Buffer to store output data + * @in: Buffer containing the input data + */ +void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in); #endif diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index b9719418c3d26..f40a97417b682 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -116,7 +116,7 @@ struct detailed_data_monitor_range { u8 supported_scalings; u8 preferred_refresh; } __attribute__((packed)) cvt; - } formula; + } __attribute__((packed)) formula; } __attribute__((packed)); struct detailed_data_wpindex { @@ -149,7 +149,7 @@ struct detailed_non_pixel { struct detailed_data_wpindex color; struct std_timing timings[6]; struct cvt_timing cvt[4]; - } data; + } __attribute__((packed)) data; } __attribute__((packed)); #define EDID_DETAIL_EST_TIMINGS 0xf7 @@ -167,7 +167,7 @@ struct detailed_timing { union { struct detailed_pixel_timing pixel_data; struct detailed_non_pixel other_data; - } data; + } __attribute__((packed)) data; } __attribute__((packed)); #define DRM_EDID_INPUT_SERRATION_VSYNC (1 << 0) diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 6db030439e29c..dbf4f08d42e52 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -27,6 +27,7 @@ struct kvm_pmu { bool ready; bool created; bool irq_level; + struct irq_work overflow_work; }; #define kvm_arm_pmu_v3_ready(v) ((v)->arch.pmu.ready) diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 4e97ba64dbb42..3e6ef64e74d3d 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -76,6 +76,11 @@ ARM_SMCCC_SMC_32, \ 0, 0x7fff) +#define ARM_SMCCC_ARCH_WORKAROUND_3 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 0x3fff) + #define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED 1 #ifndef __ASSEMBLY__ diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 2849bdbb3acbe..edbbca35f4c3d 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -177,6 +177,8 @@ struct bdi_writeback { struct cgroup_subsys_state *blkcg_css; /* and blkcg */ struct list_head memcg_node; /* anchored at memcg->cgwb_list */ struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */ + struct list_head b_attached; /* attached inodes, protected by list_lock */ + struct list_head offline_node; /* anchored at offline_cgwbs */ union { struct work_struct release_work; @@ -279,8 +281,9 @@ static inline void wb_get(struct bdi_writeback *wb) /** * wb_put - decrement a wb's refcount * @wb: bdi_writeback to put + * @nr: number of references to put */ -static inline void wb_put(struct bdi_writeback *wb) +static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr) { if (WARN_ON_ONCE(!wb->bdi)) { /* @@ -291,7 +294,16 @@ static inline void wb_put(struct bdi_writeback *wb) } if (wb != &wb->bdi->wb) - percpu_ref_put(&wb->refcnt); + percpu_ref_put_many(&wb->refcnt, nr); +} + +/** + * wb_put - decrement a wb's refcount + * @wb: bdi_writeback to put + */ +static inline void wb_put(struct bdi_writeback *wb) +{ + wb_put_many(wb, 1); } /** @@ -320,6 +332,10 @@ static inline void wb_put(struct bdi_writeback *wb) { } +static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr) +{ +} + static inline bool wb_dying(struct bdi_writeback *wb) { return false; diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 4c0224ff0a14b..4f1c0f8e1bb0b 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -41,6 +41,22 @@ #define __bf_shf(x) (__builtin_ffsll(x) - 1) +#define __scalar_type_to_unsigned_cases(type) \ + unsigned type: (unsigned type)0, \ + signed type: (unsigned type)0 + +#define __unsigned_scalar_typeof(x) typeof( \ + _Generic((x), \ + char: (unsigned char)0, \ + __scalar_type_to_unsigned_cases(char), \ + __scalar_type_to_unsigned_cases(short), \ + __scalar_type_to_unsigned_cases(int), \ + __scalar_type_to_unsigned_cases(long), \ + __scalar_type_to_unsigned_cases(long long), \ + default: (x))) + +#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x)) + #define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ ({ \ BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ @@ -49,7 +65,8 @@ BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ _pfx "value too large for the field"); \ - BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull, \ + BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ + __bf_cast_unsigned(_reg, ~0ull), \ _pfx "type of reg too small for mask"); \ __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ (1ULL << __bf_shf(_mask))); \ diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index bed9e43f94260..75ba2bc1ffbea 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -15,13 +15,16 @@ */ #include +#include #include +#include #include #include #include #include #include #include +#include /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) @@ -31,15 +34,12 @@ #ifdef CONFIG_BLK_CGROUP -enum blkg_rwstat_type { - BLKG_RWSTAT_READ, - BLKG_RWSTAT_WRITE, - BLKG_RWSTAT_SYNC, - BLKG_RWSTAT_ASYNC, - BLKG_RWSTAT_DISCARD, +enum blkg_iostat_type { + BLKG_IOSTAT_READ, + BLKG_IOSTAT_WRITE, + BLKG_IOSTAT_DISCARD, - BLKG_RWSTAT_NR, - BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, + BLKG_IOSTAT_NR, }; struct blkcg_gq; @@ -47,6 +47,7 @@ struct blkcg_gq; struct blkcg { struct cgroup_subsys_state css; spinlock_t lock; + refcount_t online_pin; struct radix_tree_root blkg_tree; struct blkcg_gq __rcu *blkg_hint; @@ -57,21 +58,18 @@ struct blkcg { struct list_head all_blkcgs_node; #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; - refcount_t cgwb_refcnt; #endif }; -/* - * blkg_[rw]stat->aux_cnt is excluded for local stats but included for - * recursive. Used to carry stats of dead children. - */ -struct blkg_rwstat { - struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR]; - atomic64_t aux_cnt[BLKG_RWSTAT_NR]; +struct blkg_iostat { + u64 bytes[BLKG_IOSTAT_NR]; + u64 ios[BLKG_IOSTAT_NR]; }; -struct blkg_rwstat_sample { - u64 cnt[BLKG_RWSTAT_NR]; +struct blkg_iostat_set { + struct u64_stats_sync sync; + struct blkg_iostat cur; + struct blkg_iostat last; }; /* @@ -127,8 +125,8 @@ struct blkcg_gq { /* is this blkg online? protected by both blkcg and q locks */ bool online; - struct blkg_rwstat stat_bytes; - struct blkg_rwstat stat_ios; + struct blkg_iostat_set __percpu *iostat_cpu; + struct blkg_iostat_set iostat; struct blkg_policy_data *pd[BLKCG_MAX_POLS]; @@ -202,13 +200,6 @@ int blkcg_activate_policy(struct request_queue *q, void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol); -static inline u64 blkg_rwstat_read_counter(struct blkg_rwstat *rwstat, - unsigned int idx) -{ - return atomic64_read(&rwstat->aux_cnt[idx]) + - percpu_counter_sum_positive(&rwstat->cpu_cnt[idx]); -} - const char *blkg_dev_name(struct blkcg_gq *blkg); void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, u64 (*prfill)(struct seq_file *, @@ -216,17 +207,6 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, const struct blkcg_policy *pol, int data, bool show_total); u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); -u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, - const struct blkg_rwstat_sample *rwstat); -u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, - int off); -int blkg_print_stat_bytes(struct seq_file *sf, void *v); -int blkg_print_stat_ios(struct seq_file *sf, void *v); -int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v); -int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v); - -void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, - int off, struct blkg_rwstat_sample *sum); struct blkg_conf_ctx { struct gendisk *disk; @@ -434,47 +414,38 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) extern void blkcg_destroy_blkgs(struct blkcg *blkcg); -#ifdef CONFIG_CGROUP_WRITEBACK - /** - * blkcg_cgwb_get - get a reference for blkcg->cgwb_list + * blkcg_pin_online - pin online state * @blkcg: blkcg of interest * - * This is used to track the number of active wb's related to a blkcg. + * While pinned, a blkcg is kept online. This is primarily used to + * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline + * while an associated cgwb is still active. */ -static inline void blkcg_cgwb_get(struct blkcg *blkcg) +static inline void blkcg_pin_online(struct blkcg *blkcg) { - refcount_inc(&blkcg->cgwb_refcnt); + refcount_inc(&blkcg->online_pin); } /** - * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list + * blkcg_unpin_online - unpin online state * @blkcg: blkcg of interest * - * This is used to track the number of active wb's related to a blkcg. - * When this count goes to zero, all active wb has finished so the + * This is primarily used to impedance-match blkg and cgwb lifetimes so + * that blkg doesn't go offline while an associated cgwb is still active. + * When this count goes to zero, all active cgwbs have finished so the * blkcg can continue destruction by calling blkcg_destroy_blkgs(). - * This work may occur in cgwb_release_workfn() on the cgwb_release - * workqueue. */ -static inline void blkcg_cgwb_put(struct blkcg *blkcg) +static inline void blkcg_unpin_online(struct blkcg *blkcg) { - if (refcount_dec_and_test(&blkcg->cgwb_refcnt)) + do { + if (!refcount_dec_and_test(&blkcg->online_pin)) + break; blkcg_destroy_blkgs(blkcg); + blkcg = blkcg_parent(blkcg); + } while (blkcg); } -#else - -static inline void blkcg_cgwb_get(struct blkcg *blkcg) { } - -static inline void blkcg_cgwb_put(struct blkcg *blkcg) -{ - /* wb isn't being accounted, so trigger destruction right away */ - blkcg_destroy_blkgs(blkcg); -} - -#endif - /** * blkg_path - format cgroup path of blkg * @blkg: blkg of interest @@ -578,128 +549,6 @@ static inline void blkg_put(struct blkcg_gq *blkg) if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ (p_blkg)->q, false))) -static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp) -{ - int i, ret; - - for (i = 0; i < BLKG_RWSTAT_NR; i++) { - ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp); - if (ret) { - while (--i >= 0) - percpu_counter_destroy(&rwstat->cpu_cnt[i]); - return ret; - } - atomic64_set(&rwstat->aux_cnt[i], 0); - } - return 0; -} - -static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat) -{ - int i; - - for (i = 0; i < BLKG_RWSTAT_NR; i++) - percpu_counter_destroy(&rwstat->cpu_cnt[i]); -} - -/** - * blkg_rwstat_add - add a value to a blkg_rwstat - * @rwstat: target blkg_rwstat - * @op: REQ_OP and flags - * @val: value to add - * - * Add @val to @rwstat. The counters are chosen according to @rw. The - * caller is responsible for synchronizing calls to this function. - */ -static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, - unsigned int op, uint64_t val) -{ - struct percpu_counter *cnt; - - if (op_is_discard(op)) - cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD]; - else if (op_is_write(op)) - cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; - else - cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; - - percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); - - if (op_is_sync(op)) - cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; - else - cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; - - percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); -} - -/** - * blkg_rwstat_read - read the current values of a blkg_rwstat - * @rwstat: blkg_rwstat to read - * - * Read the current snapshot of @rwstat and return it in the aux counts. - */ -static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat, - struct blkg_rwstat_sample *result) -{ - int i; - - for (i = 0; i < BLKG_RWSTAT_NR; i++) - result->cnt[i] = - percpu_counter_sum_positive(&rwstat->cpu_cnt[i]); -} - -/** - * blkg_rwstat_total - read the total count of a blkg_rwstat - * @rwstat: blkg_rwstat to read - * - * Return the total count of @rwstat regardless of the IO direction. This - * function can be called without synchronization and takes care of u64 - * atomicity. - */ -static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) -{ - struct blkg_rwstat_sample tmp = { }; - - blkg_rwstat_read(rwstat, &tmp); - return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; -} - -/** - * blkg_rwstat_reset - reset a blkg_rwstat - * @rwstat: blkg_rwstat to reset - */ -static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) -{ - int i; - - for (i = 0; i < BLKG_RWSTAT_NR; i++) { - percpu_counter_set(&rwstat->cpu_cnt[i], 0); - atomic64_set(&rwstat->aux_cnt[i], 0); - } -} - -/** - * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count - * @to: the destination blkg_rwstat - * @from: the source - * - * Add @from's count including the aux one to @to's aux count. - */ -static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to, - struct blkg_rwstat *from) -{ - u64 sum[BLKG_RWSTAT_NR]; - int i; - - for (i = 0; i < BLKG_RWSTAT_NR; i++) - sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]); - - for (i = 0; i < BLKG_RWSTAT_NR; i++) - atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]), - &to->aux_cnt[i]); -} - #ifdef CONFIG_BLK_DEV_THROTTLING extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, struct bio *bio); @@ -745,15 +594,33 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, throtl = blk_throtl_bio(q, blkg, bio); if (!throtl) { + struct blkg_iostat_set *bis; + int rwd, cpu; + + if (op_is_discard(bio->bi_opf)) + rwd = BLKG_IOSTAT_DISCARD; + else if (op_is_write(bio->bi_opf)) + rwd = BLKG_IOSTAT_WRITE; + else + rwd = BLKG_IOSTAT_READ; + + cpu = get_cpu(); + bis = per_cpu_ptr(blkg->iostat_cpu, cpu); + u64_stats_update_begin(&bis->sync); + /* * If the bio is flagged with BIO_QUEUE_ENTERED it means this * is a split bio and we would have already accounted for the * size of the bio. */ if (!bio_flagged(bio, BIO_QUEUE_ENTERED)) - blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf, - bio->bi_iter.bi_size); - blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); + bis->cur.bytes[rwd] += bio->bi_iter.bi_size; + bis->cur.ios[rwd]++; + + u64_stats_update_end(&bis->sync); + if (cgroup_subsys_on_dfl(io_cgrp_subsys)) + cgroup_rstat_updated(blkg->blkcg->css.cgroup, cpu); + put_cpu(); } blkcg_bio_issue_init(bio); @@ -764,14 +631,33 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, static inline void blkcg_use_delay(struct blkcg_gq *blkg) { + if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) + return; if (atomic_add_return(1, &blkg->use_delay) == 1) atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); } +/** + * blk_cgroup_mergeable - Determine whether to allow or disallow merges + * @rq: request to merge into + * @bio: bio to merge + * + * @bio and @rq should belong to the same cgroup and their issue_as_root should + * match. The latter is necessary as we don't want to throttle e.g. a metadata + * update because it happens to be next to a regular IO. + */ +static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) +{ + return rq->bio->bi_blkg == bio->bi_blkg && + bio_issue_as_root_blkg(rq->bio) == bio_issue_as_root_blkg(bio); +} + static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) { int old = atomic_read(&blkg->use_delay); + if (WARN_ON_ONCE(old < 0)) + return 0; if (old == 0) return 0; @@ -796,20 +682,39 @@ static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) return 1; } +/** + * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount + * @blkg: target blkg + * @delay: delay duration in nsecs + * + * When enabled with this function, the delay is not decayed and must be + * explicitly cleared with blkcg_clear_delay(). Must not be mixed with + * blkcg_[un]use_delay() and blkcg_add_delay() usages. + */ +static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay) +{ + int old = atomic_read(&blkg->use_delay); + + /* We only want 1 person setting the congestion count for this blkg. */ + if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old) + atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); + + atomic64_set(&blkg->delay_nsec, delay); +} + +/** + * blkcg_clear_delay - Disable allocator delay mechanism + * @blkg: target blkg + * + * Disable use_delay mechanism. See blkcg_set_delay(). + */ static inline void blkcg_clear_delay(struct blkcg_gq *blkg) { int old = atomic_read(&blkg->use_delay); - if (!old) - return; + /* We only want 1 person clearing the congestion count for this blkg. */ - while (old) { - int cur = atomic_cmpxchg(&blkg->use_delay, old, 0); - if (cur == old) { - atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); - break; - } - old = cur; - } + if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old) + atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); } void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); @@ -868,6 +773,7 @@ static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } static inline void blkcg_bio_issue_init(struct bio *bio) { } static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { return true; } +static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; } #define blk_queue_for_each_rl(rl, q) \ for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d688b96d1d633..8fdb38830f331 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -136,7 +136,22 @@ static inline void bio_issue_init(struct bio_issue *issue, (ktime_get_ns() & BIO_ISSUE_TIME_MASK) | ((u64)size << BIO_ISSUE_SIZE_SHIFT)); } +#ifdef CONFIG_BYTEDANCE_BLK_CGROUP_IOTRACE +static inline void bio_issue_init_sector(struct bio_issue *issue, + sector_t size) +{ + size &= (1UL << BIO_ISSUE_SIZE_BITS) - 1; + issue->value = ((issue->value & BIO_ISSUE_RES_MASK) | + ((u64)size << BIO_ISSUE_SIZE_SHIFT) | + (issue->value & BIO_ISSUE_TIME_MASK)); +} +static inline void bio_issue_init_time(struct bio_issue *issue, + u64 time) +{ + issue->value = time & BIO_ISSUE_TIME_MASK; +} +#endif /* * main unit of I/O for the block layer and lower layers (ie drivers and * stacking drivers) @@ -169,6 +184,9 @@ struct bio { */ struct blkcg_gq *bi_blkg; struct bio_issue bi_issue; +#ifdef CONFIG_BYTEDANCE_BLK_CGROUP_IOTRACE + struct bio_issue bi_start; +#endif #ifdef CONFIG_BLK_CGROUP_IOCOST u64 bi_iocost_cost; #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 497ccceee03f6..9912317c0f33c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -59,6 +59,14 @@ struct blk_stat_callback; */ #define BLKCG_MAX_POLS 5 +static inline int blk_validate_block_size(unsigned int bsize) +{ + if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) + return -EINVAL; + + return 0; +} + typedef void (rq_end_io_fn)(struct request *, blk_status_t); /* diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7f0876bd7ddf6..fca56fbd0eb5f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -958,6 +958,8 @@ void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, struct net_device *netdev); bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev); +void unpriv_ebpf_notify(int new_state); + #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 22f070085971b..ee10a9f06b97c 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -194,6 +194,13 @@ struct bpf_idx_pair { u32 idx; }; +struct bpf_id_pair { + u32 old; + u32 cur; +}; + +/* Maximum number of register states that can exist at once */ +#define BPF_ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) #define MAX_CALL_FRAMES 8 struct bpf_verifier_state { /* call stack tracking */ @@ -301,8 +308,8 @@ struct bpf_insn_aux_data { }; }; int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ - int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ + bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ bool zext_dst; /* this insn zero extends dst reg */ u8 alu_state; /* used in combination with alu_limit */ bool prune_point; @@ -364,12 +371,14 @@ struct bpf_verifier_env { struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ + bool explore_alu_limits; bool allow_ptr_leaks; bool seen_direct_write; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; + struct bpf_id_pair idmap_scratch[BPF_ID_MAP_SIZE]; struct { int *insn_state; int *insn_stack; diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 46b92cd61d0c8..c8c71eea237d6 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -78,24 +78,6 @@ struct cpu_cacheinfo { bool cpu_map_populated; }; -/* - * Helpers to make sure "func" is executed on the cpu whose cache - * attributes are being detected - */ -#define DEFINE_SMP_CALL_CACHE_FUNCTION(func) \ -static inline void _##func(void *ret) \ -{ \ - int cpu = smp_processor_id(); \ - *(int *)ret = __##func(cpu); \ -} \ - \ -int func(unsigned int cpu) \ -{ \ - int ret; \ - smp_call_function_single(cpu, _##func, &ret, true); \ - return ret; \ -} - struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu); int init_cache_level(unsigned int cpu); int populate_cache_leaves(unsigned int cpu); diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1ccfa3779e18d..4ca43cbaf9135 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -105,6 +105,7 @@ enum { CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ CFTYPE_DEBUG = (1 << 5), /* create when cgroup_debug */ + CFTYPE_PRESSURE = (1 << 6), /* only if pressure feature is enabled */ /* internal flags, do not use outside cgroup core proper */ __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ @@ -255,7 +256,8 @@ struct css_set { * List of csets participating in the on-going migration either as * source or destination. Protected by cgroup_mutex. */ - struct list_head mg_preload_node; + struct list_head mg_src_preload_node; + struct list_head mg_dst_preload_node; struct list_head mg_node; /* @@ -458,7 +460,7 @@ struct cgroup { struct list_head rstat_css_list; /* cgroup basic resource statistics */ - struct cgroup_base_stat pending_bstat; /* pending from children */ + struct cgroup_base_stat last_bstat; struct cgroup_base_stat bstat; struct prev_cputime prev_cputime; /* for printing out cputime */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 202852383ae95..8a0e1bd77a4f1 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -670,6 +670,8 @@ static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) return &cgrp->psi; } +bool cgroup_psi_enabled(void); + static inline void cgroup_init_kthreadd(void) { /* @@ -735,6 +737,11 @@ static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) return NULL; } +static inline bool cgroup_psi_enabled(void) +{ + return false; +} + static inline bool task_under_cgroup_hierarchy(struct task_struct *task, struct cgroup *ancestor) { diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 9446e8fbe55c5..bce983406aaf3 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -233,6 +233,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, (typeof(ptr)) (__ptr + (off)); }) #endif +#define absolute_pointer(val) RELOC_HIDE((void *)(val), 0) + #ifndef OPTIMIZER_HIDE_VAR /* Make the optimizer believe the variable can be manipulated arbitrarily. */ #define OPTIMIZER_HIDE_VAR(var) \ diff --git a/include/linux/console.h b/include/linux/console.h index d09951d5a94e4..d1d03c9c7a512 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -153,6 +153,8 @@ struct console { short flags; short index; int cflag; + uint ispeed; + uint ospeed; void *data; struct console *next; }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 4e9822cb11f38..29a6fa2f518db 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -64,6 +64,10 @@ extern ssize_t cpu_show_tsx_async_abort(struct device *dev, char *buf); extern ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_mmio_stale_data(struct device *dev, + struct device_attribute *attr, + char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index bd1f23536b1b6..15835f37bd5f2 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -60,6 +60,7 @@ enum cpuhp_state { CPUHP_LUSTRE_CFS_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, + CPUHP_RANDOM_PREPARE, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, @@ -177,6 +178,7 @@ enum cpuhp_state { CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE, CPUHP_AP_WATCHDOG_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, + CPUHP_AP_RANDOM_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_BASE_CACHEINFO_ONLINE, CPUHP_AP_ONLINE_DYN, diff --git a/include/linux/delay.h b/include/linux/delay.h index 8e6828094c1ee..4e684562036d6 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -20,6 +20,7 @@ */ #include +#include extern unsigned long loops_per_jiffy; @@ -58,7 +59,18 @@ void calibrate_delay(void); void __attribute__((weak)) calibration_delay_done(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); -void usleep_range(unsigned long min, unsigned long max); +void usleep_range_state(unsigned long min, unsigned long max, + unsigned int state); + +static inline void usleep_range(unsigned long min, unsigned long max) +{ + usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); +} + +static inline void usleep_idle_range(unsigned long min, unsigned long max) +{ + usleep_range_state(min, max, TASK_IDLE); +} static inline void ssleep(unsigned int seconds) { diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 655f813832587..c0406f6e1c5aa 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -28,7 +28,6 @@ enum dm_queue_mode { DM_TYPE_BIO_BASED = 1, DM_TYPE_REQUEST_BASED = 2, DM_TYPE_DAX_BIO_BASED = 3, - DM_TYPE_NVME_BIO_BASED = 4, }; typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; diff --git a/include/linux/dim.h b/include/linux/dim.h index 9fa4b3f88c397..2571da63877c5 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -17,7 +17,7 @@ * We consider 10% difference as significant. */ #define IS_SIGNIFICANT_DIFF(val, ref) \ - (((100UL * abs((val) - (ref))) / (ref)) > 10) + ((ref) && (((100UL * abs((val) - (ref))) / (ref)) > 10)) /** * Calculate the gap between two values. diff --git a/include/linux/edac.h b/include/linux/edac.h index ad56bfa55a0bd..4bdde1a999ce0 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -188,6 +188,8 @@ static inline char *mc_event_error_type(const unsigned int err_type) * This is a variant of the DDR4 memories. * @MEM_LRDDR4: Load-Reduced DDR4 memory. * @MEM_DDR5: Unbuffered DDR5 RAM + * @MEM_RDDR5: Registered DDR5 RAM + * @MEM_LRDDR5: Load-Reduced DDR5 memory. * @MEM_NVDIMM: Non-volatile RAM */ enum mem_type { @@ -213,6 +215,8 @@ enum mem_type { MEM_RDDR4, MEM_LRDDR4, MEM_DDR5, + MEM_RDDR5, + MEM_LRDDR5, MEM_NVDIMM, }; @@ -237,6 +241,8 @@ enum mem_type { #define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) #define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) #define MEM_FLAG_DDR5 BIT(MEM_DDR5) +#define MEM_FLAG_RDDR5 BIT(MEM_RDDR5) +#define MEM_FLAG_LRDDR5 BIT(MEM_LRDDR5) #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) /** diff --git a/include/linux/efi.h b/include/linux/efi.h index c82ef0eba4f84..f9b9f9a2fd4a5 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -165,6 +165,8 @@ struct capsule_info { size_t page_bytes_remain; }; +int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, + size_t hdr_bytes); int __efi_capsule_setup_info(struct capsule_info *cap_info); /* diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index b81f9e1d74b0a..9d249dfbab726 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -58,7 +58,7 @@ static inline int elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregse } #endif -#if defined(CONFIG_UM) || defined(CONFIG_IA64) +#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64) /* * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out * extra segments containing the gate DSO contents. Dumping its diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 73f8c3cb95888..9ee6ccc18424f 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -42,6 +42,22 @@ struct em_perf_domain { #define EM_CPU_MAX_POWER 0xFFFF +/* + * Increase resolution of energy estimation calculations for 64-bit + * architectures. The extra resolution improves decision made by EAS for the + * task placement when two Performance Domains might provide similar energy + * estimation values (w/o better resolution the values could be equal). + * + * We increase resolution only if we have enough bits to allow this increased + * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit + * are pretty high and the returns do not justify the increased costs. + */ +#ifdef CONFIG_64BIT +#define em_scale_power(p) ((p) * 1000) +#else +#define em_scale_power(p) (p) +#endif + struct em_data_callback { /** * active_power() - Provide power at the next capacity state of a CPU diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index f6564b572d779..0f1e95240c0c0 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -127,7 +127,7 @@ static inline bool is_multicast_ether_addr(const u8 *addr) #endif } -static inline bool is_multicast_ether_addr_64bits(const u8 addr[6+2]) +static inline bool is_multicast_ether_addr_64bits(const u8 *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 #ifdef __BIG_ENDIAN @@ -341,8 +341,7 @@ static inline bool ether_addr_equal(const u8 *addr1, const u8 *addr2) * Please note that alignment of addr1 & addr2 are only guaranteed to be 16 bits. */ -static inline bool ether_addr_equal_64bits(const u8 addr1[6+2], - const u8 addr2[6+2]) +static inline bool ether_addr_equal_64bits(const u8 *addr1, const u8 *addr2) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 u64 fold = (*(const u64 *)addr1) ^ (*(const u64 *)addr2); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 95991e4300bf5..47c1a359107ee 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -81,6 +81,7 @@ enum { #define ETH_RSS_HASH_NO_CHANGE 0 struct net_device; +struct netlink_ext_ack; /* Some generic methods drivers may use in their ethtool_ops */ u32 ethtool_op_get_link(struct net_device *dev); @@ -177,6 +178,31 @@ void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, const unsigned long *src); +#define ETH_MODULE_EEPROM_PAGE_LEN 128 +#define ETH_MODULE_MAX_I2C_ADDRESS 0x7f + +/** + * struct ethtool_module_eeprom - EEPROM dump from specified page + * @offset: Offset within the specified EEPROM page to begin read, in bytes. + * @length: Number of bytes to read. + * @page: Page number to read from. + * @bank: Page bank number to read from, if applicable by EEPROM spec. + * @i2c_address: I2C address of a page. Value less than 0x7f expected. Most + * EEPROMs use 0x50 or 0x51. + * @data: Pointer to buffer with EEPROM data of @length size. + * + * This can be used to manage pages during EEPROM dump in ethtool and pass + * required information to the driver. + */ +struct ethtool_module_eeprom { + __u32 offset; + __u32 length; + __u8 page; + __u8 bank; + __u8 i2c_address; + __u8 *data; +}; + /** * struct ethtool_ops - optional netdev operations * @get_drvinfo: Report driver/device information. Should only set the @@ -309,6 +335,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * @get_ethtool_phy_stats: Return extended statistics about the PHY device. * This is only useful if the device maintains PHY statistics and * cannot use the standard PHY library helpers. + * @get_module_eeprom_by_page: Get a region of plug-in module EEPROM data from + * specified page. Returns a negative error code or the amount of bytes + * read. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must @@ -404,6 +433,9 @@ struct ethtool_ops { struct ethtool_fecparam *); void (*get_ethtool_phy_stats)(struct net_device *, struct ethtool_stats *, u64 *); + int (*get_module_eeprom_by_page)(struct net_device *dev, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack); }; struct ethtool_rx_flow_rule { diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h new file mode 100644 index 0000000000000..af5e702a27117 --- /dev/null +++ b/include/linux/ethtool_netlink.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _LINUX_ETHTOOL_NETLINK_H_ +#define _LINUX_ETHTOOL_NETLINK_H_ + +#include +#include +#include + +#define __ETHTOOL_LINK_MODE_MASK_NWORDS \ + DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32) + +enum ethtool_multicast_groups { + ETHNL_MCGRP_MONITOR, +}; + +struct phy_device; + +#endif /* _LINUX_ETHTOOL_NETLINK_H_ */ diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index ff5596dd30f85..2382dec6d6ab8 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -15,6 +15,8 @@ void fbcon_new_modelist(struct fb_info *info); void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps); void fbcon_fb_blanked(struct fb_info *info, int blank); +int fbcon_modechange_possible(struct fb_info *info, + struct fb_var_screeninfo *var); void fbcon_update_vcs(struct fb_info *info, bool all); void fbcon_remap_all(struct fb_info *info); int fbcon_set_con2fb_map_ioctl(void __user *argp); @@ -33,6 +35,8 @@ static inline void fbcon_new_modelist(struct fb_info *info) {} static inline void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps) {} static inline void fbcon_fb_blanked(struct fb_info *info, int blank) {} +static inline int fbcon_modechange_possible(struct fb_info *info, + struct fb_var_screeninfo *var) { return 0; } static inline void fbcon_update_vcs(struct fb_info *info, bool all) {} static inline void fbcon_remap_all(struct fb_info *info) {} static inline int fbcon_set_con2fb_map_ioctl(void __user *argp) { return 0; } diff --git a/include/linux/filter.h b/include/linux/filter.h index a68a7db6eab93..f7109bff712c0 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -68,6 +68,11 @@ struct ctl_table_header; /* unused opcode to mark call to interpreter with arguments */ #define BPF_CALL_ARGS 0xe0 +/* unused opcode to mark speculation barrier for mitigating + * Speculative Store Bypass + */ +#define BPF_NOSPEC 0xc0 + /* As per nm, we expose JITed images as text (code) section for * kallsyms. That way, tools like perf can find it to match * addresses. @@ -368,6 +373,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) .off = 0, \ .imm = 0 }) +/* Speculation barrier */ + +#define BPF_ST_NOSPEC() \ + ((struct bpf_insn) { \ + .code = BPF_ST | BPF_NOSPEC, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = 0 }) + /* Internal classic blocks for direct assignment */ #define __BPF_STMT(CODE, K) \ @@ -946,6 +961,7 @@ extern int bpf_jit_enable; extern int bpf_jit_harden; extern int bpf_jit_kallsyms; extern long bpf_jit_limit; +extern long bpf_jit_limit_max; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); diff --git a/include/linux/fs.h b/include/linux/fs.h index b3b357efc731b..ce7667529c248 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -983,6 +983,7 @@ struct file { #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; errseq_t f_wb_err; + errseq_t f_sb_err; /* for syncfs */ } __randomize_layout __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ @@ -1527,6 +1528,9 @@ struct super_block { /* Being remounted read-only */ int s_readonly_remount; + /* per-sb errseq_t for reporting writeback errors via syncfs */ + errseq_t s_wb_err; + /* AIO completions deferred from interrupt context */ struct workqueue_struct *s_dio_done_wq; struct hlist_head s_pins; @@ -2092,6 +2096,18 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) }; } +static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, + struct file *filp) +{ + *kiocb = (struct kiocb) { + .ki_filp = filp, + .ki_flags = kiocb_src->ki_flags, + .ki_hint = kiocb_src->ki_hint, + .ki_ioprio = kiocb_src->ki_ioprio, + .ki_pos = kiocb_src->ki_pos, + }; +} + /* * Inode state bits. Protected by inode->i_lock * @@ -2763,8 +2779,6 @@ static inline int filemap_fdatawait(struct address_space *mapping) extern bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend); extern int filemap_write_and_wait(struct address_space *mapping); -extern bool filemap_range_needs_writeback(struct address_space *, - loff_t lstart, loff_t lend); extern int filemap_write_and_wait_range(struct address_space *mapping, loff_t lstart, loff_t lend); extern int __filemap_fdatawrite_range(struct address_space *mapping, @@ -2834,6 +2848,18 @@ static inline errseq_t filemap_sample_wb_err(struct address_space *mapping) return errseq_sample(&mapping->wb_err); } +/** + * file_sample_sb_err - sample the current errseq_t to test for later errors + * @mapping: mapping to be sampled + * + * Grab the most current superblock-level errseq_t value for the given + * struct file. + */ +static inline errseq_t file_sample_sb_err(struct file *file) +{ + return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err); +} + static inline int filemap_nr_thps(struct address_space *mapping) { #ifdef CONFIG_READ_ONLY_THP_FOR_FS @@ -3131,6 +3157,10 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags); ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags); +ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb, + struct iov_iter *iter); +ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, + struct iov_iter *iter); /* fs/block_dev.c */ extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to); diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index ba8a58754340d..ebcb91a57e865 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -135,6 +135,8 @@ extern int generic_parse_monolithic(struct fs_context *fc, void *data); extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); extern void fc_drop_locked(struct fs_context *fc); +int reconfigure_single(struct super_block *s, + int flags, void *data); /* * sget() wrappers to be called from the ->get_tree() op. diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index d5ba431b5d63c..8b648f083ec7d 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -74,6 +74,7 @@ struct fscache_cache { }; extern wait_queue_head_t fscache_cache_cleared_wq; +extern struct workqueue_struct *fscache_object_wq; /* * operation to be applied to a cache object @@ -304,6 +305,10 @@ struct fscache_cache_ops { /* dissociate a cache from all the pages it was backing */ void (*dissociate_pages)(struct fscache_cache *cache); + + /* Begin a read operation for the netfs lib */ + int (*begin_read_operation)(struct netfs_read_request *rreq, + struct fscache_retrieval *op); }; extern struct fscache_cookie fscache_fsdef_index; diff --git a/include/linux/fscache.h b/include/linux/fscache.h index ad044c0cb1f3b..cde7de4a73d13 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -46,6 +46,7 @@ struct pagevec; struct fscache_cache_tag; struct fscache_cookie; struct fscache_netfs; +struct netfs_read_request; typedef void (*fscache_rw_complete_t)(struct page *page, void *context, @@ -200,6 +201,10 @@ extern void __fscache_update_cookie(struct fscache_cookie *, const void *); extern int __fscache_attr_changed(struct fscache_cookie *); extern void __fscache_invalidate(struct fscache_cookie *); extern void __fscache_wait_on_invalidate(struct fscache_cookie *); + +#ifdef FSCACHE_USE_NEW_IO_API +extern int __fscache_begin_read_operation(struct netfs_read_request *, struct fscache_cookie *); +#else extern int __fscache_read_or_alloc_page(struct fscache_cookie *, struct page *, fscache_rw_complete_t, @@ -223,6 +228,8 @@ extern void __fscache_uncache_all_inode_pages(struct fscache_cookie *, struct inode *); extern void __fscache_readpages_cancel(struct fscache_cookie *cookie, struct list_head *pages); +#endif /* FSCACHE_USE_NEW_IO_API */ + extern void __fscache_disable_cookie(struct fscache_cookie *, const void *, bool); extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_t, bool (*)(void *), void *); @@ -507,6 +514,36 @@ int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size) return -ENOBUFS; } +#ifdef FSCACHE_USE_NEW_IO_API + +/** + * fscache_begin_read_operation - Begin a read operation for the netfs lib + * @rreq: The read request being undertaken + * @cookie: The cookie representing the cache object + * + * Begin a read operation on behalf of the netfs helper library. @rreq + * indicates the read request to which the operation state should be attached; + * @cookie indicates the cache object that will be accessed. + * + * This is intended to be called from the ->begin_cache_operation() netfs lib + * operation as implemented by the network filesystem. + * + * Returns: + * * 0 - Success + * * -ENOBUFS - No caching available + * * Other error code from the cache, such as -ENOMEM. + */ +static inline +int fscache_begin_read_operation(struct netfs_read_request *rreq, + struct fscache_cookie *cookie) +{ + if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) + return __fscache_begin_read_operation(rreq, cookie); + return -ENOBUFS; +} + +#else /* FSCACHE_USE_NEW_IO_API */ + /** * fscache_read_or_alloc_page - Read a page from the cache or allocate a block * in which to store it @@ -786,6 +823,8 @@ void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, __fscache_uncache_all_inode_pages(cookie, inode); } +#endif /* FSCACHE_USE_NEW_IO_API */ + /** * fscache_disable_cookie - Disable a cookie * @cookie: The cookie representing the cache object diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 032e5bcf97012..0d1a53d6c52dc 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -298,6 +298,7 @@ extern int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, extern const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, unsigned int max_size, struct delayed_call *done); +int fscrypt_symlink_getattr(const struct path *path, struct kstat *stat); static inline void fscrypt_set_ops(struct super_block *sb, const struct fscrypt_operations *s_cop) { @@ -585,6 +586,12 @@ static inline const char *fscrypt_get_symlink(struct inode *inode, return ERR_PTR(-EOPNOTSUPP); } +static inline int fscrypt_symlink_getattr(const struct path *path, + struct kstat *stat) +{ + return -EOPNOTSUPP; +} + static inline void fscrypt_set_ops(struct super_block *sb, const struct fscrypt_operations *s_cop) { diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8385cafe4f9fc..4618a26da819b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -502,7 +502,7 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; } /** * ftrace_make_nop - convert code into nop * @mod: module structure if called by module load initialization - * @rec: the mcount call site record + * @rec: the call site record (e.g. mcount/fentry) * @addr: the address that the call site should be calling * * This is a very sensitive operation and great care needs @@ -523,9 +523,38 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; } extern int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr); + +/** + * ftrace_init_nop - initialize a nop call site + * @mod: module structure if called by module load initialization + * @rec: the call site record (e.g. mcount/fentry) + * + * This is a very sensitive operation and great care needs + * to be taken by the arch. The operation should carefully + * read the location, check to see if what is read is indeed + * what we expect it to be, and then on success of the compare, + * it should write to the location. + * + * The code segment at @rec->ip should contain the contents created by + * the compiler + * + * Return must be: + * 0 on success + * -EFAULT on error reading the location + * -EINVAL on a failed compare of the contents + * -EPERM on error writing to the location + * Any other value will be considered a failure. + */ +#ifndef ftrace_init_nop +static inline int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +{ + return ftrace_make_nop(mod, rec, MCOUNT_ADDR); +} +#endif + /** * ftrace_make_call - convert a nop call site into a call to addr - * @rec: the mcount call site record + * @rec: the call site record (e.g. mcount/fentry) * @addr: the address that the call site should call * * This is a very sensitive operation and great care needs @@ -548,7 +577,7 @@ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS /** * ftrace_modify_call - convert from one addr to another (no nop) - * @rec: the mcount call site record + * @rec: the call site record (e.g. mcount/fentry) * @old_addr: the address expected to be currently called to * @addr: the address to change to * @@ -712,6 +741,11 @@ static inline unsigned long get_lock_parent_ip(void) #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); +#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY +#define FTRACE_CALLSITE_SECTION "__patchable_function_entries" +#else +#define FTRACE_CALLSITE_SECTION "__mcount_loc" +#endif #else static inline void ftrace_init(void) { } #endif diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 23b845820e1dc..621c14a217217 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -26,6 +26,7 @@ #define disk_to_dev(disk) (&(disk)->part0.__dev) #define part_to_dev(part) (&((part)->__dev)) +extern const struct device_type disk_type; extern struct device_type part_type; extern struct kobject *block_depr; extern struct class block_class; diff --git a/include/linux/hid.h b/include/linux/hid.h index 85bedeb9ca9f2..d5f9bbf8afa51 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -344,6 +344,8 @@ struct hid_item { /* BIT(9) reserved for backward compatibility, was NO_INIT_INPUT_REPORTS */ #define HID_QUIRK_ALWAYS_POLL BIT(10) #define HID_QUIRK_INPUT_PER_APP BIT(11) +#define HID_QUIRK_X_INVERT BIT(12) +#define HID_QUIRK_Y_INVERT BIT(13) #define HID_QUIRK_SKIP_OUTPUT_REPORTS BIT(16) #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID BIT(17) #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP BIT(18) @@ -831,6 +833,11 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev, return hdev->ll_driver == driver; } +static inline bool hid_is_usb(struct hid_device *hdev) +{ + return hid_is_using_ll_driver(hdev, &usb_hid_driver); +} + #define PM_HINT_FULLON 1<<5 #define PM_HINT_NORMAL 1<<1 diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1f98b52118f0a..48be92aded5ee 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -317,16 +317,12 @@ struct clock_event_device; extern void hrtimer_interrupt(struct clock_event_device *dev); -extern void clock_was_set_delayed(void); - extern unsigned int hrtimer_resolution; #else #define hrtimer_resolution (unsigned int)LOW_RES_NSEC -static inline void clock_was_set_delayed(void) { } - #endif static inline ktime_t @@ -350,7 +346,6 @@ hrtimer_expires_remaining_adjusted(const struct hrtimer *timer) timer->base->get_time()); } -extern void clock_was_set(void); #ifdef CONFIG_TIMERFD extern void timerfd_clock_was_set(void); #else diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index af5e18ac543d6..87f3d3293a9db 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -381,15 +381,14 @@ struct hstate { unsigned int nr_huge_pages_node[MAX_NUMNODES]; unsigned int free_huge_pages_node[MAX_NUMNODES]; unsigned int surplus_huge_pages_node[MAX_NUMNODES]; -#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP - unsigned int nr_free_vmemmap_pages; +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP + unsigned int optimize_vmemmap_pages; #endif #ifdef CONFIG_CGROUP_HUGETLB /* cgroup control files */ struct cftype cgroup_files[5]; #endif char name[HSTATE_NAME_LEN]; - struct delayed_work clear_work; }; struct huge_bootmem_page { @@ -774,11 +773,6 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr } #endif /* CONFIG_HUGETLB_PAGE */ -#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP -int hugetlb_vmemmap_sysctl_handler(struct ctl_table *table, int write, - void *buffer, size_t *length, loff_t *ppos); -#endif - static inline spinlock_t *huge_pte_lock(struct hstate *h, struct mm_struct *mm, pte_t *pte) { diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 8e6dd908da216..aa1d4da03538b 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -60,7 +60,5 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); /** Unregister a Hardware Random Number Generator driver. */ extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); -/** Feed random bits into the pool. */ -extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy); #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index bf5c5f32c65e4..e147ea6794670 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -51,6 +51,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev) case ARPHRD_VOID: case ARPHRD_NONE: case ARPHRD_RAWIP: + case ARPHRD_PIMREG: return false; default: return true; diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 686be532f4cb7..7816bf070f835 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -228,6 +228,7 @@ struct st_sensor_settings { * @hw_irq_trigger: if we're using the hardware interrupt on the sensor. * @hw_timestamp: Latest timestamp from the interrupt handler, when in use. * @buffer_data: Data used by buffer part. + * @odr_lock: Local lock for preventing concurrent ODR accesses/changes */ struct st_sensor_data { struct device *dev; @@ -253,6 +254,8 @@ struct st_sensor_data { s64 hw_timestamp; char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned; + + struct mutex odr_lock; }; #ifdef CONFIG_IIO_BUFFER diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index c309f43bde45e..f8c4d9f97819f 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -130,6 +130,16 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) return ns; } +static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) +{ + if (ns) { + if (refcount_inc_not_zero(&ns->count)) + return ns; + } + + return NULL; +} + extern void put_ipc_ns(struct ipc_namespace *ns); #else static inline struct ipc_namespace *copy_ipcs(unsigned long flags, @@ -146,6 +156,11 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) return ns; } +static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) +{ + return ns; +} + static inline void put_ipc_ns(struct ipc_namespace *ns) { } diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index bbe297bbbca52..d5c507311efb1 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -50,7 +50,7 @@ struct ipv6_devconf { __s32 use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE - __s32 mc_forwarding; + atomic_t mc_forwarding; #endif __s32 disable_ipv6; __s32 drop_unicast_in_l2_multicast; diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 5cc10cf7cb3e6..a0bde9e12efa3 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -487,6 +487,8 @@ #define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT) #define ICC_CTLR_EL1_CBPR_SHIFT 0 #define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT) +#define ICC_CTLR_EL1_PMHE_SHIFT 6 +#define ICC_CTLR_EL1_PMHE_MASK (1 << ICC_CTLR_EL1_PMHE_SHIFT) #define ICC_CTLR_EL1_PRI_BITS_SHIFT 8 #define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT) #define ICC_CTLR_EL1_ID_BITS_SHIFT 11 diff --git a/include/linux/kd.h b/include/linux/kd.h deleted file mode 100644 index b130a18f860f0..0000000000000 --- a/include/linux/kd.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_KD_H -#define _LINUX_KD_H - -#include - -#define KD_FONT_FLAG_OLD 0x80000000 /* Invoked via old interface [compat] */ -#endif /* _LINUX_KD_H */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d83d403dac2ea..77c86a2236daf 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -627,7 +627,7 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte) return buf; } -extern int hex_to_bin(char ch); +extern int hex_to_bin(unsigned char ch); extern int __must_check hex2bin(u8 *dst, const char *src, size_t count); extern char *bin2hex(char *dst, const void *src, size_t count); diff --git a/include/linux/kexec.h b/include/linux/kexec.h index a1cffce3de8cd..64f23bab6255d 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -186,14 +186,6 @@ void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name); int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len); void * __weak arch_kexec_kernel_image_load(struct kimage *image); -int __weak arch_kexec_apply_relocations_add(struct purgatory_info *pi, - Elf_Shdr *section, - const Elf_Shdr *relsec, - const Elf_Shdr *symtab); -int __weak arch_kexec_apply_relocations(struct purgatory_info *pi, - Elf_Shdr *section, - const Elf_Shdr *relsec, - const Elf_Shdr *symtab); extern int kexec_add_buffer(struct kexec_buf *kbuf); int kexec_locate_mem_hole(struct kexec_buf *kbuf); @@ -216,6 +208,44 @@ extern int crash_exclude_mem_range(struct crash_mem *mem, unsigned long long mend); extern int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, void **addr, unsigned long *sz); + +#ifndef arch_kexec_apply_relocations_add +/* + * arch_kexec_apply_relocations_add - apply relocations of type RELA + * @pi: Purgatory to be relocated. + * @section: Section relocations applying to. + * @relsec: Section containing RELAs. + * @symtab: Corresponding symtab. + * + * Return: 0 on success, negative errno on error. + */ +static inline int +arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section, + const Elf_Shdr *relsec, const Elf_Shdr *symtab) +{ + pr_err("RELA relocation unsupported.\n"); + return -ENOEXEC; +} +#endif + +#ifndef arch_kexec_apply_relocations +/* + * arch_kexec_apply_relocations - apply relocations of type REL + * @pi: Purgatory to be relocated. + * @section: Section relocations applying to. + * @relsec: Section containing RELs. + * @symtab: Corresponding symtab. + * + * Return: 0 on success, negative errno on error. + */ +static inline int +arch_kexec_apply_relocations(struct purgatory_info *pi, Elf_Shdr *section, + const Elf_Shdr *relsec, const Elf_Shdr *symtab) +{ + pr_err("REL relocation unsupported.\n"); + return -ENOEXEC; +} +#endif #endif /* CONFIG_KEXEC_FILE */ #ifdef CONFIG_KEXEC_ELF diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index a121fd8e7c3a0..c7764d9e6f397 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -155,6 +155,8 @@ struct kretprobe { raw_spinlock_t lock; }; +#define KRETPROBE_MAX_DATA_SIZE 4096 + struct kretprobe_instance { struct hlist_node hlist; struct kretprobe *rp; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c3402920fd2f6..4168109bfa855 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -266,7 +266,8 @@ struct kvm_vcpu { struct preempt_notifier preempt_notifier; #endif int cpu; - int vcpu_id; + int vcpu_id; /* id given by userspace at creation */ + int vcpu_idx; /* index in kvm->vcpus array */ int srcu_idx; int mode; u64 requests; @@ -571,13 +572,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *tmp; - int idx; - - kvm_for_each_vcpu(idx, tmp, vcpu->kvm) - if (tmp == vcpu) - return idx; - BUG(); + return vcpu->vcpu_idx; } #define kvm_for_each_memslot(memslot, slots) \ diff --git a/include/linux/libata.h b/include/linux/libata.h index 3c3d8d6b16183..2e448d65a04c7 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -391,7 +391,7 @@ enum { /* This should match the actual table size of * ata_eh_cmd_timeout_table in libata-eh.c. */ - ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 6, + ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7, /* Horkage types. May be set by libata or controller on drives (some horkage may be drive/controller pair dependent */ @@ -423,6 +423,7 @@ enum { ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ ATA_HORKAGE_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */ + ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ diff --git a/include/linux/list.h b/include/linux/list.h index 85c92555e31f8..231ff089f7d1c 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -268,6 +268,24 @@ static inline int list_empty(const struct list_head *head) return READ_ONCE(head->next) == head; } +/** + * list_del_init_careful - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + * + * This is the same as list_del_init(), except designed to be used + * together with list_empty_careful() in a way to guarantee ordering + * of other memory operations. + * + * Any memory operations done before a list_del_init_careful() are + * guaranteed to be visible after a list_empty_careful() test. + */ +static inline void list_del_init_careful(struct list_head *entry) +{ + __list_del_entry(entry); + entry->prev = entry; + smp_store_release(&entry->next, entry); +} + /** * list_empty_careful - tests whether a list is empty and not being modified * @head: the list to test @@ -283,7 +301,7 @@ static inline int list_empty(const struct list_head *head) */ static inline int list_empty_careful(const struct list_head *head) { - struct list_head *next = head->next; + struct list_head *next = smp_load_acquire(&head->next); return (next == head) && (next == head->prev); } @@ -567,6 +585,15 @@ static inline void list_splice_tail_init(struct list_head *list, pos != (head); \ pos = n, n = pos->prev) +/** + * list_entry_is_head - test if the entry points to the head of the list + * @pos: the type * to cursor + * @head: the head for your list. + * @member: the name of the list_head within the struct. + */ +#define list_entry_is_head(pos, head, member) \ + (&pos->member == (head)) + /** * list_for_each_entry - iterate over list of given type * @pos: the type * to use as a loop cursor. @@ -575,7 +602,7 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_entry(pos, head, member) \ for (pos = list_first_entry(head, typeof(*pos), member); \ - &pos->member != (head); \ + !list_entry_is_head(pos, head, member); \ pos = list_next_entry(pos, member)) /** @@ -586,7 +613,7 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_entry_reverse(pos, head, member) \ for (pos = list_last_entry(head, typeof(*pos), member); \ - &pos->member != (head); \ + !list_entry_is_head(pos, head, member); \ pos = list_prev_entry(pos, member)) /** @@ -611,7 +638,7 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_entry_continue(pos, head, member) \ for (pos = list_next_entry(pos, member); \ - &pos->member != (head); \ + !list_entry_is_head(pos, head, member); \ pos = list_next_entry(pos, member)) /** @@ -625,7 +652,7 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_entry_continue_reverse(pos, head, member) \ for (pos = list_prev_entry(pos, member); \ - &pos->member != (head); \ + !list_entry_is_head(pos, head, member); \ pos = list_prev_entry(pos, member)) /** @@ -637,7 +664,7 @@ static inline void list_splice_tail_init(struct list_head *list, * Iterate over list of given type, continuing from current position. */ #define list_for_each_entry_from(pos, head, member) \ - for (; &pos->member != (head); \ + for (; !list_entry_is_head(pos, head, member); \ pos = list_next_entry(pos, member)) /** @@ -650,7 +677,7 @@ static inline void list_splice_tail_init(struct list_head *list, * Iterate backwards over list of given type, continuing from current position. */ #define list_for_each_entry_from_reverse(pos, head, member) \ - for (; &pos->member != (head); \ + for (; !list_entry_is_head(pos, head, member); \ pos = list_prev_entry(pos, member)) /** @@ -663,7 +690,7 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_for_each_entry_safe(pos, n, head, member) \ for (pos = list_first_entry(head, typeof(*pos), member), \ n = list_next_entry(pos, member); \ - &pos->member != (head); \ + !list_entry_is_head(pos, head, member); \ pos = n, n = list_next_entry(n, member)) /** @@ -679,7 +706,7 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_for_each_entry_safe_continue(pos, n, head, member) \ for (pos = list_next_entry(pos, member), \ n = list_next_entry(pos, member); \ - &pos->member != (head); \ + !list_entry_is_head(pos, head, member); \ pos = n, n = list_next_entry(n, member)) /** @@ -694,7 +721,7 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_entry_safe_from(pos, n, head, member) \ for (n = list_next_entry(pos, member); \ - &pos->member != (head); \ + !list_entry_is_head(pos, head, member); \ pos = n, n = list_next_entry(n, member)) /** @@ -710,7 +737,7 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_for_each_entry_safe_reverse(pos, n, head, member) \ for (pos = list_last_entry(head, typeof(*pos), member), \ n = list_prev_entry(pos, member); \ - &pos->member != (head); \ + !list_entry_is_head(pos, head, member); \ pos = n, n = list_prev_entry(n, member)) /** diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 4a8795b21d774..4d11e3d5e76c8 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -14,26 +14,13 @@ typedef struct { } local_lock_t; #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define LL_DEP_MAP_INIT(lockname) \ +# define LOCAL_LOCK_DEBUG_INIT(lockname) \ .dep_map = { \ .name = #lockname, \ .wait_type_inner = LD_WAIT_CONFIG, \ - } -#else -# define LL_DEP_MAP_INIT(lockname) -#endif - -#define INIT_LOCAL_LOCK(lockname) { LL_DEP_MAP_INIT(lockname) } + }, \ + .owner = NULL, -#define __local_lock_init(lock) \ -do { \ - static struct lock_class_key __key; \ - \ - debug_check_no_locks_freed((void *)lock, sizeof(*lock));\ - lockdep_init_map_wait(&(lock)->dep_map, #lock, &__key, 0, LD_WAIT_CONFIG);\ -} while (0) - -#ifdef CONFIG_DEBUG_LOCK_ALLOC static inline void local_lock_acquire(local_lock_t *l) { lock_map_acquire(&l->dep_map); @@ -48,11 +35,30 @@ static inline void local_lock_release(local_lock_t *l) lock_map_release(&l->dep_map); } +static inline void local_lock_debug_init(local_lock_t *l) +{ + l->owner = NULL; +} #else /* CONFIG_DEBUG_LOCK_ALLOC */ +# define LOCAL_LOCK_DEBUG_INIT(lockname) static inline void local_lock_acquire(local_lock_t *l) { } static inline void local_lock_release(local_lock_t *l) { } +static inline void local_lock_debug_init(local_lock_t *l) { } #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ +#define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } + +#define __local_lock_init(lock) \ +do { \ + static struct lock_class_key __key; \ + \ + debug_check_no_locks_freed((void *)lock, sizeof(*lock));\ + lockdep_init_map_type(&(lock)->dep_map, #lock, &__key, \ + 0, LD_WAIT_CONFIG, LD_WAIT_INV, \ + LD_LOCK_PERCPU); \ + local_lock_debug_init(lock); \ +} while (0) + #define __local_lock(lock) \ do { \ preempt_disable(); \ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index a3763247547c3..a21dc5413653e 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1241,22 +1241,22 @@ * * @binder_set_context_mgr: * Check whether @mgr is allowed to be the binder context manager. - * @mgr contains the task_struct for the task being registered. + * @mgr contains the struct cred for the current binder process. * Return 0 if permission is granted. * @binder_transaction: * Check whether @from is allowed to invoke a binder transaction call * to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. + * @from contains the struct cred for the sending process. + * @to contains the struct cred for the receiving process. * @binder_transfer_binder: * Check whether @from is allowed to transfer a binder reference to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. + * @from contains the struct cred for the sending process. + * @to contains the struct cred for the receiving process. * @binder_transfer_file: * Check whether @from is allowed to transfer @file to @to. - * @from contains the task_struct for the sending task. + * @from contains the struct cred for the sending process. * @file contains the struct file being transferred. - * @to contains the task_struct for the receiving task. + * @to contains the struct cred for the receiving process. * * @ptrace_access_check: * Check permission before allowing the current process to trace the @@ -1456,13 +1456,13 @@ * @what: kernel feature being accessed */ union security_list_options { - int (*binder_set_context_mgr)(struct task_struct *mgr); - int (*binder_transaction)(struct task_struct *from, - struct task_struct *to); - int (*binder_transfer_binder)(struct task_struct *from, - struct task_struct *to); - int (*binder_transfer_file)(struct task_struct *from, - struct task_struct *to, + int (*binder_set_context_mgr)(const struct cred *mgr); + int (*binder_transaction)(const struct cred *from, + const struct cred *to); + int (*binder_transfer_binder)(const struct cred *from, + const struct cred *to); + int (*binder_transfer_file)(const struct cred *from, + const struct cred *to, struct file *file); int (*ptrace_access_check)(struct task_struct *child, diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h index 36d6ce673503c..6fee33cb52f58 100644 --- a/include/linux/mailbox_controller.h +++ b/include/linux/mailbox_controller.h @@ -83,6 +83,7 @@ struct mbox_controller { const struct of_phandle_args *sp); /* Internal to API */ struct hrtimer poll_hrt; + spinlock_t poll_hrt_lock; struct list_head node; }; diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index 0661af17a7584..1e02058113944 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -86,6 +86,8 @@ struct cmos_rtc_board_info { /* 2 values for divider stage reset, others for "testing purposes only" */ # define RTC_DIV_RESET1 0x60 # define RTC_DIV_RESET2 0x70 + /* In AMD BKDG bit 5 and 6 are reserved, bit 4 is for select dv0 bank */ +# define RTC_AMD_BANK_SELECT 0x10 /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */ # define RTC_RATE_SELECT 0x0F diff --git a/include/linux/mdio.h b/include/linux/mdio.h index a7604248777b7..0f1f784de80e9 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -64,6 +64,9 @@ struct mdio_driver { /* Clears up any memory if needed */ void (*remove)(struct mdio_device *mdiodev); + + /* Quiesces the device on system shutdown, turns off interrupts etc */ + void (*shutdown)(struct mdio_device *mdiodev); }; #define to_mdio_driver(d) \ container_of(to_mdio_common_driver(d), struct mdio_driver, mdiodrv) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index f491690d54c6c..64b971b2542d6 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -351,8 +351,8 @@ phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); -static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, - phys_addr_t align) +static __always_inline phys_addr_t memblock_phys_alloc(phys_addr_t size, + phys_addr_t align) { return memblock_phys_alloc_range(size, align, 0, MEMBLOCK_ALLOC_ACCESSIBLE); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 3723308ac3d22..bc4753fd4d45f 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -331,6 +331,6 @@ extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, int online_type); -extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn, - unsigned long nr_pages); +extern struct zone *zone_for_pfn_range(int online_type, int nid, + unsigned long start_pfn, unsigned long nr_pages); #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index dc93ed6c7096f..e2c296d11f978 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -995,8 +995,7 @@ void mlx5_unregister_debugfs(void); void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, - unsigned int *irqn); +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 44cc6077eb3a2..a60ed982071fa 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8986,16 +8986,22 @@ struct mlx5_ifc_pcmr_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; u8 reserved_at_10[0x10]; + u8 entropy_force_cap[0x1]; u8 entropy_calc_cap[0x1]; u8 entropy_gre_calc_cap[0x1]; - u8 reserved_at_23[0x1b]; + u8 reserved_at_23[0xf]; + u8 rx_ts_over_crc_cap[0x1]; + u8 reserved_at_33[0xb]; u8 fcs_cap[0x1]; u8 reserved_at_3f[0x1]; + u8 entropy_force[0x1]; u8 entropy_calc[0x1]; u8 entropy_gre_calc[0x1]; - u8 reserved_at_43[0x1b]; + u8 reserved_at_43[0xf]; + u8 rx_ts_over_crc[0x1]; + u8 reserved_at_53[0xb]; u8 fcs_chk[0x1]; u8 reserved_at_5f[0x1]; }; @@ -9013,8 +9019,8 @@ struct mlx5_ifc_bufferx_reg_bits { u8 reserved_at_0[0x6]; u8 lossy[0x1]; u8 epsb[0x1]; - u8 reserved_at_8[0xc]; - u8 size[0xc]; + u8 reserved_at_8[0x8]; + u8 size[0x10]; u8 xoff_threshold[0x10]; u8 xon_threshold[0x10]; diff --git a/include/linux/mlx5/mpfs.h b/include/linux/mlx5/mpfs.h new file mode 100644 index 0000000000000..bf700c8d55164 --- /dev/null +++ b/include/linux/mlx5/mpfs.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2021 Mellanox Technologies Ltd. + */ + +#ifndef _MLX5_MPFS_ +#define _MLX5_MPFS_ + +struct mlx5_core_dev; + +#ifdef CONFIG_MLX5_MPFS +int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac); +int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac); +#else /* #ifndef CONFIG_MLX5_MPFS */ +static inline int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } +static inline int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } +#endif + +#endif diff --git a/include/linux/mm.h b/include/linux/mm.h index e4b8ce39eb7e9..fb22eae37dce7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2331,6 +2331,7 @@ extern int install_special_mapping(struct mm_struct *mm, unsigned long flags, struct page **pages); unsigned long randomize_stack_top(unsigned long stack_top); +unsigned long randomize_page(unsigned long start, unsigned long range); extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); @@ -2774,7 +2775,7 @@ static inline void print_vma_addr(char *prefix, unsigned long rip) } #endif -#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP int vmemmap_remap_free(unsigned long start, unsigned long end, unsigned long reuse); int vmemmap_remap_alloc(unsigned long start, unsigned long end, diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a1fd622032aa5..c9539149c35a2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -955,6 +955,15 @@ static inline int is_highmem_idx(enum zone_type idx) #endif } +#ifdef CONFIG_ZONE_DMA +bool has_managed_dma(void); +#else +static inline bool has_managed_dma(void) +{ + return false; +} +#endif + /** * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references @@ -1267,13 +1276,16 @@ static inline unsigned long *section_to_usemap(struct mem_section *ms) static inline struct mem_section *__nr_to_section(unsigned long nr) { + unsigned long root = SECTION_NR_TO_ROOT(nr); + + if (unlikely(root >= NR_SECTION_ROOTS)) + return NULL; + #ifdef CONFIG_SPARSEMEM_EXTREME - if (!mem_section) + if (!mem_section || !mem_section[root]) return NULL; #endif - if (!mem_section[SECTION_NR_TO_ROOT(nr)]) - return NULL; - return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; + return &mem_section[root][nr & SECTION_ROOT_MASK]; } extern unsigned long __section_nr(struct mem_section *ms); extern size_t mem_section_usage_size(void); @@ -1452,37 +1464,6 @@ void memory_present(int nid, unsigned long start, unsigned long end); #define pfn_valid_within(pfn) (1) #endif -#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL -/* - * pfn_valid() is meant to be able to tell if a given PFN has valid memmap - * associated with it or not. This means that a struct page exists for this - * pfn. The caller cannot assume the page is fully initialized in general. - * Hotplugable pages might not have been onlined yet. pfn_to_online_page() - * will ensure the struct page is fully online and initialized. Special pages - * (e.g. ZONE_DEVICE) are never onlined and should be treated accordingly. - * - * In FLATMEM, it is expected that holes always have valid memmap as long as - * there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed - * that a valid section has a memmap for the entire section. - * - * However, an ARM, and maybe other embedded architectures in the future - * free memmap backing holes to save memory on the assumption the memmap is - * never used. The page_zone linkages are then broken even though pfn_valid() - * returns true. A walker of the full memmap must then do this additional - * check to ensure the memmap they are looking at is sane by making sure - * the zone and PFN linkages are still valid. This is expensive, but walkers - * of the full memmap are extremely rare. - */ -bool memmap_valid_within(unsigned long pfn, - struct page *page, struct zone *zone); -#else -static inline bool memmap_valid_within(unsigned long pfn, - struct page *page, struct zone *zone) -{ - return true; -} -#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ - #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ #endif /* _LINUX_MMZONE_H */ diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index c98a211086880..f3c149073c213 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -286,6 +286,7 @@ struct cfi_private { map_word sector_erase_cmd; unsigned long chipshift; /* Because they're of the same type */ const char *im_name; /* inter_module name for cmdset_setup */ + unsigned long quirks; struct flchip chips[0]; /* per-chip data structure for each chip */ }; diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 640e7279f1617..75be8886783e1 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -151,7 +151,7 @@ enum { #define NETIF_F_HW_TLS_TX __NETIF_F(HW_TLS_TX) #define NETIF_F_HW_TLS_RX __NETIF_F(HW_TLS_RX) -/* Finds the next feature with the highest number of the range of start till 0. +/* Finds the next feature with the highest number of the range of start-1 till 0. */ static inline int find_next_netdev_feature(u64 feature, unsigned long start) { @@ -170,7 +170,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) for ((bit) = find_next_netdev_feature((mask_addr), \ NETDEV_FEATURE_COUNT); \ (bit) >= 0; \ - (bit) = find_next_netdev_feature((mask_addr), (bit) - 1)) + (bit) = find_next_netdev_feature((mask_addr), (bit))) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9b97b4b5cb144..ed953b0a1bcc4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -739,6 +739,8 @@ struct netdev_rx_queue { #ifdef CONFIG_RPS struct rps_map __rcu *rps_map; struct rps_dev_flow_table __rcu *rps_flow_table; + unsigned long rps_single_flow_flags; +#define RPS_SINGLE_FLOW_ENABLE (0) #endif struct kobject kobj; struct net_device *dev; @@ -757,6 +759,46 @@ struct rx_queue_attribute { ssize_t (*store)(struct netdev_rx_queue *queue, const char *buf, size_t len); }; +#ifdef CONFIG_RPS +struct rps_flow_info_node { + union { + struct { + __be32 saddr; + __be32 daddr; + __u8 protocol; + } in4_u; + + struct { + struct in6_addr saddr; + struct in6_addr daddr; + __u8 flow_lbl[3]; + } in6_u; + + }; +#define v4_saddr in4_u.saddr +#define v4_daddr in4_u.daddr +#define v4_protocol in4_u.protocol +#define v6_saddr in6_u.saddr.s6_addr32 +#define v6_daddr in6_u.daddr.s6_addr32 +#define v6_flow_lbl in6_u.flow_lbl + u64 jiffies; /* keep the time update entry */ + struct hlist_node node; +}; +DECLARE_STATIC_KEY_FALSE(rps_flow_queue_enable_key); +void rps_single_flow_enable_set(bool enable); +unsigned long rps_flow_node_aging_time_get(void); +void rps_flow_node_aging_time_set(unsigned long aging_time); +bool __rps_flow_node_add(const void *iph, bool ipv6); + +static inline void rps_flow_node_add(const void *iph, bool ipv6) +{ + /* no rps_single_flow_enable */ + if (static_branch_unlikely(&rps_flow_queue_enable_key)) + __rps_flow_node_add(iph, ipv6); +} +#else +static inline void rps_flow_node_add(const void *iph, bool ipv6) { } +#endif #ifdef CONFIG_XPS /* @@ -2401,6 +2443,7 @@ struct packet_type { struct net_device *); bool (*id_match)(struct packet_type *ptype, struct sock *sk); + struct net *af_packet_net; void *af_packet_priv; struct list_head list; }; @@ -3688,6 +3731,10 @@ int netdev_rx_handler_register(struct net_device *dev, void netdev_rx_handler_unregister(struct net_device *dev); bool dev_valid_name(const char *name); +static inline bool is_socket_ioctl_cmd(unsigned int cmd) +{ + return _IOC_TYPE(cmd) == SOCK_IOC_TYPE; +} int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_copyout); int dev_ifconf(struct net *net, struct ifconf *, int); @@ -3776,7 +3823,8 @@ void netdev_run_todo(void); */ static inline void dev_put(struct net_device *dev) { - this_cpu_dec(*dev->pcpu_refcnt); + if (dev) + this_cpu_dec(*dev->pcpu_refcnt); } /** @@ -3787,7 +3835,8 @@ static inline void dev_put(struct net_device *dev) */ static inline void dev_hold(struct net_device *dev) { - this_cpu_inc(*dev->pcpu_refcnt); + if (dev) + this_cpu_inc(*dev->pcpu_refcnt); } /* Carrier loss detection, dial on demand. The functions netif_carrier_on @@ -3943,7 +3992,8 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) { spin_lock(&txq->_xmit_lock); - txq->xmit_lock_owner = cpu; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, cpu); } static inline bool __netif_tx_acquire(struct netdev_queue *txq) @@ -3960,26 +4010,32 @@ static inline void __netif_tx_release(struct netdev_queue *txq) static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); - txq->xmit_lock_owner = smp_processor_id(); + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); } static inline bool __netif_tx_trylock(struct netdev_queue *txq) { bool ok = spin_trylock(&txq->_xmit_lock); - if (likely(ok)) - txq->xmit_lock_owner = smp_processor_id(); + + if (likely(ok)) { + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); + } return ok; } static inline void __netif_tx_unlock(struct netdev_queue *txq) { - txq->xmit_lock_owner = -1; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock(&txq->_xmit_lock); } static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) { - txq->xmit_lock_owner = -1; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock_bh(&txq->_xmit_lock); } diff --git a/include/linux/netfs.h b/include/linux/netfs.h new file mode 100644 index 0000000000000..28ae3822cc631 --- /dev/null +++ b/include/linux/netfs.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Network filesystem support services. + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * See: + * + * Documentation/filesystems/netfs_library.rst + * + * for a description of the network filesystem interface declared here. + */ + +#ifndef _LINUX_NETFS_H +#define _LINUX_NETFS_H + +enum netfs_read_source { + NETFS_FILL_WITH_ZEROES, + NETFS_DOWNLOAD_FROM_SERVER, + NETFS_READ_FROM_CACHE, + NETFS_INVALID_READ, +} __mode(byte); + +typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, + bool was_async); + +/* + * Resources required to do operations on a cache. + */ +struct netfs_cache_resources { + const struct netfs_cache_ops *ops; + void *cache_priv; + void *cache_priv2; +}; + +/* + * Descriptor for a single component subrequest. + */ +struct netfs_read_subrequest { + struct netfs_read_request *rreq; /* Supervising read request */ + struct list_head rreq_link; /* Link in rreq->subrequests */ + loff_t start; /* Where to start the I/O */ + size_t len; /* Size of the I/O */ + size_t transferred; /* Amount of data transferred */ + refcount_t usage; + short error; /* 0 or error that occurred */ + unsigned short debug_index; /* Index in list (for debugging output) */ + enum netfs_read_source source; /* Where to read from */ + unsigned long flags; +#define NETFS_SREQ_WRITE_TO_CACHE 0 /* Set if should write to cache */ +#define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ +#define NETFS_SREQ_SHORT_READ 2 /* Set if there was a short read from the cache */ +#define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ +#define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */ +#define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */ +}; + +/* + * Descriptor for a read helper request. This is used to make multiple I/O + * requests on a variety of sources and then stitch the result together. + */ +struct netfs_read_request { + struct work_struct work; + struct inode *inode; /* The file being accessed */ + struct address_space *mapping; /* The mapping being accessed */ + struct netfs_cache_resources cache_resources; + struct list_head subrequests; /* Requests to fetch I/O from disk or net */ + void *netfs_priv; /* Private data for the netfs */ + unsigned int debug_id; + unsigned int cookie_debug_id; + atomic_t nr_rd_ops; /* Number of read ops in progress */ + atomic_t nr_wr_ops; /* Number of write ops in progress */ + size_t submitted; /* Amount submitted for I/O so far */ + size_t len; /* Length of the request */ + short error; /* 0 or error that occurred */ + loff_t i_size; /* Size of the file */ + loff_t start; /* Start position */ + pgoff_t no_unlock_page; /* Don't unlock this page after read */ + refcount_t usage; + unsigned long flags; +#define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */ +#define NETFS_RREQ_WRITE_TO_CACHE 1 /* Need to write to the cache */ +#define NETFS_RREQ_NO_UNLOCK_PAGE 2 /* Don't unlock no_unlock_page on completion */ +#define NETFS_RREQ_DONT_UNLOCK_PAGES 3 /* Don't unlock the pages on completion */ +#define NETFS_RREQ_FAILED 4 /* The request failed */ +#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ + const struct netfs_read_request_ops *netfs_ops; +}; + +/* + * Operations the network filesystem can/must provide to the helpers. + */ +struct netfs_read_request_ops { + bool (*is_cache_enabled)(struct inode *inode); + void (*init_rreq)(struct netfs_read_request *rreq, struct file *file); + int (*begin_cache_operation)(struct netfs_read_request *rreq); + void (*expand_readahead)(struct netfs_read_request *rreq); + bool (*clamp_length)(struct netfs_read_subrequest *subreq); + void (*issue_op)(struct netfs_read_subrequest *subreq); + bool (*is_still_valid)(struct netfs_read_request *rreq); + int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, + struct page *page, void **_fsdata); + void (*done)(struct netfs_read_request *rreq); + void (*cleanup)(struct address_space *mapping, void *netfs_priv); +}; + +/* + * Table of operations for access to a cache. This is obtained by + * rreq->ops->begin_cache_operation(). + */ +struct netfs_cache_ops { + /* End an operation */ + void (*end_operation)(struct netfs_cache_resources *cres); + + /* Read data from the cache */ + int (*read)(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + bool seek_data, + netfs_io_terminated_t term_func, + void *term_func_priv); + + /* Write data to the cache */ + int (*write)(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv); + + /* Expand readahead request */ + void (*expand_readahead)(struct netfs_cache_resources *cres, + loff_t *_start, size_t *_len, loff_t i_size); + + /* Prepare a read operation, shortening it to a cached/uncached + * boundary as appropriate. + */ + enum netfs_read_source (*prepare_read)(struct netfs_read_subrequest *subreq, + loff_t i_size); + + /* Prepare a write operation, working out what part of the write we can + * actually do. + */ + int (*prepare_write)(struct netfs_cache_resources *cres, + loff_t *_start, size_t *_len, loff_t i_size); +}; +#endif /* _LINUX_NETFS_H */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 978ef674f0388..49cf5c855cbe5 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -480,10 +480,10 @@ static inline const struct cred *nfs_file_cred(struct file *file) * linux/fs/nfs/direct.c */ extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *); -extern ssize_t nfs_file_direct_read(struct kiocb *iocb, - struct iov_iter *iter); -extern ssize_t nfs_file_direct_write(struct kiocb *iocb, - struct iov_iter *iter); +ssize_t nfs_file_direct_read(struct kiocb *iocb, + struct iov_iter *iter, bool swap); +ssize_t nfs_file_direct_write(struct kiocb *iocb, + struct iov_iter *iter, bool swap); /* * linux/fs/nfs/dir.c diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 27e7fa36f707f..a8d79f5b9a52d 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -42,11 +42,11 @@ * void nodes_shift_right(dst, src, n) Shift right * void nodes_shift_left(dst, src, n) Shift left * - * int first_node(mask) Number lowest set bit, or MAX_NUMNODES - * int next_node(node, mask) Next node past 'node', or MAX_NUMNODES - * int next_node_in(node, mask) Next node past 'node', or wrap to first, + * unsigned int first_node(mask) Number lowest set bit, or MAX_NUMNODES + * unsigend int next_node(node, mask) Next node past 'node', or MAX_NUMNODES + * unsigned int next_node_in(node, mask) Next node past 'node', or wrap to first, * or MAX_NUMNODES - * int first_unset_node(mask) First node not set in mask, or + * unsigned int first_unset_node(mask) First node not set in mask, or * MAX_NUMNODES * * nodemask_t nodemask_of_node(node) Return nodemask with bit 'node' set @@ -153,7 +153,7 @@ static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) #define node_test_and_set(node, nodemask) \ __node_test_and_set((node), &(nodemask)) -static inline int __node_test_and_set(int node, nodemask_t *addr) +static inline bool __node_test_and_set(int node, nodemask_t *addr) { return test_and_set_bit(node, addr->bits); } @@ -200,7 +200,7 @@ static inline void __nodes_complement(nodemask_t *dstp, #define nodes_equal(src1, src2) \ __nodes_equal(&(src1), &(src2), MAX_NUMNODES) -static inline int __nodes_equal(const nodemask_t *src1p, +static inline bool __nodes_equal(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_equal(src1p->bits, src2p->bits, nbits); @@ -208,7 +208,7 @@ static inline int __nodes_equal(const nodemask_t *src1p, #define nodes_intersects(src1, src2) \ __nodes_intersects(&(src1), &(src2), MAX_NUMNODES) -static inline int __nodes_intersects(const nodemask_t *src1p, +static inline bool __nodes_intersects(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_intersects(src1p->bits, src2p->bits, nbits); @@ -216,20 +216,20 @@ static inline int __nodes_intersects(const nodemask_t *src1p, #define nodes_subset(src1, src2) \ __nodes_subset(&(src1), &(src2), MAX_NUMNODES) -static inline int __nodes_subset(const nodemask_t *src1p, +static inline bool __nodes_subset(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_subset(src1p->bits, src2p->bits, nbits); } #define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES) -static inline int __nodes_empty(const nodemask_t *srcp, unsigned int nbits) +static inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits) { return bitmap_empty(srcp->bits, nbits); } #define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES) -static inline int __nodes_full(const nodemask_t *srcp, unsigned int nbits) +static inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits) { return bitmap_full(srcp->bits, nbits); } @@ -260,15 +260,15 @@ static inline void __nodes_shift_left(nodemask_t *dstp, > MAX_NUMNODES, then the silly min_ts could be dropped. */ #define first_node(src) __first_node(&(src)) -static inline int __first_node(const nodemask_t *srcp) +static inline unsigned int __first_node(const nodemask_t *srcp) { - return min_t(int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES)); + return min_t(unsigned int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES)); } #define next_node(n, src) __next_node((n), &(src)) -static inline int __next_node(int n, const nodemask_t *srcp) +static inline unsigned int __next_node(int n, const nodemask_t *srcp) { - return min_t(int,MAX_NUMNODES,find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); + return min_t(unsigned int, MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); } /* @@ -276,7 +276,7 @@ static inline int __next_node(int n, const nodemask_t *srcp) * the first node in src if needed. Returns MAX_NUMNODES if src is empty. */ #define next_node_in(n, src) __next_node_in((n), &(src)) -int __next_node_in(int node, const nodemask_t *srcp); +unsigned int __next_node_in(int node, const nodemask_t *srcp); static inline void init_nodemask_of_node(nodemask_t *mask, int node) { @@ -296,9 +296,9 @@ static inline void init_nodemask_of_node(nodemask_t *mask, int node) }) #define first_unset_node(mask) __first_unset_node(&(mask)) -static inline int __first_unset_node(const nodemask_t *maskp) +static inline unsigned int __first_unset_node(const nodemask_t *maskp) { - return min_t(int,MAX_NUMNODES, + return min_t(unsigned int, MAX_NUMNODES, find_first_zero_bit(maskp->bits, MAX_NUMNODES)); } @@ -375,14 +375,13 @@ static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, } #if MAX_NUMNODES > 1 -#define for_each_node_mask(node, mask) \ - for ((node) = first_node(mask); \ - (node) < MAX_NUMNODES; \ - (node) = next_node((node), (mask))) +#define for_each_node_mask(node, mask) \ + for ((node) = first_node(mask); \ + (node >= 0) && (node) < MAX_NUMNODES; \ + (node) = next_node((node), (mask))) #else /* MAX_NUMNODES == 1 */ -#define for_each_node_mask(node, mask) \ - if (!nodes_empty(mask)) \ - for ((node) = 0; (node) < 1; (node)++) +#define for_each_node_mask(node, mask) \ + for ((node) = 0; (node) < 1 && !nodes_empty(mask); (node)++) #endif /* MAX_NUMNODES */ /* @@ -435,11 +434,11 @@ static inline int num_node_state(enum node_states state) #define first_online_node first_node(node_states[N_ONLINE]) #define first_memory_node first_node(node_states[N_MEMORY]) -static inline int next_online_node(int nid) +static inline unsigned int next_online_node(int nid) { return next_node(nid, node_states[N_ONLINE]); } -static inline int next_memory_node(int nid) +static inline unsigned int next_memory_node(int nid) { return next_node(nid, node_states[N_MEMORY]); } diff --git a/include/linux/of_clk.h b/include/linux/of_clk.h index b27da9f164cbd..c86fcad23fc21 100644 --- a/include/linux/of_clk.h +++ b/include/linux/of_clk.h @@ -6,6 +6,9 @@ #ifndef __LINUX_OF_CLK_H #define __LINUX_OF_CLK_H +struct device_node; +struct of_device_id; + #if defined(CONFIG_COMMON_CLK) && defined(CONFIG_OF) unsigned int of_clk_get_parent_count(struct device_node *np); diff --git a/include/linux/once.h b/include/linux/once.h index 9225ee6d96c75..ae6f4eb41cbe7 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -7,7 +7,7 @@ bool __do_once_start(bool *done, unsigned long *flags); void __do_once_done(bool *done, struct static_key_true *once_key, - unsigned long *flags); + unsigned long *flags, struct module *mod); /* Call a function exactly once. The idea of DO_ONCE() is to perform * a function call such as initialization of random seeds, etc, only @@ -46,7 +46,7 @@ void __do_once_done(bool *done, struct static_key_true *once_key, if (unlikely(___ret)) { \ func(__VA_ARGS__); \ __do_once_done(&___done, &___once_key, \ - &___flags); \ + &___flags, THIS_MODULE); \ } \ } \ ___ret; \ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 87bb899189021..71c4a3ad754c0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -169,18 +169,18 @@ enum pageflags { struct page; /* forward declaration */ -#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP -DECLARE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON, - hugetlb_free_vmemmap_enabled_key); +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP +DECLARE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON, + hugetlb_optimize_vmemmap_key); -static __always_inline bool hugetlb_free_vmemmap_enabled(void) +static __always_inline bool hugetlb_optimize_vmemmap_enabled(void) { - return static_branch_maybe(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON, - &hugetlb_free_vmemmap_enabled_key); + return static_branch_maybe(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON, + &hugetlb_optimize_vmemmap_key); } /* - * If the feature of freeing some vmemmap pages associated with each HugeTLB + * If the feature of optimizing vmemmap pages associated with each HugeTLB * page is enabled, the head vmemmap page frame is reused and all of the tail * vmemmap addresses map to the head vmemmap page frame (furture details can * refer to the figure at the head of the mm/hugetlb_vmemmap.c). In other @@ -197,7 +197,7 @@ static __always_inline bool hugetlb_free_vmemmap_enabled(void) */ static __always_inline struct page *page_fixed_fake_head(struct page *page) { - if (!hugetlb_free_vmemmap_enabled()) + if (!hugetlb_optimize_vmemmap_enabled()) return page; /* @@ -226,7 +226,7 @@ static inline struct page *page_fixed_fake_head(struct page *page) return page; } -static inline bool hugetlb_free_vmemmap_enabled(void) +static inline bool hugetlb_optimize_vmemmap_enabled(void) { return false; } diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index fac2dc02bd27a..1b0591edad49b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -51,7 +51,11 @@ static inline void mapping_set_error(struct address_space *mapping, int error) return; /* Record in wb_err for checkers using errseq_t based tracking */ - filemap_set_wb_err(mapping, error); + __filemap_set_wb_err(mapping, error); + + /* Record it in superblock */ + if (mapping->host) + errseq_set(&mapping->host->i_sb->s_wb_err, error); /* Record it in flags for now, for legacy callers */ if (error == -ENOSPC) diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 2d155bfb8fbf3..11c98875538a2 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -125,10 +125,4 @@ static inline void acpi_pci_add_bus(struct pci_bus *bus) { } static inline void acpi_pci_remove_bus(struct pci_bus *bus) { } #endif /* CONFIG_ACPI */ -#ifdef CONFIG_ACPI_APEI -extern bool aer_acpi_firmware_first(void); -#else -static inline bool aer_acpi_firmware_first(void) { return false; } -#endif - #endif /* _PCI_ACPI_H_ */ diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index d0260824dc5dc..75456a66024a9 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -51,13 +51,13 @@ extern struct pci_ecam_ops pci_generic_ecam_ops; #if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS) extern struct pci_ecam_ops pci_32b_ops; /* 32-bit accesses only */ +extern struct pci_ecam_ops pci_32b_read_ops; /* 32-bit read only */ extern struct pci_ecam_ops hisi_pcie_ops; /* HiSilicon */ extern struct pci_ecam_ops thunder_pem_ecam_ops; /* Cavium ThunderX 1.x & 2.x */ extern struct pci_ecam_ops pci_thunder_ecam_ops; /* Cavium ThunderX 1.x */ extern struct pci_ecam_ops xgene_v1_pcie_ecam_ops; /* APM X-Gene PCIe v1 */ extern struct pci_ecam_ops xgene_v2_pcie_ecam_ops; /* APM X-Gene PCIe v2.x */ extern struct pci_ecam_ops al_pcie_ops; /* Amazon Annapurna Labs PCIe */ -extern struct pci_ecam_ops pci_32b_read_ops; /* 32-bit read accesses only */ #endif #ifdef CONFIG_PCI_HOST_COMMON diff --git a/include/linux/pci.h b/include/linux/pci.h index 96be27b359548..bd1ed10449a0d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -160,7 +160,7 @@ static inline const char *pci_power_name(pci_power_t state) */ typedef unsigned int __bitwise pci_channel_state_t; -enum pci_channel_state { +enum { /* I/O channel is in normal state */ pci_channel_io_normal = (__force pci_channel_state_t) 1, @@ -208,6 +208,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10), /* Don't use Relaxed Ordering for TLPs directed at this device */ PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11), + /* Device does honor MSI masking despite saying otherwise */ + PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12), }; enum pci_irq_reroute_variant { @@ -286,6 +288,7 @@ struct pci_vpd; struct pci_sriov; struct pci_ats; struct pci_p2pdma; +struct rcec_ea; /* The pci_dev structure describes PCI devices */ struct pci_dev { @@ -308,6 +311,10 @@ struct pci_dev { #ifdef CONFIG_PCIEAER u16 aer_cap; /* AER capability offset */ struct aer_stats *aer_stats; /* AER stats for this device */ +#endif +#ifdef CONFIG_PCIEPORTBUS + struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */ + struct pci_dev *rcec; /* Associated RCEC device */ #endif u8 pcie_cap; /* PCIe capability offset */ u8 msi_cap; /* MSI capability offset */ @@ -414,8 +421,6 @@ struct pci_dev { * mappings to make sure they cannot access arbitrary memory. */ unsigned int untrusted:1; - unsigned int __aer_firmware_first_valid:1; - unsigned int __aer_firmware_first:1; unsigned int broken_intx_masking:1; /* INTx masking can't be used */ unsigned int io_window_1k:1; /* Intel bridge 1K I/O windows */ unsigned int irq_managed:1; @@ -608,6 +613,7 @@ struct pci_bus { struct bin_attribute *legacy_io; /* Legacy I/O for this bus */ struct bin_attribute *legacy_mem; /* Legacy mem */ unsigned int is_added:1; + unsigned int unsafe_warn:1; /* warned about RW1C config write */ }; #define to_pci_bus(n) container_of(n, struct pci_bus, dev) @@ -767,7 +773,7 @@ enum pci_ers_result { struct pci_error_handlers { /* PCI bus error detected on this device */ pci_ers_result_t (*error_detected)(struct pci_dev *dev, - enum pci_channel_state error); + pci_channel_state_t error); /* MMIO has been re-enabled, but not DMA */ pci_ers_result_t (*mmio_enabled)(struct pci_dev *dev); @@ -1702,8 +1708,9 @@ static inline int pci_enable_device(struct pci_dev *dev) { return -EIO; } static inline void pci_disable_device(struct pci_dev *dev) { } static inline int pci_assign_resource(struct pci_dev *dev, int i) { return -EBUSY; } -static inline int __pci_register_driver(struct pci_driver *drv, - struct module *owner) +static inline int __must_check __pci_register_driver(struct pci_driver *drv, + struct module *owner, + const char *mod_name) { return 0; } static inline int pci_register_driver(struct pci_driver *drv) { return 0; } diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0ad57693f3926..8448f955b0c4e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -81,6 +81,7 @@ #define PCI_CLASS_SYSTEM_RTC 0x0803 #define PCI_CLASS_SYSTEM_PCI_HOTPLUG 0x0804 #define PCI_CLASS_SYSTEM_SDHCI 0x0805 +#define PCI_CLASS_SYSTEM_RCEC 0x0807 #define PCI_CLASS_SYSTEM_OTHER 0x0880 #define PCI_BASE_CLASS_INPUT 0x09 @@ -553,6 +554,9 @@ #define PCI_DEVICE_ID_AMD_17H_M60H_DF_F3 0x144b #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443 #define PCI_DEVICE_ID_AMD_19H_DF_F3 0x1653 +#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F3 0x14b0 +#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F3 0x167c +#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F3 0x166d #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 @@ -2476,7 +2480,8 @@ #define PCI_VENDOR_ID_TDI 0x192E #define PCI_DEVICE_ID_TDI_EHCI 0x0101 -#define PCI_VENDOR_ID_FREESCALE 0x1957 +#define PCI_VENDOR_ID_FREESCALE 0x1957 /* duplicate: NXP */ +#define PCI_VENDOR_ID_NXP 0x1957 /* duplicate: FREESCALE */ #define PCI_DEVICE_ID_MPC8308 0xc006 #define PCI_DEVICE_ID_MPC8315E 0x00b4 #define PCI_DEVICE_ID_MPC8315 0x00b5 diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 71f525a35ac2b..5b616dde9a4c2 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -80,6 +80,7 @@ struct arm_pmu { struct pmu pmu; cpumask_t supported_cpus; char *name; + int pmuver; irqreturn_t (*handle_irq)(struct arm_pmu *pmu); void (*enable)(struct perf_event *event); void (*disable)(struct perf_event *event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 73bddd000ced4..5bd6f926986ba 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -951,7 +951,7 @@ struct perf_sample_data { struct perf_raw_record *raw; struct perf_branch_stack *br_stack; u64 period; - u64 weight; + union perf_sample_weight weight; u64 txn; union perf_mem_data_src data_src; @@ -1002,7 +1002,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->raw = NULL; data->br_stack = NULL; data->period = period; - data->weight = 0; + data->weight.full = 0; data->data_src.val = PERF_MEM_NA; data->txn = 0; } @@ -1188,7 +1188,18 @@ extern void perf_event_bpf_event(struct bpf_prog *prog, enum perf_bpf_event_type type, u16 flags); -extern struct perf_guest_info_callbacks *perf_guest_cbs; +extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; +static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void) +{ + /* + * Callbacks are RCU-protected and must be READ_ONCE to avoid reloading + * the callbacks between a !NULL check and dereferences, to ensure + * pending stores/changes to the callback pointers are visible before a + * non-NULL perf_guest_cbs is visible to readers, and to prevent a + * module from unloading callbacks while readers are active. + */ + return rcu_dereference(perf_guest_cbs); +} extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h index 4badd53229490..2f9ff5017f122 100644 --- a/include/linux/power/max17042_battery.h +++ b/include/linux/power/max17042_battery.h @@ -69,7 +69,7 @@ enum max17042_register { MAX17042_RelaxCFG = 0x2A, MAX17042_MiscCFG = 0x2B, MAX17042_TGAIN = 0x2C, - MAx17042_TOFF = 0x2D, + MAX17042_TOFF = 0x2D, MAX17042_CGAIN = 0x2E, MAX17042_COFF = 0x2F, diff --git a/include/linux/prandom.h b/include/linux/prandom.h index e20339c78a84c..709e8e69fb39b 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -10,6 +10,7 @@ #include #include +#include u32 prandom_u32(void); void prandom_bytes(void *buf, size_t nbytes); @@ -21,15 +22,10 @@ void prandom_reseed_late(void); * The core SipHash round function. Each line can be executed in * parallel given enough CPU resources. */ -#define PRND_SIPROUND(v0, v1, v2, v3) ( \ - v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \ - v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \ - v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \ - v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \ -) +#define PRND_SIPROUND(v0, v1, v2, v3) SIPHASH_PERMUTATION(v0, v1, v2, v3) -#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261) -#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573) +#define PRND_K0 (SIPHASH_CONST_0 ^ SIPHASH_CONST_2) +#define PRND_K1 (SIPHASH_CONST_1 ^ SIPHASH_CONST_3) #elif BITS_PER_LONG == 32 /* @@ -37,14 +33,9 @@ void prandom_reseed_late(void); * This is weaker, but 32-bit machines are not used for high-traffic * applications, so there is less output for an attacker to analyze. */ -#define PRND_SIPROUND(v0, v1, v2, v3) ( \ - v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \ - v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \ - v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \ - v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \ -) -#define PRND_K0 0x6c796765 -#define PRND_K1 0x74656462 +#define PRND_SIPROUND(v0, v1, v2, v3) HSIPHASH_PERMUTATION(v0, v1, v2, v3) +#define PRND_K0 (HSIPHASH_CONST_0 ^ HSIPHASH_CONST_2) +#define PRND_K1 (HSIPHASH_CONST_1 ^ HSIPHASH_CONST_3) #else #error Unsupported BITS_PER_LONG diff --git a/include/linux/psi.h b/include/linux/psi.h index 65eb1476ac705..74f7148dfb9f4 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -32,7 +32,7 @@ void cgroup_move_task(struct task_struct *p, struct css_set *to); struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, size_t nbytes, enum psi_res res); -void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t); +void psi_trigger_destroy(struct psi_trigger *t); __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait); diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 0a23300d49af7..72a50a05e9576 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -21,7 +21,17 @@ enum psi_task_count { * don't have to special case any state tracking for it. */ NR_ONCPU, - NR_PSI_TASK_COUNTS = 4, + /* + * For IO and CPU stalls the presence of running/oncpu tasks + * in the domain means a partial rather than a full stall. + * For memory it's not so simple because of page reclaimers: + * they are running/oncpu while representing a stall. To tell + * whether a domain has productivity left or not, we need to + * distinguish between regular running (i.e. productive) + * threads and memstall ones. + */ + NR_MEMSTALL_RUNNING, + NR_PSI_TASK_COUNTS = 5, }; /* Task state bitmasks */ @@ -29,6 +39,7 @@ enum psi_task_count { #define TSK_MEMSTALL (1 << NR_MEMSTALL) #define TSK_RUNNING (1 << NR_RUNNING) #define TSK_ONCPU (1 << NR_ONCPU) +#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING) /* Resources that workloads could be stalled on */ enum psi_res { @@ -130,8 +141,8 @@ struct psi_trigger { */ u64 last_event_time; - /* Refcounting to prevent premature destruction */ - struct kref refcount; + /* Deferred event(s) from previous ratelimit window */ + bool pending_event; }; struct psi_group { diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 2a9df80ea8876..ae7dbdfa3d832 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -30,7 +30,6 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ #define PT_PTRACED 0x00000001 -#define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ #define PT_OPT_FLAG_SHIFT 3 /* PT_TRACE_* event enable flags */ @@ -47,12 +46,6 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) #define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT) -/* single stepping state bits (used on ARM and PA-RISC) */ -#define PT_SINGLESTEP_BIT 31 -#define PT_SINGLESTEP (1< +#include #include #include #include -struct random_ready_callback { - struct list_head list; - void (*func)(struct random_ready_callback *rdy); - struct module *owner; -}; +struct notifier_block; -extern void add_device_randomness(const void *, unsigned int); -extern void add_bootloader_randomness(const void *, unsigned int); +void add_device_randomness(const void *buf, size_t len); +void __init add_bootloader_randomness(const void *buf, size_t len); +void add_input_randomness(unsigned int type, unsigned int code, + unsigned int value) __latent_entropy; +void add_interrupt_randomness(int irq) __latent_entropy; +void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy); #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) static inline void add_latent_entropy(void) { - add_device_randomness((const void *)&latent_entropy, - sizeof(latent_entropy)); + add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy)); } #else -static inline void add_latent_entropy(void) {} -#endif - -extern void add_input_randomness(unsigned int type, unsigned int code, - unsigned int value) __latent_entropy; -extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; - -extern void get_random_bytes(void *buf, int nbytes); -extern int wait_for_random_bytes(void); -extern int __init rand_initialize(void); -extern bool rng_is_initialized(void); -extern int add_random_ready_callback(struct random_ready_callback *rdy); -extern void del_random_ready_callback(struct random_ready_callback *rdy); -extern int __must_check get_random_bytes_arch(void *buf, int nbytes); - -#ifndef MODULE -extern const struct file_operations random_fops, urandom_fops; +static inline void add_latent_entropy(void) { } #endif +void get_random_bytes(void *buf, size_t len); +size_t __must_check get_random_bytes_arch(void *buf, size_t len); u32 get_random_u32(void); u64 get_random_u64(void); static inline unsigned int get_random_int(void) @@ -78,36 +61,38 @@ static inline unsigned long get_random_long(void) static inline unsigned long get_random_canary(void) { - unsigned long val = get_random_long(); - - return val & CANARY_MASK; + return get_random_long() & CANARY_MASK; } +int __init random_init(const char *command_line); +bool rng_is_initialized(void); +int wait_for_random_bytes(void); +int register_random_ready_notifier(struct notifier_block *nb); +int unregister_random_ready_notifier(struct notifier_block *nb); + /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes). * Returns the result of the call to wait_for_random_bytes. */ -static inline int get_random_bytes_wait(void *buf, int nbytes) +static inline int get_random_bytes_wait(void *buf, size_t nbytes) { int ret = wait_for_random_bytes(); get_random_bytes(buf, nbytes); return ret; } -#define declare_get_random_var_wait(var) \ - static inline int get_random_ ## var ## _wait(var *out) { \ +#define declare_get_random_var_wait(name, ret_type) \ + static inline int get_random_ ## name ## _wait(ret_type *out) { \ int ret = wait_for_random_bytes(); \ if (unlikely(ret)) \ return ret; \ - *out = get_random_ ## var(); \ + *out = get_random_ ## name(); \ return 0; \ } -declare_get_random_var_wait(u32) -declare_get_random_var_wait(u64) -declare_get_random_var_wait(int) -declare_get_random_var_wait(long) +declare_get_random_var_wait(u32, u32) +declare_get_random_var_wait(u64, u32) +declare_get_random_var_wait(int, unsigned int) +declare_get_random_var_wait(long, unsigned long) #undef declare_get_random_var -unsigned long randomize_page(unsigned long start, unsigned long range); - /* * This is designed to be standalone for just prandom * users, but for now we include it from @@ -118,30 +103,39 @@ unsigned long randomize_page(unsigned long start, unsigned long range); #ifdef CONFIG_ARCH_RANDOM # include #else -static inline bool arch_get_random_long(unsigned long *v) -{ - return 0; -} -static inline bool arch_get_random_int(unsigned int *v) -{ - return 0; -} -static inline bool arch_has_random(void) -{ - return 0; -} -static inline bool arch_get_random_seed_long(unsigned long *v) -{ - return 0; -} -static inline bool arch_get_random_seed_int(unsigned int *v) +static inline bool __must_check arch_get_random_long(unsigned long *v) { return false; } +static inline bool __must_check arch_get_random_int(unsigned int *v) { return false; } +static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { return false; } +static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { return false; } +#endif + +/* + * Called from the boot CPU during startup; not valid to call once + * secondary CPUs are up and preemption is possible. + */ +#ifndef arch_get_random_seed_long_early +static inline bool __init arch_get_random_seed_long_early(unsigned long *v) { - return 0; + WARN_ON(system_state != SYSTEM_BOOTING); + return arch_get_random_seed_long(v); } -static inline bool arch_has_random_seed(void) +#endif + +#ifndef arch_get_random_long_early +static inline bool __init arch_get_random_long_early(unsigned long *v) { - return 0; + WARN_ON(system_state != SYSTEM_BOOTING); + return arch_get_random_long(v); } #endif +#ifdef CONFIG_SMP +int random_prepare_cpu(unsigned int cpu); +int random_online_cpu(unsigned int cpu); +#endif + +#ifndef MODULE +extern const struct file_operations random_fops, urandom_fops; +#endif + #endif /* _LINUX_RANDOM_H */ diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 8ddf79e9207a9..1df12e8dde6f6 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -23,12 +23,16 @@ struct ratelimit_state { unsigned long flags; }; -#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \ - .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ - .interval = interval_init, \ - .burst = burst_init, \ +#define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .interval = interval_init, \ + .burst = burst_init, \ + .flags = flags_init, \ } +#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) \ + RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, 0) + #define RATELIMIT_STATE_INIT_DISABLED \ RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST) diff --git a/include/linux/refcount.h b/include/linux/refcount.h index e28cce21bad6c..0ac50cf62d062 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -1,9 +1,88 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Variant of atomic_t specialized for reference counts. + * + * The interface matches the atomic_t interface (to aid in porting) but only + * provides the few functions one should use for reference counting. + * + * Saturation semantics + * ==================== + * + * refcount_t differs from atomic_t in that the counter saturates at + * REFCOUNT_SATURATED and will not move once there. This avoids wrapping the + * counter and causing 'spurious' use-after-free issues. In order to avoid the + * cost associated with introducing cmpxchg() loops into all of the saturating + * operations, we temporarily allow the counter to take on an unchecked value + * and then explicitly set it to REFCOUNT_SATURATED on detecting that underflow + * or overflow has occurred. Although this is racy when multiple threads + * access the refcount concurrently, by placing REFCOUNT_SATURATED roughly + * equidistant from 0 and INT_MAX we minimise the scope for error: + * + * INT_MAX REFCOUNT_SATURATED UINT_MAX + * 0 (0x7fff_ffff) (0xc000_0000) (0xffff_ffff) + * +--------------------------------+----------------+----------------+ + * <---------- bad value! ----------> + * + * (in a signed view of the world, the "bad value" range corresponds to + * a negative counter value). + * + * As an example, consider a refcount_inc() operation that causes the counter + * to overflow: + * + * int old = atomic_fetch_add_relaxed(r); + * // old is INT_MAX, refcount now INT_MIN (0x8000_0000) + * if (old < 0) + * atomic_set(r, REFCOUNT_SATURATED); + * + * If another thread also performs a refcount_inc() operation between the two + * atomic operations, then the count will continue to edge closer to 0. If it + * reaches a value of 1 before /any/ of the threads reset it to the saturated + * value, then a concurrent refcount_dec_and_test() may erroneously free the + * underlying object. Given the precise timing details involved with the + * round-robin scheduling of each thread manipulating the refcount and the need + * to hit the race multiple times in succession, there doesn't appear to be a + * practical avenue of attack even if using refcount_add() operations with + * larger increments. + * + * Memory ordering + * =============== + * + * Memory ordering rules are slightly relaxed wrt regular atomic_t functions + * and provide only what is strictly required for refcounts. + * + * The increments are fully relaxed; these will not provide ordering. The + * rationale is that whatever is used to obtain the object we're increasing the + * reference count on will provide the ordering. For locked data structures, + * its the lock acquire, for RCU/lockless data structures its the dependent + * load. + * + * Do note that inc_not_zero() provides a control dependency which will order + * future stores against the inc, this ensures we'll never modify the object + * if we did not in fact acquire a reference. + * + * The decrements will provide release order, such that all the prior loads and + * stores will be issued before, it also provides a control dependency, which + * will order us against the subsequent free(). + * + * The control dependency is against the load of the cmpxchg (ll/sc) that + * succeeded. This means the stores aren't fully ordered, but this is fine + * because the 1->0 transition indicates no concurrency. + * + * Note that the allocator is responsible for ordering things between free() + * and alloc(). + * + * The decrements dec_and_test() and sub_and_test() also provide acquire + * ordering on success. + * + */ + #ifndef _LINUX_REFCOUNT_H #define _LINUX_REFCOUNT_H #include +#include #include +#include #include struct mutex; @@ -12,7 +91,7 @@ struct mutex; * struct refcount_t - variant of atomic_t specialized for reference counts * @refs: atomic_t counter field * - * The counter saturates at UINT_MAX and will not move once + * The counter saturates at REFCOUNT_SATURATED and will not move once * there. This avoids wrapping the counter and causing 'spurious' * use-after-free bugs. */ @@ -21,13 +100,25 @@ typedef struct refcount_struct { } refcount_t; #define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } +#define REFCOUNT_MAX INT_MAX +#define REFCOUNT_SATURATED (INT_MIN / 2) + +enum refcount_saturation_type { + REFCOUNT_ADD_NOT_ZERO_OVF, + REFCOUNT_ADD_OVF, + REFCOUNT_ADD_UAF, + REFCOUNT_SUB_UAF, + REFCOUNT_DEC_LEAK, +}; + +void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t); /** * refcount_set - set a refcount's value * @r: the refcount * @n: value to which the refcount will be set */ -static inline void refcount_set(refcount_t *r, unsigned int n) +static inline void refcount_set(refcount_t *r, int n) { atomic_set(&r->refs, n); } @@ -43,70 +134,168 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } -extern __must_check bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r); -extern void refcount_add_checked(unsigned int i, refcount_t *r); - -extern __must_check bool refcount_inc_not_zero_checked(refcount_t *r); -extern void refcount_inc_checked(refcount_t *r); - -extern __must_check bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r); - -extern __must_check bool refcount_dec_and_test_checked(refcount_t *r); -extern void refcount_dec_checked(refcount_t *r); - -#ifdef CONFIG_REFCOUNT_FULL - -#define refcount_add_not_zero refcount_add_not_zero_checked -#define refcount_add refcount_add_checked - -#define refcount_inc_not_zero refcount_inc_not_zero_checked -#define refcount_inc refcount_inc_checked +/** + * refcount_add_not_zero - add a value to a refcount unless it is 0 + * @i: the value to add to the refcount + * @r: the refcount + * + * Will saturate at REFCOUNT_SATURATED and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc(), or one of its variants, should instead be used to + * increment a reference count. + * + * Return: false if the passed refcount is 0, true otherwise + */ +static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) +{ + int old = refcount_read(r); -#define refcount_sub_and_test refcount_sub_and_test_checked + do { + if (!old) + break; + } while (!atomic_try_cmpxchg_relaxed(&r->refs, &old, old + i)); -#define refcount_dec_and_test refcount_dec_and_test_checked -#define refcount_dec refcount_dec_checked + if (unlikely(old < 0 || old + i < 0)) + refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF); -#else -# ifdef CONFIG_ARCH_HAS_REFCOUNT -# include -# else -static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r) -{ - return atomic_add_unless(&r->refs, i, 0); + return old; } -static inline void refcount_add(unsigned int i, refcount_t *r) +/** + * refcount_add - add a value to a refcount + * @i: the value to add to the refcount + * @r: the refcount + * + * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc(), or one of its variants, should instead be used to + * increment a reference count. + */ +static inline void refcount_add(int i, refcount_t *r) { - atomic_add(i, &r->refs); + int old = atomic_fetch_add_relaxed(i, &r->refs); + + if (unlikely(!old)) + refcount_warn_saturate(r, REFCOUNT_ADD_UAF); + else if (unlikely(old < 0 || old + i < 0)) + refcount_warn_saturate(r, REFCOUNT_ADD_OVF); } +/** + * refcount_inc_not_zero - increment a refcount unless it is 0 + * @r: the refcount to increment + * + * Similar to atomic_inc_not_zero(), but will saturate at REFCOUNT_SATURATED + * and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Return: true if the increment was successful, false otherwise + */ static inline __must_check bool refcount_inc_not_zero(refcount_t *r) { - return atomic_add_unless(&r->refs, 1, 0); + return refcount_add_not_zero(1, r); } +/** + * refcount_inc - increment a refcount + * @r: the refcount to increment + * + * Similar to atomic_inc(), but will saturate at REFCOUNT_SATURATED and WARN. + * + * Provides no memory ordering, it is assumed the caller already has a + * reference on the object. + * + * Will WARN if the refcount is 0, as this represents a possible use-after-free + * condition. + */ static inline void refcount_inc(refcount_t *r) { - atomic_inc(&r->refs); + refcount_add(1, r); } -static inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r) +/** + * refcount_sub_and_test - subtract from a refcount and test if it is 0 + * @i: amount to subtract from the refcount + * @r: the refcount + * + * Similar to atomic_dec_and_test(), but it will WARN, return false and + * ultimately leak on underflow and will fail to decrement when saturated + * at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides an acquire ordering on success such that free() + * must come after. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_dec(), or one of its variants, should instead be used to + * decrement a reference count. + * + * Return: true if the resulting refcount is 0, false otherwise + */ +static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) { - return atomic_sub_and_test(i, &r->refs); + int old = atomic_fetch_sub_release(i, &r->refs); + + if (old == i) { + smp_acquire__after_ctrl_dep(); + return true; + } + + if (unlikely(old < 0 || old - i < 0)) + refcount_warn_saturate(r, REFCOUNT_SUB_UAF); + + return false; } +/** + * refcount_dec_and_test - decrement a refcount and test if it is 0 + * @r: the refcount + * + * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to + * decrement when saturated at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides an acquire ordering on success such that free() + * must come after. + * + * Return: true if the resulting refcount is 0, false otherwise + */ static inline __must_check bool refcount_dec_and_test(refcount_t *r) { - return atomic_dec_and_test(&r->refs); + return refcount_sub_and_test(1, r); } +/** + * refcount_dec - decrement a refcount + * @r: the refcount + * + * Similar to atomic_dec(), it will WARN on underflow and fail to decrement + * when saturated at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before. + */ static inline void refcount_dec(refcount_t *r) { - atomic_dec(&r->refs); + if (unlikely(atomic_fetch_sub_release(1, &r->refs) <= 1)) + refcount_warn_saturate(r, REFCOUNT_DEC_LEAK); } -# endif /* !CONFIG_ARCH_HAS_REFCOUNT */ -#endif /* CONFIG_REFCOUNT_FULL */ extern __must_check bool refcount_dec_if_one(refcount_t *r); extern __must_check bool refcount_dec_not_one(refcount_t *r); diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 9fe156d1c018e..a68972b097b72 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -177,7 +177,7 @@ static inline struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *rpdev /* This shouldn't be possible */ WARN_ON(1); - return ERR_PTR(-ENXIO); + return NULL; } static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len) diff --git a/include/linux/rtsx_usb.h b/include/linux/rtsx_usb.h index 159729cffd8e1..3247ed8e9ff0f 100644 --- a/include/linux/rtsx_usb.h +++ b/include/linux/rtsx_usb.h @@ -54,8 +54,6 @@ struct rtsx_ucr { struct usb_device *pusb_dev; struct usb_interface *pusb_intf; struct usb_sg_request current_sg; - unsigned char *iobuf; - dma_addr_t iobuf_dma; struct timer_list sg_timer; struct mutex dev_mutex; diff --git a/include/linux/sched.h b/include/linux/sched.h index 78faffc70dd60..06cf8d83617bd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -352,28 +352,30 @@ struct util_est { } __attribute__((__aligned__(sizeof(u64)))); /* - * The load_avg/util_avg accumulates an infinite geometric series - * (see __update_load_avg() in kernel/sched/fair.c). + * The load/runnable/util_avg accumulates an infinite geometric series + * (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c). * * [load_avg definition] * * load_avg = runnable% * scale_load_down(load) * - * where runnable% is the time ratio that a sched_entity is runnable. - * For cfs_rq, it is the aggregated load_avg of all runnable and - * blocked sched_entities. + * [runnable_avg definition] + * + * runnable_avg = runnable% * SCHED_CAPACITY_SCALE * * [util_avg definition] * * util_avg = running% * SCHED_CAPACITY_SCALE * - * where running% is the time ratio that a sched_entity is running on - * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable - * and blocked sched_entities. + * where runnable% is the time ratio that a sched_entity is runnable and + * running% the time ratio that a sched_entity is running. + * + * For cfs_rq, they are the aggregated values of all runnable and blocked + * sched_entities. * - * load_avg and util_avg don't direcly factor frequency scaling and CPU - * capacity scaling. The scaling is done through the rq_clock_pelt that - * is used for computing those signals (see update_rq_clock_pelt()) + * The load/runnable/util_avg doesn't direcly factor frequency scaling and CPU + * capacity scaling. The scaling is done through the rq_clock_pelt that is used + * for computing those signals (see update_rq_clock_pelt()) * * N.B., the above ratios (runnable% and running%) themselves are in the * range of [0, 1]. To do fixed point arithmetics, we therefore scale them @@ -397,11 +399,11 @@ struct util_est { struct sched_avg { u64 last_update_time; u64 load_sum; - u64 runnable_load_sum; + u64 runnable_sum; u32 util_sum; u32 period_contrib; unsigned long load_avg; - unsigned long runnable_load_avg; + unsigned long runnable_avg; unsigned long util_avg; struct util_est util_est; } ____cacheline_aligned; @@ -445,7 +447,6 @@ struct sched_statistics { struct sched_entity { /* For load-balancing: */ struct load_weight load; - unsigned long runnable_weight; struct rb_node run_node; struct list_head group_node; unsigned int on_rq; @@ -466,6 +467,8 @@ struct sched_entity { struct cfs_rq *cfs_rq; /* rq "owned" by this entity/group: */ struct cfs_rq *my_q; + /* cached value of my_q->h_nr_running */ + unsigned long runnable_weight; #endif #ifdef CONFIG_SMP @@ -849,7 +852,9 @@ struct task_struct { #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN struct vtime vtime; #endif - +#ifdef CONFIG_BYTEDANCE_BURST_SCHED + void *burst_credit; +#endif #ifdef CONFIG_NO_HZ_FULL atomic_t tick_dep_mask; #endif @@ -1256,6 +1261,7 @@ struct task_struct { int pagefault_disabled; #ifdef CONFIG_MMU struct task_struct *oom_reaper_list; + struct timer_list oom_reaper_timer; #endif #ifdef CONFIG_VMAP_STACK struct vm_struct *stack_vm_area; @@ -1475,7 +1481,6 @@ extern struct pid *cad_pid; #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ -#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF_FROZEN 0x00010000 /* Frozen for system suspend */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ @@ -1521,7 +1526,7 @@ extern struct pid *cad_pid; #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) -static inline bool is_percpu_thread(void) +static __always_inline bool is_percpu_thread(void) { #ifdef CONFIG_SMP return (current->flags & PF_NO_SETAFFINITY) && diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 3a1d899019af0..ab0da04ac9ee7 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -133,6 +133,14 @@ static inline void mm_update_next_owner(struct mm_struct *mm) #endif /* CONFIG_MEMCG */ #ifdef CONFIG_MMU +#ifndef arch_get_mmap_end +#define arch_get_mmap_end(addr) (TASK_SIZE) +#endif + +#ifndef arch_get_mmap_base +#define arch_get_mmap_base(addr, base) (base) +#endif + extern void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack); extern unsigned long diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index d4f6215ee03f7..b60ee73eb7674 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -22,6 +22,7 @@ enum { sysctl_hung_task_timeout_secs = 0 }; extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; +extern unsigned int sysctl_sched_idle_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; @@ -63,6 +64,7 @@ extern unsigned int sysctl_sched_uclamp_util_max; #ifdef CONFIG_CFS_BANDWIDTH extern unsigned int sysctl_sched_cfs_bandwidth_slice; +extern unsigned int sysctl_sched_cfs_bandwidth_min_ratio; #endif #ifdef CONFIG_SCHED_AUTOGROUP diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 9a2710cd63197..56d99bc728006 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -163,7 +163,7 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. Also protects ->cpuset and - * ->cgroup.subsys[]. And ->vfork_done. + * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist. * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index 2413427e439c7..d10150587d819 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -25,7 +25,11 @@ static inline void *task_stack_page(const struct task_struct *task) static inline unsigned long *end_of_stack(const struct task_struct *task) { +#ifdef CONFIG_STACK_GROWSUP + return (unsigned long *)((unsigned long)task->stack + THREAD_SIZE) - 1; +#else return task->stack; +#endif } #elif !defined(__HAVE_THREAD_FUNCTIONS) diff --git a/include/linux/security.h b/include/linux/security.h index df90399a8af98..aa5c7141c8d17 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -118,10 +118,12 @@ enum lockdown_reason { LOCKDOWN_MMIOTRACE, LOCKDOWN_DEBUGFS, LOCKDOWN_XMON_WR, + LOCKDOWN_DBG_WRITE_KERNEL, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_KCORE, LOCKDOWN_KPROBES, LOCKDOWN_BPF_READ, + LOCKDOWN_DBG_READ_KERNEL, LOCKDOWN_PERF, LOCKDOWN_TRACEFS, LOCKDOWN_XMON_RW, @@ -249,13 +251,13 @@ extern int security_init(void); extern int early_security_init(void); /* Security operations */ -int security_binder_set_context_mgr(struct task_struct *mgr); -int security_binder_transaction(struct task_struct *from, - struct task_struct *to); -int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to); -int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, struct file *file); +int security_binder_set_context_mgr(const struct cred *mgr); +int security_binder_transaction(const struct cred *from, + const struct cred *to); +int security_binder_transfer_binder(const struct cred *from, + const struct cred *to); +int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file); int security_ptrace_access_check(struct task_struct *child, unsigned int mode); int security_ptrace_traceme(struct task_struct *parent); int security_capget(struct task_struct *target, @@ -481,25 +483,25 @@ static inline int early_security_init(void) return 0; } -static inline int security_binder_set_context_mgr(struct task_struct *mgr) +static inline int security_binder_set_context_mgr(const struct cred *mgr) { return 0; } -static inline int security_binder_transaction(struct task_struct *from, - struct task_struct *to) +static inline int security_binder_transaction(const struct cred *from, + const struct cred *to) { return 0; } -static inline int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to) +static inline int security_binder_transfer_binder(const struct cred *from, + const struct cred *to) { return 0; } -static inline int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, +static inline int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file) { return 0; @@ -985,6 +987,11 @@ static inline void security_transfer_creds(struct cred *new, { } +static inline void security_cred_getsecid(const struct cred *c, u32 *secid) +{ + *secid = 0; +} + static inline int security_kernel_act_as(struct cred *cred, u32 secid) { return 0; diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 1c35428e98bc5..1f65247b6377a 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -506,6 +506,9 @@ phy_interface_t sfp_select_interface(struct sfp_bus *bus, int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo); int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee, u8 *data); +int sfp_get_module_eeprom_by_page(struct sfp_bus *bus, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack); void sfp_upstream_start(struct sfp_bus *bus); void sfp_upstream_stop(struct sfp_bus *bus); struct sfp_bus *sfp_register_upstream(struct fwnode_handle *fwnode, @@ -545,6 +548,13 @@ static inline int sfp_get_module_eeprom(struct sfp_bus *bus, return -EOPNOTSUPP; } +static inline int sfp_get_module_eeprom_by_page(struct sfp_bus *bus, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + static inline void sfp_upstream_start(struct sfp_bus *bus) { } diff --git a/include/linux/siphash.h b/include/linux/siphash.h index bf21591a9e5e6..0bb5ecd507bef 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -27,9 +27,7 @@ static inline bool siphash_key_is_zero(const siphash_key_t *key) } u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); -#endif u64 siphash_1u64(const u64 a, const siphash_key_t *key); u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); @@ -82,10 +80,9 @@ static inline u64 ___siphash_aligned(const __le64 *data, size_t len, static inline u64 siphash(const void *data, size_t len, const siphash_key_t *key) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) return __siphash_unaligned(data, len, key); -#endif return ___siphash_aligned(data, len, key); } @@ -96,10 +93,8 @@ typedef struct { u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key); -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key); -#endif u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); @@ -135,11 +130,38 @@ static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, static inline u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) return __hsiphash_unaligned(data, len, key); -#endif return ___hsiphash_aligned(data, len, key); } +/* + * These macros expose the raw SipHash and HalfSipHash permutations. + * Do not use them directly! If you think you have a use for them, + * be sure to CC the maintainer of this file explaining why. + */ + +#define SIPHASH_PERMUTATION(a, b, c, d) ( \ + (a) += (b), (b) = rol64((b), 13), (b) ^= (a), (a) = rol64((a), 32), \ + (c) += (d), (d) = rol64((d), 16), (d) ^= (c), \ + (a) += (d), (d) = rol64((d), 21), (d) ^= (a), \ + (c) += (b), (b) = rol64((b), 17), (b) ^= (c), (c) = rol64((c), 32)) + +#define SIPHASH_CONST_0 0x736f6d6570736575ULL +#define SIPHASH_CONST_1 0x646f72616e646f6dULL +#define SIPHASH_CONST_2 0x6c7967656e657261ULL +#define SIPHASH_CONST_3 0x7465646279746573ULL + +#define HSIPHASH_PERMUTATION(a, b, c, d) ( \ + (a) += (b), (b) = rol32((b), 5), (b) ^= (a), (a) = rol32((a), 16), \ + (c) += (d), (d) = rol32((d), 8), (d) ^= (c), \ + (a) += (d), (d) = rol32((d), 7), (d) ^= (a), \ + (c) += (b), (b) = rol32((b), 13), (b) ^= (c), (c) = rol32((c), 16)) + +#define HSIPHASH_CONST_0 0U +#define HSIPHASH_CONST_1 0U +#define HSIPHASH_CONST_2 0x6c796765U +#define HSIPHASH_CONST_3 0x74656462U + #endif /* _LINUX_SIPHASH_H */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9b1f3cff27032..7fe336949a261 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1893,7 +1893,7 @@ static inline void __skb_insert(struct sk_buff *newsk, WRITE_ONCE(newsk->prev, prev); WRITE_ONCE(next->prev, newsk); WRITE_ONCE(prev->next, newsk); - list->qlen++; + WRITE_ONCE(list->qlen, list->qlen + 1); } static inline void __skb_queue_splice(const struct sk_buff_head *list, diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7067f85cef0bf..fed5b3ac7f44d 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -319,6 +319,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @max_speed_hz: Highest supported transfer speed * @flags: other constraints relevant to this driver * @slave: indicates that this is an SPI slave controller + * @devm_allocated: whether the allocation of this struct is devres-managed * @max_transfer_size: function that returns the max transfer size for * a &spi_device; may be %NULL, so the default %SIZE_MAX will be used. * @max_message_size: function that returns the max message size for @@ -466,7 +467,7 @@ struct spi_controller { #define SPI_MASTER_GPIO_SS BIT(5) /* GPIO CS must select slave */ - /* flag indicating this is a non-devres managed controller */ + /* flag indicating if the allocation of this struct is devres-managed */ bool devm_allocated; /* flag indicating this is an SPI slave controller */ diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index dc60d03c4b606..0b35747c9837a 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -179,5 +179,6 @@ struct plat_stmmacenet_data { int mac_port_sel_speed; bool en_tx_lpi_clockgating; int has_xgmac; + bool sph_disable; }; #endif diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index a8d68c5a4ca61..fb0a6d6c91e48 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -536,6 +536,8 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; + if (len > SIZE_MAX / sizeof(*p)) + return -EBADMSG; p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) return -EBADMSG; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d7ef5b97174ce..3c6c4b1dbf1a4 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -419,6 +419,7 @@ void xprt_unlock_connect(struct rpc_xprt *, void *); #define XPRT_CONGESTED (9) #define XPRT_CWND_WAIT (10) #define XPRT_WRITE_SPACE (11) +#define XPRT_SND_IS_COOKIE (12) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index a940de03808dd..46deca97e806d 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -90,6 +90,7 @@ struct sock_xprt { #define XPRT_SOCK_WAKE_WRITE (5) #define XPRT_SOCK_WAKE_PENDING (6) #define XPRT_SOCK_WAKE_DISCONNECT (7) +#define XPRT_SOCK_CONNECT_SENT (8) #endif /* __KERNEL__ */ diff --git a/include/linux/suspend.h b/include/linux/suspend.h index cd97d2c8840cc..194e64cb25850 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -428,15 +428,7 @@ struct platform_hibernation_ops { #ifdef CONFIG_HIBERNATION /* kernel/power/snapshot.c */ -extern void __register_nosave_region(unsigned long b, unsigned long e, int km); -static inline void __init register_nosave_region(unsigned long b, unsigned long e) -{ - __register_nosave_region(b, e, 0); -} -static inline void __init register_nosave_region_late(unsigned long b, unsigned long e) -{ - __register_nosave_region(b, e, 1); -} +extern void register_nosave_region(unsigned long b, unsigned long e); extern int swsusp_page_is_forbidden(struct page *); extern void swsusp_set_page_free(struct page *); extern void swsusp_unset_page_free(struct page *); @@ -453,7 +445,6 @@ extern struct pbe *restore_pblist; int pfn_is_nosave(unsigned long pfn); #else /* CONFIG_HIBERNATION */ static inline void register_nosave_region(unsigned long b, unsigned long e) {} -static inline void register_nosave_region_late(unsigned long b, unsigned long e) {} static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } static inline void swsusp_set_page_free(struct page *p) {} static inline void swsusp_unset_page_free(struct page *p) {} @@ -491,14 +482,14 @@ extern void ksys_sync_helper(void); /* drivers/base/power/wakeup.c */ extern bool events_check_enabled; -extern unsigned int pm_wakeup_irq; extern suspend_state_t pm_suspend_target_state; extern bool pm_wakeup_pending(void); extern void pm_system_wakeup(void); extern void pm_system_cancel_wakeup(void); -extern void pm_wakeup_clear(bool reset); +extern void pm_wakeup_clear(unsigned int irq_number); extern void pm_system_irq_wakeup(unsigned int irq_number); +extern unsigned int pm_wakeup_irq(void); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e995a035a109f..7a102099702f1 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -210,6 +210,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); +extern void __register_sysctl_init(const char *path, struct ctl_table *table, + const char *table_name); +#define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table) extern struct ctl_table sysctl_mount_point[]; diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index cd15c1b7fae06..8868337e9445b 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -178,7 +178,7 @@ void tee_device_unregister(struct tee_device *teedev); * @offset: offset of buffer in user space * @pages: locked pages from userspace * @num_pages: number of locked pages - * @dmabuf: dmabuf used to for exporting to user space + * @refcount: reference counter * @flags: defined by TEE_SHM_* in tee_drv.h * @id: unique id of a shared memory object on this device * @@ -195,7 +195,7 @@ struct tee_shm { unsigned int offset; struct page **pages; size_t num_pages; - struct dma_buf *dmabuf; + refcount_t refcount; u32 flags; int id; }; @@ -579,4 +579,18 @@ struct tee_client_driver { #define to_tee_client_driver(d) \ container_of(d, struct tee_client_driver, driver) +/** + * teedev_open() - Open a struct tee_device + * @teedev: Device to open + * + * @return a pointer to struct tee_context on success or an ERR_PTR on failure. + */ +struct tee_context *teedev_open(struct tee_device *teedev); + +/** + * teedev_close_context() - closes a struct tee_context + * @ctx: The struct tee_context to close + */ +void teedev_close_context(struct tee_context *ctx); + #endif /*__TEE_DRV_H*/ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index e45659c759209..a41378bdf27c7 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -501,12 +501,13 @@ static inline void thermal_zone_device_update(struct thermal_zone_device *tz, static inline void thermal_zone_set_trips(struct thermal_zone_device *tz) { } static inline struct thermal_cooling_device * -thermal_cooling_device_register(char *type, void *devdata, +thermal_cooling_device_register(const char *type, void *devdata, const struct thermal_cooling_device_ops *ops) { return ERR_PTR(-ENODEV); } static inline struct thermal_cooling_device * thermal_of_cooling_device_register(struct device_node *np, - char *type, void *devdata, const struct thermal_cooling_device_ops *ops) + const char *type, void *devdata, + const struct thermal_cooling_device_ops *ops) { return ERR_PTR(-ENODEV); } static inline struct thermal_cooling_device * devm_thermal_of_cooling_device_register(struct device *dev, diff --git a/include/linux/timex.h b/include/linux/timex.h index ce08597636705..2efab9a806a9d 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -62,6 +62,8 @@ #include #include +unsigned long random_get_entropy_fallback(void); + #include #ifndef random_get_entropy @@ -74,8 +76,14 @@ * * By default we use get_cycles() for this purpose, but individual * architectures may override this in their asm/timex.h header file. + * If a given arch does not have get_cycles(), then we fallback to + * using random_get_entropy_fallback(). */ -#define random_get_entropy() get_cycles() +#ifdef get_cycles +#define random_get_entropy() ((unsigned long)get_cycles()) +#else +#define random_get_entropy() random_get_entropy_fallback() +#endif #endif /* diff --git a/include/linux/topology.h b/include/linux/topology.h index eb2fe6edd73c8..64e7ee0abe71c 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -48,6 +48,7 @@ int arch_update_cpu_topology(void); /* Conform to ACPI 2.0 SLIT distance definitions */ #define LOCAL_DISTANCE 10 #define REMOTE_DISTANCE 20 +#define DISTANCE_BITS 8 #ifndef node_distance #define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE) #endif diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index 767f62086bd9b..c326bfdb5ec2c 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -12,7 +12,6 @@ extern int tty_insert_flip_string_fixed_flag(struct tty_port *port, extern int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars, size_t size); extern void tty_flip_buffer_push(struct tty_port *port); -void tty_schedule_flip(struct tty_port *port); int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag); static inline int tty_insert_flip_char(struct tty_port *port, @@ -40,4 +39,7 @@ static inline int tty_insert_flip_string(struct tty_port *port, extern void tty_buffer_lock_exclusive(struct tty_port *port); extern void tty_buffer_unlock_exclusive(struct tty_port *port); +int tty_insert_flip_string_and_push_buffer(struct tty_port *port, + const unsigned char *chars, size_t cnt); + #endif /* _LINUX_TTY_FLIP_H */ diff --git a/include/linux/uio.h b/include/linux/uio.h index b74d292b9960e..42cff84c4ecab 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -25,6 +25,7 @@ enum iter_type { ITER_BVEC = 16, ITER_PIPE = 32, ITER_DISCARD = 64, + ITER_XARRAY = 128, }; struct iov_iter { @@ -40,6 +41,7 @@ struct iov_iter { const struct iovec *iov; const struct kvec *kvec; const struct bio_vec *bvec; + struct xarray *xarray; struct pipe_inode_info *pipe; }; union { @@ -48,6 +50,7 @@ struct iov_iter { int idx; int start_idx; }; + loff_t xarray_start; }; }; @@ -81,6 +84,11 @@ static inline bool iov_iter_is_discard(const struct iov_iter *i) return iov_iter_type(i) == ITER_DISCARD; } +static inline bool iov_iter_is_xarray(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_XARRAY; +} + static inline unsigned char iov_iter_rw(const struct iov_iter *i) { return i->type & (READ | WRITE); @@ -222,6 +230,8 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_ void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe, size_t count); void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); +void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray, + loff_t start, size_t count); ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 712b2a603645f..c0eb85b2981e0 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -124,6 +124,7 @@ struct usb_hcd { #define HCD_FLAG_RH_RUNNING 5 /* root hub is running? */ #define HCD_FLAG_DEAD 6 /* controller has died? */ #define HCD_FLAG_INTF_AUTHORIZED 7 /* authorize interfaces? */ +#define HCD_FLAG_DEFER_RH_REGISTER 8 /* Defer roothub registration */ /* The flags can be tested using these macros; they are likely to * be slightly faster than test_bit(). @@ -134,6 +135,7 @@ struct usb_hcd { #define HCD_WAKEUP_PENDING(hcd) ((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING)) #define HCD_RH_RUNNING(hcd) ((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING)) #define HCD_DEAD(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEAD)) +#define HCD_DEFER_RH_REGISTER(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEFER_RH_REGISTER)) /* * Specifies if interfaces are authorized by default diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 7c075463c7f2b..57a202aa12a52 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -58,6 +58,14 @@ int virtqueue_add_sgs(struct virtqueue *vq, void *data, gfp_t gfp); +int virtqueue_add_mapped_buf_split(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, void *ctx, + gfp_t gfp); + bool virtqueue_kick(struct virtqueue *vq); bool virtqueue_kick_prepare(struct virtqueue *vq); @@ -69,6 +77,10 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len, void **ctx); +void *virtqueue_get_mapped_buf_split(struct virtqueue *_vq, + unsigned int *len, + void **ctx); + void virtqueue_disable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq); @@ -81,6 +93,8 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *vq); void *virtqueue_detach_unused_buf(struct virtqueue *vq); +void *virtqueue_detach_unused_mapped_buf_split(struct virtqueue *_vq); + unsigned int virtqueue_get_vring_size(struct virtqueue *vq); bool virtqueue_is_broken(struct virtqueue *vq); @@ -135,7 +149,6 @@ void virtio_break_device(struct virtio_device *dev); void virtio_config_changed(struct virtio_device *dev); void virtio_config_disable(struct virtio_device *dev); void virtio_config_enable(struct virtio_device *dev); -int virtio_finalize_features(struct virtio_device *dev); #ifdef CONFIG_PM_SLEEP int virtio_device_freeze(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev); @@ -143,6 +156,18 @@ int virtio_device_restore(struct virtio_device *dev); size_t virtio_max_dma_size(struct virtio_device *vdev); +dma_addr_t virtio_dma_map_sg(struct virtio_device *dev, + struct scatterlist *sg, + enum dma_data_direction direction); + +dma_addr_t virtio_dma_map(struct virtio_device *dev, void *addr, + unsigned int length, enum dma_data_direction dir); + +int virtio_dma_mapping_error(struct virtio_device *dev, dma_addr_t addr); + +void virtio_dma_unmap(struct virtio_device *dev, dma_addr_t dma, + unsigned int length, enum dma_data_direction dir); + #define virtio_device_for_each_vq(vdev, vq) \ list_for_each_entry(vq, &vdev->vqs, list) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index f2cc2a0df1742..0fdc43fe233f0 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -56,8 +56,9 @@ struct irq_affinity; * Returns the first 64 feature bits (all we currently need). * @finalize_features: confirm what device features we'll be using. * vdev: the virtio_device - * This gives the final feature bits for the device: it can change + * This sends the driver feature bits to the device: it can change * the dev->feature bits if it wants. + * Note: despite the name this can be called any number of times. * Returns 0 on success or error status * @bus_name: return the bus name associated with the device (optional) * vdev: the virtio_device diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index b465f8f3e554f..a960de68ac69e 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -7,9 +7,27 @@ #include #include +static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) +{ + switch (gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + return protocol == cpu_to_be16(ETH_P_IP); + case VIRTIO_NET_HDR_GSO_TCPV6: + return protocol == cpu_to_be16(ETH_P_IPV6); + case VIRTIO_NET_HDR_GSO_UDP: + return protocol == cpu_to_be16(ETH_P_IP) || + protocol == cpu_to_be16(ETH_P_IPV6); + default: + return false; + } +} + static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, const struct virtio_net_hdr *hdr) { + if (skb->protocol) + return 0; + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_UDP: @@ -88,9 +106,12 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb->protocol) { __be16 protocol = dev_parse_header_protocol(skb); - virtio_net_hdr_set_proto(skb, hdr); - if (protocol && protocol != skb->protocol) + if (!protocol) + virtio_net_hdr_set_proto(skb, hdr); + else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type)) return -EINVAL; + else + skb->protocol = protocol; } retry: if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, @@ -120,10 +141,15 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); + unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); + /* UFO may not include transport header in gso_size. */ + if (gso_type & SKB_GSO_UDP) + nh_off -= thlen; + /* Too small packets are not really GSO ones. */ - if (skb->len - p_off > gso_size) { + if (skb->len - nh_off > gso_size) { shinfo->gso_size = gso_size; shinfo->gso_type = gso_type; diff --git a/include/linux/wait.h b/include/linux/wait.h index 032ae61c22a2b..5903b1d17c924 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -204,6 +204,7 @@ void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); +void __wake_up_pollfree(struct wait_queue_head *wq_head); #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) @@ -230,6 +231,31 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); #define wake_up_interruptible_sync_poll(x, m) \ __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, poll_to_key(m)) +/** + * wake_up_pollfree - signal that a polled waitqueue is going away + * @wq_head: the wait queue head + * + * In the very rare cases where a ->poll() implementation uses a waitqueue whose + * lifetime is tied to a task rather than to the 'struct file' being polled, + * this function must be called before the waitqueue is freed so that + * non-blocking polls (e.g. epoll) are notified that the queue is going away. + * + * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via + * an explicit synchronize_rcu() or call_rcu(), or via SLAB_TYPESAFE_BY_RCU. + */ +static inline void wake_up_pollfree(struct wait_queue_head *wq_head) +{ + /* + * For performance reasons, we don't always take the queue lock here. + * Therefore, we might race with someone removing the last entry from + * the queue, and proceed while they still hold the queue lock. + * However, rcu_read_lock() is required to be held in such cases, so we + * can safely proceed with an RCU-delayed free. + */ + if (waitqueue_active(wq_head)) + __wake_up_pollfree(wq_head); +} + #define ___wait_cond_timeout(condition) \ ({ \ bool __cond = (condition); \ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a19d845dd7eb9..718966969ef61 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -220,6 +220,7 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages, enum wb_reason reason, struct wb_completion *done); void cgroup_writeback_umount(void); +bool cleanup_offline_cgwb(struct bdi_writeback *wb); /** * inode_attach_wb - associate an inode with its wb diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 8d90fb9184e8a..880e609b7352a 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -399,6 +399,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) { const struct inet6_dev *idev = __in6_dev_get(dev); + if (unlikely(!idev)) + return true; + return !!idev->cnf.ignore_routes_with_linkdown; } diff --git a/include/net/arp.h b/include/net/arp.h index 4950191f6b2bf..4a23a97195f33 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -71,6 +71,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, const unsigned char *src_hw, const unsigned char *th); int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir); void arp_ifdown(struct net_device *dev); +int arp_invalidate(struct net_device *dev, __be32 ip, bool force); struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, diff --git a/include/net/ax25.h b/include/net/ax25.h index 8b7eb46ad72d8..aadff553e4b73 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -236,6 +236,7 @@ typedef struct ax25_dev { #if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER) ax25_dama_info dama; #endif + refcount_t refcount; } ax25_dev; typedef struct ax25_cb { @@ -290,6 +291,17 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25) } } +static inline void ax25_dev_hold(ax25_dev *ax25_dev) +{ + refcount_inc(&ax25_dev->refcount); +} + +static inline void ax25_dev_put(ax25_dev *ax25_dev) +{ + if (refcount_dec_and_test(&ax25_dev->refcount)) { + kfree(ax25_dev); + } +} static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev) { skb->dev = dev; diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index fabee6db0abb7..421d41ef4e9ca 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -370,6 +370,71 @@ static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, return NULL; } +/* Shall not be called with lock_sock held */ +static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk, + struct msghdr *msg, + size_t len, size_t mtu, + size_t headroom, size_t tailroom) +{ + struct sk_buff *skb; + size_t size = min_t(size_t, len, mtu); + int err; + + skb = bt_skb_send_alloc(sk, size + headroom + tailroom, + msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) + return ERR_PTR(err); + + skb_reserve(skb, headroom); + skb_tailroom_reserve(skb, mtu, tailroom); + + if (!copy_from_iter_full(skb_put(skb, size), size, &msg->msg_iter)) { + kfree_skb(skb); + return ERR_PTR(-EFAULT); + } + + skb->priority = sk->sk_priority; + + return skb; +} + +/* Similar to bt_skb_sendmsg but can split the msg into multiple fragments + * accourding to the MTU. + */ +static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, + struct msghdr *msg, + size_t len, size_t mtu, + size_t headroom, size_t tailroom) +{ + struct sk_buff *skb, **frag; + + skb = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); + if (IS_ERR_OR_NULL(skb)) + return skb; + + len -= skb->len; + if (!len) + return skb; + + /* Add remaining data over MTU as continuation fragments */ + frag = &skb_shinfo(skb)->frag_list; + while (len) { + struct sk_buff *tmp; + + tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); + if (IS_ERR(tmp)) { + return skb; + } + + len -= tmp->len; + + *frag = tmp; + frag = &(*frag)->next; + } + + return skb; +} + int bt_to_errno(u16 code); void hci_sock_set_flag(struct sock *sk, int nr); diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 14d00cc10e22e..ecad25900ad78 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -34,6 +34,9 @@ /* HCI priority */ #define HCI_PRIO_MAX 7 +/* HCI maximum id value */ +#define HCI_MAX_ID 10000 + /* HCI Core structures */ struct inquiry_data { bdaddr_t bdaddr; diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 8efc2419a815f..b2046b02d11d6 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -802,6 +802,7 @@ enum { }; void l2cap_chan_hold(struct l2cap_chan *c); +struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c); void l2cap_chan_put(struct l2cap_chan *c); static inline void l2cap_chan_lock(struct l2cap_chan *chan) diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index c8696a230b7d9..1a28f299a4c61 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -262,7 +262,7 @@ struct ad_system { struct ad_bond_info { struct ad_system system; /* 802.3ad system structure */ struct bond_3ad_stats stats; - u32 agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */ + atomic_t agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */ u16 aggregator_identifier; }; diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h index b3504fcd773dc..2d3c482818863 100644 --- a/include/net/bond_alb.h +++ b/include/net/bond_alb.h @@ -126,7 +126,7 @@ struct tlb_slave_info { struct alb_bond_info { struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ u32 unbalanced_load; - int tx_rebalance_counter; + atomic_t tx_rebalance_counter; int lp_counter; /* -------- rlb parameters -------- */ int rlb_enabled; diff --git a/include/net/checksum.h b/include/net/checksum.h index 97bf4885a962f..e13d5ecf71cdb 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -22,7 +22,7 @@ #include #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER -static inline +static __always_inline __wsum csum_and_copy_from_user (const void __user *src, void *dst, int len, __wsum sum, int *err_ptr) { @@ -37,7 +37,7 @@ __wsum csum_and_copy_from_user (const void __user *src, void *dst, #endif #ifndef HAVE_CSUM_COPY_USER -static __inline__ __wsum csum_and_copy_to_user +static __always_inline __wsum csum_and_copy_to_user (const void *src, void __user *dst, int len, __wsum sum, int *err_ptr) { sum = csum_partial(src, len, sum); @@ -54,7 +54,7 @@ static __inline__ __wsum csum_and_copy_to_user #endif #ifndef HAVE_ARCH_CSUM_ADD -static inline __wsum csum_add(__wsum csum, __wsum addend) +static __always_inline __wsum csum_add(__wsum csum, __wsum addend) { u32 res = (__force u32)csum; res += (__force u32)addend; @@ -62,12 +62,12 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) } #endif -static inline __wsum csum_sub(__wsum csum, __wsum addend) +static __always_inline __wsum csum_sub(__wsum csum, __wsum addend) { return csum_add(csum, ~addend); } -static inline __sum16 csum16_add(__sum16 csum, __be16 addend) +static __always_inline __sum16 csum16_add(__sum16 csum, __be16 addend) { u16 res = (__force u16)csum; @@ -75,12 +75,12 @@ static inline __sum16 csum16_add(__sum16 csum, __be16 addend) return (__force __sum16)(res + (res < (__force u16)addend)); } -static inline __sum16 csum16_sub(__sum16 csum, __be16 addend) +static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend) { return csum16_add(csum, ~addend); } -static inline __wsum +static __always_inline __wsum csum_block_add(__wsum csum, __wsum csum2, int offset) { u32 sum = (__force u32)csum2; @@ -92,36 +92,37 @@ csum_block_add(__wsum csum, __wsum csum2, int offset) return csum_add(csum, (__force __wsum)sum); } -static inline __wsum +static __always_inline __wsum csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len) { return csum_block_add(csum, csum2, offset); } -static inline __wsum +static __always_inline __wsum csum_block_sub(__wsum csum, __wsum csum2, int offset) { return csum_block_add(csum, ~csum2, offset); } -static inline __wsum csum_unfold(__sum16 n) +static __always_inline __wsum csum_unfold(__sum16 n) { return (__force __wsum)n; } -static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) +static __always_inline +__wsum csum_partial_ext(const void *buff, int len, __wsum sum) { return csum_partial(buff, len, sum); } #define CSUM_MANGLED_0 ((__force __sum16)0xffff) -static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) +static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) { *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); } -static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) +static __always_inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); @@ -134,11 +135,16 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) * m : old value of a 16bit field * m' : new value of a 16bit field */ -static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) +static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) { *sum = ~csum16_add(csum16_sub(~(*sum), old), new); } +static inline void csum_replace(__wsum *csum, __wsum old, __wsum new) +{ + *csum = csum_add(csum_sub(*csum, old), new); +} + struct sk_buff; void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, __be32 from, __be32 to, bool pseudohdr); @@ -148,16 +154,16 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, __wsum diff, bool pseudohdr); -static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, - __be16 from, __be16 to, - bool pseudohdr) +static __always_inline +void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, + __be16 from, __be16 to, bool pseudohdr) { inet_proto_csum_replace4(sum, skb, (__force __be32)from, (__force __be32)to, pseudohdr); } -static inline __wsum remcsum_adjust(void *ptr, __wsum csum, - int start, int offset) +static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum, + int start, int offset) { __sum16 *psum = (__sum16 *)(ptr + offset); __wsum delta; @@ -173,7 +179,7 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum, return delta; } -static inline void remcsum_unadjust(__sum16 *psum, __wsum delta) +static __always_inline void remcsum_unadjust(__sum16 *psum, __wsum delta) { *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum)); } diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 14efa0ded75dd..adab27ba1ecbf 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -123,8 +123,20 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, sizeof(struct ip_tunnel_info) + md_size); +#ifdef CONFIG_DST_CACHE + /* Unclone the dst cache if there is one */ + if (new_md->u.tun_info.dst_cache.cache) { + int ret; + + ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC); + if (ret) { + metadata_dst_free(new_md); + return ERR_PTR(ret); + } + } +#endif + skb_dst_drop(skb); - dst_hold(&new_md->dst); skb_dst_set(skb, &new_md->dst); return new_md; } diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 7fed3193f81d4..25eae5c67387e 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -68,7 +68,7 @@ struct fib_rules_ops { int (*action)(struct fib_rule *, struct flowi *, int, struct fib_lookup_arg *); - bool (*suppress)(struct fib_rule *, + bool (*suppress)(struct fib_rule *, int, struct fib_lookup_arg *); int (*match)(struct fib_rule *, struct flowi *, int); diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 78f6437cbc3a8..02171416c68eb 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -51,6 +51,8 @@ struct flow_dissector_key_vlan { vlan_dei:1, vlan_priority:3; __be16 vlan_tpid; + __be16 vlan_eth_type; + u16 padding; }; struct flow_dissector_key_mpls { diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index a01981d7108f9..f6d614926e9e9 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -64,6 +64,14 @@ struct inet6_ifaddr { struct hlist_node addr_lst; struct list_head if_list; + /* + * Used to safely traverse idev->addr_list in process context + * if the idev->lock needed to protect idev->addr_list cannot be held. + * In that case, add the items to this list temporarily and iterate + * without holding idev->lock. + * See addrconf_ifdown and dev_forward_change. + */ + struct list_head if_list_aux; struct list_head tmp_list; struct inet6_ifaddr *ifpub; diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index bac79e817776c..4cbd413e71a3f 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -116,8 +116,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); static inline void fqdir_pre_exit(struct fqdir *fqdir) { - fqdir->high_thresh = 0; /* prevent creation of new frags */ - fqdir->dead = true; + /* Prevent creation of new frags. + * Pairs with READ_ONCE() in inet_frag_find(). + */ + WRITE_ONCE(fqdir->high_thresh, 0); + + /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() + * and ip6frag_expire_frag_queue(). + */ + WRITE_ONCE(fqdir->dead, true); } void fqdir_exit(struct fqdir *fqdir); diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 92560974ea67e..d4d611064a76f 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -247,8 +247,9 @@ void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long high_limit); int inet_hashinfo2_init_mod(struct inet_hashinfo *h); -bool inet_ehash_insert(struct sock *sk, struct sock *osk); -bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); +bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk); +bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, + bool *found_dup_sk); int __inet_hash(struct sock *sk, struct sock *osk); int inet_hash(struct sock *sk); void inet_unhash(struct sock *sk); @@ -418,7 +419,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr) } int __inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk, u32 port_offset, + struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 34c4436fd18ff..58db7c69c146d 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) { - if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) + if (!sk->sk_mark && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) return skb->mark; return sk->sk_mark; @@ -375,7 +376,7 @@ static inline bool inet_get_convert_csum(struct sock *sk) static inline bool inet_can_nonlocal_bind(struct net *net, struct inet_sock *inet) { - return net->ipv4.sysctl_ip_nonlocal_bind || + return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) || inet->freebind || inet->transparent; } diff --git a/include/net/ip.h b/include/net/ip.h index 52abfc00b5e3d..db841ab388c0e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -381,7 +381,7 @@ void ipfrag_init(void); void ip_static_sysctl_init(void); #define IP4_REPLY_MARK(net, mark) \ - ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) + (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0) static inline bool ip_is_fragment(const struct iphdr *iph) { @@ -442,7 +442,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, struct net *net = dev_net(dst->dev); unsigned int mtu; - if (net->ipv4.sysctl_ip_fwd_use_pmtu || + if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) return dst_mtu(dst); @@ -509,19 +509,18 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, { struct iphdr *iph = ip_hdr(skb); + /* We had many attacks based on IPID, use the private + * generator as much as we can. + */ + if (sk && inet_sk(sk)->inet_daddr) { + iph->id = htons(inet_sk(sk)->inet_id); + inet_sk(sk)->inet_id += segs; + return; + } if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { - /* This is only to work around buggy Windows95/2000 - * VJ compression implementations. If the ID field - * does not change, they drop every other packet in - * a TCP stream using header compression. - */ - if (sk && inet_sk(sk)->inet_daddr) { - iph->id = htons(inet_sk(sk)->inet_id); - inet_sk(sk)->inet_id += segs; - } else { - iph->id = 0; - } + iph->id = 0; } else { + /* Unfortunately we need the big hammer to get a suitable IPID */ __ip_select_ident(net, iph, segs); } } diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index bd0f1595bdc71..780754b9cbcd4 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -247,7 +247,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i, fn = rcu_dereference(f6i->fib6_node); if (fn) { - *cookie = fn->fn_sernum; + *cookie = READ_ONCE(fn->fn_sernum); /* pairs with smp_wmb() in fib6_update_sernum_upto_root() */ smp_rmb(); status = true; @@ -451,6 +451,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void fib6_nh_release(struct fib6_nh *fib6_nh); +void fib6_nh_release_dsts(struct fib6_nh *fib6_nh); int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ffbae7683450a..3bf9ecb6b0d39 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -412,7 +412,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { - return net->ipv4.fib_num_tclassid_users; + return atomic_read(&net->ipv4.fib_num_tclassid_users); } #else static inline int fib_num_tclassid_users(struct net *net) @@ -524,5 +524,5 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh, u8 rt_family, unsigned char *flags, bool skip_oif); int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh, - int nh_weight, u8 rt_family); + int nh_weight, u8 rt_family, u32 nh_tclassid); #endif /* _NET_FIB_H */ diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 7c37e3c3b1c79..a1540b3a27f96 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -14,6 +14,7 @@ #include /* for struct rwlock_t */ #include /* for struct atomic_t */ #include /* for struct refcount_t */ +#include #include #include @@ -885,6 +886,8 @@ struct netns_ipvs { atomic_t conn_out_counter; #ifdef CONFIG_SYSCTL + /* delayed work for expiring no dest connections */ + struct delayed_work expire_nodest_conn_work; /* 1/rate drop and drop-entry variables */ struct delayed_work defense_work; /* Work handler */ int drop_rate; @@ -1049,6 +1052,11 @@ static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs) return ipvs->sysctl_conn_reuse_mode; } +static inline int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_expire_nodest_conn; +} + static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs) { return ipvs->sysctl_schedule_icmp; @@ -1136,6 +1144,11 @@ static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs) return 1; } +static inline int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) +{ + return 0; +} + static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs) { return 0; @@ -1505,6 +1518,22 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs) static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; } #endif +#ifdef CONFIG_SYSCTL +/* Enqueue delayed work for expiring no dest connections + * Only run when sysctl_expire_nodest=1 + */ +static inline void ip_vs_enqueue_expire_nodest_conns(struct netns_ipvs *ipvs) +{ + if (sysctl_expire_nodest_conn(ipvs)) + queue_delayed_work(system_long_wq, + &ipvs->expire_nodest_conn_work, 1); +} + +void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs); +#else +static inline void ip_vs_enqueue_expire_nodest_conns(struct netns_ipvs *ipvs) {} +#endif + #define IP_VS_DFWD_METHOD(dest) (atomic_read(&(dest)->conn_flags) & \ IP_VS_CONN_F_FWD_MASK) diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index a21e8b1381a10..e47a946bb77c9 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -67,7 +67,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) struct sk_buff *head; rcu_read_lock(); - if (fq->q.fqdir->dead) + /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(fq->q.fqdir->dead)) goto out_rcu_unlock; spin_lock(&fq->q.lock); diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 3e7d2c0e79ca1..af9e127779adf 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -47,6 +47,7 @@ struct ipv6_stub { struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void (*fib6_nh_release)(struct fib6_nh *fib6_nh); + void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh); void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt); int (*ip6_del_rt)(struct net *net, struct fib6_info *rt); void (*fib6_rt_update)(struct net *net, struct fib6_info *rt, diff --git a/include/net/llc.h b/include/net/llc.h index df282d9b40170..9c10b121b49b0 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -72,7 +72,9 @@ struct llc_sap { static inline struct hlist_head *llc_sk_dev_hash(struct llc_sap *sap, int ifindex) { - return &sap->sk_dev_hash[ifindex % LLC_SK_DEV_HASH_ENTRIES]; + u32 bucket = hash_32(ifindex, LLC_SK_DEV_HASH_BITS); + + return &sap->sk_dev_hash[bucket]; } static inline diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 2be8d6b0dfb69..b6494e87c897c 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -253,6 +253,7 @@ static inline void *neighbour_priv(const struct neighbour *n) #define NEIGH_UPDATE_F_OVERRIDE 0x00000001 #define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002 #define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004 +#define NEIGH_UPDATE_F_USE 0x10000000 #define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000 #define NEIGH_UPDATE_F_ISROUTER 0x40000000 #define NEIGH_UPDATE_F_ADMIN 0x80000000 @@ -505,10 +506,15 @@ static inline int neigh_output(struct neighbour *n, struct sk_buff *skb, { const struct hh_cache *hh = &n->hh; - if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache) + /* n->nud_state and hh->hh_len could be changed under us. + * neigh_hh_output() is taking care of the race later. + */ + if (!skip_cache && + (READ_ONCE(n->nud_state) & NUD_CONNECTED) && + READ_ONCE(hh->hh_len)) return neigh_hh_output(hh, skb); - else - return n->output(n, skb); + + return n->output(n, skb); } static inline struct neighbour * diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 09f2efea0b970..5805fe4947f3c 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -59,8 +59,13 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) int ret = NF_ACCEPT; if (ct) { - if (!nf_ct_is_confirmed(ct)) + if (!nf_ct_is_confirmed(ct)) { ret = __nf_conntrack_confirm(skb); + + if (ret == NF_ACCEPT) + ct = (struct nf_conn *)skb_nfct(skb); + } + if (likely(ret == NF_ACCEPT)) nf_ct_deliver_cached_events(ct); } diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 47088083667b2..c204af20c27e4 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -34,7 +34,7 @@ void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *q void nf_unregister_queue_handler(struct net *net); void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); -void nf_queue_entry_get_refs(struct nf_queue_entry *entry); +bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); void nf_queue_entry_release_refs(struct nf_queue_entry *entry); static inline void init_hashrandom(u32 *jhash_initval) diff --git a/include/net/netlink.h b/include/net/netlink.h index b140c8f1be22d..280390e0240a9 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -375,6 +375,12 @@ struct nla_policy { .len = __VA_ARGS__ + 0, \ } +#define __NLA_IS_UINT_TYPE(tp) \ + (tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || tp == NLA_U64) + +#define NLA_ENSURE_UINT_TYPE(tp) \ + (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp)) + tp) + /** * struct nl_info - netlink source information * @nlh: Netlink message header of original request diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3cc72100eeb26..4f4b1d376362e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -61,7 +61,7 @@ struct netns_ipv4 { #endif bool fib_has_custom_local_routes; #ifdef CONFIG_IP_ROUTE_CLASSID - int fib_num_tclassid_users; + atomic_t fib_num_tclassid_users; #endif struct hlist_head *fib_table_hash; bool fib_offload_disabled; diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 18a5aca264767..5ad614793af26 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -173,7 +173,7 @@ int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, struct fib_nh_common *nhc = &nhi->fib_nhc; int weight = nhg->nh_entries[i].weight; - if (fib_add_nexthop(skb, nhc, weight, rt_family) < 0) + if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0) return -EMSGSIZE; } diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 33979017b7824..004e49f748419 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -30,6 +30,7 @@ enum nci_flag { NCI_UP, NCI_DATA_EXCHANGE, NCI_DATA_EXCHANGE_TO, + NCI_UNREG, }; /* NCI device states */ diff --git a/include/net/nl802154.h b/include/net/nl802154.h index ddcee128f5d9a..145acb8f25095 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -19,6 +19,8 @@ * */ +#include + #define NL802154_GENL_NAME "nl802154" enum nl802154_commands { @@ -150,10 +152,9 @@ enum nl802154_attrs { }; enum nl802154_iftype { - /* for backwards compatibility TODO */ - NL802154_IFTYPE_UNSPEC = -1, + NL802154_IFTYPE_UNSPEC = (~(__u32)0), - NL802154_IFTYPE_NODE, + NL802154_IFTYPE_NODE = 0, NL802154_IFTYPE_MONITOR, NL802154_IFTYPE_COORD, diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index b16f9236de147..d1585b54fb0bd 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -11,6 +11,7 @@ #include #define DEFAULT_TX_QUEUE_LEN 1000 +#define STAB_SIZE_LOG_MAX 30 struct qdisc_walker { int stop; diff --git a/include/net/raw.h b/include/net/raw.h index 8ad8df5948536..c51a635671a73 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -75,7 +75,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept, + return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/include/net/route.h b/include/net/route.h index 6c516840380db..b85d1912d84fd 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -359,7 +359,7 @@ static inline struct neighbour *ip_neigh_gw4(struct net_device *dev, { struct neighbour *neigh; - neigh = __ipv4_neigh_lookup_noref(dev, daddr); + neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &daddr, dev, false); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 0cb0a4bcb5447..1ee396ce0eda8 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -160,37 +160,17 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) if (spin_trylock(&qdisc->seqlock)) goto nolock_empty; - /* Paired with smp_mb__after_atomic() to make sure - * STATE_MISSED checking is synchronized with clearing - * in pfifo_fast_dequeue(). + /* No need to insist if the MISSED flag was already set. + * Note that test_and_set_bit() also gives us memory ordering + * guarantees wrt potential earlier enqueue() and below + * spin_trylock(), both of which are necessary to prevent races */ - smp_mb__before_atomic(); - - /* If the MISSED flag is set, it means other thread has - * set the MISSED flag before second spin_trylock(), so - * we can return false here to avoid multi cpus doing - * the set_bit() and second spin_trylock() concurrently. - */ - if (test_bit(__QDISC_STATE_MISSED, &qdisc->state)) + if (test_and_set_bit(__QDISC_STATE_MISSED, &qdisc->state)) return false; - /* Set the MISSED flag before the second spin_trylock(), - * if the second spin_trylock() return false, it means - * other cpu holding the lock will do dequeuing for us - * or it will see the MISSED flag set after releasing - * lock and reschedule the net_tx_action() to do the - * dequeuing. - */ - set_bit(__QDISC_STATE_MISSED, &qdisc->state); - - /* spin_trylock() only has load-acquire semantic, so use - * smp_mb__after_atomic() to ensure STATE_MISSED is set - * before doing the second spin_trylock(). - */ - smp_mb__after_atomic(); - - /* Retry again in case other CPU may not see the new flag - * after it releases the lock at the end of qdisc_run_end(). + /* Try to take the lock again to make sure that we will either + * grab it or the CPU that still has it will see MISSED set + * when testing it in qdisc_run_end() */ if (!spin_trylock(&qdisc->seqlock)) return false; @@ -214,6 +194,12 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) if (qdisc->flags & TCQ_F_NOLOCK) { spin_unlock(&qdisc->seqlock); + /* spin_unlock() only has store-release semantic. The unlock + * and test_bit() ordering is a store-load ordering, so a full + * memory barrier is needed here. + */ + smp_mb(); + if (unlikely(test_bit(__QDISC_STATE_MISSED, &qdisc->state))) { clear_bit(__QDISC_STATE_MISSED, &qdisc->state); @@ -299,6 +285,8 @@ struct Qdisc_ops { struct netlink_ext_ack *extack); void (*attach)(struct Qdisc *sch); int (*change_tx_queue_len)(struct Qdisc *, unsigned int); + void (*change_real_num_tx)(struct Qdisc *sch, + unsigned int new_real_tx); int (*dump)(struct Qdisc *, struct sk_buff *); int (*dump_stats)(struct Qdisc *, struct gnet_dump *); @@ -675,6 +663,8 @@ void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *); void qdisc_class_hash_destroy(struct Qdisc_class_hash *); int dev_qdisc_change_tx_queue_len(struct net_device *dev); +void dev_qdisc_change_real_num_tx(struct net_device *dev, + unsigned int new_real_tx); void dev_init_scheduler(struct net_device *dev); void dev_shutdown(struct net_device *dev); void dev_activate(struct net_device *dev); @@ -1260,6 +1250,7 @@ struct psched_ratecfg { u64 rate_bytes_ps; /* bytes per second */ u32 mult; u16 overhead; + u16 mpu; u8 linklayer; u8 shift; }; @@ -1269,6 +1260,9 @@ static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, { len += r->overhead; + if (len < r->mpu) + len = r->mpu; + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift; @@ -1291,6 +1285,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, res->rate = min_t(u64, r->rate_bytes_ps, ~0U); res->overhead = r->overhead; + res->mpu = r->mpu; res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 3ab5c6bbb90bd..35c108a6b8720 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -103,6 +103,7 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, @@ -113,9 +114,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p); +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index fd7c3f76040c3..cb05e503c9cd1 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1345,6 +1345,7 @@ struct sctp_endpoint { u32 secid; u32 peer_secid; + struct rcu_head rcu; }; /* Recover the outter endpoint structure. */ @@ -1360,7 +1361,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t); void sctp_endpoint_free(struct sctp_endpoint *); void sctp_endpoint_put(struct sctp_endpoint *); -void sctp_endpoint_hold(struct sctp_endpoint *); +int sctp_endpoint_hold(struct sctp_endpoint *ep); void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *); struct sctp_association *sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index d7d2495f83c27..dac91aa38c5af 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -4,8 +4,8 @@ #include -u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); -u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, +u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); +u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); u32 secure_tcp_seq(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); diff --git a/include/net/sock.h b/include/net/sock.h index 99233235ee8bd..ed201c8e1fe9a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -470,8 +470,10 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; kuid_t sk_uid; + spinlock_t sk_peer_lock; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; + long sk_rcvtimeo; ktime_t sk_stamp; #if BITS_PER_LONG==32 @@ -1404,7 +1406,7 @@ void __sk_mem_reclaim(struct sock *sk, int amount); /* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ static inline long sk_prot_mem_limits(const struct sock *sk, int index) { - long val = sk->sk_prot->sysctl_mem[index]; + long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]); #if PAGE_SIZE > SK_MEM_QUANTUM val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; @@ -2615,6 +2617,9 @@ extern int sysctl_optmem_max; extern __u32 sysctl_wmem_default; extern __u32 sysctl_rmem_default; + +/* On 32bit arches, an skb frag is limited to 2^15 */ +#define SKB_FRAG_PAGE_ORDER get_order(32768) DECLARE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key); static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) diff --git a/include/net/strparser.h b/include/net/strparser.h index 1d20b98493a10..bec1439bd3be6 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -54,10 +54,24 @@ struct strp_msg { int offset; }; +struct _strp_msg { + /* Internal cb structure. struct strp_msg must be first for passing + * to upper layer. + */ + struct strp_msg strp; + int accum_len; +}; + +struct sk_skb_cb { +#define SK_SKB_CB_PRIV_LEN 20 + unsigned char data[SK_SKB_CB_PRIV_LEN]; + struct _strp_msg strp; +}; + static inline struct strp_msg *strp_msg(struct sk_buff *skb) { return (struct strp_msg *)((void *)skb->cb + - offsetof(struct qdisc_skb_cb, data)); + offsetof(struct sk_skb_cb, strp)); } /* Structure for an attached lower socket */ diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index 748cf87a4d7ea..3e02709a1df65 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -14,6 +14,7 @@ struct tcf_pedit { struct tc_action common; unsigned char tcfp_nkeys; unsigned char tcfp_flags; + u32 tcfp_off_max_hint; struct tc_pedit_key *tcfp_keys; struct tcf_pedit_key_ex *tcfp_keys_ex; }; diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 0689d9bcdf841..f6a0f09ccc5f9 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -52,7 +52,10 @@ static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT struct tcf_tunnel_key *t = to_tunnel_key(a); - struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + struct tcf_tunnel_key_params *params; + + params = rcu_dereference_protected(t->params, + lockdep_is_held(&a->tcfa_lock)); return ¶ms->tcft_enc_metadata->u.tun_info; #else @@ -69,7 +72,7 @@ tcf_tunnel_info_copy(const struct tc_action *a) if (tun) { size_t tun_size = sizeof(*tun) + tun->options_len; struct ip_tunnel_info *tun_copy = kmemdup(tun, tun_size, - GFP_KERNEL); + GFP_ATOMIC); return tun_copy; } diff --git a/include/net/tcp.h b/include/net/tcp.h index dbb2ef0220c69..d78fc985e0247 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -805,6 +805,7 @@ bool tcp_in_quickack_mode(struct sock *sk); void tcp_check_space(struct sock *sk); void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb); +void tcp_check_space(struct sock *sk); /* tcp_minisocks.c */ void smc_check_reset_syn_req(struct tcp_sock *oldtp, @@ -1286,6 +1287,7 @@ struct rate_sample { int losses; /* number of packets marked lost upon ACK */ u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ u32 prior_in_flight; /* in flight before this ACK */ + u32 last_end_seq; /* end_seq of most recently ACKed packet */ bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ bool is_ack_delayed; /* is this (likely) a delayed ACK? */ @@ -1407,6 +1409,11 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, bool is_sack_reneg, struct rate_sample *rs); void tcp_rate_check_app_limited(struct sock *sk); +static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) +{ + return t1 > t2 || (t1 == t2 && after(seq1, seq2)); +} + /* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary * between different flows. @@ -1638,8 +1645,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); s32 delta; - if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || - ca_ops->cong_control) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) || + tp->packets_out || ca_ops->cong_control) return; delta = tcp_jiffies32 - tp->lsndtime; if (delta > inet_csk(sk)->icsk_rto) @@ -1654,7 +1661,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, static inline int tcp_win_from_space(const struct sock *sk, int space) { - int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; + int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale); return tcp_adv_win_scale <= 0 ? (space>>(-tcp_adv_win_scale)) : @@ -1730,7 +1737,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) static inline int tcp_fin_time(const struct sock *sk) { - int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; + int fin_timeout = tcp_sk(sk)->linger2 ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout); const int rto = inet_csk(sk)->icsk_rto; if (fin_timeout < (rto << 2) - (rto >> 1)) @@ -2211,7 +2219,7 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; + return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); } /* @wake is one when sk_stream_write_space() calls us. @@ -2281,6 +2289,11 @@ struct tcp_request_sock_ops { struct sk_buff *syn_skb); }; +extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops; +#if IS_ENABLED(CONFIG_IPV6) +extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops; +#endif + #ifdef CONFIG_SYN_COOKIES static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, const struct sock *sk, struct sk_buff *skb, diff --git a/include/net/tls.h b/include/net/tls.h index 697df45c0bcee..7f220e03ebb2d 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -360,6 +360,7 @@ int tls_sk_query(struct sock *sk, int optname, char __user *optval, int __user *optlen); int tls_sk_attach(struct sock *sk, int optname, char __user *optval, unsigned int optlen); +void tls_err_abort(struct sock *sk, int err); int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); @@ -465,12 +466,6 @@ static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) #endif } -static inline void tls_err_abort(struct sock *sk, int err) -{ - sk->sk_err = err; - sk->sk_error_report(sk); -} - static inline bool tls_bigint_increment(unsigned char *seq, int len) { int i; @@ -499,7 +494,7 @@ static inline void tls_advance_record_sn(struct sock *sk, struct cipher_context *ctx) { if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size)) - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); if (prot->version != TLS_1_3_VERSION) tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, diff --git a/include/net/udp.h b/include/net/udp.h index fabf507bce5d1..e66854e767dcc 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -252,7 +252,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept, + return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); @@ -459,6 +459,7 @@ void udp_init(void); DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); void udp_encap_enable(void); +void udp_encap_disable(void); #if IS_ENABLED(CONFIG_IPV6) DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void); @@ -480,8 +481,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, * CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial * packets in udp_gro_complete_segment. As does UDP GSO, verified by * udp_send_skb. But when those packets are looped in dev_loopback_xmit - * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this - * specific case, where PARTIAL is both correct and required. + * their ip_summed CHECKSUM_NONE is changed to CHECKSUM_UNNECESSARY. + * Reset in this specific case, where PARTIAL is both correct and + * required. */ if (skb->pkt_type == PACKET_LOOPBACK) skb->ip_summed = CHECKSUM_PARTIAL; diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 4b1f95e083070..27bf9f4dd1d45 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -178,9 +178,8 @@ static inline void udp_tunnel_encap_enable(struct socket *sock) #if IS_ENABLED(CONFIG_IPV6) if (sock->sk->sk_family == PF_INET6) ipv6_stub->udpv6_encap_enable(); - else #endif - udp_encap_enable(); + udp_encap_enable(); } #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 8ce63850d6d01..96faf09186945 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1543,7 +1543,6 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); int xfrm_init_replay(struct xfrm_state *x); -u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu); u32 xfrm_state_mtu(struct xfrm_state *x, int mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); int xfrm_init_state(struct xfrm_state *x); @@ -1664,14 +1663,15 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap); -struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net); +struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net, + u32 if_id); struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, struct xfrm_migrate *m, struct xfrm_encap_tmpl *encap); int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k, struct net *net, - struct xfrm_encap_tmpl *encap); + struct xfrm_encap_tmpl *encap, u32 if_id); #endif int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index ab22759de7ea0..7b3ef7a3d8d04 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -30,7 +30,7 @@ enum rdma_nl_flags { * constant as well and the compiler checks they are the same. */ #define MODULE_ALIAS_RDMA_NETLINK(_index, _val) \ - static inline void __chk_##_index(void) \ + static inline void __maybe_unused __chk_##_index(void) \ { \ BUILD_BUG_ON(_index != _val); \ } \ diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h index fac8e89aed81d..310e0dbffda99 100644 --- a/include/scsi/libfcoe.h +++ b/include/scsi/libfcoe.h @@ -249,7 +249,8 @@ int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *, struct fc_lport *, struct fc_frame *); /* libfcoe funcs */ -u64 fcoe_wwn_from_mac(unsigned char mac[MAX_ADDR_LEN], unsigned int, unsigned int); +u64 fcoe_wwn_from_mac(unsigned char mac[ETH_ALEN], unsigned int scheme, + unsigned int port); int fcoe_libfc_config(struct fc_lport *, struct fcoe_ctlr *, const struct libfc_function_template *, int init_fcp); u32 fcoe_fc_crc(struct fc_frame *fp); diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h index ef88b20c7b0a5..23dc8deac344d 100644 --- a/include/sound/hdaudio_ext.h +++ b/include/sound/hdaudio_ext.h @@ -88,6 +88,8 @@ struct hdac_ext_stream *snd_hdac_ext_stream_assign(struct hdac_bus *bus, struct snd_pcm_substream *substream, int type); void snd_hdac_ext_stream_release(struct hdac_ext_stream *azx_dev, int type); +void snd_hdac_ext_stream_decouple_locked(struct hdac_bus *bus, + struct hdac_ext_stream *azx_dev, bool decouple); void snd_hdac_ext_stream_decouple(struct hdac_bus *bus, struct hdac_ext_stream *azx_dev, bool decouple); void snd_hdac_ext_stop_streams(struct hdac_bus *bus); diff --git a/include/sound/jack.h b/include/sound/jack.h index 9eb2b5ec1ec41..78f3619f3de94 100644 --- a/include/sound/jack.h +++ b/include/sound/jack.h @@ -62,6 +62,7 @@ struct snd_jack { const char *id; #ifdef CONFIG_SND_JACK_INPUT_DEV struct input_dev *input_dev; + struct mutex input_dev_lock; int registered; int type; char name[100]; diff --git a/include/sound/pcm.h b/include/sound/pcm.h index bbe6eb1ff5d22..f0045f842a604 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -395,6 +395,8 @@ struct snd_pcm_runtime { wait_queue_head_t sleep; /* poll sleep */ wait_queue_head_t tsleep; /* transfer sleep */ struct fasync_struct *fasync; + struct mutex buffer_mutex; /* protect for buffer changes */ + atomic_t buffer_accessing; /* >0: in r/w operation, <0: blocked */ /* -- private section -- */ void *private_data; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 7c9716fe731e2..59d7ebb8bbaf4 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -781,8 +781,9 @@ struct se_device { atomic_long_t read_bytes; atomic_long_t write_bytes; /* Active commands on this virtual SE device */ - atomic_t simple_cmds; - atomic_t dev_ordered_sync; + atomic_t non_ordered; + bool ordered_sync_in_progress; + atomic_t delayed_cmd_count; atomic_t dev_qf_count; u32 export_count; spinlock_t delayed_cmd_lock; @@ -804,6 +805,7 @@ struct se_device { struct list_head dev_sep_list; struct list_head dev_tmr_list; struct work_struct qf_work_queue; + struct work_struct delayed_cmd_work; struct list_head delayed_cmd_list; struct list_head state_list; struct list_head qf_cmd_list; diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index 5d9de24cb9c01..d09e369e9d1e8 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -21,6 +21,8 @@ enum cachefiles_obj_ref_trace { cachefiles_obj_put_wait_retry = fscache_obj_ref__nr_traces, cachefiles_obj_put_wait_timeo, + cachefiles_obj_get_ondemand_fd, + cachefiles_obj_put_ondemand_fd, cachefiles_obj_ref__nr_traces }; diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index 27f5caa6299a3..b7373edac5cdd 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -35,20 +35,20 @@ TRACE_EVENT(erofs_lookup, TP_STRUCT__entry( __field(dev_t, dev ) __field(erofs_nid_t, nid ) - __field(const char *, name ) + __string(name, dentry->d_name.name ) __field(unsigned int, flags ) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->nid = EROFS_I(dir)->nid; - __entry->name = dentry->d_name.name; + __assign_str(name, dentry->d_name.name); __entry->flags = flags; ), TP_printk("dev = (%d,%d), pnid = %llu, name:%s, flags:%x", show_dev_nid(__entry), - __entry->name, + __get_str(name), __entry->flags) ); @@ -169,7 +169,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_enter, __entry->flags ? show_map_flags(__entry->flags) : "NULL") ); -DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_flatmode_enter, +DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_enter, TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, unsigned flags), @@ -221,7 +221,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_exit, show_mflags(__entry->mflags), __entry->ret) ); -DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_flatmode_exit, +DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_exit, TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, unsigned flags, int ret), diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 1796ff99c3e9c..a7613efc271ab 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -793,20 +793,20 @@ TRACE_EVENT(f2fs_lookup_start, TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(const char *, name) + __string(name, dentry->d_name.name) __field(unsigned int, flags) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->ino = dir->i_ino; - __entry->name = dentry->d_name.name; + __assign_str(name, dentry->d_name.name); __entry->flags = flags; ), TP_printk("dev = (%d,%d), pino = %lu, name:%s, flags:%u", show_dev_ino(__entry), - __entry->name, + __get_str(name), __entry->flags) ); @@ -820,7 +820,7 @@ TRACE_EVENT(f2fs_lookup_end, TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(const char *, name) + __string(name, dentry->d_name.name) __field(nid_t, cino) __field(int, err) ), @@ -828,14 +828,14 @@ TRACE_EVENT(f2fs_lookup_end, TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->ino = dir->i_ino; - __entry->name = dentry->d_name.name; + __assign_str(name, dentry->d_name.name); __entry->cino = ino; __entry->err = err; ), TP_printk("dev = (%d,%d), pino = %lu, name:%s, ino:%u, err:%d", show_dev_ino(__entry), - __entry->name, + __get_str(name), __entry->cino, __entry->err) ); diff --git a/include/trace/events/iocost.h b/include/trace/events/iocost.h index c2f580fd371b1..e282ce02fa2d6 100644 --- a/include/trace/events/iocost.h +++ b/include/trace/events/iocost.h @@ -11,7 +11,7 @@ struct ioc_gq; #include -TRACE_EVENT(iocost_iocg_activate, +DECLARE_EVENT_CLASS(iocost_iocg_state, TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, u64 last_period, u64 cur_period, u64 vtime), @@ -26,7 +26,6 @@ TRACE_EVENT(iocost_iocg_activate, __field(u64, vrate) __field(u64, last_period) __field(u64, cur_period) - __field(u64, last_vtime) __field(u64, vtime) __field(u32, weight) __field(u32, inuse) @@ -42,7 +41,6 @@ TRACE_EVENT(iocost_iocg_activate, __entry->vrate = now->vrate; __entry->last_period = last_period; __entry->cur_period = cur_period; - __entry->last_vtime = iocg->last_vtime; __entry->vtime = vtime; __entry->weight = iocg->weight; __entry->inuse = iocg->inuse; @@ -51,17 +49,30 @@ TRACE_EVENT(iocost_iocg_activate, ), TP_printk("[%s:%s] now=%llu:%llu vrate=%llu " - "period=%llu->%llu vtime=%llu->%llu " + "period=%llu->%llu vtime=%llu " "weight=%u/%u hweight=%llu/%llu", __get_str(devname), __get_str(cgroup), __entry->now, __entry->vnow, __entry->vrate, __entry->last_period, __entry->cur_period, - __entry->last_vtime, __entry->vtime, - __entry->inuse, __entry->weight, + __entry->vtime, __entry->inuse, __entry->weight, __entry->hweight_inuse, __entry->hweight_active ) ); +DEFINE_EVENT(iocost_iocg_state, iocost_iocg_activate, + TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, + u64 last_period, u64 cur_period, u64 vtime), + + TP_ARGS(iocg, path, now, last_period, cur_period, vtime) +); + +DEFINE_EVENT(iocost_iocg_state, iocost_iocg_idle, + TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, + u64 last_period, u64 cur_period, u64 vtime), + + TP_ARGS(iocg, path, now, last_period, cur_period, vtime) +); + DECLARE_EVENT_CLASS(iocg_inuse_update, TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, @@ -98,7 +109,7 @@ DECLARE_EVENT_CLASS(iocg_inuse_update, ) ); -DEFINE_EVENT(iocg_inuse_update, iocost_inuse_takeback, +DEFINE_EVENT(iocg_inuse_update, iocost_inuse_shortage, TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, u32 old_inuse, u32 new_inuse, @@ -108,7 +119,7 @@ DEFINE_EVENT(iocg_inuse_update, iocost_inuse_takeback, old_hw_inuse, new_hw_inuse) ); -DEFINE_EVENT(iocg_inuse_update, iocost_inuse_giveaway, +DEFINE_EVENT(iocg_inuse_update, iocost_inuse_transfer, TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, u32 old_inuse, u32 new_inuse, @@ -118,7 +129,7 @@ DEFINE_EVENT(iocg_inuse_update, iocost_inuse_giveaway, old_hw_inuse, new_hw_inuse) ); -DEFINE_EVENT(iocg_inuse_update, iocost_inuse_reset, +DEFINE_EVENT(iocg_inuse_update, iocost_inuse_adjust, TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, u32 old_inuse, u32 new_inuse, @@ -131,11 +142,9 @@ DEFINE_EVENT(iocg_inuse_update, iocost_inuse_reset, TRACE_EVENT(iocost_ioc_vrate_adj, TP_PROTO(struct ioc *ioc, u64 new_vrate, u32 *missed_ppm, - u32 rq_wait_pct, int nr_lagging, int nr_shortages, - int nr_surpluses), + u32 rq_wait_pct, int nr_lagging, int nr_shortages), - TP_ARGS(ioc, new_vrate, missed_ppm, rq_wait_pct, nr_lagging, nr_shortages, - nr_surpluses), + TP_ARGS(ioc, new_vrate, missed_ppm, rq_wait_pct, nr_lagging, nr_shortages), TP_STRUCT__entry ( __string(devname, ioc_name(ioc)) @@ -147,7 +156,6 @@ TRACE_EVENT(iocost_ioc_vrate_adj, __field(u32, rq_wait_pct) __field(int, nr_lagging) __field(int, nr_shortages) - __field(int, nr_surpluses) ), TP_fast_assign( @@ -160,15 +168,54 @@ TRACE_EVENT(iocost_ioc_vrate_adj, __entry->rq_wait_pct = rq_wait_pct; __entry->nr_lagging = nr_lagging; __entry->nr_shortages = nr_shortages; - __entry->nr_surpluses = nr_surpluses; ), - TP_printk("[%s] vrate=%llu->%llu busy=%d missed_ppm=%u:%u rq_wait_pct=%u lagging=%d shortages=%d surpluses=%d", + TP_printk("[%s] vrate=%llu->%llu busy=%d missed_ppm=%u:%u rq_wait_pct=%u lagging=%d shortages=%d", __get_str(devname), __entry->old_vrate, __entry->new_vrate, __entry->busy_level, __entry->read_missed_ppm, __entry->write_missed_ppm, - __entry->rq_wait_pct, __entry->nr_lagging, __entry->nr_shortages, - __entry->nr_surpluses + __entry->rq_wait_pct, __entry->nr_lagging, __entry->nr_shortages + ) +); + +TRACE_EVENT(iocost_iocg_forgive_debt, + + TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, + u32 usage_pct, u64 old_debt, u64 new_debt, + u64 old_delay, u64 new_delay), + + TP_ARGS(iocg, path, now, usage_pct, + old_debt, new_debt, old_delay, new_delay), + + TP_STRUCT__entry ( + __string(devname, ioc_name(iocg->ioc)) + __string(cgroup, path) + __field(u64, now) + __field(u64, vnow) + __field(u32, usage_pct) + __field(u64, old_debt) + __field(u64, new_debt) + __field(u64, old_delay) + __field(u64, new_delay) + ), + + TP_fast_assign( + __assign_str(devname, ioc_name(iocg->ioc)); + __assign_str(cgroup, path); + __entry->now = now->now; + __entry->vnow = now->vnow; + __entry->usage_pct = usage_pct; + __entry->old_debt = old_debt; + __entry->new_debt = new_debt; + __entry->old_delay = old_delay; + __entry->new_delay = new_delay; + ), + + TP_printk("[%s:%s] now=%llu:%llu usage=%u debt=%llu->%llu delay=%llu->%llu", + __get_str(devname), __get_str(cgroup), + __entry->now, __entry->vnow, __entry->usage_pct, + __entry->old_debt, __entry->new_debt, + __entry->old_delay, __entry->new_delay ) ); diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h index ab69434e2329e..72e785a903b65 100644 --- a/include/trace/events/libata.h +++ b/include/trace/events/libata.h @@ -249,6 +249,7 @@ DECLARE_EVENT_CLASS(ata_qc_complete_template, __entry->hob_feature = qc->result_tf.hob_feature; __entry->nsect = qc->result_tf.nsect; __entry->hob_nsect = qc->result_tf.hob_nsect; + __entry->flags = qc->flags; ), TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s status=%s " \ diff --git a/include/trace/events/random.h b/include/trace/events/random.h deleted file mode 100644 index 32c10a515e2d5..0000000000000 --- a/include/trace/events/random.h +++ /dev/null @@ -1,313 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM random - -#if !defined(_TRACE_RANDOM_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_RANDOM_H - -#include -#include - -TRACE_EVENT(add_device_randomness, - TP_PROTO(int bytes, unsigned long IP), - - TP_ARGS(bytes, IP), - - TP_STRUCT__entry( - __field( int, bytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->bytes = bytes; - __entry->IP = IP; - ), - - TP_printk("bytes %d caller %pS", - __entry->bytes, (void *)__entry->IP) -); - -DECLARE_EVENT_CLASS(random__mix_pool_bytes, - TP_PROTO(const char *pool_name, int bytes, unsigned long IP), - - TP_ARGS(pool_name, bytes, IP), - - TP_STRUCT__entry( - __field( const char *, pool_name ) - __field( int, bytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->pool_name = pool_name; - __entry->bytes = bytes; - __entry->IP = IP; - ), - - TP_printk("%s pool: bytes %d caller %pS", - __entry->pool_name, __entry->bytes, (void *)__entry->IP) -); - -DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, - TP_PROTO(const char *pool_name, int bytes, unsigned long IP), - - TP_ARGS(pool_name, bytes, IP) -); - -DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, - TP_PROTO(const char *pool_name, int bytes, unsigned long IP), - - TP_ARGS(pool_name, bytes, IP) -); - -TRACE_EVENT(credit_entropy_bits, - TP_PROTO(const char *pool_name, int bits, int entropy_count, - unsigned long IP), - - TP_ARGS(pool_name, bits, entropy_count, IP), - - TP_STRUCT__entry( - __field( const char *, pool_name ) - __field( int, bits ) - __field( int, entropy_count ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->pool_name = pool_name; - __entry->bits = bits; - __entry->entropy_count = entropy_count; - __entry->IP = IP; - ), - - TP_printk("%s pool: bits %d entropy_count %d caller %pS", - __entry->pool_name, __entry->bits, - __entry->entropy_count, (void *)__entry->IP) -); - -TRACE_EVENT(push_to_pool, - TP_PROTO(const char *pool_name, int pool_bits, int input_bits), - - TP_ARGS(pool_name, pool_bits, input_bits), - - TP_STRUCT__entry( - __field( const char *, pool_name ) - __field( int, pool_bits ) - __field( int, input_bits ) - ), - - TP_fast_assign( - __entry->pool_name = pool_name; - __entry->pool_bits = pool_bits; - __entry->input_bits = input_bits; - ), - - TP_printk("%s: pool_bits %d input_pool_bits %d", - __entry->pool_name, __entry->pool_bits, - __entry->input_bits) -); - -TRACE_EVENT(debit_entropy, - TP_PROTO(const char *pool_name, int debit_bits), - - TP_ARGS(pool_name, debit_bits), - - TP_STRUCT__entry( - __field( const char *, pool_name ) - __field( int, debit_bits ) - ), - - TP_fast_assign( - __entry->pool_name = pool_name; - __entry->debit_bits = debit_bits; - ), - - TP_printk("%s: debit_bits %d", __entry->pool_name, - __entry->debit_bits) -); - -TRACE_EVENT(add_input_randomness, - TP_PROTO(int input_bits), - - TP_ARGS(input_bits), - - TP_STRUCT__entry( - __field( int, input_bits ) - ), - - TP_fast_assign( - __entry->input_bits = input_bits; - ), - - TP_printk("input_pool_bits %d", __entry->input_bits) -); - -TRACE_EVENT(add_disk_randomness, - TP_PROTO(dev_t dev, int input_bits), - - TP_ARGS(dev, input_bits), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( int, input_bits ) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->input_bits = input_bits; - ), - - TP_printk("dev %d,%d input_pool_bits %d", MAJOR(__entry->dev), - MINOR(__entry->dev), __entry->input_bits) -); - -TRACE_EVENT(xfer_secondary_pool, - TP_PROTO(const char *pool_name, int xfer_bits, int request_bits, - int pool_entropy, int input_entropy), - - TP_ARGS(pool_name, xfer_bits, request_bits, pool_entropy, - input_entropy), - - TP_STRUCT__entry( - __field( const char *, pool_name ) - __field( int, xfer_bits ) - __field( int, request_bits ) - __field( int, pool_entropy ) - __field( int, input_entropy ) - ), - - TP_fast_assign( - __entry->pool_name = pool_name; - __entry->xfer_bits = xfer_bits; - __entry->request_bits = request_bits; - __entry->pool_entropy = pool_entropy; - __entry->input_entropy = input_entropy; - ), - - TP_printk("pool %s xfer_bits %d request_bits %d pool_entropy %d " - "input_entropy %d", __entry->pool_name, __entry->xfer_bits, - __entry->request_bits, __entry->pool_entropy, - __entry->input_entropy) -); - -DECLARE_EVENT_CLASS(random__get_random_bytes, - TP_PROTO(int nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP), - - TP_STRUCT__entry( - __field( int, nbytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->nbytes = nbytes; - __entry->IP = IP; - ), - - TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP) -); - -DEFINE_EVENT(random__get_random_bytes, get_random_bytes, - TP_PROTO(int nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP) -); - -DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, - TP_PROTO(int nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP) -); - -DECLARE_EVENT_CLASS(random__extract_entropy, - TP_PROTO(const char *pool_name, int nbytes, int entropy_count, - unsigned long IP), - - TP_ARGS(pool_name, nbytes, entropy_count, IP), - - TP_STRUCT__entry( - __field( const char *, pool_name ) - __field( int, nbytes ) - __field( int, entropy_count ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->pool_name = pool_name; - __entry->nbytes = nbytes; - __entry->entropy_count = entropy_count; - __entry->IP = IP; - ), - - TP_printk("%s pool: nbytes %d entropy_count %d caller %pS", - __entry->pool_name, __entry->nbytes, __entry->entropy_count, - (void *)__entry->IP) -); - - -DEFINE_EVENT(random__extract_entropy, extract_entropy, - TP_PROTO(const char *pool_name, int nbytes, int entropy_count, - unsigned long IP), - - TP_ARGS(pool_name, nbytes, entropy_count, IP) -); - -DEFINE_EVENT(random__extract_entropy, extract_entropy_user, - TP_PROTO(const char *pool_name, int nbytes, int entropy_count, - unsigned long IP), - - TP_ARGS(pool_name, nbytes, entropy_count, IP) -); - -TRACE_EVENT(random_read, - TP_PROTO(int got_bits, int need_bits, int pool_left, int input_left), - - TP_ARGS(got_bits, need_bits, pool_left, input_left), - - TP_STRUCT__entry( - __field( int, got_bits ) - __field( int, need_bits ) - __field( int, pool_left ) - __field( int, input_left ) - ), - - TP_fast_assign( - __entry->got_bits = got_bits; - __entry->need_bits = need_bits; - __entry->pool_left = pool_left; - __entry->input_left = input_left; - ), - - TP_printk("got_bits %d still_needed_bits %d " - "blocking_pool_entropy_left %d input_entropy_left %d", - __entry->got_bits, __entry->got_bits, __entry->pool_left, - __entry->input_left) -); - -TRACE_EVENT(urandom_read, - TP_PROTO(int got_bits, int pool_left, int input_left), - - TP_ARGS(got_bits, pool_left, input_left), - - TP_STRUCT__entry( - __field( int, got_bits ) - __field( int, pool_left ) - __field( int, input_left ) - ), - - TP_fast_assign( - __entry->got_bits = got_bits; - __entry->pool_left = pool_left; - __entry->input_left = input_left; - ), - - TP_printk("got_bits %d nonblocking_pool_entropy_left %d " - "input_entropy_left %d", __entry->got_bits, - __entry->pool_left, __entry->input_left) -); - -#endif /* _TRACE_RANDOM_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 059b6e45a0283..839bb07b93a71 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1511,7 +1511,7 @@ TRACE_EVENT(rxrpc_call_reset, __entry->call_serial = call->rx_serial; __entry->conn_serial = call->conn->hi_serial; __entry->tx_seq = call->tx_hard_ack; - __entry->rx_seq = call->ackr_seen; + __entry->rx_seq = call->rx_hard_ack; ), TP_printk("c=%08x %08x:%08x r=%08x/%08x tx=%08x rx=%08x", diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index 51fe9f6719eb1..ad11938c09725 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -97,7 +97,7 @@ TRACE_EVENT(sock_exceed_buf_limit, TP_STRUCT__entry( __array(char, name, 32) - __field(long *, sysctl_mem) + __array(long, sysctl_mem, 3) __field(long, allocated) __field(int, sysctl_rmem) __field(int, rmem_alloc) @@ -109,7 +109,9 @@ TRACE_EVENT(sock_exceed_buf_limit, TP_fast_assign( strncpy(__entry->name, prot->name, 32); - __entry->sysctl_mem = prot->sysctl_mem; + __entry->sysctl_mem[0] = READ_ONCE(prot->sysctl_mem[0]); + __entry->sysctl_mem[1] = READ_ONCE(prot->sysctl_mem[1]); + __entry->sysctl_mem[2] = READ_ONCE(prot->sysctl_mem[2]); __entry->allocated = allocated; __entry->sysctl_rmem = sk_get_rmem0(sk, prot); __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index a5ab2973e8dc3..57184c02e3b93 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -283,7 +283,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate, __field(unsigned long, nr_scanned) __field(unsigned long, nr_skipped) __field(unsigned long, nr_taken) - __field(isolate_mode_t, isolate_mode) + __field(unsigned int, isolate_mode) __field(int, lru) ), @@ -294,7 +294,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate, __entry->nr_scanned = nr_scanned; __entry->nr_skipped = nr_skipped; __entry->nr_taken = nr_taken; - __entry->isolate_mode = isolate_mode; + __entry->isolate_mode = (__force unsigned int)isolate_mode; __entry->lru = lru; ), diff --git a/include/uapi/asm-generic/poll.h b/include/uapi/asm-generic/poll.h index 41b509f410bf9..f9c520ce4bf4e 100644 --- a/include/uapi/asm-generic/poll.h +++ b/include/uapi/asm-generic/poll.h @@ -29,7 +29,7 @@ #define POLLRDHUP 0x2000 #endif -#define POLLFREE (__force __poll_t)0x4000 /* currently only for epoll */ +#define POLLFREE (__force __poll_t)0x4000 #define POLL_BUSY_LOOP (__force __poll_t)0x8000 diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e119122e85731..2c9206c5c2bef 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -10,6 +10,7 @@ #include #include +#include /* Extended instruction set based on top of classic BPF */ @@ -1310,8 +1311,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if current task belongs to the cgroup2. - * * 1, if current task does not belong to the cgroup2. + * * 1, if current task belongs to the cgroup2. + * * 0, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) @@ -1585,6 +1586,10 @@ union bpf_attr { * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the + * L2 type as Ethernet. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -2290,7 +2295,7 @@ union bpf_attr { * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a - * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*. * It checks the selected socket is matching the incoming * request in the socket buffer. * Return @@ -3737,13 +3742,11 @@ union bpf_attr { FN(bpf_per_cpu_ptr), \ FN(bpf_this_cpu_ptr), \ FN(redirect_peer), \ - FN(unsafe_helper), \ - /* - * unsafe_helper is the helper functions we implement - * ourselves. In order to ensure that the order of the new helper - * functions which backported from community kernel is consistent, the - * unsafe_helper need to be be placed last. - */ + /* */ + +#ifndef INT_MAX +#define INT_MAX 0x7fffffff +#endif /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3752,6 +3755,10 @@ union bpf_attr { enum bpf_func_id { __BPF_FUNC_MAPPER(__BPF_ENUM_FN) __BPF_FUNC_MAX_ID, + + __BPF_BYTEDANCE_FUNC_MIN_ID = INT_MAX / 2, + __BPF_ENUM_FN(unsafe_helper), + __BPF_BYTEDANCE_FUNC_MAX_ID, }; #undef __BPF_ENUM_FN @@ -3827,6 +3834,8 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2), BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), + BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5), + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), }; enum { @@ -3975,7 +3984,8 @@ struct bpf_sock { __u32 src_ip4; __u32 src_ip6[4]; __u32 src_port; /* host byte order */ - __u32 dst_port; /* network byte order */ + __be16 dst_port; /* network byte order */ + __u16 :16; /* zero padding */ __u32 dst_ip4; __u32 dst_ip6[4]; __u32 state; diff --git a/include/uapi/linux/cachefiles.h b/include/uapi/linux/cachefiles.h new file mode 100644 index 0000000000000..b6746a2fe57c4 --- /dev/null +++ b/include/uapi/linux/cachefiles.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_CACHEFILES_H +#define _LINUX_CACHEFILES_H + +#include +#include + +/* + * Fscache ensures that the maximum length of cookie key is 255. The volume key + * is controlled by netfs, and generally no bigger than 255. + */ +#define CACHEFILES_MSG_MAX_SIZE 1024 + +enum cachefiles_opcode { + CACHEFILES_OP_OPEN, + CACHEFILES_OP_CLOSE, + CACHEFILES_OP_READ, +}; + +/* + * Message Header + * + * @msg_id a unique ID identifying this message + * @opcode message type, CACHEFILE_OP_* + * @len message length, including message header and following data + * @object_id a unique ID identifying a cache file + * @data message type specific payload + */ +struct cachefiles_msg { + __u32 msg_id; + __u32 opcode; + __u32 len; + __u32 object_id; + __u8 data[]; +}; + +/* + * @data contains the volume_key followed directly by the cookie_key. volume_key + * is a NUL-terminated string; @volume_key_size indicates the size of the volume + * key in bytes (with trailing NUL). cookie_key is a string without trailing + * NUL; @cookie_key_size indicates the size of the cookie key in bytes (without + * trailing NUL). + * + * @fd identifies an anon_fd referring to the cache file. + */ +struct cachefiles_open { + __u32 volume_key_size; + __u32 cookie_key_size; + __u32 fd; + __u32 flags; + __u8 data[]; +}; + +/* + * @off indicates the starting offset of the requested file range + * @len indicates the length of the requested file range + */ +struct cachefiles_read { + __u64 off; + __u64 len; +}; + +/* + * Reply for READ request + * @arg for this ioctl is the @id field of READ request. + */ +#define CACHEFILES_IOC_READ_COMPLETE _IOW(0x98, 1, int) + +#endif diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index 7f30393b92c3b..f76d11725c6c6 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -44,7 +44,7 @@ struct dma_buf_sync { * between them in actual uapi, they're just different numbers. */ #define DMA_BUF_SET_NAME _IOW(DMA_BUF_BASE, 1, const char *) -#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, u32) -#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, u64) +#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) +#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64) #endif diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h new file mode 100644 index 0000000000000..9890fa62243a2 --- /dev/null +++ b/include/uapi/linux/ethtool_netlink.h @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * include/uapi/linux/ethtool_netlink.h - netlink interface for ethtool + * + * See Documentation/networking/ethtool-netlink.rst in kernel source tree for + * doucumentation of the interface. + */ + +#ifndef _UAPI_LINUX_ETHTOOL_NETLINK_H_ +#define _UAPI_LINUX_ETHTOOL_NETLINK_H_ + +#include + +/* message types - userspace to kernel */ +enum { + ETHTOOL_MSG_USER_NONE, + ETHTOOL_MSG_STRSET_GET, + ETHTOOL_MSG_LINKINFO_GET, + ETHTOOL_MSG_LINKINFO_SET, + ETHTOOL_MSG_LINKMODES_GET, + ETHTOOL_MSG_LINKMODES_SET, + ETHTOOL_MSG_LINKSTATE_GET, + ETHTOOL_MSG_DEBUG_GET, + ETHTOOL_MSG_DEBUG_SET, + ETHTOOL_MSG_WOL_GET, + ETHTOOL_MSG_WOL_SET, + ETHTOOL_MSG_FEATURES_GET, + ETHTOOL_MSG_FEATURES_SET, + ETHTOOL_MSG_PRIVFLAGS_GET, + ETHTOOL_MSG_PRIVFLAGS_SET, + ETHTOOL_MSG_RINGS_GET, + ETHTOOL_MSG_RINGS_SET, + ETHTOOL_MSG_CHANNELS_GET, + ETHTOOL_MSG_CHANNELS_SET, + ETHTOOL_MSG_COALESCE_GET, + ETHTOOL_MSG_COALESCE_SET, + ETHTOOL_MSG_PAUSE_GET, + ETHTOOL_MSG_PAUSE_SET, + ETHTOOL_MSG_EEE_GET, + ETHTOOL_MSG_EEE_SET, + ETHTOOL_MSG_TSINFO_GET, + ETHTOOL_MSG_CABLE_TEST_ACT, + ETHTOOL_MSG_CABLE_TEST_TDR_ACT, + ETHTOOL_MSG_TUNNEL_INFO_GET, + ETHTOOL_MSG_FEC_GET, + ETHTOOL_MSG_FEC_SET, + ETHTOOL_MSG_MODULE_EEPROM_GET, + ETHTOOL_MSG_STATS_GET, + + /* add new constants above here */ + __ETHTOOL_MSG_USER_CNT, + ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1 +}; + +/* message types - kernel to userspace */ +enum { + ETHTOOL_MSG_KERNEL_NONE, + ETHTOOL_MSG_STRSET_GET_REPLY, + ETHTOOL_MSG_LINKINFO_GET_REPLY, + ETHTOOL_MSG_LINKINFO_NTF, + ETHTOOL_MSG_LINKMODES_GET_REPLY, + ETHTOOL_MSG_LINKMODES_NTF, + ETHTOOL_MSG_LINKSTATE_GET_REPLY, + ETHTOOL_MSG_DEBUG_GET_REPLY, + ETHTOOL_MSG_DEBUG_NTF, + ETHTOOL_MSG_WOL_GET_REPLY, + ETHTOOL_MSG_WOL_NTF, + ETHTOOL_MSG_FEATURES_GET_REPLY, + ETHTOOL_MSG_FEATURES_SET_REPLY, + ETHTOOL_MSG_FEATURES_NTF, + ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, + ETHTOOL_MSG_PRIVFLAGS_NTF, + ETHTOOL_MSG_RINGS_GET_REPLY, + ETHTOOL_MSG_RINGS_NTF, + ETHTOOL_MSG_CHANNELS_GET_REPLY, + ETHTOOL_MSG_CHANNELS_NTF, + ETHTOOL_MSG_COALESCE_GET_REPLY, + ETHTOOL_MSG_COALESCE_NTF, + ETHTOOL_MSG_PAUSE_GET_REPLY, + ETHTOOL_MSG_PAUSE_NTF, + ETHTOOL_MSG_EEE_GET_REPLY, + ETHTOOL_MSG_EEE_NTF, + ETHTOOL_MSG_TSINFO_GET_REPLY, + ETHTOOL_MSG_CABLE_TEST_NTF, + ETHTOOL_MSG_CABLE_TEST_TDR_NTF, + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, + ETHTOOL_MSG_FEC_GET_REPLY, + ETHTOOL_MSG_FEC_NTF, + ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY, + ETHTOOL_MSG_STATS_GET_REPLY, + + /* add new constants above here */ + __ETHTOOL_MSG_KERNEL_CNT, + ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1 +}; + +/* request header */ + +/* use compact bitsets in reply */ +#define ETHTOOL_FLAG_COMPACT_BITSETS (1 << 0) +/* provide optional reply for SET or ACT requests */ +#define ETHTOOL_FLAG_OMIT_REPLY (1 << 1) +/* request statistics, if supported by the driver */ +#define ETHTOOL_FLAG_STATS (1 << 2) + +#define ETHTOOL_FLAG_ALL (ETHTOOL_FLAG_COMPACT_BITSETS | \ + ETHTOOL_FLAG_OMIT_REPLY | \ + ETHTOOL_FLAG_STATS) + +enum { + ETHTOOL_A_HEADER_UNSPEC, + ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ + ETHTOOL_A_HEADER_DEV_NAME, /* string */ + ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ + + /* add new constants above here */ + __ETHTOOL_A_HEADER_CNT, + ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1 +}; + +/* MODULE EEPROM */ + +enum { + ETHTOOL_A_MODULE_EEPROM_UNSPEC, + ETHTOOL_A_MODULE_EEPROM_HEADER, /* nest - _A_HEADER_* */ + + ETHTOOL_A_MODULE_EEPROM_OFFSET, /* u32 */ + ETHTOOL_A_MODULE_EEPROM_LENGTH, /* u32 */ + ETHTOOL_A_MODULE_EEPROM_PAGE, /* u8 */ + ETHTOOL_A_MODULE_EEPROM_BANK, /* u8 */ + ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, /* u8 */ + ETHTOOL_A_MODULE_EEPROM_DATA, /* nested */ + + __ETHTOOL_A_MODULE_EEPROM_CNT, + ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1) +}; + +/* generic netlink info */ +#define ETHTOOL_GENL_NAME "ethtool" +#define ETHTOOL_GENL_VERSION 1 + +#define ETHTOOL_MCGRP_MONITOR_NAME "monitor" + +#endif /* _UAPI_LINUX_ETHTOOL_NETLINK_H_ */ diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index ffcc561c4dc54..6fea4c7fdd246 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -231,7 +231,8 @@ struct fuse_attr { uint32_t gid; uint32_t rdev; uint32_t blksize; - uint32_t padding; + uint32_t padding:31; + uint32_t should_inval:1; }; struct fuse_kstatfs { @@ -277,12 +278,14 @@ struct fuse_file_lock { * FOPEN_NONSEEKABLE: the file is not seekable * FOPEN_CACHE_DIR: allow caching this directory * FOPEN_STREAM: the file is stream-like (no file position at all) + * FOPEN_INVAL_ATTR: invalidate the attr cache on open */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) #define FOPEN_NONSEEKABLE (1 << 2) #define FOPEN_CACHE_DIR (1 << 3) #define FOPEN_STREAM (1 << 4) +#define FOPEN_INVAL_ATTR (1 << 31) /** * INIT request/reply flags @@ -875,7 +878,8 @@ struct fuse_notify_retrieve_in { }; /* Device ioctls: */ -#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) +#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) +#define FUSE_DEV_IOC_RESTORE_REQ _IO(229, 255) struct fuse_lseek_in { uint64_t fh; diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h index 7fea0fd7d6f54..4bf33344aab19 100644 --- a/include/uapi/linux/if.h +++ b/include/uapi/linux/if.h @@ -33,6 +33,7 @@ #define IFNAMSIZ 16 #endif /* __UAPI_DEF_IF_IFNAMSIZ */ #define IFALIASZ 256 +#define ALTIFNAMSIZ 128 #include /* For glibc compatibility. An empty enum does not compile. */ diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 311a57f3e01a2..7f0ae1f411e3a 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -278,7 +278,8 @@ #define KEY_PAUSECD 201 #define KEY_PROG3 202 #define KEY_PROG4 203 -#define KEY_DASHBOARD 204 /* AL Dashboard */ +#define KEY_ALL_APPLICATIONS 204 /* AC Desktop Show All Applications */ +#define KEY_DASHBOARD KEY_ALL_APPLICATIONS #define KEY_SUSPEND 205 #define KEY_CLOSE 206 /* AC Close */ #define KEY_PLAY 207 @@ -608,6 +609,7 @@ #define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */ #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ #define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */ +#define KEY_DICTATE 0x24a /* Start or Stop Voice Dictation Session (HUTRR99) */ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index f6e3c8c9c7449..4fa4e979e948a 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -263,7 +263,7 @@ enum nfc_sdp_attr { #define NFC_SE_ENABLED 0x1 struct sockaddr_nfc { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; @@ -271,14 +271,14 @@ struct sockaddr_nfc { #define NFC_LLCP_MAX_SERVICE_NAME 63 struct sockaddr_nfc_llcp { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; __u8 dsap; /* Destination SAP, if known */ __u8 ssap; /* Source SAP to be bound to */ char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */; - size_t service_name_len; + __kernel_size_t service_name_len; }; /* NFC socket protocols */ diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 29d6e93fd15e3..502633d275ef4 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -502,6 +502,12 @@ #define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ #define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ #define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ +#define PCI_EXP_DEVCTL_PAYLOAD_128B 0x0000 /* 128 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_256B 0x0020 /* 256 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_512B 0x0040 /* 512 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_1024B 0x0060 /* 1024 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_2048B 0x0080 /* 2048 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_4096B 0x00a0 /* 4096 Bytes */ #define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ #define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ #define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ @@ -604,6 +610,7 @@ #define PCI_EXP_SLTCTL_PWR_OFF 0x0400 /* Power Off */ #define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */ #define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */ +#define PCI_EXP_SLTCTL_IBPD_DISABLE 0x4000 /* In-band PD disable */ #define PCI_EXP_SLTSTA 26 /* Slot Status */ #define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */ #define PCI_EXP_SLTSTA_PFD 0x0002 /* Power Fault Detected */ @@ -676,6 +683,7 @@ #define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ #define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */ +#define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ #define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ #define PCI_EXP_SLTSTA2 58 /* Slot Status 2 */ @@ -822,6 +830,13 @@ #define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ #define PCI_EXT_CAP_PWR_SIZEOF 16 +/* Root Complex Event Collector Endpoint Association */ +#define PCI_RCEC_RCIEP_BITMAP 4 /* Associated Bitmap for RCiEPs */ +#define PCI_RCEC_BUSN 8 /* RCEC Associated Bus Numbers */ +#define PCI_RCEC_BUSN_REG_VER 0x02 /* Least version with BUSN present */ +#define PCI_RCEC_BUSN_NEXT(x) (((x) >> 8) & 0xff) +#define PCI_RCEC_BUSN_LAST(x) (((x) >> 16) & 0xff) + /* Vendor-Specific (VSEC, PCI_EXT_CAP_ID_VNDR) */ #define PCI_VNDR_HEADER 4 /* Vendor-Specific Header */ #define PCI_VNDR_HEADER_ID(x) ((x) & 0xffff) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index ceccd980ffcfe..996bc0ed06644 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -141,12 +141,14 @@ enum perf_event_sample_format { PERF_SAMPLE_TRANSACTION = 1U << 17, PERF_SAMPLE_REGS_INTR = 1U << 18, PERF_SAMPLE_PHYS_ADDR = 1U << 19, + PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24, - PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; +#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) /* * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set * @@ -858,7 +860,24 @@ enum perf_event_type { * char data[size]; * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * - * { u64 weight; } && PERF_SAMPLE_WEIGHT + * { union perf_sample_weight + * { + * u64 full; && PERF_SAMPLE_WEIGHT + * #if defined(__LITTLE_ENDIAN_BITFIELD) + * struct { + * u32 var1_dw; + * u16 var2_w; + * u16 var3_w; + * } && PERF_SAMPLE_WEIGHT_STRUCT + * #elif defined(__BIG_ENDIAN_BITFIELD) + * struct { + * u16 var3_w; + * u16 var2_w; + * u32 var1_dw; + * } && PERF_SAMPLE_WEIGHT_STRUCT + * #endif + * } + * } * { u64 data_src; } && PERF_SAMPLE_DATA_SRC * { u64 transaction; } && PERF_SAMPLE_TRANSACTION * { u64 abi; # enum perf_sample_regs_abi @@ -1058,14 +1077,16 @@ union perf_mem_data_src { mem_lvl_num:4, /* memory hierarchy level number */ mem_remote:1, /* remote */ mem_snoopx:2, /* snoop mode, ext */ - mem_rsvd:24; + mem_blk:3, /* access blocked */ + mem_rsvd:21; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:24, + __u64 mem_rsvd:21, + mem_blk:3, /* access blocked */ mem_snoopx:2, /* snoop mode, ext */ mem_remote:1, /* remote */ mem_lvl_num:4, /* memory hierarchy level number */ @@ -1148,6 +1169,12 @@ union perf_mem_data_src { #define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ #define PERF_MEM_TLB_SHIFT 26 +/* Access blocked */ +#define PERF_MEM_BLK_NA 0x01 /* not available */ +#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ +#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ +#define PERF_MEM_BLK_SHIFT 40 + #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) @@ -1179,4 +1206,23 @@ struct perf_branch_entry { reserved:40; }; +union perf_sample_weight { + __u64 full; +#if defined(__LITTLE_ENDIAN_BITFIELD) + struct { + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; + }; +#elif defined(__BIG_ENDIAN_BITFIELD) + struct { + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; + }; +#else +#error "Unknown endianness" +#endif +}; + #endif /* _UAPI_LINUX_PERF_EVENT_H */ diff --git a/include/uapi/linux/pfkeyv2.h b/include/uapi/linux/pfkeyv2.h index d65b117852604..8abae1f6749c5 100644 --- a/include/uapi/linux/pfkeyv2.h +++ b/include/uapi/linux/pfkeyv2.h @@ -309,6 +309,7 @@ struct sadb_x_filter { #define SADB_X_AALG_SHA2_512HMAC 7 #define SADB_X_AALG_RIPEMD160HMAC 8 #define SADB_X_AALG_AES_XCBC_MAC 9 +#define SADB_X_AALG_SM3_256HMAC 10 #define SADB_X_AALG_NULL 251 /* kame */ #define SADB_AALG_MAX 251 @@ -329,6 +330,7 @@ struct sadb_x_filter { #define SADB_X_EALG_AES_GCM_ICV16 20 #define SADB_X_EALG_CAMELLIACBC 22 #define SADB_X_EALG_NULL_AES_GMAC 23 +#define SADB_X_EALG_SM4CBC 24 #define SADB_EALG_MAX 253 /* last EALG */ /* private allocations should use 249-255 (RFC2407) */ #define SADB_X_EALG_SERPENTCBC 252 /* draft-ietf-ipsec-ciph-aes-cbc-00 */ diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index edbbf4bfdd9e5..4a245d7a5c8d6 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -807,6 +807,8 @@ struct tc_codel_xstats { /* FQ_CODEL */ +#define FQ_CODEL_QUANTUM_MAX (1 << 20) + enum { TCA_FQ_CODEL_UNSPEC, TCA_FQ_CODEL_TARGET, diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h index 26ee91300e3ec..dcc1b3e6106fe 100644 --- a/include/uapi/linux/random.h +++ b/include/uapi/linux/random.h @@ -48,9 +48,11 @@ struct rand_pool_info { * Flags for getrandom(2) * * GRND_NONBLOCK Don't block and return EAGAIN instead - * GRND_RANDOM Use the /dev/random pool instead of /dev/urandom + * GRND_RANDOM No effect + * GRND_INSECURE Return non-cryptographic random bytes */ #define GRND_NONBLOCK 0x0001 #define GRND_RANDOM 0x0002 +#define GRND_INSECURE 0x0004 #endif /* _UAPI_LINUX_RANDOM_H */ diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h index be07b5470f4bb..f51bc8f368134 100644 --- a/include/uapi/linux/serial_reg.h +++ b/include/uapi/linux/serial_reg.h @@ -62,6 +62,7 @@ * ST16C654: 8 16 56 60 8 16 32 56 PORT_16654 * TI16C750: 1 16 32 56 xx xx xx xx PORT_16750 * TI16C752: 8 16 56 60 8 16 32 56 + * OX16C950: 16 32 112 120 16 32 64 112 PORT_16C950 * Tegra: 1 4 8 14 16 8 4 1 PORT_TEGRA */ #define UART_FCR_R_TRIG_00 0x00 diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index c4152a94d43f6..da91723ad2063 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -232,4 +232,51 @@ struct vduse_vq_info { /* Get the specified virtqueue's information. Caller should set index field. */ #define VDUSE_VQ_GET_INFO _IOWR(VDUSE_BASE, 0x15, struct vduse_vq_info) +/** + * struct vduse_iova_umem - userspace memory configuration for one IOVA region + * @uaddr: start address of userspace memory, it must be aligned to page size + * @iova: start of the IOVA region + * @size: size of the IOVA region + * @reserved: for future use, needs to be initialized to zero + * + * Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM + * ioctls to register/de-register userspace memory for IOVA regions + */ +struct vduse_iova_umem { + __u64 uaddr; + __u64 iova; + __u64 size; + __u64 reserved[3]; +}; + +/* Register userspace memory for IOVA regions */ +#define VDUSE_IOTLB_REG_UMEM _IOW(VDUSE_BASE, 0x18, struct vduse_iova_umem) + +/* De-register the userspace memory. Caller should set iova and size field. */ +#define VDUSE_IOTLB_DEREG_UMEM _IOW(VDUSE_BASE, 0x19, struct vduse_iova_umem) + +/** + * struct vduse_iova_info - information of one IOVA region + * @start: start of the IOVA region + * @last: last of the IOVA region + * @capability: capability of the IOVA regsion + * @reserved: for future use, needs to be initialized to zero + * + * Structure used by VDUSE_IOTLB_GET_INFO ioctl to get information of + * one IOVA region. + */ +struct vduse_iova_info { + __u64 start; + __u64 last; +#define VDUSE_IOVA_CAP_UMEM (1 << 0) + __u64 capability; + __u64 reserved[3]; +}; + +/* + * Find the first IOVA region that overlaps with the range [start, last] + * and return some information on it. Caller should set start and last fields. + */ +#define VDUSE_IOTLB_GET_INFO _IOWR(VDUSE_BASE, 0x1a, struct vduse_iova_info) + #endif /* _UAPI_VDUSE_H_ */ diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h index 50cdc8aebfcf4..71a54a6849ca9 100644 --- a/include/uapi/linux/virtio_crypto.h +++ b/include/uapi/linux/virtio_crypto.h @@ -37,6 +37,7 @@ #define VIRTIO_CRYPTO_SERVICE_HASH 1 #define VIRTIO_CRYPTO_SERVICE_MAC 2 #define VIRTIO_CRYPTO_SERVICE_AEAD 3 +#define VIRTIO_CRYPTO_SERVICE_AKCIPHER 4 #define VIRTIO_CRYPTO_OPCODE(service, op) (((service) << 8) | (op)) @@ -57,6 +58,10 @@ struct virtio_crypto_ctrl_header { VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02) #define VIRTIO_CRYPTO_AEAD_DESTROY_SESSION \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03) +#define VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x04) +#define VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x05) __le32 opcode; __le32 algo; __le32 flag; @@ -180,6 +185,58 @@ struct virtio_crypto_aead_create_session_req { __u8 padding[32]; }; +struct virtio_crypto_rsa_session_para { +#define VIRTIO_CRYPTO_RSA_RAW_PADDING 0 +#define VIRTIO_CRYPTO_RSA_PKCS1_PADDING 1 + __le32 padding_algo; + +#define VIRTIO_CRYPTO_RSA_NO_HASH 0 +#define VIRTIO_CRYPTO_RSA_MD2 1 +#define VIRTIO_CRYPTO_RSA_MD3 2 +#define VIRTIO_CRYPTO_RSA_MD4 3 +#define VIRTIO_CRYPTO_RSA_MD5 4 +#define VIRTIO_CRYPTO_RSA_SHA1 5 +#define VIRTIO_CRYPTO_RSA_SHA256 6 +#define VIRTIO_CRYPTO_RSA_SHA384 7 +#define VIRTIO_CRYPTO_RSA_SHA512 8 +#define VIRTIO_CRYPTO_RSA_SHA224 9 + __le32 hash_algo; +}; + +struct virtio_crypto_ecdsa_session_para { +#define VIRTIO_CRYPTO_CURVE_UNKNOWN 0 +#define VIRTIO_CRYPTO_CURVE_NIST_P192 1 +#define VIRTIO_CRYPTO_CURVE_NIST_P224 2 +#define VIRTIO_CRYPTO_CURVE_NIST_P256 3 +#define VIRTIO_CRYPTO_CURVE_NIST_P384 4 +#define VIRTIO_CRYPTO_CURVE_NIST_P521 5 + __le32 curve_id; + __le32 padding; +}; + +struct virtio_crypto_akcipher_session_para { +#define VIRTIO_CRYPTO_NO_AKCIPHER 0 +#define VIRTIO_CRYPTO_AKCIPHER_RSA 1 +#define VIRTIO_CRYPTO_AKCIPHER_DSA 2 +#define VIRTIO_CRYPTO_AKCIPHER_ECDSA 3 + __le32 algo; + +#define VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PUBLIC 1 +#define VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PRIVATE 2 + __le32 keytype; + __le32 keylen; + + union { + struct virtio_crypto_rsa_session_para rsa; + struct virtio_crypto_ecdsa_session_para ecdsa; + } u; +}; + +struct virtio_crypto_akcipher_create_session_req { + struct virtio_crypto_akcipher_session_para para; + __u8 padding[36]; +}; + struct virtio_crypto_alg_chain_session_para { #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER 1 #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH 2 @@ -247,6 +304,8 @@ struct virtio_crypto_op_ctrl_req { mac_create_session; struct virtio_crypto_aead_create_session_req aead_create_session; + struct virtio_crypto_akcipher_create_session_req + akcipher_create_session; struct virtio_crypto_destroy_session_req destroy_session; __u8 padding[56]; @@ -266,6 +325,14 @@ struct virtio_crypto_op_header { VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00) #define VIRTIO_CRYPTO_AEAD_DECRYPT \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01) +#define VIRTIO_CRYPTO_AKCIPHER_ENCRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x00) +#define VIRTIO_CRYPTO_AKCIPHER_DECRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x01) +#define VIRTIO_CRYPTO_AKCIPHER_SIGN \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x02) +#define VIRTIO_CRYPTO_AKCIPHER_VERIFY \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x03) __le32 opcode; /* algo should be service-specific algorithms */ __le32 algo; @@ -390,6 +457,16 @@ struct virtio_crypto_aead_data_req { __u8 padding[32]; }; +struct virtio_crypto_akcipher_para { + __le32 src_data_len; + __le32 dst_data_len; +}; + +struct virtio_crypto_akcipher_data_req { + struct virtio_crypto_akcipher_para para; + __u8 padding[40]; +}; + /* The request of the data virtqueue's packet */ struct virtio_crypto_op_data_req { struct virtio_crypto_op_header header; @@ -399,6 +476,7 @@ struct virtio_crypto_op_data_req { struct virtio_crypto_hash_data_req hash_req; struct virtio_crypto_mac_data_req mac_req; struct virtio_crypto_aead_data_req aead_req; + struct virtio_crypto_akcipher_data_req akcipher_req; __u8 padding[48]; } u; }; @@ -408,39 +486,41 @@ struct virtio_crypto_op_data_req { #define VIRTIO_CRYPTO_BADMSG 2 #define VIRTIO_CRYPTO_NOTSUPP 3 #define VIRTIO_CRYPTO_INVSESS 4 /* Invalid session id */ +#define VIRTIO_CRYPTO_NOSPC 5 /* no free session ID */ +#define VIRTIO_CRYPTO_KEY_REJECTED 6 /* Signature verification failed */ /* The accelerator hardware is ready */ #define VIRTIO_CRYPTO_S_HW_READY (1 << 0) struct virtio_crypto_config { /* See VIRTIO_CRYPTO_OP_* above */ - __u32 status; + __le32 status; /* * Maximum number of data queue */ - __u32 max_dataqueues; + __le32 max_dataqueues; /* * Specifies the services mask which the device support, * see VIRTIO_CRYPTO_SERVICE_* above */ - __u32 crypto_services; + __le32 crypto_services; /* Detailed algorithms mask */ - __u32 cipher_algo_l; - __u32 cipher_algo_h; - __u32 hash_algo; - __u32 mac_algo_l; - __u32 mac_algo_h; - __u32 aead_algo; + __le32 cipher_algo_l; + __le32 cipher_algo_h; + __le32 hash_algo; + __le32 mac_algo_l; + __le32 mac_algo_h; + __le32 aead_algo; /* Maximum length of cipher key */ - __u32 max_cipher_key_len; + __le32 max_cipher_key_len; /* Maximum length of authenticated key */ - __u32 max_auth_key_len; - __u32 reserve; + __le32 max_auth_key_len; + __le32 akcipher_algo; /* Maximum size of each crypto request's content */ - __u64 max_size; + __le64 max_size; }; struct virtio_crypto_inhdr { diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index ff7cfdc6cb44d..90a8f968af2f9 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -504,6 +504,12 @@ struct xfrm_user_offload { int ifindex; __u8 flags; }; +/* This flag was exposed without any kernel code that supporting it. + * Unfortunately, strongswan has the code that uses sets this flag, + * which makes impossible to reuse this bit. + * + * So leave it here to make sure that it won't be reused by mistake. + */ #define XFRM_OFFLOAD_IPV6 1 #define XFRM_OFFLOAD_INBOUND 2 diff --git a/include/video/of_display_timing.h b/include/video/of_display_timing.h index e1126a74882a5..eff166fdd81b9 100644 --- a/include/video/of_display_timing.h +++ b/include/video/of_display_timing.h @@ -8,6 +8,8 @@ #ifndef __LINUX_OF_DISPLAY_TIMING_H #define __LINUX_OF_DISPLAY_TIMING_H +#include + struct device_node; struct display_timing; struct display_timings; diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index a9978350b45b0..a58a89cc0e97d 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -97,17 +97,32 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly); * access has been ended, free the given page too. Access will be ended * immediately iff the grant entry is not in use, otherwise it will happen * some time later. page may be 0, in which case no freeing will occur. + * Note that the granted page might still be accessed (read or write) by the + * other side after gnttab_end_foreign_access() returns, so even if page was + * specified as 0 it is not allowed to just reuse the page for other + * purposes immediately. gnttab_end_foreign_access() will take an additional + * reference to the granted page in this case, which is dropped only after + * the grant is no longer in use. + * This requires that multi page allocations for areas subject to + * gnttab_end_foreign_access() are done via alloc_pages_exact() (and freeing + * via free_pages_exact()) in order to avoid high order pages. */ void gnttab_end_foreign_access(grant_ref_t ref, int readonly, unsigned long page); +/* + * End access through the given grant reference, iff the grant entry is + * no longer in use. In case of success ending foreign access, the + * grant reference is deallocated. + * Return 1 if the grant entry was freed, 0 if it is still in use. + */ +int gnttab_try_end_foreign_access(grant_ref_t ref); + int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn); unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); -int gnttab_query_foreign_access(grant_ref_t ref); - /* * operations on reserved batches of grant references */ diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h index 3f40501fc60b1..b39cdbc522ec7 100644 --- a/include/xen/interface/io/ring.h +++ b/include/xen/interface/io/ring.h @@ -1,21 +1,53 @@ -/* SPDX-License-Identifier: GPL-2.0 */ /****************************************************************************** * ring.h * * Shared producer-consumer ring macros. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Tim Deegan and Andrew Warfield November 2004. */ #ifndef __XEN_PUBLIC_IO_RING_H__ #define __XEN_PUBLIC_IO_RING_H__ +/* + * When #include'ing this header, you need to provide the following + * declaration upfront: + * - standard integers types (uint8_t, uint16_t, etc) + * They are provided by stdint.h of the standard headers. + * + * In addition, if you intend to use the FLEX macros, you also need to + * provide the following, before invoking the FLEX macros: + * - size_t + * - memcpy + * - grant_ref_t + * These declarations are provided by string.h of the standard headers, + * and grant_table.h from the Xen public headers. + */ + #include typedef unsigned int RING_IDX; /* Round a 32-bit unsigned constant down to the nearest power of two. */ -#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) #define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) #define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) #define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) @@ -27,82 +59,79 @@ typedef unsigned int RING_IDX; * A ring contains as many entries as will fit, rounded down to the nearest * power of two (so we can mask with (size-1) to loop around). */ -#define __CONST_RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ - sizeof(((struct _s##_sring *)0)->ring[0]))) - +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof(((struct _s##_sring *)0)->ring[0]))) /* * The same for passing in an actual pointer instead of a name tag. */ -#define __RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) /* * Macros to make the correct C datatypes for a new kind of ring. * * To make a new ring datatype, you need to have two message structures, - * let's say struct request, and struct response already defined. + * let's say request_t, and response_t already defined. * * In a header where you want the ring datatype declared, you then do: * - * DEFINE_RING_TYPES(mytag, struct request, struct response); + * DEFINE_RING_TYPES(mytag, request_t, response_t); * * These expand out to give you a set of types, as you can see below. * The most important of these are: * - * struct mytag_sring - The shared ring. - * struct mytag_front_ring - The 'front' half of the ring. - * struct mytag_back_ring - The 'back' half of the ring. + * mytag_sring_t - The shared ring. + * mytag_front_ring_t - The 'front' half of the ring. + * mytag_back_ring_t - The 'back' half of the ring. * * To initialize a ring in your code you need to know the location and size * of the shared memory area (PAGE_SIZE, for instance). To initialise * the front half: * - * struct mytag_front_ring front_ring; - * SHARED_RING_INIT((struct mytag_sring *)shared_page); - * FRONT_RING_INIT(&front_ring, (struct mytag_sring *)shared_page, - * PAGE_SIZE); + * mytag_front_ring_t front_ring; + * SHARED_RING_INIT((mytag_sring_t *)shared_page); + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); * * Initializing the back follows similarly (note that only the front * initializes the shared ring): * - * struct mytag_back_ring back_ring; - * BACK_RING_INIT(&back_ring, (struct mytag_sring *)shared_page, - * PAGE_SIZE); + * mytag_back_ring_t back_ring; + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); */ -#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ - \ -/* Shared ring entry */ \ -union __name##_sring_entry { \ - __req_t req; \ - __rsp_t rsp; \ -}; \ - \ -/* Shared ring page */ \ -struct __name##_sring { \ - RING_IDX req_prod, req_event; \ - RING_IDX rsp_prod, rsp_event; \ - uint8_t pad[48]; \ - union __name##_sring_entry ring[1]; /* variable-length */ \ -}; \ - \ -/* "Front" end's private variables */ \ -struct __name##_front_ring { \ - RING_IDX req_prod_pvt; \ - RING_IDX rsp_cons; \ - unsigned int nr_ents; \ - struct __name##_sring *sring; \ -}; \ - \ -/* "Back" end's private variables */ \ -struct __name##_back_ring { \ - RING_IDX rsp_prod_pvt; \ - RING_IDX req_cons; \ - unsigned int nr_ents; \ - struct __name##_sring *sring; \ -}; - +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + uint8_t __pad[48]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ /* * Macros for manipulating rings. * @@ -119,105 +148,99 @@ struct __name##_back_ring { \ */ /* Initialising empty rings */ -#define SHARED_RING_INIT(_s) do { \ - (_s)->req_prod = (_s)->rsp_prod = 0; \ - (_s)->req_event = (_s)->rsp_event = 1; \ - memset((_s)->pad, 0, sizeof((_s)->pad)); \ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ } while(0) -#define FRONT_RING_INIT(_r, _s, __size) do { \ - (_r)->req_prod_pvt = 0; \ - (_r)->rsp_cons = 0; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ - (_r)->sring = (_s); \ +#define FRONT_RING_ATTACH(_r, _s, _i, __size) do { \ + (_r)->req_prod_pvt = (_i); \ + (_r)->rsp_cons = (_i); \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ } while (0) -#define BACK_RING_INIT(_r, _s, __size) do { \ - (_r)->rsp_prod_pvt = 0; \ - (_r)->req_cons = 0; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ - (_r)->sring = (_s); \ -} while (0) +#define FRONT_RING_INIT(_r, _s, __size) FRONT_RING_ATTACH(_r, _s, 0, __size) -/* Initialize to existing shared indexes -- for recovery */ -#define FRONT_RING_ATTACH(_r, _s, __size) do { \ - (_r)->sring = (_s); \ - (_r)->req_prod_pvt = (_s)->req_prod; \ - (_r)->rsp_cons = (_s)->rsp_prod; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ +#define BACK_RING_ATTACH(_r, _s, _i, __size) do { \ + (_r)->rsp_prod_pvt = (_i); \ + (_r)->req_cons = (_i); \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ } while (0) -#define BACK_RING_ATTACH(_r, _s, __size) do { \ - (_r)->sring = (_s); \ - (_r)->rsp_prod_pvt = (_s)->rsp_prod; \ - (_r)->req_cons = (_s)->req_prod; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ -} while (0) +#define BACK_RING_INIT(_r, _s, __size) BACK_RING_ATTACH(_r, _s, 0, __size) /* How big is this ring? */ -#define RING_SIZE(_r) \ +#define RING_SIZE(_r) \ ((_r)->nr_ents) /* Number of free requests (for use on front side only). */ -#define RING_FREE_REQUESTS(_r) \ +#define RING_FREE_REQUESTS(_r) \ (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) /* Test if there is an empty slot available on the front ring. * (This is only meaningful from the front. ) */ -#define RING_FULL(_r) \ +#define RING_FULL(_r) \ (RING_FREE_REQUESTS(_r) == 0) /* Test if there are outstanding messages to be processed on a ring. */ -#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ ((_r)->sring->rsp_prod - (_r)->rsp_cons) -#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ - ({ \ - unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ - unsigned int rsp = RING_SIZE(_r) - \ - ((_r)->req_cons - (_r)->rsp_prod_pvt); \ - req < rsp ? req : rsp; \ - }) +#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ +}) /* Direct access to individual ring elements, by index. */ -#define RING_GET_REQUEST(_r, _idx) \ +#define RING_GET_REQUEST(_r, _idx) \ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) +#define RING_GET_RESPONSE(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + /* - * Get a local copy of a request. + * Get a local copy of a request/response. * - * Use this in preference to RING_GET_REQUEST() so all processing is + * Use this in preference to RING_GET_{REQUEST,RESPONSE}() so all processing is * done on a local copy that cannot be modified by the other end. * * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this - * to be ineffective where _req is a struct which consists of only bitfields. + * to be ineffective where dest is a struct which consists of only bitfields. */ -#define RING_COPY_REQUEST(_r, _idx, _req) do { \ - /* Use volatile to force the copy into _req. */ \ - *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); \ +#define RING_COPY_(type, r, idx, dest) do { \ + /* Use volatile to force the copy into dest. */ \ + *(dest) = *(volatile typeof(dest))RING_GET_##type(r, idx); \ } while (0) -#define RING_GET_RESPONSE(_r, _idx) \ - (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) +#define RING_COPY_REQUEST(r, idx, req) RING_COPY_(REQUEST, r, idx, req) +#define RING_COPY_RESPONSE(r, idx, rsp) RING_COPY_(RESPONSE, r, idx, rsp) /* Loop termination condition: Would the specified index overflow the ring? */ -#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) /* Ill-behaved frontend determination: Can there be this many requests? */ -#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) +/* Ill-behaved backend determination: Can there be this many responses? */ +#define RING_RESPONSE_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_cons) > RING_SIZE(_r)) -#define RING_PUSH_REQUESTS(_r) do { \ - virt_wmb(); /* back sees requests /before/ updated producer index */ \ - (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +#define RING_PUSH_REQUESTS(_r) do { \ + virt_wmb(); /* back sees requests /before/ updated producer index */\ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ } while (0) -#define RING_PUSH_RESPONSES(_r) do { \ - virt_wmb(); /* front sees responses /before/ updated producer index */ \ - (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +#define RING_PUSH_RESPONSES(_r) do { \ + virt_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ } while (0) /* @@ -250,40 +273,40 @@ struct __name##_back_ring { \ * field appropriately. */ -#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ - RING_IDX __old = (_r)->sring->req_prod; \ - RING_IDX __new = (_r)->req_prod_pvt; \ - virt_wmb(); /* back sees requests /before/ updated producer index */ \ - (_r)->sring->req_prod = __new; \ - virt_mb(); /* back sees new requests /before/ we check req_event */ \ - (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ - (RING_IDX)(__new - __old)); \ +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + virt_wmb(); /* back sees requests /before/ updated producer index */\ + (_r)->sring->req_prod = __new; \ + virt_mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ } while (0) -#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ - RING_IDX __old = (_r)->sring->rsp_prod; \ - RING_IDX __new = (_r)->rsp_prod_pvt; \ - virt_wmb(); /* front sees responses /before/ updated producer index */ \ - (_r)->sring->rsp_prod = __new; \ - virt_mb(); /* front sees new responses /before/ we check rsp_event */ \ - (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ - (RING_IDX)(__new - __old)); \ +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + virt_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + virt_mb(); /* front sees new resps /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ } while (0) -#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ - (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ - if (_work_to_do) break; \ - (_r)->sring->req_event = (_r)->req_cons + 1; \ - virt_mb(); \ - (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + virt_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ } while (0) -#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ - (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ - if (_work_to_do) break; \ - (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ - virt_mb(); \ - (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + virt_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ } while (0) diff --git a/init/Kconfig b/init/Kconfig index 6d64930be40de..cff0d7f3b8ff5 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -33,6 +33,15 @@ config CC_CAN_LINK config CC_HAS_ASM_GOTO def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC)) +config CC_HAS_ASM_GOTO_TIED_OUTPUT + depends on CC_HAS_ASM_GOTO_OUTPUT + # Detect buggy gcc and clang, fixed in gcc-11 clang-14. + def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .\n": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) + +config CC_HAS_ASM_GOTO_OUTPUT + depends on CC_HAS_ASM_GOTO + def_bool $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null) + config TOOLS_SUPPORT_RELR def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) @@ -686,6 +695,16 @@ config GENERIC_SCHED_CLOCK menu "Scheduler features" +config BYTEDANCE_BURST_SCHED + bool "Enable burst instance scheduling" + default n + help + This feature enables the scheduler to calculate and modify the credit value + of burst instance tasks, and schedule tasks according to the credit value. + + The specific details of the burst instance scheduler are implemented + in the module. + config UCLAMP_TASK bool "Enable utilization clamping for RT/FAIR tasks" depends on CPU_FREQ_GOV_SCHEDUTIL diff --git a/init/main.c b/init/main.c index e6a1fb14f3085..a17a111d93362 100644 --- a/init/main.c +++ b/init/main.c @@ -680,21 +680,18 @@ asmlinkage __visible void __init start_kernel(void) hrtimers_init(); softirq_init(); timekeeping_init(); + time_init(); /* * For best initial stack canary entropy, prepare it after: * - setup_arch() for any UEFI RNG entropy and boot cmdline access - * - timekeeping_init() for ktime entropy used in rand_initialize() - * - rand_initialize() to get any arch-specific entropy like RDRAND - * - add_latent_entropy() to get any latent entropy - * - adding command line entropy + * - timekeeping_init() for ktime entropy used in random_init() + * - time_init() for making random_get_entropy() work on some platforms + * - random_init() to initialize the RNG from from early entropy sources */ - rand_initialize(); - add_latent_entropy(); - add_device_randomness(command_line, strlen(command_line)); + random_init(command_line); boot_init_stack_canary(); - time_init(); perf_event_init(); profile_init(); call_function_init(); @@ -831,7 +828,7 @@ static int __init initcall_blacklist(char *str) } } while (str_entry); - return 0; + return 1; } static bool __init_or_module initcall_blacklisted(initcall_t fn) @@ -1072,7 +1069,9 @@ static noinline void __init kernel_init_freeable(void); bool rodata_enabled __ro_after_init = true; static int __init set_debug_rodata(char *str) { - return strtobool(str, &rodata_enabled); + if (strtobool(str, &rodata_enabled)) + pr_warn("Invalid option string for rodata: '%s'\n", str); + return 1; } __setup("rodata=", set_debug_rodata); #endif diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 2ea0c08188e67..12519bf5f330d 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -45,6 +45,7 @@ struct mqueue_fs_context { struct ipc_namespace *ipc_ns; + bool newns; /* Set if newly created ipc namespace */ }; #define MQUEUE_MAGIC 0x19800202 @@ -365,6 +366,14 @@ static int mqueue_get_tree(struct fs_context *fc) { struct mqueue_fs_context *ctx = fc->fs_private; + /* + * With a newly created ipc namespace, we don't need to do a search + * for an ipc namespace match, but we still need to set s_fs_info. + */ + if (ctx->newns) { + fc->s_fs_info = ctx->ipc_ns; + return get_tree_nodev(fc, mqueue_fill_super); + } return get_tree_keyed(fc, mqueue_fill_super, ctx->ipc_ns); } @@ -392,6 +401,10 @@ static int mqueue_init_fs_context(struct fs_context *fc) return 0; } +/* + * mq_init_ns() is currently the only caller of mq_create_mount(). + * So the ns parameter is always a newly created ipc namespace. + */ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns) { struct mqueue_fs_context *ctx; @@ -403,6 +416,7 @@ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns) return ERR_CAST(fc); ctx = fc->fs_private; + ctx->newns = true; put_ipc_ns(ctx->ipc_ns); ctx->ipc_ns = get_ipc_ns(ns); put_user_ns(fc->user_ns); diff --git a/ipc/shm.c b/ipc/shm.c index ca3f1e89b02ba..c08f81ae9b475 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -62,9 +62,18 @@ struct shmid_kernel /* private to the kernel */ struct pid *shm_lprid; struct user_struct *mlock_user; - /* The task created the shm object. NULL if the task is dead. */ + /* + * The task created the shm object, for + * task_lock(shp->shm_creator) + */ struct task_struct *shm_creator; - struct list_head shm_clist; /* list by creator */ + + /* + * List by creator. task_lock(->shm_creator) required for read/write. + * If list_empty(), then the creator is dead already. + */ + struct list_head shm_clist; + struct ipc_namespace *ns; } __randomize_layout; /* shm_mode upper byte flags */ @@ -115,6 +124,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) struct shmid_kernel *shp; shp = container_of(ipcp, struct shmid_kernel, shm_perm); + WARN_ON(ns != shp->ns); if (shp->shm_nattch) { shp->shm_perm.mode |= SHM_DEST; @@ -225,10 +235,43 @@ static void shm_rcu_free(struct rcu_head *head) kvfree(shp); } -static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) +/* + * It has to be called with shp locked. + * It must be called before ipc_rmid() + */ +static inline void shm_clist_rm(struct shmid_kernel *shp) { - list_del(&s->shm_clist); - ipc_rmid(&shm_ids(ns), &s->shm_perm); + struct task_struct *creator; + + /* ensure that shm_creator does not disappear */ + rcu_read_lock(); + + /* + * A concurrent exit_shm may do a list_del_init() as well. + * Just do nothing if exit_shm already did the work + */ + if (!list_empty(&shp->shm_clist)) { + /* + * shp->shm_creator is guaranteed to be valid *only* + * if shp->shm_clist is not empty. + */ + creator = shp->shm_creator; + + task_lock(creator); + /* + * list_del_init() is a nop if the entry was already removed + * from the list. + */ + list_del_init(&shp->shm_clist); + task_unlock(creator); + } + rcu_read_unlock(); +} + +static inline void shm_rmid(struct shmid_kernel *s) +{ + shm_clist_rm(s); + ipc_rmid(&shm_ids(s->ns), &s->shm_perm); } @@ -283,7 +326,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) shm_file = shp->shm_file; shp->shm_file = NULL; ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; - shm_rmid(ns, shp); + shm_rmid(shp); shm_unlock(shp); if (!is_file_hugepages(shm_file)) shmem_lock(shm_file, 0, shp->mlock_user); @@ -306,10 +349,10 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) * * 2) sysctl kernel.shm_rmid_forced is set to 1. */ -static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) +static bool shm_may_destroy(struct shmid_kernel *shp) { return (shp->shm_nattch == 0) && - (ns->shm_rmid_forced || + (shp->ns->shm_rmid_forced || (shp->shm_perm.mode & SHM_DEST)); } @@ -340,7 +383,7 @@ static void shm_close(struct vm_area_struct *vma) ipc_update_pid(&shp->shm_lprid, task_tgid(current)); shp->shm_dtim = ktime_get_real_seconds(); shp->shm_nattch--; - if (shm_may_destroy(ns, shp)) + if (shm_may_destroy(shp)) shm_destroy(ns, shp); else shm_unlock(shp); @@ -361,10 +404,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) * * As shp->* are changed under rwsem, it's safe to skip shp locking. */ - if (shp->shm_creator != NULL) + if (!list_empty(&shp->shm_clist)) return 0; - if (shm_may_destroy(ns, shp)) { + if (shm_may_destroy(shp)) { shm_lock_by_ptr(shp); shm_destroy(ns, shp); } @@ -382,48 +425,97 @@ void shm_destroy_orphaned(struct ipc_namespace *ns) /* Locking assumes this will only be called with task == current */ void exit_shm(struct task_struct *task) { - struct ipc_namespace *ns = task->nsproxy->ipc_ns; - struct shmid_kernel *shp, *n; + for (;;) { + struct shmid_kernel *shp; + struct ipc_namespace *ns; - if (list_empty(&task->sysvshm.shm_clist)) - return; + task_lock(task); + + if (list_empty(&task->sysvshm.shm_clist)) { + task_unlock(task); + break; + } + + shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel, + shm_clist); - /* - * If kernel.shm_rmid_forced is not set then only keep track of - * which shmids are orphaned, so that a later set of the sysctl - * can clean them up. - */ - if (!ns->shm_rmid_forced) { - down_read(&shm_ids(ns).rwsem); - list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist) - shp->shm_creator = NULL; /* - * Only under read lock but we are only called on current - * so no entry on the list will be shared. + * 1) Get pointer to the ipc namespace. It is worth to say + * that this pointer is guaranteed to be valid because + * shp lifetime is always shorter than namespace lifetime + * in which shp lives. + * We taken task_lock it means that shp won't be freed. */ - list_del(&task->sysvshm.shm_clist); - up_read(&shm_ids(ns).rwsem); - return; - } + ns = shp->ns; - /* - * Destroy all already created segments, that were not yet mapped, - * and mark any mapped as orphan to cover the sysctl toggling. - * Destroy is skipped if shm_may_destroy() returns false. - */ - down_write(&shm_ids(ns).rwsem); - list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) { - shp->shm_creator = NULL; + /* + * 2) If kernel.shm_rmid_forced is not set then only keep track of + * which shmids are orphaned, so that a later set of the sysctl + * can clean them up. + */ + if (!ns->shm_rmid_forced) + goto unlink_continue; - if (shm_may_destroy(ns, shp)) { - shm_lock_by_ptr(shp); - shm_destroy(ns, shp); + /* + * 3) get a reference to the namespace. + * The refcount could be already 0. If it is 0, then + * the shm objects will be free by free_ipc_work(). + */ + ns = get_ipc_ns_not_zero(ns); + if (!ns) { +unlink_continue: + list_del_init(&shp->shm_clist); + task_unlock(task); + continue; } - } - /* Remove the list head from any segments still attached. */ - list_del(&task->sysvshm.shm_clist); - up_write(&shm_ids(ns).rwsem); + /* + * 4) get a reference to shp. + * This cannot fail: shm_clist_rm() is called before + * ipc_rmid(), thus the refcount cannot be 0. + */ + WARN_ON(!ipc_rcu_getref(&shp->shm_perm)); + + /* + * 5) unlink the shm segment from the list of segments + * created by current. + * This must be done last. After unlinking, + * only the refcounts obtained above prevent IPC_RMID + * from destroying the segment or the namespace. + */ + list_del_init(&shp->shm_clist); + + task_unlock(task); + + /* + * 6) we have all references + * Thus lock & if needed destroy shp. + */ + down_write(&shm_ids(ns).rwsem); + shm_lock_by_ptr(shp); + /* + * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's + * safe to call ipc_rcu_putref here + */ + ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); + + if (ipc_valid_object(&shp->shm_perm)) { + if (shm_may_destroy(shp)) + shm_destroy(ns, shp); + else + shm_unlock(shp); + } else { + /* + * Someone else deleted the shp from namespace + * idr/kht while we have waited. + * Just unlock and continue. + */ + shm_unlock(shp); + } + + up_write(&shm_ids(ns).rwsem); + put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */ + } } static vm_fault_t shm_fault(struct vm_fault *vmf) @@ -680,7 +772,11 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (error < 0) goto no_id; + shp->ns = ns; + + task_lock(current); list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist); + task_unlock(current); /* * shmid gets reported as "inode#" in /proc/pid/maps. @@ -1575,7 +1671,8 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, down_write(&shm_ids(ns).rwsem); shp = shm_lock(ns, shmid); shp->shm_nattch--; - if (shm_may_destroy(ns, shp)) + + if (shm_may_destroy(shp)) shm_destroy(ns, shp); else shm_unlock(shp); diff --git a/ipc/util.c b/ipc/util.c index 1821b6386d3b4..09c3bd9f8e768 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -446,8 +446,8 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) { if (ipcp->key != IPC_PRIVATE) - rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode, - ipc_kht_params); + WARN_ON_ONCE(rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode, + ipc_kht_params)); } /** @@ -462,7 +462,7 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) { int idx = ipcid_to_idx(ipcp->id); - idr_remove(&ids->ipcs_idr, idx); + WARN_ON_ONCE(idr_remove(&ids->ipcs_idr, idx) != ipcp); ipc_kht_remove(ids, ipcp); ids->in_use--; ipcp->deleted = true; diff --git a/kernel/async.c b/kernel/async.c index 4f9c1d6140168..74660f611b97d 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -205,9 +205,6 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data, atomic_inc(&entry_count); spin_unlock_irqrestore(&async_lock, flags); - /* mark that this task has queued an async job, used by module init */ - current->flags |= PF_USED_ASYNC; - /* schedule for execution */ queue_work_node(node, system_unbound_wq, &entry->work); diff --git a/kernel/audit.c b/kernel/audit.c index 05ae208ad4423..db8141866ceaa 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -535,20 +535,22 @@ static void kauditd_printk_skb(struct sk_buff *skb) /** * kauditd_rehold_skb - Handle a audit record send failure in the hold queue * @skb: audit record + * @error: error code (unused) * * Description: * This should only be used by the kauditd_thread when it fails to flush the * hold queue. */ -static void kauditd_rehold_skb(struct sk_buff *skb) +static void kauditd_rehold_skb(struct sk_buff *skb, __always_unused int error) { - /* put the record back in the queue at the same place */ - skb_queue_head(&audit_hold_queue, skb); + /* put the record back in the queue */ + skb_queue_tail(&audit_hold_queue, skb); } /** * kauditd_hold_skb - Queue an audit record, waiting for auditd * @skb: audit record + * @error: error code * * Description: * Queue the audit record, waiting for an instance of auditd. When this @@ -558,19 +560,31 @@ static void kauditd_rehold_skb(struct sk_buff *skb) * and queue it, if we have room. If we want to hold on to the record, but we * don't have room, record a record lost message. */ -static void kauditd_hold_skb(struct sk_buff *skb) +static void kauditd_hold_skb(struct sk_buff *skb, int error) { /* at this point it is uncertain if we will ever send this to auditd so * try to send the message via printk before we go any further */ kauditd_printk_skb(skb); /* can we just silently drop the message? */ - if (!audit_default) { - kfree_skb(skb); - return; + if (!audit_default) + goto drop; + + /* the hold queue is only for when the daemon goes away completely, + * not -EAGAIN failures; if we are in a -EAGAIN state requeue the + * record on the retry queue unless it's full, in which case drop it + */ + if (error == -EAGAIN) { + if (!audit_backlog_limit || + skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { + skb_queue_tail(&audit_retry_queue, skb); + return; + } + audit_log_lost("kauditd retry queue overflow"); + goto drop; } - /* if we have room, queue the message */ + /* if we have room in the hold queue, queue the message */ if (!audit_backlog_limit || skb_queue_len(&audit_hold_queue) < audit_backlog_limit) { skb_queue_tail(&audit_hold_queue, skb); @@ -579,24 +593,32 @@ static void kauditd_hold_skb(struct sk_buff *skb) /* we have no other options - drop the message */ audit_log_lost("kauditd hold queue overflow"); +drop: kfree_skb(skb); } /** * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd * @skb: audit record + * @error: error code (unused) * * Description: * Not as serious as kauditd_hold_skb() as we still have a connected auditd, * but for some reason we are having problems sending it audit records so * queue the given record and attempt to resend. */ -static void kauditd_retry_skb(struct sk_buff *skb) +static void kauditd_retry_skb(struct sk_buff *skb, __always_unused int error) { - /* NOTE: because records should only live in the retry queue for a - * short period of time, before either being sent or moved to the hold - * queue, we don't currently enforce a limit on this queue */ - skb_queue_tail(&audit_retry_queue, skb); + if (!audit_backlog_limit || + skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { + skb_queue_tail(&audit_retry_queue, skb); + return; + } + + /* we have to drop the record, send it via printk as a last effort */ + kauditd_printk_skb(skb); + audit_log_lost("kauditd retry queue overflow"); + kfree_skb(skb); } /** @@ -634,7 +656,7 @@ static void auditd_reset(const struct auditd_connection *ac) /* flush the retry queue to the hold queue, but don't touch the main * queue since we need to process that normally for multicast */ while ((skb = skb_dequeue(&audit_retry_queue))) - kauditd_hold_skb(skb); + kauditd_hold_skb(skb, -ECONNREFUSED); } /** @@ -708,16 +730,18 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, struct sk_buff_head *queue, unsigned int retry_limit, void (*skb_hook)(struct sk_buff *skb), - void (*err_hook)(struct sk_buff *skb)) + void (*err_hook)(struct sk_buff *skb, int error)) { int rc = 0; - struct sk_buff *skb; - static unsigned int failed = 0; + struct sk_buff *skb = NULL; + struct sk_buff *skb_tail; + unsigned int failed = 0; /* NOTE: kauditd_thread takes care of all our locking, we just use * the netlink info passed to us (e.g. sk and portid) */ - while ((skb = skb_dequeue(queue))) { + skb_tail = skb_peek_tail(queue); + while ((skb != skb_tail) && (skb = skb_dequeue(queue))) { /* call the skb_hook for each skb we touch */ if (skb_hook) (*skb_hook)(skb); @@ -725,36 +749,34 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, /* can we send to anyone via unicast? */ if (!sk) { if (err_hook) - (*err_hook)(skb); + (*err_hook)(skb, -ECONNREFUSED); continue; } +retry: /* grab an extra skb reference in case of error */ skb_get(skb); rc = netlink_unicast(sk, skb, portid, 0); if (rc < 0) { - /* fatal failure for our queue flush attempt? */ + /* send failed - try a few times unless fatal error */ if (++failed >= retry_limit || rc == -ECONNREFUSED || rc == -EPERM) { - /* yes - error processing for the queue */ sk = NULL; if (err_hook) - (*err_hook)(skb); - if (!skb_hook) - goto out; - /* keep processing with the skb_hook */ + (*err_hook)(skb, rc); + if (rc == -EAGAIN) + rc = 0; + /* continue to drain the queue */ continue; } else - /* no - requeue to preserve ordering */ - skb_queue_head(queue, skb); + goto retry; } else { - /* it worked - drop the extra reference and continue */ + /* skb sent - drop the extra reference and continue */ consume_skb(skb); failed = 0; } } -out: return (rc >= 0 ? 0 : rc); } @@ -1530,6 +1552,20 @@ static void audit_receive(struct sk_buff *skb) nlh = nlmsg_next(nlh, &len); } audit_ctl_unlock(); + + /* can't block with the ctrl lock, so penalize the sender now */ + if (audit_backlog_limit && + (skb_queue_len(&audit_queue) > audit_backlog_limit)) { + DECLARE_WAITQUEUE(wait, current); + + /* wake kauditd to try and flush the queue */ + wake_up_interruptible(&kauditd_wait); + + add_wait_queue_exclusive(&audit_backlog_wait, &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(audit_backlog_wait_time); + remove_wait_queue(&audit_backlog_wait, &wait); + } } /* Run custom bind function on netlink socket group connect or bind requests. */ @@ -1557,7 +1593,8 @@ static int __net_init audit_net_init(struct net *net) audit_panic("cannot initialize netlink socket in namespace"); return -ENOMEM; } - aunet->sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; + /* limit the timeout in case auditd is blocked/stopped */ + aunet->sk->sk_sndtimeo = HZ / 10; return 0; } @@ -1773,7 +1810,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() * using a PID anchored in the caller's namespace * 2. generator holding the audit_cmd_mutex - we don't want to block - * while holding the mutex */ + * while holding the mutex, although we do penalize the sender + * later in audit_receive() when it is safe to block + */ if (!(auditd_test_task(current) || audit_ctl_owner_current())) { long stime = audit_backlog_wait_time; diff --git a/kernel/audit.h b/kernel/audit.h index ddc22878433d0..fed8e93ce1699 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -191,6 +191,10 @@ struct audit_context { struct { char *name; } module; + struct { + struct audit_ntp_data ntp_data; + struct timespec64 tk_injoffset; + } time; }; int fds[2]; struct audit_proctitle proctitle; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 4effe01ebbe2b..e8e90c0c49367 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -624,7 +624,7 @@ static int audit_filter_rules(struct task_struct *tsk, result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val); break; case AUDIT_SADDR_FAM: - if (ctx->sockaddr) + if (ctx && ctx->sockaddr) result = audit_comparator(ctx->sockaddr->ss_family, f->op, f->val); break; @@ -1185,6 +1185,53 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) from_kuid(&init_user_ns, name->fcap.rootid)); } +static void audit_log_time(struct audit_context *context, struct audit_buffer **ab) +{ + const struct audit_ntp_data *ntp = &context->time.ntp_data; + const struct timespec64 *tk = &context->time.tk_injoffset; + static const char * const ntp_name[] = { + "offset", + "freq", + "status", + "tai", + "tick", + "adjust", + }; + int type; + + if (context->type == AUDIT_TIME_ADJNTPVAL) { + for (type = 0; type < AUDIT_NTP_NVALS; type++) { + if (ntp->vals[type].newval != ntp->vals[type].oldval) { + if (!*ab) { + *ab = audit_log_start(context, + GFP_KERNEL, + AUDIT_TIME_ADJNTPVAL); + if (!*ab) + return; + } + audit_log_format(*ab, "op=%s old=%lli new=%lli", + ntp_name[type], + ntp->vals[type].oldval, + ntp->vals[type].newval); + audit_log_end(*ab); + *ab = NULL; + } + } + } + if (tk->tv_sec != 0 || tk->tv_nsec != 0) { + if (!*ab) { + *ab = audit_log_start(context, GFP_KERNEL, + AUDIT_TIME_INJOFFSET); + if (!*ab) + return; + } + audit_log_format(*ab, "sec=%lli nsec=%li", + (long long)tk->tv_sec, tk->tv_nsec); + audit_log_end(*ab); + *ab = NULL; + } +} + static void show_special(struct audit_context *context, int *call_panic) { struct audit_buffer *ab; @@ -1290,6 +1337,11 @@ static void show_special(struct audit_context *context, int *call_panic) audit_log_format(ab, "(null)"); break; + case AUDIT_TIME_ADJNTPVAL: + case AUDIT_TIME_INJOFFSET: + /* this call deviates from the rest, eating the buffer */ + audit_log_time(context, &ab); + break; } audit_log_end(ab); } @@ -2518,31 +2570,26 @@ void __audit_fanotify(unsigned int response) void __audit_tk_injoffset(struct timespec64 offset) { - audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_INJOFFSET, - "sec=%lli nsec=%li", - (long long)offset.tv_sec, offset.tv_nsec); -} - -static void audit_log_ntp_val(const struct audit_ntp_data *ad, - const char *op, enum audit_ntp_type type) -{ - const struct audit_ntp_val *val = &ad->vals[type]; - - if (val->newval == val->oldval) - return; + struct audit_context *context = audit_context(); - audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_ADJNTPVAL, - "op=%s old=%lli new=%lli", op, val->oldval, val->newval); + /* only set type if not already set by NTP */ + if (!context->type) + context->type = AUDIT_TIME_INJOFFSET; + memcpy(&context->time.tk_injoffset, &offset, sizeof(offset)); } void __audit_ntp_log(const struct audit_ntp_data *ad) { - audit_log_ntp_val(ad, "offset", AUDIT_NTP_OFFSET); - audit_log_ntp_val(ad, "freq", AUDIT_NTP_FREQ); - audit_log_ntp_val(ad, "status", AUDIT_NTP_STATUS); - audit_log_ntp_val(ad, "tai", AUDIT_NTP_TAI); - audit_log_ntp_val(ad, "tick", AUDIT_NTP_TICK); - audit_log_ntp_val(ad, "adjust", AUDIT_NTP_ADJUST); + struct audit_context *context = audit_context(); + int type; + + for (type = 0; type < AUDIT_NTP_NVALS; type++) + if (ad->vals[type].newval != ad->vals[type].oldval) { + /* unconditionally set type, overwriting TK */ + context->type = AUDIT_TIME_ADJNTPVAL; + memcpy(&context->time.ntp_data, ad, sizeof(*ad)); + break; + } } static void audit_log_task(struct audit_buffer *ab) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 323913ba13b38..6b33a8a148b85 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -31,6 +31,7 @@ #include #include +#include #include /* Registers */ @@ -63,11 +64,13 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns { u8 *ptr = NULL; - if (k >= SKF_NET_OFF) + if (k >= SKF_NET_OFF) { ptr = skb_network_header(skb) + k - SKF_NET_OFF; - else if (k >= SKF_LL_OFF) + } else if (k >= SKF_LL_OFF) { + if (unlikely(!skb_mac_header_was_set(skb))) + return NULL; ptr = skb_mac_header(skb) + k - SKF_LL_OFF; - + } if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) return ptr; @@ -522,6 +525,7 @@ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); int bpf_jit_harden __read_mostly; int bpf_jit_kallsyms __read_mostly; long bpf_jit_limit __read_mostly; +long bpf_jit_limit_max __read_mostly; static __always_inline void bpf_get_prog_addr_region(const struct bpf_prog *prog, @@ -758,7 +762,8 @@ u64 __weak bpf_jit_alloc_exec_limit(void) static int __init bpf_jit_charge_init(void) { /* Only used as heuristic here to derive limit. */ - bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2, + bpf_jit_limit_max = bpf_jit_alloc_exec_limit(); + bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2, PAGE_SIZE), LONG_MAX); return 0; } @@ -1310,6 +1315,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) /* Non-UAPI available opcodes. */ [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS, [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL, + [BPF_ST | BPF_NOSPEC] = &&ST_NOSPEC, }; #undef BPF_INSN_3_LBL #undef BPF_INSN_2_LBL @@ -1550,7 +1556,21 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) COND_JMP(s, JSGE, >=) COND_JMP(s, JSLE, <=) #undef COND_JMP - /* STX and ST and LDX*/ + /* ST, STX and LDX*/ + ST_NOSPEC: + /* Speculation barrier for mitigating Speculative Store Bypass. + * In case of arm64, we rely on the firmware mitigation as + * controlled via the ssbd kernel parameter. Whenever the + * mitigation is enabled, it works for all of the kernel code + * with no need to provide any additional instructions here. + * In case of x86, we use 'lfence' insn for mitigation. We + * reuse preexisting logic from Spectre v1 mitigation that + * happens to produce the required code on x86 for v4 as well. + */ +#ifdef CONFIG_X86 + barrier_nospec(); +#endif + CONT; #define LDST(SIZEOP, SIZE) \ STX_MEM_##SIZEOP: \ *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \ diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 7d1eb8acb0464..ff03546bf6b6e 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -94,7 +94,7 @@ static struct hlist_head *dev_map_create_hash(unsigned int entries, int i; struct hlist_head *hash; - hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node); + hash = bpf_map_area_alloc((u64) entries * sizeof(*hash), numa_node); if (hash != NULL) for (i = 0; i < entries; i++) INIT_HLIST_HEAD(&hash[i]); @@ -159,7 +159,7 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) spin_lock_init(&dtab->index_lock); } else { - dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * + dtab->netdev_map = bpf_map_area_alloc((u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *), dtab->map.numa_node); if (!dtab->netdev_map) diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index b44d8c447afd1..ff1dd7d45b58a 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -162,15 +162,17 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, else verbose(cbs->private_data, "BUG_%02x\n", insn->code); } else if (class == BPF_ST) { - if (BPF_MODE(insn->code) != BPF_MEM) { + if (BPF_MODE(insn->code) == BPF_MEM) { + verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n", + insn->code, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->dst_reg, + insn->off, insn->imm); + } else if (BPF_MODE(insn->code) == 0xc0 /* BPF_NOSPEC, no UAPI */) { + verbose(cbs->private_data, "(%02x) nospec\n", insn->code); + } else { verbose(cbs->private_data, "BUG_st_%02x\n", insn->code); - return; } - verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n", - insn->code, - bpf_ldst_string[BPF_SIZE(insn->code) >> 3], - insn->dst_reg, - insn->off, insn->imm); } else if (class == BPF_LDX) { if (BPF_MODE(insn->code) != BPF_MEM) { verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code); diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 6e090140b9240..e756d8d174e4a 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -30,7 +30,7 @@ static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head, { raw_spin_lock(&head->lock); node->next = head->first; - head->first = node; + WRITE_ONCE(head->first, node); raw_spin_unlock(&head->lock); } @@ -90,14 +90,17 @@ struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s) orig_cpu = cpu = raw_smp_processor_id(); while (1) { head = per_cpu_ptr(s->freelist, cpu); + if (!READ_ONCE(head->first)) + goto next_cpu; raw_spin_lock(&head->lock); node = head->first; if (node) { - head->first = node->next; + WRITE_ONCE(head->first, node->next); raw_spin_unlock(&head->lock); return node; } raw_spin_unlock(&head->lock); +next_cpu: cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = 0; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index d7e57143f5195..5d642bfab86ce 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -59,7 +59,8 @@ static inline int stack_map_data_size(struct bpf_map *map) static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) { - u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; + u64 elem_size = sizeof(struct stack_map_bucket) + + (u64)smap->map.value_size; int err; smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, @@ -115,8 +116,8 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) return ERR_PTR(-E2BIG); cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); - cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); - err = bpf_map_charge_init(&mem, cost); + err = bpf_map_charge_init(&mem, cost + attr->max_entries * + (sizeof(struct stack_map_bucket) + (u64)value_size)); if (err) return ERR_PTR(err); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 280a2bac4ef6f..bbbc10c7089f6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1920,6 +1920,19 @@ static int check_stack_write(struct bpf_verifier_env *env, cur = env->cur_state->frame[env->cur_state->curframe]; if (value_regno >= 0) reg = &cur->regs[value_regno]; + if (!env->allow_ptr_leaks) { + bool sanitize = reg && is_spillable_regtype(reg->type); + + for (i = 0; i < size; i++) { + if (state->stack[spi].slot_type[i] == STACK_INVALID) { + sanitize = true; + break; + } + } + + if (sanitize) + env->insn_aux_data[insn_idx].sanitize_stack_spill = true; + } if (reg && size == BPF_REG_SIZE && register_is_const(reg) && !register_is_null(reg) && env->allow_ptr_leaks) { @@ -1942,47 +1955,10 @@ static int check_stack_write(struct bpf_verifier_env *env, verbose(env, "invalid size of register spill\n"); return -EACCES; } - if (state != cur && reg->type == PTR_TO_STACK) { verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); return -EINVAL; } - - if (!env->allow_ptr_leaks) { - bool sanitize = false; - - if (state->stack[spi].slot_type[0] == STACK_SPILL && - register_is_const(&state->stack[spi].spilled_ptr)) - sanitize = true; - for (i = 0; i < BPF_REG_SIZE; i++) - if (state->stack[spi].slot_type[i] == STACK_MISC) { - sanitize = true; - break; - } - if (sanitize) { - int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; - int soff = (-spi - 1) * BPF_REG_SIZE; - - /* detected reuse of integer stack slot with a pointer - * which means either llvm is reusing stack slot or - * an attacker is trying to exploit CVE-2018-3639 - * (speculative store bypass) - * Have to sanitize that slot with preemptive - * store of zero. - */ - if (*poff && *poff != soff) { - /* disallow programs where single insn stores - * into two different stack slots, since verifier - * cannot sanitize them - */ - verbose(env, - "insn %d cannot access two stack slots fp%d and fp%d", - insn_idx, *poff, soff); - return -EINVAL; - } - *poff = soff; - } - } save_register_state(state, spi, reg); } else { u8 type = STACK_MISC; @@ -2778,6 +2754,41 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) reg->smax_value = reg->umax_value; } +static bool bpf_map_is_rdonly(const struct bpf_map *map) +{ + return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen; +} + +static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) +{ + void *ptr; + u64 addr; + int err; + + err = map->ops->map_direct_value_addr(map, &addr, off); + if (err) + return err; + ptr = (void *)(long)addr + off; + + switch (size) { + case sizeof(u8): + *val = (u64)*(u8 *)ptr; + break; + case sizeof(u16): + *val = (u64)*(u16 *)ptr; + break; + case sizeof(u32): + *val = (u64)*(u32 *)ptr; + break; + case sizeof(u64): + *val = *(u64 *)ptr; + break; + default: + return -EINVAL; + } + return 0; +} + /* check whether memory at (regno + off) is accessible for t = (read | write) * if t==write, value_regno is a register which value is stored into memory * if t==read, value_regno is a register which will receive the value from memory @@ -2815,9 +2826,27 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (err) return err; err = check_map_access(env, regno, off, size, false); - if (!err && t == BPF_READ && value_regno >= 0) - mark_reg_unknown(env, regs, value_regno); + if (!err && t == BPF_READ && value_regno >= 0) { + struct bpf_map *map = reg->map_ptr; + + /* if map is read-only, track its contents as scalars */ + if (tnum_is_const(reg->var_off) && + bpf_map_is_rdonly(map) && + map->ops->map_direct_value_addr) { + int map_off = off + reg->var_off.value; + u64 val = 0; + + err = bpf_map_direct_read(map, map_off, size, + &val); + if (err) + return err; + regs[value_regno].type = SCALAR_VALUE; + __mark_reg_known(®s[value_regno], val); + } else { + mark_reg_unknown(env, regs, value_regno); + } + } } else if (reg->type == PTR_TO_CTX) { enum bpf_reg_type reg_type = SCALAR_VALUE; @@ -4007,7 +4036,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn int i, err; /* find function prototype */ - if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) { + if (func_id < 0 || (func_id >= __BPF_FUNC_MAX_ID && + func_id <= __BPF_BYTEDANCE_FUNC_MIN_ID) || + func_id >= __BPF_BYTEDANCE_FUNC_MAX_ID) { verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id); return -EINVAL; @@ -4420,6 +4451,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0; alu_state |= ptr_is_dst_reg ? BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; + + /* Limit pruning on unknown scalars to enable deep search for + * potential masking differences from other program paths. + */ + if (!off_is_imm) + env->explore_alu_limits = true; } err = update_alu_sanitation_state(aux, alu_state, alu_limit); @@ -5048,6 +5085,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, coerce_reg_to_size(dst_reg, 4); } + __update_reg_bounds(dst_reg); __reg_deduce_bounds(dst_reg); __reg_bound_offset(dst_reg); return 0; @@ -5337,7 +5375,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, new_range = dst_reg->off; if (range_right_open) - new_range--; + new_range++; /* Examples for register markings: * @@ -6808,6 +6846,8 @@ static int check_btf_line(struct bpf_verifier_env *env, nr_linfo = attr->line_info_cnt; if (!nr_linfo) return 0; + if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info)) + return -EINVAL; rec_size = attr->line_info_rec_size; if (rec_size < MIN_BPF_LINEINFO_SIZE || @@ -6951,13 +6991,6 @@ static bool range_within(struct bpf_reg_state *old, old->smax_value >= cur->smax_value; } -/* Maximum number of register states that can exist at once */ -#define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) -struct idpair { - u32 old; - u32 cur; -}; - /* If in the old state two registers had the same id, then they need to have * the same id in the new state as well. But that id could be different from * the old state, so we need to track the mapping from old to new ids. @@ -6968,11 +7001,11 @@ struct idpair { * So we look through our idmap to see if this old id has been seen before. If * so, we require the new id to match; otherwise, we add the id pair to the map. */ -static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap) +static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap) { unsigned int i; - for (i = 0; i < ID_MAP_SIZE; i++) { + for (i = 0; i < BPF_ID_MAP_SIZE; i++) { if (!idmap[i].old) { /* Reached an empty slot; haven't seen this id before */ idmap[i].old = old_id; @@ -7084,8 +7117,8 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, } /* Returns true if (rold safe implies rcur safe) */ -static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, - struct idpair *idmap) +static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, + struct bpf_reg_state *rcur, struct bpf_id_pair *idmap) { bool equal; @@ -7111,6 +7144,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, return false; switch (rold->type) { case SCALAR_VALUE: + if (env->explore_alu_limits) + return false; if (rcur->type == SCALAR_VALUE) { if (!rold->precise && !rcur->precise) return true; @@ -7200,9 +7235,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, return false; } -static bool stacksafe(struct bpf_func_state *old, - struct bpf_func_state *cur, - struct idpair *idmap) +static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, + struct bpf_func_state *cur, struct bpf_id_pair *idmap) { int i, spi; @@ -7247,9 +7281,8 @@ static bool stacksafe(struct bpf_func_state *old, continue; if (old->stack[spi].slot_type[0] != STACK_SPILL) continue; - if (!regsafe(&old->stack[spi].spilled_ptr, - &cur->stack[spi].spilled_ptr, - idmap)) + if (!regsafe(env, &old->stack[spi].spilled_ptr, + &cur->stack[spi].spilled_ptr, idmap)) /* when explored and current stack slot are both storing * spilled registers, check that stored pointers types * are the same as well. @@ -7299,32 +7332,24 @@ static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur) * whereas register type in current state is meaningful, it means that * the current state will reach 'bpf_exit' instruction safely */ -static bool func_states_equal(struct bpf_func_state *old, +static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, struct bpf_func_state *cur) { - struct idpair *idmap; - bool ret = false; int i; - idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL); - /* If we failed to allocate the idmap, just say it's not safe */ - if (!idmap) - return false; - - for (i = 0; i < MAX_BPF_REG; i++) { - if (!regsafe(&old->regs[i], &cur->regs[i], idmap)) - goto out_free; - } + memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch)); + for (i = 0; i < MAX_BPF_REG; i++) + if (!regsafe(env, &old->regs[i], &cur->regs[i], + env->idmap_scratch)) + return false; - if (!stacksafe(old, cur, idmap)) - goto out_free; + if (!stacksafe(env, old, cur, env->idmap_scratch)) + return false; if (!refsafe(old, cur)) - goto out_free; - ret = true; -out_free: - kfree(idmap); - return ret; + return false; + + return true; } static bool states_equal(struct bpf_verifier_env *env, @@ -7351,7 +7376,7 @@ static bool states_equal(struct bpf_verifier_env *env, for (i = 0; i <= old->curframe; i++) { if (old->frame[i]->callsite != cur->frame[i]->callsite) return false; - if (!func_states_equal(old->frame[i], cur->frame[i])) + if (!func_states_equal(env, old->frame[i], cur->frame[i])) return false; } return true; @@ -8805,35 +8830,33 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { bpf_convert_ctx_access_t convert_ctx_access; + bool ctx_access; if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || insn->code == (BPF_LDX | BPF_MEM | BPF_H) || insn->code == (BPF_LDX | BPF_MEM | BPF_W) || - insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) + insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) { type = BPF_READ; - else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || - insn->code == (BPF_STX | BPF_MEM | BPF_H) || - insn->code == (BPF_STX | BPF_MEM | BPF_W) || - insn->code == (BPF_STX | BPF_MEM | BPF_DW)) + ctx_access = true; + } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || + insn->code == (BPF_STX | BPF_MEM | BPF_H) || + insn->code == (BPF_STX | BPF_MEM | BPF_W) || + insn->code == (BPF_STX | BPF_MEM | BPF_DW) || + insn->code == (BPF_ST | BPF_MEM | BPF_B) || + insn->code == (BPF_ST | BPF_MEM | BPF_H) || + insn->code == (BPF_ST | BPF_MEM | BPF_W) || + insn->code == (BPF_ST | BPF_MEM | BPF_DW)) { type = BPF_WRITE; - else + ctx_access = BPF_CLASS(insn->code) == BPF_STX; + } else { continue; + } if (type == BPF_WRITE && - env->insn_aux_data[i + delta].sanitize_stack_off) { + env->insn_aux_data[i + delta].sanitize_stack_spill) { struct bpf_insn patch[] = { - /* Sanitize suspicious stack slot with zero. - * There are no memory dependencies for this store, - * since it's only using frame pointer and immediate - * constant of zero - */ - BPF_ST_MEM(BPF_DW, BPF_REG_FP, - env->insn_aux_data[i + delta].sanitize_stack_off, - 0), - /* the original STX instruction will immediately - * overwrite the same stack slot with appropriate value - */ *insn, + BPF_ST_NOSPEC(), }; cnt = ARRAY_SIZE(patch); @@ -8847,6 +8870,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) continue; } + if (!ctx_access) + continue; + switch (env->insn_aux_data[i + delta].ptr_type) { case PTR_TO_CTX: if (!ops->convert_ctx_access) @@ -8908,6 +8934,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) if (is_narrower_load && size < target_size) { u8 shift = bpf_ctx_narrow_access_offset( off, size, size_default) * 8; + if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) { + verbose(env, "bpf verifier narrow ctx load misconfigured\n"); + return -EINVAL; + } if (ctx_field_size <= 4) { if (shift) insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 87d8c36f8bb60..e4cee104c5f89 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -65,6 +65,25 @@ static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) return container_of(kfc, struct cgroup_fs_context, kfc); } +struct cgroup_pidlist; + +struct cgroup_file_ctx { + struct cgroup_namespace *ns; + + struct { + void *trigger; + } psi; + + struct { + bool started; + struct css_task_iter iter; + } procs; + + struct { + struct cgroup_pidlist *pidlist; + } procs1; +}; + /* * A cgroup can be associated with multiple css_sets as different tasks may * belong to different cgroups on different hierarchies. In the other @@ -232,9 +251,10 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup, int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, bool threadgroup); -struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup) +struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, + bool *locked) __acquires(&cgroup_threadgroup_rwsem); -void cgroup_procs_write_finish(struct task_struct *task) +void cgroup_procs_write_finish(struct task_struct *task, bool locked) __releases(&cgroup_threadgroup_rwsem); void cgroup_lock_and_drain_offline(struct cgroup *cgrp); diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 94026c700ba26..e3eaeb5c678a3 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -398,6 +398,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) * next pid to display, if any */ struct kernfs_open_file *of = s->private; + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *cgrp = seq_css(s)->cgroup; struct cgroup_pidlist *l; enum cgroup_filetype type = seq_cft(s)->private; @@ -407,25 +408,24 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) mutex_lock(&cgrp->pidlist_mutex); /* - * !NULL @of->priv indicates that this isn't the first start() - * after open. If the matching pidlist is around, we can use that. - * Look for it. Note that @of->priv can't be used directly. It - * could already have been destroyed. + * !NULL @ctx->procs1.pidlist indicates that this isn't the first + * start() after open. If the matching pidlist is around, we can use + * that. Look for it. Note that @ctx->procs1.pidlist can't be used + * directly. It could already have been destroyed. */ - if (of->priv) - of->priv = cgroup_pidlist_find(cgrp, type); + if (ctx->procs1.pidlist) + ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type); /* * Either this is the first start() after open or the matching * pidlist has been destroyed inbetween. Create a new one. */ - if (!of->priv) { - ret = pidlist_array_load(cgrp, type, - (struct cgroup_pidlist **)&of->priv); + if (!ctx->procs1.pidlist) { + ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist); if (ret) return ERR_PTR(ret); } - l = of->priv; + l = ctx->procs1.pidlist; if (pid) { int end = l->length; @@ -453,7 +453,8 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) static void cgroup_pidlist_stop(struct seq_file *s, void *v) { struct kernfs_open_file *of = s->private; - struct cgroup_pidlist *l = of->priv; + struct cgroup_file_ctx *ctx = of->priv; + struct cgroup_pidlist *l = ctx->procs1.pidlist; if (l) mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, @@ -464,7 +465,8 @@ static void cgroup_pidlist_stop(struct seq_file *s, void *v) static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) { struct kernfs_open_file *of = s->private; - struct cgroup_pidlist *l = of->priv; + struct cgroup_file_ctx *ctx = of->priv; + struct cgroup_pidlist *l = ctx->procs1.pidlist; pid_t *p = v; pid_t *end = l->list + l->length; /* @@ -496,21 +498,23 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of, struct task_struct *task; const struct cred *cred, *tcred; ssize_t ret; + bool locked; cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) return -ENODEV; - task = cgroup_procs_write_start(buf, threadgroup); + task = cgroup_procs_write_start(buf, threadgroup, &locked); ret = PTR_ERR_OR_ZERO(task); if (ret) goto out_unlock; /* - * Even if we're attaching all tasks in the thread group, we only - * need to check permissions on one of them. + * Even if we're attaching all tasks in the thread group, we only need + * to check permissions on one of them. Check permissions using the + * credentials from file open to protect against inherited fd attacks. */ - cred = current_cred(); + cred = of->file->f_cred; tcred = get_task_cred(task); if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && !uid_eq(cred->euid, tcred->uid) && @@ -523,7 +527,7 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of, ret = cgroup_attach_task(cgrp, task, threadgroup); out_finish: - cgroup_procs_write_finish(task); + cgroup_procs_write_finish(task, locked); out_unlock: cgroup_kn_unlock(of->kn); @@ -546,9 +550,19 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct cgroup *cgrp; + struct cgroup_file_ctx *ctx; BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); + /* + * Release agent gets called with all capabilities, + * require capabilities to set release agent. + */ + ctx = of->priv; + if ((ctx->ns->user_ns != &init_user_ns) || + !file_ns_capable(of->file, &init_user_ns, CAP_SYS_ADMIN)) + return -EPERM; + cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) return -ENODEV; @@ -930,6 +944,9 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) for_each_subsys(ss, i) { if (strcmp(param->key, ss->legacy_name)) continue; + if (!cgroup_ssid_enabled(i) || cgroup1_ssid_disabled(i)) + return cg_invalf(fc, "Disabled controller '%s'", + param->key); ctx->subsys_mask |= (1 << i); return 0; } @@ -962,6 +979,12 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) /* Specifying two release agents is forbidden */ if (ctx->release_agent) return cg_invalf(fc, "cgroup1: release_agent respecified"); + /* + * Release agent gets called with all capabilities, + * require capabilities to set release agent. + */ + if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN)) + return cg_invalf(fc, "cgroup1: Setting release_agent not allowed"); ctx->release_agent = param->string; param->string = NULL; break; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 2db181e6f26bb..d540baf000dda 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -212,6 +212,22 @@ struct cgroup_namespace init_cgroup_ns = { static struct cftype cgroup_base_files[]; +/* cgroup optional features */ +enum cgroup_opt_features { +#ifdef CONFIG_PSI + OPT_FEATURE_PRESSURE, +#endif + OPT_FEATURE_COUNT +}; + +static const char *cgroup_opt_feature_names[OPT_FEATURE_COUNT] = { +#ifdef CONFIG_PSI + "pressure", +#endif +}; + +static u16 cgroup_feature_disable_mask __read_mostly; + static int cgroup_apply_control(struct cgroup *cgrp); static void cgroup_finalize_control(struct cgroup *cgrp, int ret); static void css_task_iter_skip(struct css_task_iter *it, @@ -742,7 +758,8 @@ struct css_set init_css_set = { .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets), .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), - .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), + .mg_src_preload_node = LIST_HEAD_INIT(init_css_set.mg_src_preload_node), + .mg_dst_preload_node = LIST_HEAD_INIT(init_css_set.mg_dst_preload_node), .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), /* @@ -1218,7 +1235,8 @@ static struct css_set *find_css_set(struct css_set *old_cset, INIT_LIST_HEAD(&cset->threaded_csets); INIT_HLIST_NODE(&cset->hlist); INIT_LIST_HEAD(&cset->cgrp_links); - INIT_LIST_HEAD(&cset->mg_preload_node); + INIT_LIST_HEAD(&cset->mg_src_preload_node); + INIT_LIST_HEAD(&cset->mg_dst_preload_node); INIT_LIST_HEAD(&cset->mg_node); /* Copy the set of subsystem state objects generated in @@ -1720,6 +1738,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) struct cgroup *dcgrp = &dst_root->cgrp; struct cgroup_subsys *ss; int ssid, i, ret; + u16 dfl_disable_ss_mask = 0; lockdep_assert_held(&cgroup_mutex); @@ -1736,8 +1755,28 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) /* can't move between two non-dummy roots either */ if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root) return -EBUSY; + + /* + * Collect ssid's that need to be disabled from default + * hierarchy. + */ + if (ss->root == &cgrp_dfl_root) + dfl_disable_ss_mask |= 1 << ssid; + } while_each_subsys_mask(); + if (dfl_disable_ss_mask) { + struct cgroup *scgrp = &cgrp_dfl_root.cgrp; + + /* + * Controllers from default hierarchy that need to be rebound + * are all disabled together in one go. + */ + cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask; + WARN_ON(cgroup_apply_control(scgrp)); + cgroup_finalize_control(scgrp, 0); + } + do_each_subsys_mask(ss, ssid, ss_mask) { struct cgroup_root *src_root = ss->root; struct cgroup *scgrp = &src_root->cgrp; @@ -1746,10 +1785,12 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) WARN_ON(!css || cgroup_css(dcgrp, ss)); - /* disable from the source */ - src_root->subsys_mask &= ~(1 << ssid); - WARN_ON(cgroup_apply_control(scgrp)); - cgroup_finalize_control(scgrp, 0); + if (src_root != &cgrp_dfl_root) { + /* disable from the source */ + src_root->subsys_mask &= ~(1 << ssid); + WARN_ON(cgroup_apply_control(scgrp)); + cgroup_finalize_control(scgrp, 0); + } /* rebind */ RCU_INIT_POINTER(scgrp->subsys[ssid], NULL); @@ -2605,21 +2646,27 @@ int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp) */ void cgroup_migrate_finish(struct cgroup_mgctx *mgctx) { - LIST_HEAD(preloaded); struct css_set *cset, *tmp_cset; lockdep_assert_held(&cgroup_mutex); spin_lock_irq(&css_set_lock); - list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded); - list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded); + list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_src_csets, + mg_src_preload_node) { + cset->mg_src_cgrp = NULL; + cset->mg_dst_cgrp = NULL; + cset->mg_dst_cset = NULL; + list_del_init(&cset->mg_src_preload_node); + put_css_set_locked(cset); + } - list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) { + list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_dst_csets, + mg_dst_preload_node) { cset->mg_src_cgrp = NULL; cset->mg_dst_cgrp = NULL; cset->mg_dst_cset = NULL; - list_del_init(&cset->mg_preload_node); + list_del_init(&cset->mg_dst_preload_node); put_css_set_locked(cset); } @@ -2661,7 +2708,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset, src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); - if (!list_empty(&src_cset->mg_preload_node)) + if (!list_empty(&src_cset->mg_src_preload_node)) return; WARN_ON(src_cset->mg_src_cgrp); @@ -2672,7 +2719,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset, src_cset->mg_src_cgrp = src_cgrp; src_cset->mg_dst_cgrp = dst_cgrp; get_css_set(src_cset); - list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets); + list_add_tail(&src_cset->mg_src_preload_node, &mgctx->preloaded_src_csets); } /** @@ -2697,7 +2744,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) /* look up the dst cset for each src cset and link it to src */ list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets, - mg_preload_node) { + mg_src_preload_node) { struct css_set *dst_cset; struct cgroup_subsys *ss; int ssid; @@ -2716,7 +2763,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) if (src_cset == dst_cset) { src_cset->mg_src_cgrp = NULL; src_cset->mg_dst_cgrp = NULL; - list_del_init(&src_cset->mg_preload_node); + list_del_init(&src_cset->mg_src_preload_node); put_css_set(src_cset); put_css_set(dst_cset); continue; @@ -2724,8 +2771,8 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) src_cset->mg_dst_cset = dst_cset; - if (list_empty(&dst_cset->mg_preload_node)) - list_add_tail(&dst_cset->mg_preload_node, + if (list_empty(&dst_cset->mg_dst_preload_node)) + list_add_tail(&dst_cset->mg_dst_preload_node, &mgctx->preloaded_dst_csets); else put_css_set(dst_cset); @@ -2793,11 +2840,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, { DEFINE_CGROUP_MGCTX(mgctx); struct task_struct *task; - int ret; - - ret = cgroup_migrate_vet_dst(dst_cgrp); - if (ret) - return ret; + int ret = 0; /* look up all src csets */ spin_lock_irq(&css_set_lock); @@ -2824,7 +2867,8 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, return ret; } -struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup) +struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, + bool *locked) __acquires(&cgroup_threadgroup_rwsem) { struct task_struct *tsk; @@ -2833,7 +2877,21 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup) if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) return ERR_PTR(-EINVAL); - percpu_down_write(&cgroup_threadgroup_rwsem); + /* + * If we migrate a single thread, we don't care about threadgroup + * stability. If the thread is `current`, it won't exit(2) under our + * hands or change PID through exec(2). We exclude + * cgroup_update_dfl_csses and other cgroup_{proc,thread}s_write + * callers by cgroup_mutex. + * Therefore, we can skip the global lock. + */ + lockdep_assert_held(&cgroup_mutex); + if (pid || threadgroup) { + percpu_down_write(&cgroup_threadgroup_rwsem); + *locked = true; + } else { + *locked = false; + } rcu_read_lock(); if (pid) { @@ -2864,13 +2922,16 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup) goto out_unlock_rcu; out_unlock_threadgroup: - percpu_up_write(&cgroup_threadgroup_rwsem); + if (*locked) { + percpu_up_write(&cgroup_threadgroup_rwsem); + *locked = false; + } out_unlock_rcu: rcu_read_unlock(); return tsk; } -void cgroup_procs_write_finish(struct task_struct *task) +void cgroup_procs_write_finish(struct task_struct *task, bool locked) __releases(&cgroup_threadgroup_rwsem) { struct cgroup_subsys *ss; @@ -2879,7 +2940,8 @@ void cgroup_procs_write_finish(struct task_struct *task) /* release reference from cgroup_procs_write_start() */ put_task_struct(task); - percpu_up_write(&cgroup_threadgroup_rwsem); + if (locked) + percpu_up_write(&cgroup_threadgroup_rwsem); for_each_subsys(ss, ssid) if (ss->post_attach) ss->post_attach(); @@ -2956,7 +3018,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) goto out_finish; spin_lock_irq(&css_set_lock); - list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) { + list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, + mg_src_preload_node) { struct task_struct *task, *ntask; /* all tasks in src_csets need to be migrated */ @@ -3624,6 +3687,7 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, size_t nbytes, enum psi_res res) { + struct cgroup_file_ctx *ctx = of->priv; struct psi_trigger *new; struct cgroup *cgrp; struct psi_group *psi; @@ -3635,6 +3699,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, cgroup_get(cgrp); cgroup_kn_unlock(of->kn); + /* Allow only one trigger per file descriptor */ + if (ctx->psi.trigger) { + cgroup_put(cgrp); + return -EBUSY; + } + psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; new = psi_trigger_create(psi, buf, nbytes, res); if (IS_ERR(new)) { @@ -3642,8 +3712,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, return PTR_ERR(new); } - psi_trigger_replace(&of->priv, new); - + smp_store_release(&ctx->psi.trigger, new); cgroup_put(cgrp); return nbytes; @@ -3673,13 +3742,29 @@ static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of, static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, poll_table *pt) { - return psi_trigger_poll(&of->priv, of->file, pt); + struct cgroup_file_ctx *ctx = of->priv; + + return psi_trigger_poll(&ctx->psi.trigger, of->file, pt); } static void cgroup_pressure_release(struct kernfs_open_file *of) { - psi_trigger_replace(&of->priv, NULL); + struct cgroup_file_ctx *ctx = of->priv; + + psi_trigger_destroy(ctx->psi.trigger); +} + +bool cgroup_psi_enabled(void) +{ + return (cgroup_feature_disable_mask & (1 << OPT_FEATURE_PRESSURE)) == 0; +} + +#else /* CONFIG_PSI */ +bool cgroup_psi_enabled(void) +{ + return false; } + #endif /* CONFIG_PSI */ static int cgroup_freeze_show(struct seq_file *seq, void *v) @@ -3718,27 +3803,46 @@ static ssize_t cgroup_freeze_write(struct kernfs_open_file *of, static int cgroup_file_open(struct kernfs_open_file *of) { - struct cftype *cft = of->kn->priv; + struct cftype *cft = of_cft(of); + struct cgroup_file_ctx *ctx; + int ret; - if (cft->open) - return cft->open(of); - return 0; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->ns = current->nsproxy->cgroup_ns; + get_cgroup_ns(ctx->ns); + of->priv = ctx; + + if (!cft->open) + return 0; + + ret = cft->open(of); + if (ret) { + put_cgroup_ns(ctx->ns); + kfree(ctx); + } + return ret; } static void cgroup_file_release(struct kernfs_open_file *of) { - struct cftype *cft = of->kn->priv; + struct cftype *cft = of_cft(of); + struct cgroup_file_ctx *ctx = of->priv; if (cft->release) cft->release(of); + put_cgroup_ns(ctx->ns); + kfree(ctx); } static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *cgrp = of->kn->parent->priv; - struct cftype *cft = of->kn->priv; + struct cftype *cft = of_cft(of); struct cgroup_subsys_state *css; int ret; @@ -3750,7 +3854,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, */ if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) && !(cft->flags & CFTYPE_NS_DELEGATABLE) && - ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp) + ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp) return -EPERM; if (cft->write) @@ -3785,7 +3889,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt) { - struct cftype *cft = of->kn->priv; + struct cftype *cft = of_cft(of); if (cft->poll) return cft->poll(of, pt); @@ -3927,6 +4031,8 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css, restart: for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) { /* does cft->flags tell us to skip this file on @cgrp? */ + if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled()) + continue; if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp)) continue; if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp)) @@ -4004,6 +4110,9 @@ static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) WARN_ON(cft->ss || cft->kf_ops); + if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled()) + continue; + if (cft->seq_start) kf_ops = &cgroup_kf_ops; else @@ -4658,21 +4767,21 @@ void css_task_iter_end(struct css_task_iter *it) static void cgroup_procs_release(struct kernfs_open_file *of) { - if (of->priv) { - css_task_iter_end(of->priv); - kfree(of->priv); - } + struct cgroup_file_ctx *ctx = of->priv; + + if (ctx->procs.started) + css_task_iter_end(&ctx->procs.iter); } static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) { struct kernfs_open_file *of = s->private; - struct css_task_iter *it = of->priv; + struct cgroup_file_ctx *ctx = of->priv; if (pos) (*pos)++; - return css_task_iter_next(it); + return css_task_iter_next(&ctx->procs.iter); } static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, @@ -4680,21 +4789,18 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, { struct kernfs_open_file *of = s->private; struct cgroup *cgrp = seq_css(s)->cgroup; - struct css_task_iter *it = of->priv; + struct cgroup_file_ctx *ctx = of->priv; + struct css_task_iter *it = &ctx->procs.iter; /* * When a seq_file is seeked, it's always traversed sequentially * from position 0, so we can simply keep iterating on !0 *pos. */ - if (!it) { + if (!ctx->procs.started) { if (WARN_ON_ONCE((*pos))) return ERR_PTR(-EINVAL); - - it = kzalloc(sizeof(*it), GFP_KERNEL); - if (!it) - return ERR_PTR(-ENOMEM); - of->priv = it; css_task_iter_start(&cgrp->self, iter_flags, it); + ctx->procs.started = true; } else if (!(*pos)) { css_task_iter_end(it); css_task_iter_start(&cgrp->self, iter_flags, it); @@ -4729,9 +4835,9 @@ static int cgroup_procs_show(struct seq_file *s, void *v) static int cgroup_procs_write_permission(struct cgroup *src_cgrp, struct cgroup *dst_cgrp, - struct super_block *sb) + struct super_block *sb, + struct cgroup_namespace *ns) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup *com_cgrp = src_cgrp; struct inode *inode; int ret; @@ -4764,18 +4870,42 @@ static int cgroup_procs_write_permission(struct cgroup *src_cgrp, return 0; } -static ssize_t cgroup_procs_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) +static int cgroup_attach_permissions(struct cgroup *src_cgrp, + struct cgroup *dst_cgrp, + struct super_block *sb, bool threadgroup, + struct cgroup_namespace *ns) +{ + int ret = 0; + + ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns); + if (ret) + return ret; + + ret = cgroup_migrate_vet_dst(dst_cgrp); + if (ret) + return ret; + + if (!threadgroup && (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp)) + ret = -EOPNOTSUPP; + + return ret; +} + +static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, + bool threadgroup) { + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *src_cgrp, *dst_cgrp; struct task_struct *task; + const struct cred *saved_cred; ssize_t ret; + bool locked; dst_cgrp = cgroup_kn_lock_live(of->kn, false); if (!dst_cgrp) return -ENODEV; - task = cgroup_procs_write_start(buf, true); + task = cgroup_procs_write_start(buf, threadgroup, &locked); ret = PTR_ERR_OR_ZERO(task); if (ret) goto out_unlock; @@ -4785,19 +4915,33 @@ static ssize_t cgroup_procs_write(struct kernfs_open_file *of, src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); spin_unlock_irq(&css_set_lock); - ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, - of->file->f_path.dentry->d_sb); + /* + * Process and thread migrations follow same delegation rule. Check + * permissions using the credentials from file open to protect against + * inherited fd attacks. + */ + saved_cred = override_creds(of->file->f_cred); + ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, + of->file->f_path.dentry->d_sb, + threadgroup, ctx->ns); + revert_creds(saved_cred); if (ret) goto out_finish; - ret = cgroup_attach_task(dst_cgrp, task, true); + ret = cgroup_attach_task(dst_cgrp, task, threadgroup); out_finish: - cgroup_procs_write_finish(task); + cgroup_procs_write_finish(task, locked); out_unlock: cgroup_kn_unlock(of->kn); - return ret ?: nbytes; + return ret; +} + +static ssize_t cgroup_procs_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + return __cgroup_procs_write(of, buf, true) ?: nbytes; } static void *cgroup_threads_start(struct seq_file *s, loff_t *pos) @@ -4808,45 +4952,7 @@ static void *cgroup_threads_start(struct seq_file *s, loff_t *pos) static ssize_t cgroup_threads_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct cgroup *src_cgrp, *dst_cgrp; - struct task_struct *task; - ssize_t ret; - - buf = strstrip(buf); - - dst_cgrp = cgroup_kn_lock_live(of->kn, false); - if (!dst_cgrp) - return -ENODEV; - - task = cgroup_procs_write_start(buf, false); - ret = PTR_ERR_OR_ZERO(task); - if (ret) - goto out_unlock; - - /* find the source cgroup */ - spin_lock_irq(&css_set_lock); - src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); - spin_unlock_irq(&css_set_lock); - - /* thread migrations follow the cgroup.procs delegation rule */ - ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, - of->file->f_path.dentry->d_sb); - if (ret) - goto out_finish; - - /* and must be contained in the same domain */ - ret = -EOPNOTSUPP; - if (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp) - goto out_finish; - - ret = cgroup_attach_task(dst_cgrp, task, false); - -out_finish: - cgroup_procs_write_finish(task); -out_unlock: - cgroup_kn_unlock(of->kn); - - return ret ?: nbytes; + return __cgroup_procs_write(of, buf, false) ?: nbytes; } /* cgroup core interface files for the default hierarchy */ @@ -4920,6 +5026,7 @@ static struct cftype cgroup_base_files[] = { #ifdef CONFIG_PSI { .name = "io.pressure", + .flags = CFTYPE_PRESSURE, .seq_show = cgroup_io_pressure_show, .write = cgroup_io_pressure_write, .poll = cgroup_pressure_poll, @@ -4927,6 +5034,7 @@ static struct cftype cgroup_base_files[] = { }, { .name = "memory.pressure", + .flags = CFTYPE_PRESSURE, .seq_show = cgroup_memory_pressure_show, .write = cgroup_memory_pressure_write, .poll = cgroup_pressure_poll, @@ -4934,6 +5042,7 @@ static struct cftype cgroup_base_files[] = { }, { .name = "cpu.pressure", + .flags = CFTYPE_PRESSURE, .seq_show = cgroup_cpu_pressure_show, .write = cgroup_cpu_pressure_write, .poll = cgroup_pressure_poll, @@ -6171,6 +6280,15 @@ static int __init cgroup_disable(char *str) pr_info("Disabling %s control group subsystem\n", ss->name); } + + for (i = 0; i < OPT_FEATURE_COUNT; i++) { + if (strcmp(token, cgroup_opt_feature_names[i])) + continue; + cgroup_feature_disable_mask |= 1 << i; + pr_info("Disabling %s control group feature\n", + cgroup_opt_feature_names[i]); + break; + } } return 1; } @@ -6474,6 +6592,9 @@ static ssize_t show_delegatable_files(struct cftype *files, char *buf, if (!(cft->flags & CFTYPE_NS_DELEGATABLE)) continue; + if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled()) + continue; + if (prefix) ret += snprintf(buf + ret, size - ret, "%s.", prefix); diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index bab6a934862e3..b02eca235ba3f 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1473,10 +1473,15 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, struct cpuset *sibling; struct cgroup_subsys_state *pos_css; + percpu_rwsem_assert_held(&cpuset_rwsem); + /* * Check all its siblings and call update_cpumasks_hier() * if their use_parent_ecpus flag is set in order for them * to use the right effective_cpus value. + * + * The update_cpumasks_hier() function may sleep. So we have to + * release the RCU read lock before calling it. */ rcu_read_lock(); cpuset_for_each_child(sibling, pos_css, parent) { @@ -1484,8 +1489,13 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, continue; if (!sibling->use_parent_ecpus) continue; + if (!css_tryget_online(&sibling->css)) + continue; + rcu_read_unlock(); update_cpumasks_hier(sibling, tmp); + rcu_read_lock(); + css_put(&sibling->css); } rcu_read_unlock(); } @@ -1558,8 +1568,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, * Make sure that subparts_cpus is a subset of cpus_allowed. */ if (cs->nr_subparts_cpus) { - cpumask_andnot(cs->subparts_cpus, cs->subparts_cpus, - cs->cpus_allowed); + cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed); cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus); } spin_unlock_irq(&callback_lock); @@ -2195,6 +2204,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) cgroup_taskset_first(tset, &css); cs = css_cs(css); + cpus_read_lock(); percpu_down_write(&cpuset_rwsem); /* prepare for attach */ @@ -2250,6 +2260,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) wake_up(&cpuset_attach_wq); percpu_up_write(&cpuset_rwsem); + cpus_read_unlock(); } /* The various types of files and directories in a cpuset file system */ @@ -3166,6 +3177,13 @@ static void cpuset_hotplug_workfn(struct work_struct *work) cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus); mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems); + /* + * In the rare case that hotplug removes all the cpus in subparts_cpus, + * we assumed that cpus are updated. + */ + if (!cpus_updated && top_cpuset.nr_subparts_cpus) + cpus_updated = true; + /* synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { spin_lock_irq(&callback_lock); @@ -3271,8 +3289,11 @@ static struct notifier_block cpuset_track_online_nodes_nb = { */ void __init cpuset_init_smp(void) { - cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); - top_cpuset.mems_allowed = node_states[N_MEMORY]; + /* + * cpus_allowd/mems_allowed set to v2 values in the initial + * cpuset_bind() call will be reset to v1 values in another + * cpuset_bind() call when v1 cpuset is mounted. + */ top_cpuset.old_mems_allowed = top_cpuset.mems_allowed; cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask); diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 4a942d4e9763d..2e1cfe874f85d 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -25,13 +25,8 @@ static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu) void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) { raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu); - struct cgroup *parent; unsigned long flags; - /* nothing to do for root */ - if (!cgroup_parent(cgrp)) - return; - /* * Speculative already-on-list test. This may race leading to * temporary inaccuracies, which is fine. @@ -46,10 +41,10 @@ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) raw_spin_lock_irqsave(cpu_lock, flags); /* put @cgrp and all ancestors on the corresponding updated lists */ - for (parent = cgroup_parent(cgrp); parent; - cgrp = parent, parent = cgroup_parent(cgrp)) { + while (true) { struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu); - struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu); + struct cgroup *parent = cgroup_parent(cgrp); + struct cgroup_rstat_cpu *prstatc; /* * Both additions and removals are bottom-up. If a cgroup @@ -58,8 +53,17 @@ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) if (rstatc->updated_next) break; + /* Root has no parent to link it to, but mark it busy */ + if (!parent) { + rstatc->updated_next = cgrp; + break; + } + + prstatc = cgroup_rstat_cpu(parent, cpu); rstatc->updated_next = prstatc->updated_children; prstatc->updated_children = cgrp; + + cgrp = parent; } raw_spin_unlock_irqrestore(cpu_lock, flags); @@ -114,23 +118,26 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, */ if (rstatc->updated_next) { struct cgroup *parent = cgroup_parent(pos); - struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu); - struct cgroup_rstat_cpu *nrstatc; - struct cgroup **nextp; - - nextp = &prstatc->updated_children; - while (true) { - nrstatc = cgroup_rstat_cpu(*nextp, cpu); - if (*nextp == pos) - break; - - WARN_ON_ONCE(*nextp == parent); - nextp = &nrstatc->updated_next; + + if (parent) { + struct cgroup_rstat_cpu *prstatc; + struct cgroup **nextp; + + prstatc = cgroup_rstat_cpu(parent, cpu); + nextp = &prstatc->updated_children; + while (true) { + struct cgroup_rstat_cpu *nrstatc; + + nrstatc = cgroup_rstat_cpu(*nextp, cpu); + if (*nextp == pos) + break; + WARN_ON_ONCE(*nextp == parent); + nextp = &nrstatc->updated_next; + } + *nextp = rstatc->updated_next; } - *nextp = rstatc->updated_next; rstatc->updated_next = NULL; - return pos; } @@ -294,60 +301,69 @@ void __init cgroup_rstat_boot(void) * Functions for cgroup basic resource statistics implemented on top of * rstat. */ -static void cgroup_base_stat_accumulate(struct cgroup_base_stat *dst_bstat, - struct cgroup_base_stat *src_bstat) +static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat, + struct cgroup_base_stat *src_bstat) { dst_bstat->cputime.utime += src_bstat->cputime.utime; dst_bstat->cputime.stime += src_bstat->cputime.stime; dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime; } +static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat, + struct cgroup_base_stat *src_bstat) +{ + dst_bstat->cputime.utime -= src_bstat->cputime.utime; + dst_bstat->cputime.stime -= src_bstat->cputime.stime; + dst_bstat->cputime.sum_exec_runtime -= src_bstat->cputime.sum_exec_runtime; +} + static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu) { - struct cgroup *parent = cgroup_parent(cgrp); struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu); - struct task_cputime *last_cputime = &rstatc->last_bstat.cputime; - struct task_cputime cputime; - struct cgroup_base_stat delta; + struct cgroup *parent = cgroup_parent(cgrp); + struct cgroup_base_stat cur, delta; unsigned seq; + /* Root-level stats are sourced from system-wide CPU stats */ + if (!parent) + return; + /* fetch the current per-cpu values */ do { seq = __u64_stats_fetch_begin(&rstatc->bsync); - cputime = rstatc->bstat.cputime; + cur.cputime = rstatc->bstat.cputime; } while (__u64_stats_fetch_retry(&rstatc->bsync, seq)); - /* calculate the delta to propgate */ - delta.cputime.utime = cputime.utime - last_cputime->utime; - delta.cputime.stime = cputime.stime - last_cputime->stime; - delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime - - last_cputime->sum_exec_runtime; - *last_cputime = cputime; - - /* transfer the pending stat into delta */ - cgroup_base_stat_accumulate(&delta, &cgrp->pending_bstat); - memset(&cgrp->pending_bstat, 0, sizeof(cgrp->pending_bstat)); - - /* propagate delta into the global stat and the parent's pending */ - cgroup_base_stat_accumulate(&cgrp->bstat, &delta); - if (parent) - cgroup_base_stat_accumulate(&parent->pending_bstat, &delta); + /* propagate percpu delta to global */ + delta = cur; + cgroup_base_stat_sub(&delta, &rstatc->last_bstat); + cgroup_base_stat_add(&cgrp->bstat, &delta); + cgroup_base_stat_add(&rstatc->last_bstat, &delta); + + /* propagate global delta to parent (unless that's root) */ + if (cgroup_parent(parent)) { + delta = cgrp->bstat; + cgroup_base_stat_sub(&delta, &cgrp->last_bstat); + cgroup_base_stat_add(&parent->bstat, &delta); + cgroup_base_stat_add(&cgrp->last_bstat, &delta); + } } static struct cgroup_rstat_cpu * -cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp) +cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags) { struct cgroup_rstat_cpu *rstatc; rstatc = get_cpu_ptr(cgrp->rstat_cpu); - u64_stats_update_begin(&rstatc->bsync); + *flags = u64_stats_update_begin_irqsave(&rstatc->bsync); return rstatc; } static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, - struct cgroup_rstat_cpu *rstatc) + struct cgroup_rstat_cpu *rstatc, + unsigned long flags) { - u64_stats_update_end(&rstatc->bsync); + u64_stats_update_end_irqrestore(&rstatc->bsync, flags); cgroup_rstat_updated(cgrp, smp_processor_id()); put_cpu_ptr(rstatc); } @@ -355,18 +371,20 @@ static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec) { struct cgroup_rstat_cpu *rstatc; + unsigned long flags; - rstatc = cgroup_base_stat_cputime_account_begin(cgrp); + rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); rstatc->bstat.cputime.sum_exec_runtime += delta_exec; - cgroup_base_stat_cputime_account_end(cgrp, rstatc); + cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags); } void __cgroup_account_cputime_field(struct cgroup *cgrp, enum cpu_usage_stat index, u64 delta_exec) { struct cgroup_rstat_cpu *rstatc; + unsigned long flags; - rstatc = cgroup_base_stat_cputime_account_begin(cgrp); + rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); switch (index) { case CPUTIME_USER: @@ -382,7 +400,7 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp, break; } - cgroup_base_stat_cputime_account_end(cgrp, rstatc); + cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags); } void cgroup_base_stat_cputime_show(struct seq_file *seq) diff --git a/kernel/cpu.c b/kernel/cpu.c index 06c009489892f..c08456af0c7fe 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -1459,6 +1460,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = perf_event_init_cpu, .teardown.single = perf_event_exit_cpu, }, + [CPUHP_RANDOM_PREPARE] = { + .name = "random:prepare", + .startup.single = random_prepare_cpu, + .teardown.single = NULL, + }, [CPUHP_WORKQUEUE_PREP] = { .name = "workqueue:prepare", .startup.single = workqueue_prepare_cpu, @@ -1575,6 +1581,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = workqueue_online_cpu, .teardown.single = workqueue_offline_cpu, }, + [CPUHP_AP_RANDOM_ONLINE] = { + .name = "random:online", + .startup.single = random_online_cpu, + .teardown.single = NULL, + }, [CPUHP_AP_RCUTREE_ONLINE] = { .name = "RCU/tree:online", .startup.single = rcutree_online_cpu, diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 097ab02989f92..565987557ad89 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -685,6 +686,29 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs, continue; kgdb_connected = 0; } else { + /* + * This is a brutal way to interfere with the debugger + * and prevent gdb being used to poke at kernel memory. + * This could cause trouble if lockdown is applied when + * there is already an active gdb session. For now the + * answer is simply "don't do that". Typically lockdown + * *will* be applied before the debug core gets started + * so only developers using kgdb for fairly advanced + * early kernel debug can be biten by this. Hopefully + * they are sophisticated enough to take care of + * themselves, especially with help from the lockdown + * message printed on the console! + */ + if (security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL)) { + if (IS_ENABLED(CONFIG_KGDB_KDB)) { + /* Switch back to kdb if possible... */ + dbg_kdb_mode = 1; + continue; + } else { + /* ... otherwise just bail */ + break; + } + } error = gdb_serial_stub(ks); } diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 4567fe998c306..7c96bf9a6c2c2 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "kdb_private.h" #undef MODULE_PARAM_PREFIX @@ -198,10 +199,62 @@ struct task_struct *kdb_curr_task(int cpu) } /* - * Check whether the flags of the current command and the permissions - * of the kdb console has allow a command to be run. + * Update the permissions flags (kdb_cmd_enabled) to match the + * current lockdown state. + * + * Within this function the calls to security_locked_down() are "lazy". We + * avoid calling them if the current value of kdb_cmd_enabled already excludes + * flags that might be subject to lockdown. Additionally we deliberately check + * the lockdown flags independently (even though read lockdown implies write + * lockdown) since that results in both simpler code and clearer messages to + * the user on first-time debugger entry. + * + * The permission masks during a read+write lockdown permits the following + * flags: INSPECT, SIGNAL, REBOOT (and ALWAYS_SAFE). + * + * The INSPECT commands are not blocked during lockdown because they are + * not arbitrary memory reads. INSPECT covers the backtrace family (sometimes + * forcing them to have no arguments) and lsmod. These commands do expose + * some kernel state but do not allow the developer seated at the console to + * choose what state is reported. SIGNAL and REBOOT should not be controversial, + * given these are allowed for root during lockdown already. + */ +static void kdb_check_for_lockdown(void) +{ + const int write_flags = KDB_ENABLE_MEM_WRITE | + KDB_ENABLE_REG_WRITE | + KDB_ENABLE_FLOW_CTRL; + const int read_flags = KDB_ENABLE_MEM_READ | + KDB_ENABLE_REG_READ; + + bool need_to_lockdown_write = false; + bool need_to_lockdown_read = false; + + if (kdb_cmd_enabled & (KDB_ENABLE_ALL | write_flags)) + need_to_lockdown_write = + security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL); + + if (kdb_cmd_enabled & (KDB_ENABLE_ALL | read_flags)) + need_to_lockdown_read = + security_locked_down(LOCKDOWN_DBG_READ_KERNEL); + + /* De-compose KDB_ENABLE_ALL if required */ + if (need_to_lockdown_write || need_to_lockdown_read) + if (kdb_cmd_enabled & KDB_ENABLE_ALL) + kdb_cmd_enabled = KDB_ENABLE_MASK & ~KDB_ENABLE_ALL; + + if (need_to_lockdown_write) + kdb_cmd_enabled &= ~write_flags; + + if (need_to_lockdown_read) + kdb_cmd_enabled &= ~read_flags; +} + +/* + * Check whether the flags of the current command, the permissions of the kdb + * console and the lockdown state allow a command to be run. */ -static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions, +static bool kdb_check_flags(kdb_cmdflags_t flags, int permissions, bool no_args) { /* permissions comes from userspace so needs massaging slightly */ @@ -1188,6 +1241,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, kdb_curr_task(raw_smp_processor_id()); KDB_DEBUG_STATE("kdb_local 1", reason); + + kdb_check_for_lockdown(); + kdb_go_count = 0; if (reason == KDB_REASON_DEBUG) { /* special case below */ diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index cb6425e52bf7a..9a4837b68e18f 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -450,7 +450,7 @@ void debug_dma_dump_mappings(struct device *dev) * At any time debug_dma_assert_idle() can be called to trigger a * warning if any cachelines in the given page are in the active set. */ -static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); +static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC); static DEFINE_SPINLOCK(radix_lock); #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) @@ -616,7 +616,7 @@ static void add_dma_entry(struct dma_debug_entry *entry) rc = active_cacheline_insert(entry); if (rc == -ENOMEM) { - pr_err("cacheline tracking ENOMEM, dma-debug disabled\n"); + pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n"); global_disable = true; } @@ -846,7 +846,7 @@ static int dump_show(struct seq_file *seq, void *v) } DEFINE_SHOW_ATTRIBUTE(dump); -static void dma_debug_fs_init(void) +static int __init dma_debug_fs_init(void) { struct dentry *dentry = debugfs_create_dir("dma-api", NULL); @@ -859,7 +859,10 @@ static void dma_debug_fs_init(void) debugfs_create_u32("nr_total_entries", 0444, dentry, &nr_total_entries); debugfs_create_file("driver_filter", 0644, dentry, NULL, &filter_fops); debugfs_create_file("dump", 0444, dentry, NULL, &dump_fops); + + return 0; } +core_initcall_sync(dma_debug_fs_init); static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry) { @@ -944,8 +947,6 @@ static int dma_debug_init(void) spin_lock_init(&dma_entry_hash[i].lock); } - dma_debug_fs_init(); - nr_pages = DIV_ROUND_UP(nr_prealloc_entries, DMA_DEBUG_DYNAMIC_ENTRIES); for (i = 0; i < nr_pages; ++i) dma_debug_create_entries(GFP_KERNEL); @@ -979,7 +980,7 @@ static __init int dma_debug_cmdline(char *str) global_disable = true; } - return 0; + return 1; } static __init int dma_debug_entries_cmdline(char *str) @@ -988,7 +989,7 @@ static __init int dma_debug_entries_cmdline(char *str) return -EINVAL; if (!get_option(&str, &nr_prealloc_entries)) nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; - return 0; + return 1; } __setup("dma_debug=", dma_debug_cmdline); @@ -1353,6 +1354,12 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, if (unlikely(dma_debug_disabled())) return; + for_each_sg(sg, s, nents, i) { + check_for_stack(dev, sg_page(s), s->offset); + if (!PageHighMem(sg_page(s))) + check_for_illegal_area(dev, sg_virt(s), s->length); + } + for_each_sg(sg, s, mapped_ents, i) { entry = dma_entry_alloc(); if (!entry) @@ -1368,12 +1375,6 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->sg_call_ents = nents; entry->sg_mapped_ents = mapped_ents; - check_for_stack(dev, sg_page(s), s->offset); - - if (!PageHighMem(sg_page(s))) { - check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); - } - check_sg_segment(dev, s); add_dma_entry(entry); diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 0a093a675b632..f04cfc2e9e01a 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -306,7 +306,8 @@ void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, dma_direct_sync_single_for_cpu(dev, addr, size, dir); if (unlikely(is_swiotlb_buffer(phys))) - swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs); + swiotlb_tbl_unmap_single(dev, phys, size, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC); } EXPORT_SYMBOL(dma_direct_unmap_page); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index f99b79d7e1235..913cb71198af4 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -571,10 +571,14 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, */ for (i = 0; i < nslots; i++) io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); - + /* + * When dir == DMA_FROM_DEVICE we could omit the copy from the orig + * to the tlb buffer, if we knew for sure the device will + * overwirte the entire current content. But we don't. Thus + * unconditional bounce may prevent leaking swiotlb content (i.e. + * kernel memory) to user-space. + */ + swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); return tlb_addr; } diff --git a/kernel/events/core.c b/kernel/events/core.c index c1e28439fc439..d171b47ff2361 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -788,18 +788,13 @@ perf_cgroup_set_timestamp(struct task_struct *task, static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list); -#define PERF_CGROUP_SWOUT 0x1 /* cgroup switch out every event */ -#define PERF_CGROUP_SWIN 0x2 /* cgroup switch in events based on task */ - /* * reschedule events based on the cgroup constraint of task. - * - * mode SWOUT : schedule out everything - * mode SWIN : schedule in based on cgroup for next */ -static void perf_cgroup_switch(struct task_struct *task, int mode) +static void perf_cgroup_switch(struct task_struct *task) { - struct perf_cpu_context *cpuctx; + struct perf_cgroup *cgrp; + struct perf_cpu_context *cpuctx, *tmp; struct list_head *list; unsigned long flags; @@ -809,35 +804,31 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) */ local_irq_save(flags); + cgrp = perf_cgroup_from_task(task, NULL); + list = this_cpu_ptr(&cgrp_cpuctx_list); - list_for_each_entry(cpuctx, list, cgrp_cpuctx_entry) { + list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) { WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); + if (READ_ONCE(cpuctx->cgrp) == cgrp) + continue; perf_ctx_lock(cpuctx, cpuctx->task_ctx); perf_pmu_disable(cpuctx->ctx.pmu); - if (mode & PERF_CGROUP_SWOUT) { - cpu_ctx_sched_out(cpuctx, EVENT_ALL); - /* - * must not be done before ctxswout due - * to event_filter_match() in event_sched_out() - */ - cpuctx->cgrp = NULL; - } + cpu_ctx_sched_out(cpuctx, EVENT_ALL); + /* + * must not be done before ctxswout due + * to update_cgrp_time_from_cpuctx() in + * ctx_sched_out() + */ + cpuctx->cgrp = cgrp; + /* + * set cgrp before ctxsw in to allow + * perf_cgroup_set_timestamp() in ctx_sched_in() + * to not have to pass task around + */ + cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); - if (mode & PERF_CGROUP_SWIN) { - WARN_ON_ONCE(cpuctx->cgrp); - /* - * set cgrp before ctxsw in to allow - * event_filter_match() to not have to pass - * task around - * we pass the cpuctx->ctx to perf_cgroup_from_task() - * because cgorup events are only per-cpu - */ - cpuctx->cgrp = perf_cgroup_from_task(task, - &cpuctx->ctx); - cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); - } perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } @@ -845,58 +836,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) local_irq_restore(flags); } -static inline void perf_cgroup_sched_out(struct task_struct *task, - struct task_struct *next) -{ - struct perf_cgroup *cgrp1; - struct perf_cgroup *cgrp2 = NULL; - - rcu_read_lock(); - /* - * we come here when we know perf_cgroup_events > 0 - * we do not need to pass the ctx here because we know - * we are holding the rcu lock - */ - cgrp1 = perf_cgroup_from_task(task, NULL); - cgrp2 = perf_cgroup_from_task(next, NULL); - - /* - * only schedule out current cgroup events if we know - * that we are switching to a different cgroup. Otherwise, - * do no touch the cgroup events. - */ - if (cgrp1 != cgrp2) - perf_cgroup_switch(task, PERF_CGROUP_SWOUT); - - rcu_read_unlock(); -} - -static inline void perf_cgroup_sched_in(struct task_struct *prev, - struct task_struct *task) -{ - struct perf_cgroup *cgrp1; - struct perf_cgroup *cgrp2 = NULL; - - rcu_read_lock(); - /* - * we come here when we know perf_cgroup_events > 0 - * we do not need to pass the ctx here because we know - * we are holding the rcu lock - */ - cgrp1 = perf_cgroup_from_task(task, NULL); - cgrp2 = perf_cgroup_from_task(prev, NULL); - - /* - * only need to schedule in cgroup events if we are changing - * cgroup during ctxsw. Cgroup events were not scheduled - * out of ctxsw out if that was not the case. - */ - if (cgrp1 != cgrp2) - perf_cgroup_switch(task, PERF_CGROUP_SWIN); - - rcu_read_unlock(); -} - static inline int perf_cgroup_connect(int fd, struct perf_event *event, struct perf_event_attr *attr, struct perf_event *group_leader) @@ -1014,16 +953,6 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) { } -static inline void perf_cgroup_sched_out(struct task_struct *task, - struct task_struct *next) -{ -} - -static inline void perf_cgroup_sched_in(struct task_struct *prev, - struct task_struct *task) -{ -} - static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event, struct perf_event_attr *attr, struct perf_event *group_leader) @@ -1037,11 +966,6 @@ perf_cgroup_set_timestamp(struct task_struct *task, { } -static inline void -perf_cgroup_switch(struct task_struct *task, struct task_struct *next) -{ -} - static inline void perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) { @@ -1058,6 +982,9 @@ list_update_cgroup_event(struct perf_event *event, { } +static void perf_cgroup_switch(struct task_struct *task) +{ +} #endif /* @@ -1744,8 +1671,8 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) if (sample_type & PERF_SAMPLE_PERIOD) size += sizeof(data->period); - if (sample_type & PERF_SAMPLE_WEIGHT) - size += sizeof(data->weight); + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + size += sizeof(data->weight.full); if (sample_type & PERF_SAMPLE_READ) size += event->read_size; @@ -3366,7 +3293,7 @@ void __perf_event_task_sched_out(struct task_struct *task, * cgroup event are system-wide mode only */ if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) - perf_cgroup_sched_out(task, next); + perf_cgroup_switch(next); } /* @@ -3597,16 +3524,6 @@ void __perf_event_task_sched_in(struct task_struct *prev, struct perf_event_context *ctx; int ctxn; - /* - * If cgroup events exist on this CPU, then we need to check if we have - * to switch in PMU state; cgroup event are system-wide mode only. - * - * Since cgroup events are CPU events, we must schedule these in before - * we schedule in the task events. - */ - if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) - perf_cgroup_sched_in(prev, task); - for_each_task_context_nr(ctxn) { ctx = task->perf_event_ctxp[ctxn]; if (likely(!ctx)) @@ -5847,10 +5764,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!atomic_inc_not_zero(&event->rb->mmap_count)) { /* - * Raced against perf_mmap_close() through - * perf_event_set_output(). Try again, hope for better - * luck. + * Raced against perf_mmap_close(); remove the + * event and try again. */ + ring_buffer_attach(event, NULL); mutex_unlock(&event->mmap_mutex); goto again; } @@ -6073,18 +5990,25 @@ static void perf_pending_event(struct irq_work *entry) * Later on, we might change it to a list if there is * another virtualization implementation supporting the callbacks. */ -struct perf_guest_info_callbacks *perf_guest_cbs; +struct perf_guest_info_callbacks __rcu *perf_guest_cbs; int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) { - perf_guest_cbs = cbs; + if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs))) + return -EBUSY; + + rcu_assign_pointer(perf_guest_cbs, cbs); return 0; } EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks); int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) { - perf_guest_cbs = NULL; + if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs) != cbs)) + return -EINVAL; + + rcu_assign_pointer(perf_guest_cbs, NULL); + synchronize_rcu(); return 0; } EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); @@ -6517,8 +6441,8 @@ void perf_output_sample(struct perf_output_handle *handle, data->regs_user.regs); } - if (sample_type & PERF_SAMPLE_WEIGHT) - perf_output_put(handle, data->weight); + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + perf_output_put(handle, data->weight.full); if (sample_type & PERF_SAMPLE_DATA_SRC) perf_output_put(handle, data->data_src.val); @@ -6564,7 +6488,6 @@ void perf_output_sample(struct perf_output_handle *handle, static u64 perf_virt_to_phys(u64 virt) { u64 phys_addr = 0; - struct page *p = NULL; if (!virt) return 0; @@ -6583,14 +6506,15 @@ static u64 perf_virt_to_phys(u64 virt) * If failed, leave phys_addr as 0. */ if (current->mm != NULL) { + struct page *p; + pagefault_disable(); - if (__get_user_pages_fast(virt, 1, 0, &p) == 1) + if (__get_user_pages_fast(virt, 1, 0, &p) == 1) { phys_addr = page_to_phys(p) + virt % PAGE_SIZE; + put_page(p); + } pagefault_enable(); } - - if (p) - put_page(p); } return phys_addr; @@ -9287,7 +9211,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event) return; if (ifh->nr_file_filters) { - mm = get_task_mm(event->ctx->task); + mm = get_task_mm(task); if (!mm) goto restart; @@ -9511,8 +9435,11 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, } /* ready to consume more filters */ + kfree(filename); + filename = NULL; state = IF_STATE_ACTION; filter = NULL; + kernel = 0; } } @@ -10772,6 +10699,10 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (attr->sample_type & PERF_SAMPLE_REGS_INTR) ret = perf_reg_validate(attr->sample_regs_intr); + + if ((attr->sample_type & PERF_SAMPLE_WEIGHT) && + (attr->sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) + return -EINVAL; out: return ret; @@ -10781,14 +10712,25 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, goto out; } +static void mutex_lock_double(struct mutex *a, struct mutex *b) +{ + if (b < a) + swap(a, b); + + mutex_lock(a); + mutex_lock_nested(b, SINGLE_DEPTH_NESTING); +} + static int perf_event_set_output(struct perf_event *event, struct perf_event *output_event) { struct ring_buffer *rb = NULL; int ret = -EINVAL; - if (!output_event) + if (!output_event) { + mutex_lock(&event->mmap_mutex); goto set; + } /* don't allow circular references */ if (event == output_event) @@ -10826,8 +10768,15 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) event->pmu != output_event->pmu) goto out; + /* + * Hold both mmap_mutex to serialize against perf_mmap_close(). Since + * output_event is already on rb->event_list, and the list iteration + * restarts after every removal, it is guaranteed this new event is + * observed *OR* if output_event is already removed, it's guaranteed we + * observe !rb->mmap_count. + */ + mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex); set: - mutex_lock(&event->mmap_mutex); /* Can't redirect output if we've got an active mmap() */ if (atomic_read(&event->mmap_count)) goto unlock; @@ -10837,6 +10786,12 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) rb = ring_buffer_get(output_event); if (!rb) goto unlock; + + /* did we race against perf_mmap_close() */ + if (!atomic_read(&rb->mmap_count)) { + ring_buffer_put(rb); + goto unlock; + } } ring_buffer_attach(event, rb); @@ -10844,20 +10799,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) ret = 0; unlock: mutex_unlock(&event->mmap_mutex); + if (output_event) + mutex_unlock(&output_event->mmap_mutex); out: return ret; } -static void mutex_lock_double(struct mutex *a, struct mutex *b) -{ - if (b < a) - swap(a, b); - - mutex_lock(a); - mutex_lock_nested(b, SINGLE_DEPTH_NESTING); -} - static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) { bool nmi_safe = false; @@ -11132,6 +11080,9 @@ SYSCALL_DEFINE5(perf_event_open, * Do not allow to attach to a group in a different task * or CPU context. If we're moving SW events, we'll fix * this up later, so allow that. + * + * Racy, not holding group_leader->ctx->mutex, see comment with + * perf_event_ctx_lock(). */ if (!move_group && group_leader->ctx != ctx) goto err_context; @@ -11199,6 +11150,7 @@ SYSCALL_DEFINE5(perf_event_open, } else { perf_event_ctx_unlock(group_leader, gctx); move_group = 0; + goto not_move_group; } } @@ -11215,7 +11167,17 @@ SYSCALL_DEFINE5(perf_event_open, } } else { mutex_lock(&ctx->mutex); + + /* + * Now that we hold ctx->lock, (re)validate group_leader->ctx == ctx, + * see the group_leader && !move_group test earlier. + */ + if (group_leader && group_leader->ctx != ctx) { + err = -EINVAL; + goto err_locked; + } } +not_move_group: if (ctx->task == TASK_TOMBSTONE) { err = -ESRCH; @@ -12365,7 +12327,7 @@ static int __perf_cgroup_move(void *info) { struct task_struct *task = info; rcu_read_lock(); - perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN); + perf_cgroup_switch(task); rcu_read_unlock(); return 0; } diff --git a/kernel/fork.c b/kernel/fork.c index efb0b92c6140b..27ead13ba46cd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2169,10 +2169,6 @@ static __latent_entropy struct task_struct *copy_process( goto bad_fork_cancel_cgroup; } - /* past the last point of failure */ - if (pidfile) - fd_install(pidfd, pidfile); - init_task_pid_links(p); if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); @@ -2221,6 +2217,9 @@ static __latent_entropy struct task_struct *copy_process( syscall_tracepoint_update(p); write_unlock_irq(&tasklist_lock); + if (pidfile) + fd_install(pidfd, pidfile); + proc_fork_connector(p); cgroup_post_fork(p); cgroup_threadgroup_change_end(current); diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 4d89ad4fae3bb..5fb78addff51b 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -269,8 +269,9 @@ static int __irq_build_affinity_masks(unsigned int startvec, */ if (numvecs <= nodes) { for_each_node_mask(n, nodemsk) { - cpumask_or(&masks[curvec].mask, &masks[curvec].mask, - node_to_cpumask[n]); + /* Ensure that only CPUs which are in both masks are set */ + cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); + cpumask_or(&masks[curvec].mask, &masks[curvec].mask, nmsk); if (++curvec == last_affv) curvec = firstvec; } diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 16ee716e82917..98a972e9a1a83 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -195,7 +195,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) retval = __handle_irq_event_percpu(desc, &flags); - add_interrupt_randomness(desc->irq_data.irq, flags); + add_interrupt_randomness(desc->irq_data.irq); if (!noirqdebug) note_interrupt(desc, retval); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 7db284b10ac9c..f9e8a7aaaa37f 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -29,12 +29,14 @@ extern struct irqaction chained_action; * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed * IRQTF_AFFINITY - irq thread is requested to adjust affinity * IRQTF_FORCED_THREAD - irq action is force threaded + * IRQTF_READY - signals that irq thread is ready */ enum { IRQTF_RUNTHREAD, IRQTF_WARNED, IRQTF_AFFINITY, IRQTF_FORCED_THREAD, + IRQTF_READY, }; /* diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 6a27603fe1cfc..91a10abac9095 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -405,6 +405,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags, lockdep_set_class(&desc->lock, &irq_desc_lock_class); mutex_init(&desc->request_mutex); init_rcu_head(&desc->rcu); + init_waitqueue_head(&desc->wait_for_threads); desc_set_defaults(irq, desc, node, affinity, owner); irqd_set(&desc->irq_data, flags); @@ -573,6 +574,7 @@ int __init early_irq_init(void) raw_spin_lock_init(&desc[i].lock); lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); mutex_init(&desc[i].request_mutex); + init_waitqueue_head(&desc[i].wait_for_threads); desc_set_defaults(i, &desc[i], node, NULL, NULL); } return arch_early_irq_init(); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 49f2b5fb770de..3e3214e11a632 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1102,6 +1102,31 @@ static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action) raw_spin_unlock_irq(&desc->lock); } +/* + * Internal function to notify that a interrupt thread is ready. + */ +static void irq_thread_set_ready(struct irq_desc *desc, + struct irqaction *action) +{ + set_bit(IRQTF_READY, &action->thread_flags); + wake_up(&desc->wait_for_threads); +} + +/* + * Internal function to wake up a interrupt thread and wait until it is + * ready. + */ +static void wake_up_and_wait_for_irq_thread_ready(struct irq_desc *desc, + struct irqaction *action) +{ + if (!action || !action->thread) + return; + + wake_up_process(action->thread); + wait_event(desc->wait_for_threads, + test_bit(IRQTF_READY, &action->thread_flags)); +} + /* * Interrupt handler thread */ @@ -1113,6 +1138,8 @@ static int irq_thread(void *data) irqreturn_t (*handler_fn)(struct irq_desc *desc, struct irqaction *action); + irq_thread_set_ready(desc, action); + if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD, &action->thread_flags)) handler_fn = irq_forced_thread_fn; @@ -1541,8 +1568,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (!shared) { - init_waitqueue_head(&desc->wait_for_threads); - /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { ret = __irq_set_trigger(desc, @@ -1632,14 +1657,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) irq_setup_timings(desc, new); - /* - * Strictly no need to wake it up, but hung_task complains - * when no hard interrupt wakes the thread up. - */ - if (new->thread) - wake_up_process(new->thread); - if (new->secondary) - wake_up_process(new->secondary->thread); + wake_up_and_wait_for_irq_thread_ready(desc, new); + wake_up_and_wait_for_irq_thread_ready(desc, new->secondary); register_irq_proc(irq, desc); new->dir = NULL; diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c index b5985da80acf0..7ccc8edce46dc 100644 --- a/kernel/irq/timings.c +++ b/kernel/irq/timings.c @@ -799,12 +799,14 @@ static int __init irq_timings_test_irqs(struct timings_intervals *ti) __irq_timings_store(irq, irqs, ti->intervals[i]); if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) { + ret = -EBADSLT; pr_err("Failed to store in the circular buffer\n"); goto out; } } if (irqs->count != ti->count) { + ret = -ERANGE; pr_err("Count differs\n"); goto out; } diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index b17998fa03f12..5b58149bcd900 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -107,40 +107,6 @@ int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, } #endif -/* - * arch_kexec_apply_relocations_add - apply relocations of type RELA - * @pi: Purgatory to be relocated. - * @section: Section relocations applying to. - * @relsec: Section containing RELAs. - * @symtab: Corresponding symtab. - * - * Return: 0 on success, negative errno on error. - */ -int __weak -arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section, - const Elf_Shdr *relsec, const Elf_Shdr *symtab) -{ - pr_err("RELA relocation unsupported.\n"); - return -ENOEXEC; -} - -/* - * arch_kexec_apply_relocations - apply relocations of type REL - * @pi: Purgatory to be relocated. - * @section: Section relocations applying to. - * @relsec: Section containing RELs. - * @symtab: Corresponding symtab. - * - * Return: 0 on success, negative errno on error. - */ -int __weak -arch_kexec_apply_relocations(struct purgatory_info *pi, Elf_Shdr *section, - const Elf_Shdr *relsec, const Elf_Shdr *symtab) -{ - pr_err("REL relocation unsupported.\n"); - return -ENOEXEC; -} - /* * Free up memory used by kernel, initrd, and command line. This is temporary * memory allocation which is not needed any more after these buffers have diff --git a/kernel/kprobes.c b/kernel/kprobes.c index a7812c115e487..c93340bae3ac2 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2003,6 +2003,9 @@ int register_kretprobe(struct kretprobe *rp) } } + if (rp->data_size > KRETPROBE_MAX_DATA_SIZE) + return -E2BIG; + rp->kp.pre_handler = pre_handler_kretprobe; rp->kp.post_handler = NULL; rp->kp.fault_handler = NULL; @@ -2712,14 +2715,13 @@ static const struct file_operations fops_kp = { static int __init debugfs_kprobe_init(void) { struct dentry *dir; - unsigned int value = 1; dir = debugfs_create_dir("kprobes", NULL); debugfs_create_file("list", 0400, dir, NULL, &debugfs_kprobes_operations); - debugfs_create_file("enabled", 0600, dir, &value, &fops_kp); + debugfs_create_file("enabled", 0600, dir, NULL, &fops_kp); debugfs_create_file("blacklist", 0400, dir, NULL, &debugfs_kprobe_blacklist_ops); diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 5954d52d0a1d4..9db10ba07f8d2 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -190,7 +190,7 @@ static int klp_find_object_symbol(const char *objname, const char *name, return -EINVAL; } -static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, +static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab, unsigned int symndx, Elf_Shdr *relasec, const char *sec_objname) { @@ -218,7 +218,7 @@ static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, relas = (Elf_Rela *) relasec->sh_addr; /* For each rela in this klp relocation section */ for (i = 0; i < relasec->sh_size / sizeof(Elf_Rela); i++) { - sym = (Elf64_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); + sym = (Elf_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); if (sym->st_shndx != SHN_LIVEPATCH) { pr_err("symbol %s is not marked as a livepatch symbol\n", strtab + sym->st_name); diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index e422d49925929..98d328d7aee13 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -643,6 +643,13 @@ void klp_force_transition(void) for_each_possible_cpu(cpu) klp_update_patch_state(idle_task(cpu)); - klp_for_each_patch(patch) - patch->forced = true; + /* Set forced flag for patches being removed. */ + if (klp_target_state == KLP_UNPATCHED) + klp_transition_patch->forced = true; + else if (klp_transition_patch->replace) { + klp_for_each_patch(patch) { + if (patch != klp_transition_patch) + patch->forced = true; + } + } } diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index c73bfb1a77b7c..806b25a2a6913 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -832,7 +832,7 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass) if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return NULL; - hlist_for_each_entry_rcu(class, hash_head, hash_entry) { + hlist_for_each_entry_rcu_notrace(class, hash_head, hash_entry) { if (class->key == key) { /* * Huh! same key, different name? Did someone trample diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index c0c7784f074b2..6644f5aea33a6 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -101,55 +101,58 @@ static inline unsigned long __owner_flags(unsigned long owner) return owner & MUTEX_FLAGS; } -/* - * Trylock variant that retuns the owning task on failure. - */ -static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock) +static inline struct task_struct *__mutex_trylock_common(struct mutex *lock, bool handoff) { unsigned long owner, curr = (unsigned long)current; owner = atomic_long_read(&lock->owner); for (;;) { /* must loop, can race against a flag */ - unsigned long old, flags = __owner_flags(owner); + unsigned long flags = __owner_flags(owner); unsigned long task = owner & ~MUTEX_FLAGS; if (task) { - if (likely(task != curr)) - break; - - if (likely(!(flags & MUTEX_FLAG_PICKUP))) + if (flags & MUTEX_FLAG_PICKUP) { + if (task != curr) + break; + flags &= ~MUTEX_FLAG_PICKUP; + } else if (handoff) { + if (flags & MUTEX_FLAG_HANDOFF) + break; + flags |= MUTEX_FLAG_HANDOFF; + } else { break; - - flags &= ~MUTEX_FLAG_PICKUP; + } } else { #ifdef CONFIG_DEBUG_MUTEXES - DEBUG_LOCKS_WARN_ON(flags & MUTEX_FLAG_PICKUP); + DEBUG_LOCKS_WARN_ON(flags & (MUTEX_FLAG_HANDOFF | MUTEX_FLAG_PICKUP)); #endif + task = curr; } - /* - * We set the HANDOFF bit, we must make sure it doesn't live - * past the point where we acquire it. This would be possible - * if we (accidentally) set the bit on an unlocked mutex. - */ - flags &= ~MUTEX_FLAG_HANDOFF; - - old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags); - if (old == owner) - return NULL; - - owner = old; + if (atomic_long_try_cmpxchg_acquire(&lock->owner, &owner, task | flags)) { + if (task == curr) + return NULL; + break; + } } return __owner_task(owner); } +/* + * Trylock or set HANDOFF + */ +static inline bool __mutex_trylock_or_handoff(struct mutex *lock, bool handoff) +{ + return !__mutex_trylock_common(lock, handoff); +} + /* * Actual trylock that will work on any unlocked state. */ static inline bool __mutex_trylock(struct mutex *lock) { - return !__mutex_trylock_or_owner(lock); + return !__mutex_trylock_common(lock, false); } #ifndef CONFIG_DEBUG_LOCK_ALLOC @@ -178,10 +181,7 @@ static __always_inline bool __mutex_unlock_fast(struct mutex *lock) { unsigned long curr = (unsigned long)current; - if (atomic_long_cmpxchg_release(&lock->owner, curr, 0UL) == curr) - return true; - - return false; + return atomic_long_try_cmpxchg_release(&lock->owner, &curr, 0UL); } #endif @@ -236,7 +236,7 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task) unsigned long owner = atomic_long_read(&lock->owner); for (;;) { - unsigned long old, new; + unsigned long new; #ifdef CONFIG_DEBUG_MUTEXES DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); @@ -248,11 +248,8 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task) if (task) new |= MUTEX_FLAG_PICKUP; - old = atomic_long_cmpxchg_release(&lock->owner, owner, new); - if (old == owner) + if (atomic_long_try_cmpxchg_release(&lock->owner, &owner, new)) break; - - owner = old; } } @@ -508,6 +505,14 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) #ifdef CONFIG_MUTEX_SPIN_ON_OWNER +/* + * Trylock variant that returns the owning task on failure. + */ +static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock) +{ + return __mutex_trylock_common(lock, false); +} + static inline bool ww_mutex_spin_on_owner(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, struct mutex_waiter *waiter) @@ -938,7 +943,6 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) { struct mutex_waiter waiter; - bool first = false; struct ww_mutex *ww; int ret; @@ -1017,6 +1021,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, set_current_state(state); for (;;) { + bool first; + /* * Once we hold wait_lock, we're serialized against * mutex_unlock() handing the lock off to us, do a trylock @@ -1045,15 +1051,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, spin_unlock(&lock->wait_lock); schedule_preempt_disabled(); - /* - * ww_mutex needs to always recheck its position since its waiter - * list is not FIFO ordered. - */ - if (ww_ctx || !first) { - first = __mutex_waiter_is_first(lock, &waiter); - if (first) - __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF); - } + first = __mutex_waiter_is_first(lock, &waiter); set_current_state(state); /* @@ -1061,7 +1059,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * state back to RUNNING and fall through the next schedule(), * or we must see its unlock and acquire. */ - if (__mutex_trylock(lock) || + if (__mutex_trylock_or_handoff(lock, first) || (first && mutex_optimistic_spin(lock, ww_ctx, &waiter))) break; @@ -1247,8 +1245,6 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne */ owner = atomic_long_read(&lock->owner); for (;;) { - unsigned long old; - #ifdef CONFIG_DEBUG_MUTEXES DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP); @@ -1257,16 +1253,12 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne if (owner & MUTEX_FLAG_HANDOFF) break; - old = atomic_long_cmpxchg_release(&lock->owner, owner, - __owner_flags(owner)); - if (old == owner) { + if (atomic_long_try_cmpxchg_release(&lock->owner, &owner, __owner_flags(owner))) { if (owner & MUTEX_FLAG_WAITERS) break; return; } - - owner = old; } spin_lock(&lock->wait_lock); diff --git a/kernel/module.c b/kernel/module.c index 4cd3e75e8a8a3..f4ca942f1af44 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3373,7 +3373,7 @@ static int find_module_sections(struct module *mod, struct load_info *info) #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD /* sechdrs[0].sh_size is always zero */ - mod->ftrace_callsites = section_objs(info, "__mcount_loc", + mod->ftrace_callsites = section_objs(info, FTRACE_CALLSITE_SECTION, sizeof(*mod->ftrace_callsites), &mod->num_ftrace_callsites); #endif @@ -3707,12 +3707,6 @@ static noinline int do_init_module(struct module *mod) } freeinit->module_init = mod->init_layout.base; - /* - * We want to find out whether @mod uses async during init. Clear - * PF_USED_ASYNC. async_schedule*() will set it. - */ - current->flags &= ~PF_USED_ASYNC; - do_mod_ctors(mod); /* Start the module */ if (mod->init != NULL) @@ -3738,22 +3732,13 @@ static noinline int do_init_module(struct module *mod) /* * We need to finish all async code before the module init sequence - * is done. This has potential to deadlock. For example, a newly - * detected block device can trigger request_module() of the - * default iosched from async probing task. Once userland helper - * reaches here, async_synchronize_full() will wait on the async - * task waiting on request_module() and deadlock. - * - * This deadlock is avoided by perfomring async_synchronize_full() - * iff module init queued any async jobs. This isn't a full - * solution as it will deadlock the same if module loading from - * async jobs nests more than once; however, due to the various - * constraints, this hack seems to be the best option for now. - * Please refer to the following thread for details. + * is done. This has potential to deadlock if synchronous module + * loading is requested from async (which is not allowed!). * - * http://thread.gmane.org/gmane.linux.kernel/1420814 + * See commit 0fdff3ec6d87 ("async, kmod: warn on synchronous + * request_module() from async workers") for more details. */ - if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC)) + if (!mod->async_probe_requested) async_synchronize_full(); ftrace_free_mem(mod, mod->init_layout.base, mod->init_layout.base + diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 8dac32bd90894..7ef35eb985baf 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -149,7 +149,9 @@ static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states, /* Compute the cost of each capacity_state. */ fmax = (u64) table[nr_states - 1].frequency; for (i = 0; i < nr_states; i++) { - table[i].cost = div64_u64(fmax * table[i].power, + unsigned long power_res = em_scale_power(table[i].power); + + table[i].cost = div64_u64(fmax * power_res, table[i].frequency); } diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 69c4cd472def3..406b4cbbec5e7 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -676,7 +676,7 @@ static int load_image_and_restore(void) goto Unlock; error = swsusp_read(&flags); - swsusp_close(FMODE_READ); + swsusp_close(FMODE_READ | FMODE_EXCL); if (!error) hibernation_restore(flags & SF_PLATFORM_MODE); @@ -871,7 +871,7 @@ static int software_resume(void) /* The snapshot device should not be opened while we're running */ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { error = -EBUSY; - swsusp_close(FMODE_READ); + swsusp_close(FMODE_READ | FMODE_EXCL); goto Unlock; } @@ -907,7 +907,7 @@ static int software_resume(void) pm_pr_dbg("Hibernation image not present or could not be loaded.\n"); return error; Close_Finish: - swsusp_close(FMODE_READ); + swsusp_close(FMODE_READ | FMODE_EXCL); goto Finish; } @@ -1216,7 +1216,7 @@ static int __init resumedelay_setup(char *str) int rc = kstrtouint(str, 0, &resume_delay); if (rc) - return rc; + pr_warn("resumedelay: bad option string '%s'\n", str); return 1; } diff --git a/kernel/power/main.c b/kernel/power/main.c index e26de7af520be..718884857830b 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -472,7 +472,10 @@ static ssize_t pm_wakeup_irq_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return pm_wakeup_irq ? sprintf(buf, "%u\n", pm_wakeup_irq) : -ENODATA; + if (!pm_wakeup_irq()) + return -ENODATA; + + return sprintf(buf, "%u\n", pm_wakeup_irq()); } power_attr_ro(pm_wakeup_irq); diff --git a/kernel/power/process.c b/kernel/power/process.c index 4b6a54da7e65b..e9f4def4d2915 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -134,7 +134,7 @@ int freeze_processes(void) if (!pm_freezing) atomic_inc(&system_freezing_cnt); - pm_wakeup_clear(true); + pm_wakeup_clear(0); pr_info("Freezing user space processes ... "); pm_freezing = true; error = try_to_freeze_tasks(true); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 569658f4ae5a4..917e3ed5194fd 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -945,8 +945,7 @@ static void memory_bm_recycle(struct memory_bitmap *bm) * Register a range of page frames the contents of which should not be saved * during hibernation (to be used in the early initialization code). */ -void __init __register_nosave_region(unsigned long start_pfn, - unsigned long end_pfn, int use_kmalloc) +void __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn) { struct nosave_region *region; @@ -962,18 +961,12 @@ void __init __register_nosave_region(unsigned long start_pfn, goto Report; } } - if (use_kmalloc) { - /* During init, this shouldn't fail */ - region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); - BUG_ON(!region); - } else { - /* This allocation cannot fail */ - region = memblock_alloc(sizeof(struct nosave_region), - SMP_CACHE_BYTES); - if (!region) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct nosave_region)); - } + /* This allocation cannot fail */ + region = memblock_alloc(sizeof(struct nosave_region), + SMP_CACHE_BYTES); + if (!region) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(struct nosave_region)); region->start_pfn = start_pfn; region->end_pfn = end_pfn; list_add_tail(®ion->list, &nosave_regions); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 27f149f5d4a9f..5dea2778a3dbb 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -138,8 +138,6 @@ static void s2idle_loop(void) break; } - pm_wakeup_clear(false); - s2idle_enter(); } diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c index 60564b58de077..bfd2a96c695ca 100644 --- a/kernel/power/suspend_test.c +++ b/kernel/power/suspend_test.c @@ -157,22 +157,22 @@ static int __init setup_test_suspend(char *value) value++; suspend_type = strsep(&value, ","); if (!suspend_type) - return 0; + return 1; repeat = strsep(&value, ","); if (repeat) { if (kstrtou32(repeat, 0, &test_repeat_count_max)) - return 0; + return 1; } for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) if (!strcmp(pm_labels[i], suspend_type)) { test_state_label = pm_labels[i]; - return 0; + return 1; } printk(warn_bad_state, suspend_type); - return 0; + return 1; } __setup("test_suspend", setup_test_suspend); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 0516c422206d8..bcc9769e8a3b5 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -292,7 +292,7 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr, return error; } -static blk_status_t hib_wait_io(struct hib_bio_batch *hb) +static int hib_wait_io(struct hib_bio_batch *hb) { wait_event(hb->wait, atomic_read(&hb->count) == 0); return blk_status_to_errno(hb->error); @@ -1509,9 +1509,10 @@ int swsusp_read(unsigned int *flags_p) int swsusp_check(void) { int error; + void *holder; hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, - FMODE_READ, NULL); + FMODE_READ | FMODE_EXCL, &holder); if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); @@ -1533,7 +1534,7 @@ int swsusp_check(void) put: if (error) - blkdev_put(hib_resume_bdev, FMODE_READ); + blkdev_put(hib_resume_bdev, FMODE_READ | FMODE_EXCL); else pr_debug("Image signature found, resuming\n"); } else { diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index 105df4dfc7839..52571dcad768b 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -39,23 +39,20 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active) { struct rb_node *node; struct wakelock *wl; - char *str = buf; - char *end = buf + PAGE_SIZE; + int len = 0; mutex_lock(&wakelocks_lock); for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) { wl = rb_entry(node, struct wakelock, node); if (wl->ws->active == show_active) - str += scnprintf(str, end - str, "%s ", wl->name); + len += sysfs_emit_at(buf, len, "%s ", wl->name); } - if (str > buf) - str--; - str += scnprintf(str, end - str, "\n"); + len += sysfs_emit_at(buf, len, "\n"); mutex_unlock(&wakelocks_lock); - return (str - buf); + return len; } #if CONFIG_PM_WAKELOCKS_LIMIT > 0 diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5569ef6bc1839..bb2198b40756d 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -146,8 +146,10 @@ static int __control_devkmsg(char *str) static int __init control_devkmsg(char *str) { - if (__control_devkmsg(str) < 0) + if (__control_devkmsg(str) < 0) { + pr_warn("printk.devkmsg: bad option string '%s'\n", str); return 1; + } /* * Set sysctl string accordingly: @@ -166,7 +168,7 @@ static int __init control_devkmsg(char *str) */ devkmsg_log |= DEVKMSG_LOG_MASK_LOCK; - return 0; + return 1; } __setup("printk.devkmsg=", control_devkmsg); @@ -2193,8 +2195,15 @@ static int __init console_setup(char *str) char *s, *options, *brl_options = NULL; int idx; - if (str[0] == 0) + /* + * console="" or console=null have been suggested as a way to + * disable console output. Use ttynull that has been created + * for exacly this purpose. + */ + if (str[0] == 0 || strcmp(str, "null") == 0) { + __add_preferred_console("ttynull", 0, NULL, NULL); return 1; + } if (_braille_console_setup(&str, &brl_options)) return 1; diff --git a/kernel/profile.c b/kernel/profile.c index af7c94bf5fa1d..e97e42aaf2023 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -41,7 +41,8 @@ struct profile_hit { #define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ) static atomic_t *prof_buffer; -static unsigned long prof_len, prof_shift; +static unsigned long prof_len; +static unsigned short int prof_shift; int prof_on __read_mostly; EXPORT_SYMBOL_GPL(prof_on); @@ -67,8 +68,8 @@ int profile_setup(char *str) if (str[strlen(sleepstr)] == ',') str += strlen(sleepstr) + 1; if (get_option(&str, &par)) - prof_shift = par; - pr_info("kernel sleep profiling enabled (shift: %ld)\n", + prof_shift = clamp(par, 0, BITS_PER_LONG - 1); + pr_info("kernel sleep profiling enabled (shift: %u)\n", prof_shift); #else pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n"); @@ -78,21 +79,21 @@ int profile_setup(char *str) if (str[strlen(schedstr)] == ',') str += strlen(schedstr) + 1; if (get_option(&str, &par)) - prof_shift = par; - pr_info("kernel schedule profiling enabled (shift: %ld)\n", + prof_shift = clamp(par, 0, BITS_PER_LONG - 1); + pr_info("kernel schedule profiling enabled (shift: %u)\n", prof_shift); } else if (!strncmp(str, kvmstr, strlen(kvmstr))) { prof_on = KVM_PROFILING; if (str[strlen(kvmstr)] == ',') str += strlen(kvmstr) + 1; if (get_option(&str, &par)) - prof_shift = par; - pr_info("kernel KVM profiling enabled (shift: %ld)\n", + prof_shift = clamp(par, 0, BITS_PER_LONG - 1); + pr_info("kernel KVM profiling enabled (shift: %u)\n", prof_shift); } else if (get_option(&str, &par)) { - prof_shift = par; + prof_shift = clamp(par, 0, BITS_PER_LONG - 1); prof_on = CPU_PROFILING; - pr_info("kernel profiling enabled (shift: %ld)\n", + pr_info("kernel profiling enabled (shift: %u)\n", prof_shift); } return 1; @@ -468,7 +469,7 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos) unsigned long p = *ppos; ssize_t read; char *pnt; - unsigned int sample_step = 1 << prof_shift; + unsigned long sample_step = 1UL << prof_shift; profile_flip_buffers(); if (p >= (prof_len+1)*sizeof(unsigned int)) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index eb4d04cb3aaf5..aab480e24bd60 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -370,6 +370,26 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) return !err; } +static int check_ptrace_options(unsigned long data) +{ + if (data & ~(unsigned long)PTRACE_O_MASK) + return -EINVAL; + + if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { + if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) || + !IS_ENABLED(CONFIG_SECCOMP)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (seccomp_mode(¤t->seccomp) != SECCOMP_MODE_DISABLED || + current->ptrace & PT_SUSPEND_SECCOMP) + return -EPERM; + } + return 0; +} + static int ptrace_attach(struct task_struct *task, long request, unsigned long addr, unsigned long flags) @@ -381,8 +401,16 @@ static int ptrace_attach(struct task_struct *task, long request, if (seize) { if (addr != 0) goto out; + /* + * This duplicates the check in check_ptrace_options() because + * ptrace_attach() and ptrace_setoptions() have historically + * used different error codes for unknown ptrace options. + */ if (flags & ~(unsigned long)PTRACE_O_MASK) goto out; + retval = check_ptrace_options(flags); + if (retval) + return retval; flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); } else { flags = PT_PTRACED; @@ -655,22 +683,11 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds static int ptrace_setoptions(struct task_struct *child, unsigned long data) { unsigned flags; + int ret; - if (data & ~(unsigned long)PTRACE_O_MASK) - return -EINVAL; - - if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { - if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) || - !IS_ENABLED(CONFIG_SECCOMP)) - return -EINVAL; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (seccomp_mode(¤t->seccomp) != SECCOMP_MODE_DISABLED || - current->ptrace & PT_SUSPEND_SECCOMP) - return -EPERM; - } + ret = check_ptrace_options(data); + if (ret) + return ret; /* Avoid intermediate state when all opts are cleared */ flags = child->ptrace; @@ -1202,9 +1219,8 @@ int ptrace_request(struct task_struct *child, long request, return ptrace_resume(child, request, data); case PTRACE_KILL: - if (child->exit_state) /* already dead */ - return 0; - return ptrace_resume(child, request, SIGKILL); + send_sig_info(SIGKILL, SEND_SIG_NOINFO, child); + return 0; #ifdef CONFIG_HAVE_ARCH_TRACEHOOK case PTRACE_GETREGSET: diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9f00e30d11a46..5797cf2909b00 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1603,7 +1603,7 @@ static void rcu_gp_fqs(bool first_time) struct rcu_node *rnp = rcu_get_root(); WRITE_ONCE(rcu_state.gp_activity, jiffies); - rcu_state.n_force_qs++; + WRITE_ONCE(rcu_state.n_force_qs, rcu_state.n_force_qs + 1); if (first_time) { /* Collect dyntick-idle snapshots. */ force_qs_rnp(dyntick_save_progress_counter); @@ -2208,7 +2208,7 @@ static void rcu_do_batch(struct rcu_data *rdp) /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ if (count == 0 && rdp->qlen_last_fqs_check != 0) { rdp->qlen_last_fqs_check = 0; - rdp->n_force_qs_snap = rcu_state.n_force_qs; + rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); } else if (count < rdp->qlen_last_fqs_check - qhimark) rdp->qlen_last_fqs_check = count; @@ -2536,10 +2536,10 @@ static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head, } else { /* Give the grace period a kick. */ rdp->blimit = DEFAULT_MAX_RCU_BLIMIT; - if (rcu_state.n_force_qs == rdp->n_force_qs_snap && + if (READ_ONCE(rcu_state.n_force_qs) == rdp->n_force_qs_snap && rcu_segcblist_first_pend_cb(&rdp->cblist) != head) rcu_force_quiescent_state(); - rdp->n_force_qs_snap = rcu_state.n_force_qs; + rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist); } } @@ -3030,7 +3030,7 @@ int rcutree_prepare_cpu(unsigned int cpu) /* Set up local state, ensuring consistent view of global state. */ raw_spin_lock_irqsave_rcu_node(rnp, flags); rdp->qlen_last_fqs_check = 0; - rdp->n_force_qs_snap = rcu_state.n_force_qs; + rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); rdp->blimit = blimit; if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */ !rcu_segcblist_is_offloaded(&rdp->cblist)) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index df90d4d7ad2e2..173e3ce607900 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -382,6 +382,7 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) continue; } if (get_cpu() == cpu) { + mask_ofl_test |= mask; put_cpu(); continue; } @@ -738,7 +739,7 @@ static void sync_sched_exp_online_cleanup(int cpu) my_cpu = get_cpu(); /* Quiescent state either not needed or already requested, leave. */ if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) || - __this_cpu_read(rcu_data.cpu_no_qs.b.exp)) { + rdp->cpu_no_qs.b.exp) { put_cpu(); return; } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index a71a4a272515d..2c127d438fe0a 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -523,16 +523,17 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } - /* Unboost if we were boosted. */ - if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) - rt_mutex_futex_unlock(&rnp->boost_mtx); - /* * If this was the last task on the expedited lists, * then we need to report up the rcu_node hierarchy. */ if (!empty_exp && empty_exp_now) rcu_report_exp_rnp(rnp, true); + + /* Unboost if we were boosted. */ + if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) + rt_mutex_futex_unlock(&rnp->boost_mtx); + } else { local_irq_restore(flags); } diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index c0b8c458d8a6a..b8c9744ad595b 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -7,6 +7,8 @@ * Author: Paul E. McKenney */ +#include + ////////////////////////////////////////////////////////////////////////////// // // Controlling CPU stall warnings, including delay calculation. @@ -525,6 +527,14 @@ static void check_cpu_stall(struct rcu_data *rdp) (READ_ONCE(rnp->qsmask) & rdp->grpmask) && cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { + /* + * If a virtual machine is stopped by the host it can look to + * the watchdog like an RCU stall. Check to see if the host + * stopped the vm. + */ + if (kvm_check_and_clear_guest_paused()) + return; + /* We haven't checked in, so go dump stack. */ print_cpu_stall(); if (rcu_cpu_stall_ftrace_dump) @@ -534,6 +544,14 @@ static void check_cpu_stall(struct rcu_data *rdp) ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) && cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { + /* + * If a virtual machine is stopped by the host it can look to + * the watchdog like an RCU stall. Check to see if the host + * stopped the vm. + */ + if (kvm_check_and_clear_guest_paused()) + return; + /* They had a few time units to dump stack, so complain. */ print_other_cpu_stall(gs2); if (rcu_cpu_stall_ftrace_dump) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5558ee69e0cbb..1cbcf11ec1b09 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -759,7 +759,6 @@ static void set_load_weight(struct task_struct *p, bool update_load) if (task_has_idle_policy(p)) { load->weight = scale_load(WEIGHT_IDLEPRIO); load->inv_weight = WMULT_IDLEPRIO; - p->se.runnable_weight = load->weight; return; } @@ -772,7 +771,6 @@ static void set_load_weight(struct task_struct *p, bool update_load) } else { load->weight = scale_load(sched_prio_to_weight[prio]); load->inv_weight = sched_prio_to_wmult[prio]; - p->se.runnable_weight = load->weight; } } @@ -1116,6 +1114,23 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) uclamp_rq_dec_id(rq, p, clamp_id); } +static inline void uclamp_rq_reinc_id(struct rq *rq, struct task_struct *p, + enum uclamp_id clamp_id) +{ + if (!p->uclamp[clamp_id].active) + return; + + uclamp_rq_dec_id(rq, p, clamp_id); + uclamp_rq_inc_id(rq, p, clamp_id); + + /* + * Make sure to clear the idle flag if we've transiently reached 0 + * active tasks on rq. + */ + if (clamp_id == UCLAMP_MAX && (rq->uclamp_flags & UCLAMP_FLAG_IDLE)) + rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE; +} + static inline void uclamp_update_active(struct task_struct *p) { @@ -1139,12 +1154,8 @@ uclamp_update_active(struct task_struct *p) * affecting a valid clamp bucket, the next time it's enqueued, * it will already see the updated clamp bucket value. */ - for_each_clamp_id(clamp_id) { - if (p->uclamp[clamp_id].active) { - uclamp_rq_dec_id(rq, p, clamp_id); - uclamp_rq_inc_id(rq, p, clamp_id); - } - } + for_each_clamp_id(clamp_id) + uclamp_rq_reinc_id(rq, p, clamp_id); task_rq_unlock(rq, p, &rf); } @@ -1330,7 +1341,7 @@ static void __init init_uclamp_rq(struct rq *rq) }; } - rq->uclamp_flags = 0; + rq->uclamp_flags = UCLAMP_FLAG_IDLE; } static void __init init_uclamp(void) @@ -2213,12 +2224,6 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) return cpu; } -static void update_avg(u64 *avg, u64 sample) -{ - s64 diff = sample - *avg; - *avg += diff >> 3; -} - void sched_set_stop_task(int cpu, struct task_struct *stop) { struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; @@ -2328,6 +2333,9 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags, if (rq->avg_idle > max) rq->avg_idle = max; + rq->wake_stamp = jiffies; + rq->wake_avg_idle = rq->avg_idle / 2; + rq->idle_stamp = 0; } #endif @@ -2475,6 +2483,9 @@ void wake_up_if_idle(int cpu) bool cpus_share_cache(int this_cpu, int that_cpu) { + if (this_cpu == that_cpu) + return true; + return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); } #endif /* CONFIG_SMP */ @@ -6818,6 +6829,8 @@ void __init sched_init(void) rq->online = 0; rq->idle_stamp = 0; rq->avg_idle = 2*sysctl_sched_migration_cost; + rq->wake_stamp = jiffies; + rq->wake_avg_idle = rq->avg_idle; rq->max_idle_balance_cost = sysctl_sched_migration_cost; INIT_LIST_HEAD(&rq->cfs_tasks); @@ -7526,6 +7539,12 @@ static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css, } #ifdef CONFIG_CFS_BANDWIDTH +/* + * The minimum of quota/period ratio users can set, default is zero and users can set + * quota and period without triggering this constraint. + */ +unsigned int sysctl_sched_cfs_bandwidth_min_ratio; + static DEFINE_MUTEX(cfs_constraints_mutex); const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ @@ -7535,6 +7554,20 @@ static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC; static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); +static int check_cfs_bandwidth_min_ratio(u64 period, u64 quota) +{ + u64 ratio; + + if (!sysctl_sched_cfs_bandwidth_min_ratio) + return 0; + + ratio = div64_u64(quota * 100, period); + if (ratio < sysctl_sched_cfs_bandwidth_min_ratio) + return -1; + + return 0; +} + static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota, u64 burst) { @@ -7570,6 +7603,9 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota, burst + quota > max_cfs_runtime)) return -EINVAL; + if (quota != RUNTIME_INF && check_cfs_bandwidth_min_ratio(period, quota)) + return -EINVAL; + /* * Prevent race between setting of cfs_rq->runtime_enabled and * unthrottle_offline_cfs_rqs(). @@ -7838,6 +7874,9 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v) seq_printf(sf, "wait_sum %llu\n", ws); } + seq_printf(sf, "nr_bursts %d\n", cfs_b->nr_burst); + seq_printf(sf, "burst_time %llu\n", cfs_b->burst_time); + return 0; } #endif /* CONFIG_CFS_BANDWIDTH */ @@ -7869,6 +7908,20 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css, } #endif /* CONFIG_RT_GROUP_SCHED */ +#ifdef CONFIG_FAIR_GROUP_SCHED +static s64 cpu_idle_read_s64(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + return css_tg(css)->idle; +} + +static int cpu_idle_write_s64(struct cgroup_subsys_state *css, + struct cftype *cft, s64 idle) +{ + return sched_group_set_idle(css_tg(css), idle); +} +#endif + static struct cftype cpu_legacy_files[] = { #ifdef CONFIG_FAIR_GROUP_SCHED { @@ -7876,6 +7929,11 @@ static struct cftype cpu_legacy_files[] = { .read_u64 = cpu_shares_read_u64, .write_u64 = cpu_shares_write_u64, }, + { + .name = "idle", + .read_s64 = cpu_idle_read_s64, + .write_s64 = cpu_idle_write_s64, + }, #endif #ifdef CONFIG_CFS_BANDWIDTH { @@ -7934,16 +7992,20 @@ static int cpu_extra_stat_show(struct seq_file *sf, { struct task_group *tg = css_tg(css); struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; - u64 throttled_usec; + u64 throttled_usec, burst_usec; throttled_usec = cfs_b->throttled_time; do_div(throttled_usec, NSEC_PER_USEC); + burst_usec = cfs_b->burst_time; + do_div(burst_usec, NSEC_PER_USEC); seq_printf(sf, "nr_periods %d\n" "nr_throttled %d\n" - "throttled_usec %llu\n", + "throttled_usec %llu\n" + "nr_bursts %d\n" + "burst_usec %llu\n", cfs_b->nr_periods, cfs_b->nr_throttled, - throttled_usec); + throttled_usec, cfs_b->nr_burst, burst_usec); } #endif return 0; @@ -8083,6 +8145,12 @@ static struct cftype cpu_files[] = { .read_s64 = cpu_weight_nice_read_s64, .write_s64 = cpu_weight_nice_write_s64, }, + { + .name = "idle", + .flags = CFTYPE_NOT_ON_ROOT, + .read_s64 = cpu_idle_read_s64, + .write_s64 = cpu_idle_write_s64, + }, #endif #ifdef CONFIG_CFS_BANDWIDTH { diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 4cb80e6042c4f..831fee509404e 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -624,9 +624,17 @@ static struct attribute *sugov_attrs[] = { }; ATTRIBUTE_GROUPS(sugov); +static void sugov_tunables_free(struct kobject *kobj) +{ + struct gov_attr_set *attr_set = container_of(kobj, struct gov_attr_set, kobj); + + kfree(to_sugov_tunables(attr_set)); +} + static struct kobj_type sugov_tunables_ktype = { .default_groups = sugov_groups, .sysfs_ops = &governor_sysfs_ops, + .release = &sugov_tunables_free, }; /********************** cpufreq governor interface *********************/ @@ -726,12 +734,10 @@ static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_polic return tunables; } -static void sugov_tunables_free(struct sugov_tunables *tunables) +static void sugov_clear_global_tunables(void) { if (!have_governor_per_policy()) global_tunables = NULL; - - kfree(tunables); } static int sugov_init(struct cpufreq_policy *policy) @@ -794,7 +800,7 @@ static int sugov_init(struct cpufreq_policy *policy) fail: kobject_put(&tunables->attr_set.kobj); policy->governor_data = NULL; - sugov_tunables_free(tunables); + sugov_clear_global_tunables(); stop_kthread: sugov_kthread_stop(sg_policy); @@ -821,7 +827,7 @@ static void sugov_exit(struct cpufreq_policy *policy) count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); policy->governor_data = NULL; if (!count) - sugov_tunables_free(tunables); + sugov_clear_global_tunables(); mutex_unlock(&global_tunables_lock); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 46ed4e1383e21..66188567778de 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -147,10 +147,10 @@ void account_guest_time(struct task_struct *p, u64 cputime) /* Add guest time to cpustat. */ if (task_nice(p) > 0) { - cpustat[CPUTIME_NICE] += cputime; + task_group_account_field(p, CPUTIME_NICE, cputime); cpustat[CPUTIME_GUEST_NICE] += cputime; } else { - cpustat[CPUTIME_USER] += cputime; + task_group_account_field(p, CPUTIME_USER, cputime); cpustat[CPUTIME_GUEST] += cputime; } } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 3cf776d5bce8f..2bda9fdba31c4 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1654,6 +1654,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused */ raw_spin_lock(&rq->lock); if (p->dl.dl_non_contending) { + update_rq_clock(rq); sub_running_bw(&p->dl, &rq->dl); p->dl.dl_non_contending = 0; /* @@ -2622,7 +2623,7 @@ void __setparam_dl(struct task_struct *p, const struct sched_attr *attr) dl_se->dl_runtime = attr->sched_runtime; dl_se->dl_deadline = attr->sched_deadline; dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline; - dl_se->flags = attr->sched_flags; + dl_se->flags = attr->sched_flags & SCHED_DL_FLAGS; dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime); } @@ -2635,7 +2636,8 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr) attr->sched_runtime = dl_se->dl_runtime; attr->sched_deadline = dl_se->dl_deadline; attr->sched_period = dl_se->dl_period; - attr->sched_flags = dl_se->flags; + attr->sched_flags &= ~SCHED_DL_FLAGS; + attr->sched_flags |= dl_se->flags; } /* @@ -2710,7 +2712,7 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr) if (dl_se->dl_runtime != attr->sched_runtime || dl_se->dl_deadline != attr->sched_deadline || dl_se->dl_period != attr->sched_period || - dl_se->flags != attr->sched_flags) + dl_se->flags != (attr->sched_flags & SCHED_DL_FLAGS)) return true; return false; diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index faada713cfae8..03a1f23f6a27c 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -400,11 +400,10 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group } P(se->load.weight); - P(se->runnable_weight); #ifdef CONFIG_SMP P(se->avg.load_avg); P(se->avg.util_avg); - P(se->avg.runnable_load_avg); + P(se->avg.runnable_avg); #endif #undef PN_SCHEDSTAT @@ -541,13 +540,17 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", cfs_rq->nr_spread_over); SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running); + SEQ_printf(m, " .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running); + SEQ_printf(m, " .%-30s: %d\n", "idle_nr_running", + cfs_rq->idle_nr_running); + SEQ_printf(m, " .%-30s: %d\n", "idle_h_nr_running", + cfs_rq->idle_h_nr_running); SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); #ifdef CONFIG_SMP - SEQ_printf(m, " .%-30s: %ld\n", "runnable_weight", cfs_rq->runnable_weight); SEQ_printf(m, " .%-30s: %lu\n", "load_avg", cfs_rq->avg.load_avg); - SEQ_printf(m, " .%-30s: %lu\n", "runnable_load_avg", - cfs_rq->avg.runnable_load_avg); + SEQ_printf(m, " .%-30s: %lu\n", "runnable_avg", + cfs_rq->avg.runnable_avg); SEQ_printf(m, " .%-30s: %lu\n", "util_avg", cfs_rq->avg.util_avg); SEQ_printf(m, " .%-30s: %u\n", "util_est_enqueued", @@ -556,8 +559,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) cfs_rq->removed.load_avg); SEQ_printf(m, " .%-30s: %ld\n", "removed.util_avg", cfs_rq->removed.util_avg); - SEQ_printf(m, " .%-30s: %ld\n", "removed.runnable_sum", - cfs_rq->removed.runnable_sum); + SEQ_printf(m, " .%-30s: %ld\n", "removed.runnable_avg", + cfs_rq->removed.runnable_avg); #ifdef CONFIG_FAIR_GROUP_SCHED SEQ_printf(m, " .%-30s: %lu\n", "tg_load_avg_contrib", cfs_rq->tg_load_avg_contrib); @@ -737,6 +740,7 @@ static void sched_debug_header(struct seq_file *m) SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) PN(sysctl_sched_latency); PN(sysctl_sched_min_granularity); + PN(sysctl_sched_idle_min_granularity); PN(sysctl_sched_wakeup_granularity); P(sysctl_sched_child_runs_first); P(sysctl_sched_features); @@ -847,25 +851,15 @@ void print_numa_stats(struct seq_file *m, int node, unsigned long tsf, static void sched_show_numa(struct task_struct *p, struct seq_file *m) { #ifdef CONFIG_NUMA_BALANCING - struct mempolicy *pol; - if (p->mm) P(mm->numa_scan_seq); - task_lock(p); - pol = p->mempolicy; - if (pol && !(pol->flags & MPOL_F_MORON)) - pol = NULL; - mpol_get(pol); - task_unlock(p); - P(numa_pages_migrated); P(numa_preferred_nid); P(total_numa_faults); SEQ_printf(m, "current_node=%d, numa_group_id=%d\n", task_node(p), task_numa_group_id(p)); show_numa_stats(p, m); - mpol_put(pol); #endif } @@ -956,13 +950,12 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, "nr_involuntary_switches", (long long)p->nivcsw); P(se.load.weight); - P(se.runnable_weight); #ifdef CONFIG_SMP P(se.avg.load_sum); - P(se.avg.runnable_load_sum); + P(se.avg.runnable_sum); P(se.avg.util_sum); P(se.avg.load_avg); - P(se.avg.runnable_load_avg); + P(se.avg.runnable_avg); P(se.avg.util_avg); P(se.avg.last_update_time); P(se.avg.util_est.ewma); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1f34252b748a5..4d984aabea673 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -59,6 +59,14 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L unsigned int sysctl_sched_min_granularity = 750000ULL; static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; +/* + * Minimal preemption granularity for CPU-bound SCHED_IDLE tasks. + * Applies only when SCHED_IDLE tasks compete with normal tasks. + * + * (default: 0.75 msec) + */ +unsigned int sysctl_sched_idle_min_granularity = 750000ULL; + /* * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity */ @@ -434,6 +442,23 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) } } +static int tg_is_idle(struct task_group *tg) +{ + return tg->idle > 0; +} + +static int cfs_rq_is_idle(struct cfs_rq *cfs_rq) +{ + return cfs_rq->idle > 0; +} + +static int se_is_idle(struct sched_entity *se) +{ + if (entity_is_task(se)) + return task_has_idle_policy(task_of(se)); + return cfs_rq_is_idle(group_cfs_rq(se)); +} + #else /* !CONFIG_FAIR_GROUP_SCHED */ static inline struct task_struct *task_of(struct sched_entity *se) @@ -495,6 +520,21 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) { } +static inline int tg_is_idle(struct task_group *tg) +{ + return 0; +} + +static int cfs_rq_is_idle(struct cfs_rq *cfs_rq) +{ + return 0; +} + +static int se_is_idle(struct sched_entity *se) +{ + return 0; +} + #endif /* CONFIG_FAIR_GROUP_SCHED */ static __always_inline @@ -684,6 +724,8 @@ static u64 __sched_period(unsigned long nr_running) return sysctl_sched_latency; } +static bool sched_idle_cfs_rq(struct cfs_rq *cfs_rq); + /* * We calculate the wall-time slice from the period by taking a part * proportional to the weight. @@ -692,23 +734,42 @@ static u64 __sched_period(unsigned long nr_running) */ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) { - u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq); + unsigned int nr_running = cfs_rq->nr_running; + struct sched_entity *init_se = se; + unsigned int min_gran; + u64 slice; + + if (sched_feat(ALT_PERIOD)) + nr_running = rq_of(cfs_rq)->cfs.h_nr_running; + + slice = __sched_period(nr_running + !se->on_rq); for_each_sched_entity(se) { struct load_weight *load; struct load_weight lw; + struct cfs_rq *qcfs_rq; - cfs_rq = cfs_rq_of(se); - load = &cfs_rq->load; + qcfs_rq = cfs_rq_of(se); + load = &qcfs_rq->load; if (unlikely(!se->on_rq)) { - lw = cfs_rq->load; + lw = qcfs_rq->load; update_load_add(&lw, se->load.weight); load = &lw; } slice = __calc_delta(slice, se->load.weight, load); } + + if (sched_feat(BASE_SLICE)) { + if (se_is_idle(init_se) && !sched_idle_cfs_rq(cfs_rq)) + min_gran = sysctl_sched_idle_min_granularity; + else + min_gran = sysctl_sched_min_granularity; + + slice = max_t(u64, slice, min_gran); + } + return slice; } @@ -743,9 +804,7 @@ void init_entity_runnable_average(struct sched_entity *se) * nothing has been attached to the task group yet. */ if (entity_is_task(se)) - sa->runnable_load_avg = sa->load_avg = scale_load_down(se->load.weight); - - se->runnable_weight = se->load.weight; + sa->load_avg = scale_load_down(se->load.weight); /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */ } @@ -798,6 +857,8 @@ void post_init_entity_util_avg(struct task_struct *p) } } + sa->runnable_avg = sa->util_avg; + if (p->sched_class != &fair_sched_class) { /* * For !fair tasks do: @@ -1475,33 +1536,35 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page, group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4; } -static unsigned long cpu_runnable_load(struct rq *rq); +/* + * 'numa_type' describes the node at the moment of load balancing. + */ +enum numa_type { + /* The node has spare capacity that can be used to run more tasks. */ + node_has_spare = 0, + /* + * The node is fully used and the tasks don't compete for more CPU + * cycles. Nevertheless, some tasks might wait before running. + */ + node_fully_busy, + /* + * The node is overloaded and can't provide expected CPU cycles to all + * tasks. + */ + node_overloaded +}; /* Cached statistics for all CPUs within a node */ struct numa_stats { unsigned long load; - + unsigned long util; /* Total compute capacity of CPUs on a node */ unsigned long compute_capacity; + unsigned int nr_running; + unsigned int weight; + enum numa_type node_type; }; -/* - * XXX borrowed from update_sg_lb_stats - */ -static void update_numa_stats(struct numa_stats *ns, int nid) -{ - int cpu; - - memset(ns, 0, sizeof(*ns)); - for_each_cpu(cpu, cpumask_of_node(nid)) { - struct rq *rq = cpu_rq(cpu); - - ns->load += cpu_runnable_load(rq); - ns->compute_capacity += capacity_of(cpu); - } - -} - struct task_numa_env { struct task_struct *p; @@ -1518,6 +1581,47 @@ struct task_numa_env { int best_cpu; }; +static unsigned long cpu_load(struct rq *rq); +static unsigned long cpu_util(int cpu); + +static inline enum +numa_type numa_classify(unsigned int imbalance_pct, + struct numa_stats *ns) +{ + if ((ns->nr_running > ns->weight) && + ((ns->compute_capacity * 100) < (ns->util * imbalance_pct))) + return node_overloaded; + + if ((ns->nr_running < ns->weight) || + ((ns->compute_capacity * 100) > (ns->util * imbalance_pct))) + return node_has_spare; + + return node_fully_busy; +} + +/* + * XXX borrowed from update_sg_lb_stats + */ +static void update_numa_stats(struct task_numa_env *env, + struct numa_stats *ns, int nid) +{ + int cpu; + + memset(ns, 0, sizeof(*ns)); + for_each_cpu(cpu, cpumask_of_node(nid)) { + struct rq *rq = cpu_rq(cpu); + + ns->load += cpu_load(rq); + ns->util += cpu_util(cpu); + ns->nr_running += rq->cfs.h_nr_running; + ns->compute_capacity += capacity_of(cpu); + } + + ns->weight = cpumask_weight(cpumask_of_node(nid)); + + ns->node_type = numa_classify(env->imbalance_pct, ns); +} + static void task_numa_assign(struct task_numa_env *env, struct task_struct *p, long imp) { @@ -1553,6 +1657,11 @@ static bool load_too_imbalanced(long src_load, long dst_load, long orig_src_load, orig_dst_load; long src_capacity, dst_capacity; + + /* If dst node has spare capacity, there is no real load imbalance */ + if (env->dst_stats.node_type == node_has_spare) + return false; + /* * The load is corrected for the CPU capacity available on each node. * @@ -1785,10 +1894,10 @@ static int task_numa_migrate(struct task_struct *p) dist = env.dist = node_distance(env.src_nid, env.dst_nid); taskweight = task_weight(p, env.src_nid, dist); groupweight = group_weight(p, env.src_nid, dist); - update_numa_stats(&env.src_stats, env.src_nid); + update_numa_stats(&env, &env.src_stats, env.src_nid); taskimp = task_weight(p, env.dst_nid, dist) - taskweight; groupimp = group_weight(p, env.dst_nid, dist) - groupweight; - update_numa_stats(&env.dst_stats, env.dst_nid); + update_numa_stats(&env, &env.dst_stats, env.dst_nid); /* Try to find a spot on the preferred nid. */ task_numa_find_cpu(&env, taskimp, groupimp); @@ -1821,7 +1930,7 @@ static int task_numa_migrate(struct task_struct *p) env.dist = dist; env.dst_nid = nid; - update_numa_stats(&env.dst_stats, env.dst_nid); + update_numa_stats(&env, &env.dst_stats, env.dst_nid); task_numa_find_cpu(&env, taskimp, groupimp); } } @@ -2767,6 +2876,8 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) } #endif cfs_rq->nr_running++; + if (se_is_idle(se)) + cfs_rq->idle_nr_running++; } static void @@ -2780,6 +2891,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) } #endif cfs_rq->nr_running--; + if (se_is_idle(se)) + cfs_rq->idle_nr_running--; } /* @@ -2831,25 +2944,6 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) } while (0) #ifdef CONFIG_SMP -static inline void -enqueue_runnable_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) -{ - cfs_rq->runnable_weight += se->runnable_weight; - - cfs_rq->avg.runnable_load_avg += se->avg.runnable_load_avg; - cfs_rq->avg.runnable_load_sum += se_runnable(se) * se->avg.runnable_load_sum; -} - -static inline void -dequeue_runnable_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) -{ - cfs_rq->runnable_weight -= se->runnable_weight; - - sub_positive(&cfs_rq->avg.runnable_load_avg, se->avg.runnable_load_avg); - sub_positive(&cfs_rq->avg.runnable_load_sum, - se_runnable(se) * se->avg.runnable_load_sum); -} - static inline void enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { @@ -2865,28 +2959,22 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) } #else static inline void -enqueue_runnable_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } -static inline void -dequeue_runnable_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } -static inline void enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } static inline void dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } #endif static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, - unsigned long weight, unsigned long runnable) + unsigned long weight) { if (se->on_rq) { /* commit outstanding execution time */ if (cfs_rq->curr == se) update_curr(cfs_rq); account_entity_dequeue(cfs_rq, se); - dequeue_runnable_load_avg(cfs_rq, se); } dequeue_load_avg(cfs_rq, se); - se->runnable_weight = runnable; update_load_set(&se->load, weight); #ifdef CONFIG_SMP @@ -2894,16 +2982,13 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, u32 divider = LOAD_AVG_MAX - 1024 + se->avg.period_contrib; se->avg.load_avg = div_u64(se_weight(se) * se->avg.load_sum, divider); - se->avg.runnable_load_avg = - div_u64(se_runnable(se) * se->avg.runnable_load_sum, divider); } while (0); #endif enqueue_load_avg(cfs_rq, se); - if (se->on_rq) { + if (se->on_rq) account_entity_enqueue(cfs_rq, se); - enqueue_runnable_load_avg(cfs_rq, se); - } + } void reweight_task(struct task_struct *p, int prio) @@ -2913,7 +2998,7 @@ void reweight_task(struct task_struct *p, int prio) struct load_weight *load = &se->load; unsigned long weight = scale_load(sched_prio_to_weight[prio]); - reweight_entity(cfs_rq, se, weight, weight); + reweight_entity(cfs_rq, se, weight); load->inv_weight = sched_prio_to_wmult[prio]; } @@ -3025,50 +3110,6 @@ static long calc_group_shares(struct cfs_rq *cfs_rq) */ return clamp_t(long, shares, MIN_SHARES, tg_shares); } - -/* - * This calculates the effective runnable weight for a group entity based on - * the group entity weight calculated above. - * - * Because of the above approximation (2), our group entity weight is - * an load_avg based ratio (3). This means that it includes blocked load and - * does not represent the runnable weight. - * - * Approximate the group entity's runnable weight per ratio from the group - * runqueue: - * - * grq->avg.runnable_load_avg - * ge->runnable_weight = ge->load.weight * -------------------------- (7) - * grq->avg.load_avg - * - * However, analogous to above, since the avg numbers are slow, this leads to - * transients in the from-idle case. Instead we use: - * - * ge->runnable_weight = ge->load.weight * - * - * max(grq->avg.runnable_load_avg, grq->runnable_weight) - * ----------------------------------------------------- (8) - * max(grq->avg.load_avg, grq->load.weight) - * - * Where these max() serve both to use the 'instant' values to fix the slow - * from-idle and avoid the /0 on to-idle, similar to (6). - */ -static long calc_group_runnable(struct cfs_rq *cfs_rq, long shares) -{ - long runnable, load_avg; - - load_avg = max(cfs_rq->avg.load_avg, - scale_load_down(cfs_rq->load.weight)); - - runnable = max(cfs_rq->avg.runnable_load_avg, - scale_load_down(cfs_rq->runnable_weight)); - - runnable *= shares; - if (load_avg) - runnable /= load_avg; - - return clamp_t(long, runnable, MIN_SHARES, shares); -} #endif /* CONFIG_SMP */ static inline int throttled_hierarchy(struct cfs_rq *cfs_rq); @@ -3080,7 +3121,7 @@ static inline int throttled_hierarchy(struct cfs_rq *cfs_rq); static void update_cfs_group(struct sched_entity *se) { struct cfs_rq *gcfs_rq = group_cfs_rq(se); - long shares, runnable; + long shares; if (!gcfs_rq) return; @@ -3089,16 +3130,15 @@ static void update_cfs_group(struct sched_entity *se) return; #ifndef CONFIG_SMP - runnable = shares = READ_ONCE(gcfs_rq->tg->shares); + shares = READ_ONCE(gcfs_rq->tg->shares); if (likely(se->load.weight == shares)) return; #else shares = calc_group_shares(gcfs_rq); - runnable = calc_group_runnable(gcfs_rq, shares); #endif - reweight_entity(cfs_rq_of(se), se, shares, runnable); + reweight_entity(cfs_rq_of(se), se, shares); } #else /* CONFIG_FAIR_GROUP_SCHED */ @@ -3223,11 +3263,11 @@ void set_task_rq_fair(struct sched_entity *se, * _IFF_ we look at the pure running and runnable sums. Because they * represent the very same entity, just at different points in the hierarchy. * - * Per the above update_tg_cfs_util() is trivial and simply copies the running - * sum over (but still wrong, because the group entity and group rq do not have - * their PELT windows aligned). + * Per the above update_tg_cfs_util() and update_tg_cfs_runnable() are trivial + * and simply copies the running/runnable sum over (but still wrong, because + * the group entity and group rq do not have their PELT windows aligned). * - * However, update_tg_cfs_runnable() is more complex. So we have: + * However, update_tg_cfs_load() is more complex. So we have: * * ge->avg.load_avg = ge->load.weight * ge->avg.runnable_avg (2) * @@ -3309,10 +3349,36 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq static inline void update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) +{ + long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg; + + /* Nothing to update */ + if (!delta) + return; + + /* + * The relation between sum and avg is: + * + * LOAD_AVG_MAX - 1024 + sa->period_contrib + * + * however, the PELT windows are not aligned between grq and gse. + */ + + /* Set new sched_entity's runnable */ + se->avg.runnable_avg = gcfs_rq->avg.runnable_avg; + se->avg.runnable_sum = se->avg.runnable_avg * LOAD_AVG_MAX; + + /* Update parent cfs_rq runnable */ + add_positive(&cfs_rq->avg.runnable_avg, delta); + cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * LOAD_AVG_MAX; +} + +static inline void +update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) { long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum; - unsigned long runnable_load_avg, load_avg; - u64 runnable_load_sum, load_sum = 0; + unsigned long load_avg; + u64 load_sum = 0; s64 delta_sum; if (!runnable_sum) @@ -3360,19 +3426,6 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf se->avg.load_avg = load_avg; add_positive(&cfs_rq->avg.load_avg, delta_avg); add_positive(&cfs_rq->avg.load_sum, delta_sum); - - runnable_load_sum = (s64)se_runnable(se) * runnable_sum; - runnable_load_avg = div_s64(runnable_load_sum, LOAD_AVG_MAX); - delta_sum = runnable_load_sum - se_weight(se) * se->avg.runnable_load_sum; - delta_avg = runnable_load_avg - se->avg.runnable_load_avg; - - se->avg.runnable_load_sum = runnable_sum; - se->avg.runnable_load_avg = runnable_load_avg; - - if (se->on_rq) { - add_positive(&cfs_rq->avg.runnable_load_avg, delta_avg); - add_positive(&cfs_rq->avg.runnable_load_sum, delta_sum); - } } static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum) @@ -3401,6 +3454,7 @@ static inline int propagate_entity_load_avg(struct sched_entity *se) update_tg_cfs_util(cfs_rq, se, gcfs_rq); update_tg_cfs_runnable(cfs_rq, se, gcfs_rq); + update_tg_cfs_load(cfs_rq, se, gcfs_rq); trace_pelt_cfs_tp(cfs_rq); trace_pelt_se_tp(se); @@ -3470,7 +3524,7 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) { - unsigned long removed_load = 0, removed_util = 0, removed_runnable_sum = 0; + unsigned long removed_load = 0, removed_util = 0, removed_runnable = 0; struct sched_avg *sa = &cfs_rq->avg; int decayed = 0; @@ -3481,7 +3535,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) raw_spin_lock(&cfs_rq->removed.lock); swap(cfs_rq->removed.util_avg, removed_util); swap(cfs_rq->removed.load_avg, removed_load); - swap(cfs_rq->removed.runnable_sum, removed_runnable_sum); + swap(cfs_rq->removed.runnable_avg, removed_runnable); cfs_rq->removed.nr = 0; raw_spin_unlock(&cfs_rq->removed.lock); @@ -3493,7 +3547,16 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) sub_positive(&sa->util_avg, r); sub_positive(&sa->util_sum, r * divider); - add_tg_cfs_propagate(cfs_rq, -(long)removed_runnable_sum); + r = removed_runnable; + sub_positive(&sa->runnable_avg, r); + sub_positive(&sa->runnable_sum, r * divider); + + /* + * removed_runnable is the unweighted version of removed_load so we + * can use it to estimate removed_load_sum. + */ + add_tg_cfs_propagate(cfs_rq, + -(long)(removed_runnable * divider) >> SCHED_CAPACITY_SHIFT); decayed = 1; } @@ -3539,17 +3602,19 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s */ se->avg.util_sum = se->avg.util_avg * divider; + se->avg.runnable_sum = se->avg.runnable_avg * divider; + se->avg.load_sum = divider; if (se_weight(se)) { se->avg.load_sum = div_u64(se->avg.load_avg * se->avg.load_sum, se_weight(se)); } - se->avg.runnable_load_sum = se->avg.load_sum; - enqueue_load_avg(cfs_rq, se); cfs_rq->avg.util_avg += se->avg.util_avg; cfs_rq->avg.util_sum += se->avg.util_sum; + cfs_rq->avg.runnable_avg += se->avg.runnable_avg; + cfs_rq->avg.runnable_sum += se->avg.runnable_sum; add_tg_cfs_propagate(cfs_rq, se->avg.load_sum); @@ -3571,6 +3636,8 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s dequeue_load_avg(cfs_rq, se); sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg); sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum); + sub_positive(&cfs_rq->avg.runnable_avg, se->avg.runnable_avg); + sub_positive(&cfs_rq->avg.runnable_sum, se->avg.runnable_sum); add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum); @@ -3677,13 +3744,13 @@ static void remove_entity_load_avg(struct sched_entity *se) ++cfs_rq->removed.nr; cfs_rq->removed.util_avg += se->avg.util_avg; cfs_rq->removed.load_avg += se->avg.load_avg; - cfs_rq->removed.runnable_sum += se->avg.load_sum; /* == runnable_sum */ + cfs_rq->removed.runnable_avg += se->avg.runnable_avg; raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags); } -static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq) +static inline unsigned long cfs_rq_runnable_avg(struct cfs_rq *cfs_rq) { - return cfs_rq->avg.runnable_load_avg; + return cfs_rq->avg.runnable_avg; } static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq) @@ -3900,7 +3967,12 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) /* sleeps up to a single latency don't count. */ if (!initial) { - unsigned long thresh = sysctl_sched_latency; + unsigned long thresh; + + if (se_is_idle(se)) + thresh = sysctl_sched_min_granularity; + else + thresh = sysctl_sched_latency; /* * Halve their sleep time's effect, to allow @@ -4003,8 +4075,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * - Add its new weight to cfs_rq->load.weight */ update_load_avg(cfs_rq, se, UPDATE_TG | DO_ATTACH); + se_update_runnable(se); update_cfs_group(se); - enqueue_runnable_load_avg(cfs_rq, se); account_entity_enqueue(cfs_rq, se); if (flags & ENQUEUE_WAKEUP) @@ -4093,7 +4165,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * of its group cfs_rq. */ update_load_avg(cfs_rq, se, UPDATE_TG); - dequeue_runnable_load_avg(cfs_rq, se); + se_update_runnable(se); update_stats_dequeue(cfs_rq, se, flags); @@ -4380,11 +4452,20 @@ static inline u64 sched_cfs_bandwidth_slice(void) */ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) { + s64 runtime; + if (unlikely(cfs_b->quota == RUNTIME_INF)) return; cfs_b->runtime += cfs_b->quota; + runtime = cfs_b->runtime_snap - cfs_b->runtime; + if (runtime > 0) { + cfs_b->burst_time += runtime; + cfs_b->nr_burst++; + } + cfs_b->runtime = min(cfs_b->runtime, cfs_b->quota + cfs_b->burst); + cfs_b->runtime_snap = cfs_b->runtime; } static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) @@ -4495,8 +4576,8 @@ static int tg_unthrottle_up(struct task_group *tg, void *data) cfs_rq->throttle_count--; if (!cfs_rq->throttle_count) { - cfs_rq->throttled_clock_task_time += rq_clock_task(rq) - - cfs_rq->throttled_clock_task; + cfs_rq->throttled_clock_pelt_time += rq_clock_pelt(rq) - + cfs_rq->throttled_clock_pelt; /* Add cfs_rq with already running entity in the list */ if (cfs_rq->nr_running >= 1) @@ -4513,7 +4594,7 @@ static int tg_throttle_down(struct task_group *tg, void *data) /* group is entering throttled state, stop time */ if (!cfs_rq->throttle_count) { - cfs_rq->throttled_clock_task = rq_clock_task(rq); + cfs_rq->throttled_clock_pelt = rq_clock_pelt(rq); list_del_leaf_cfs_rq(cfs_rq); } cfs_rq->throttle_count++; @@ -4562,20 +4643,43 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) struct cfs_rq *qcfs_rq = cfs_rq_of(se); /* throttled entity or throttle-on-deactivate */ if (!se->on_rq) - break; + goto done; + + dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP); + + if (cfs_rq_is_idle(group_cfs_rq(se))) + idle_task_delta = cfs_rq->h_nr_running; - if (dequeue) - dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP); qcfs_rq->h_nr_running -= task_delta; qcfs_rq->idle_h_nr_running -= idle_task_delta; - if (qcfs_rq->load.weight) - dequeue = 0; + if (qcfs_rq->load.weight) { + /* Avoid re-evaluating load for this entity: */ + se = parent_entity(se); + break; + } } - if (!se) - sub_nr_running(rq, task_delta); + for_each_sched_entity(se) { + struct cfs_rq *qcfs_rq = cfs_rq_of(se); + /* throttled entity or throttle-on-deactivate */ + if (!se->on_rq) + goto done; + + update_load_avg(qcfs_rq, se, 0); + se_update_runnable(se); + + if (cfs_rq_is_idle(group_cfs_rq(se))) + idle_task_delta = cfs_rq->h_nr_running; + + qcfs_rq->h_nr_running -= task_delta; + qcfs_rq->idle_h_nr_running -= idle_task_delta; + } + /* At this point se is NULL and we are at root level*/ + sub_nr_running(rq, task_delta); + +done: /* * Note: distribution will already see us throttled via the * throttled-list. rq->lock protects completion. @@ -4612,36 +4716,45 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) task_delta = cfs_rq->h_nr_running; idle_task_delta = cfs_rq->idle_h_nr_running; for_each_sched_entity(se) { + struct cfs_rq *qcfs_rq = cfs_rq_of(se); + if (se->on_rq) break; - cfs_rq = cfs_rq_of(se); - enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP); + enqueue_entity(qcfs_rq, se, ENQUEUE_WAKEUP); - cfs_rq->h_nr_running += task_delta; - cfs_rq->idle_h_nr_running += idle_task_delta; + if (cfs_rq_is_idle(group_cfs_rq(se))) + idle_task_delta = cfs_rq->h_nr_running; + + qcfs_rq->h_nr_running += task_delta; + qcfs_rq->idle_h_nr_running += idle_task_delta; /* end evaluation on encountering a throttled cfs_rq */ - if (cfs_rq_throttled(cfs_rq)) + if (cfs_rq_throttled(qcfs_rq)) goto unthrottle_throttle; } for_each_sched_entity(se) { - cfs_rq = cfs_rq_of(se); + struct cfs_rq *qcfs_rq = cfs_rq_of(se); - cfs_rq->h_nr_running += task_delta; - cfs_rq->idle_h_nr_running += idle_task_delta; + update_load_avg(qcfs_rq, se, UPDATE_TG); + se_update_runnable(se); + if (cfs_rq_is_idle(group_cfs_rq(se))) + idle_task_delta = cfs_rq->h_nr_running; + + qcfs_rq->h_nr_running += task_delta; + qcfs_rq->idle_h_nr_running += idle_task_delta; /* end evaluation on encountering a throttled cfs_rq */ - if (cfs_rq_throttled(cfs_rq)) + if (cfs_rq_throttled(qcfs_rq)) goto unthrottle_throttle; /* * One parent has been throttled and cfs_rq removed from the * list. Add it back to not break the leaf list. */ - if (throttled_hierarchy(cfs_rq)) - list_add_leaf_cfs_rq(cfs_rq); + if (throttled_hierarchy(qcfs_rq)) + list_add_leaf_cfs_rq(qcfs_rq); } /* At this point se is NULL and we are at root level*/ @@ -4654,9 +4767,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) * assertion below. */ for_each_sched_entity(se) { - cfs_rq = cfs_rq_of(se); + struct cfs_rq *qcfs_rq = cfs_rq_of(se); - if (list_add_leaf_cfs_rq(cfs_rq)) + if (list_add_leaf_cfs_rq(qcfs_rq)) break; } @@ -4921,7 +5034,7 @@ static void sync_throttle(struct task_group *tg, int cpu) pcfs_rq = tg->parent->cfs_rq[cpu]; cfs_rq->throttle_count = pcfs_rq->throttle_count; - cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu)); + cfs_rq->throttled_clock_pelt = rq_clock_pelt(cpu_rq(cpu)); } /* conditionally throttle active cfs_rq's from put_prev_entity() */ @@ -5227,6 +5340,31 @@ static inline void update_overutilized_status(struct rq *rq) static inline void update_overutilized_status(struct rq *rq) { } #endif +/* Runqueue only has SCHED_IDLE tasks enqueued */ +static int sched_idle_rq(struct rq *rq) +{ + return unlikely(rq->nr_running == rq->cfs.idle_h_nr_running && + rq->nr_running); +} + +/* + * Returns true if cfs_rq only has SCHED_IDLE entities enqueued. Note the use + * of idle_nr_running, which does not consider idle descendants of normal + * entities. + */ +static bool sched_idle_cfs_rq(struct cfs_rq *cfs_rq) +{ + return cfs_rq->nr_running && + cfs_rq->nr_running == cfs_rq->idle_nr_running; +} + +#ifdef CONFIG_SMP +static int sched_idle_cpu(int cpu) +{ + return sched_idle_rq(cpu_rq(cpu)); +} +#endif + /* * The enqueue_task method is called before nr_running is * increased. Here we update the fair scheduling stats and @@ -5265,6 +5403,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq->h_nr_running++; cfs_rq->idle_h_nr_running += idle_h_nr_running; + if (cfs_rq_is_idle(cfs_rq)) + idle_h_nr_running = 1; + /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) goto enqueue_throttle; @@ -5276,11 +5417,15 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq = cfs_rq_of(se); update_load_avg(cfs_rq, se, UPDATE_TG); + se_update_runnable(se); update_cfs_group(se); cfs_rq->h_nr_running++; cfs_rq->idle_h_nr_running += idle_h_nr_running; + if (cfs_rq_is_idle(cfs_rq)) + idle_h_nr_running = 1; + /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) goto enqueue_throttle; @@ -5348,6 +5493,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; int task_sleep = flags & DEQUEUE_SLEEP; int idle_h_nr_running = task_has_idle_policy(p); + bool was_sched_idle = sched_idle_rq(rq); for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); @@ -5356,6 +5502,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq->h_nr_running--; cfs_rq->idle_h_nr_running -= idle_h_nr_running; + if (cfs_rq_is_idle(cfs_rq)) + idle_h_nr_running = 1; + /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) goto dequeue_throttle; @@ -5379,11 +5528,15 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq = cfs_rq_of(se); update_load_avg(cfs_rq, se, UPDATE_TG); + se_update_runnable(se); update_cfs_group(se); cfs_rq->h_nr_running--; cfs_rq->idle_h_nr_running -= idle_h_nr_running; + if (cfs_rq_is_idle(cfs_rq)) + idle_h_nr_running = 1; + /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) goto dequeue_throttle; @@ -5394,6 +5547,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (!se) sub_nr_running(rq, 1); + /* balance early to pull high priority tasks */ + if (unlikely(!was_sched_idle && sched_idle_rq(rq))) + rq->next_balance = jiffies; + util_est_dequeue(&rq->cfs, p, task_sleep); hrtick_update(rq); } @@ -5416,35 +5573,68 @@ static struct { #endif /* CONFIG_NO_HZ_COMMON */ -/* CPU only has SCHED_IDLE tasks enqueued */ -static int sched_idle_cpu(int cpu) +static unsigned long cpu_load(struct rq *rq) { - struct rq *rq = cpu_rq(cpu); - - return unlikely(rq->nr_running == rq->cfs.idle_h_nr_running && - rq->nr_running); + return cfs_rq_load_avg(&rq->cfs); } -static unsigned long cpu_runnable_load(struct rq *rq) +/* + * cpu_load_without - compute CPU load without any contributions from *p + * @cpu: the CPU which load is requested + * @p: the task which load should be discounted + * + * The load of a CPU is defined by the load of tasks currently enqueued on that + * CPU as well as tasks which are currently sleeping after an execution on that + * CPU. + * + * This method returns the load of the specified CPU by discounting the load of + * the specified task, whenever the task is currently contributing to the CPU + * load. + */ +static unsigned long cpu_load_without(struct rq *rq, struct task_struct *p) { - return cfs_rq_runnable_load_avg(&rq->cfs); + struct cfs_rq *cfs_rq; + unsigned int load; + + /* Task has no contribution or is new */ + if (cpu_of(rq) != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time)) + return cpu_load(rq); + + cfs_rq = &rq->cfs; + load = READ_ONCE(cfs_rq->avg.load_avg); + + /* Discount task's util from CPU's util */ + lsub_positive(&load, task_h_load(p)); + + return load; } -static unsigned long capacity_of(int cpu) +static unsigned long cpu_runnable(struct rq *rq) { - return cpu_rq(cpu)->cpu_capacity; + return cfs_rq_runnable_avg(&rq->cfs); } -static unsigned long cpu_avg_load_per_task(int cpu) +static unsigned long cpu_runnable_without(struct rq *rq, struct task_struct *p) { - struct rq *rq = cpu_rq(cpu); - unsigned long nr_running = READ_ONCE(rq->cfs.h_nr_running); - unsigned long load_avg = cpu_runnable_load(rq); + struct cfs_rq *cfs_rq; + unsigned int runnable; - if (nr_running) - return load_avg / nr_running; + /* Task has no contribution or is new */ + if (cpu_of(rq) != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time)) + return cpu_runnable(rq); - return 0; + cfs_rq = &rq->cfs; + runnable = READ_ONCE(cfs_rq->avg.runnable_avg); + + /* Discount task's runnable from CPU's runnable */ + lsub_positive(&runnable, p->se.avg.runnable_avg); + + return runnable; +} + +static unsigned long capacity_of(int cpu) +{ + return cpu_rq(cpu)->cpu_capacity; } static void record_wakee(struct task_struct *p) @@ -5527,6 +5717,9 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync) if (sync && cpu_rq(this_cpu)->nr_running == 1) return this_cpu; + if (available_idle_cpu(prev_cpu)) + return prev_cpu; + return nr_cpumask_bits; } @@ -5537,7 +5730,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p, s64 this_eff_load, prev_eff_load; unsigned long task_load; - this_eff_load = cpu_runnable_load(cpu_rq(this_cpu)); + this_eff_load = cpu_load(cpu_rq(this_cpu)); if (sync) { unsigned long current_load = task_h_load(current); @@ -5555,7 +5748,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p, this_eff_load *= 100; this_eff_load *= capacity_of(prev_cpu); - prev_eff_load = cpu_runnable_load(cpu_rq(prev_cpu)); + prev_eff_load = cpu_load(cpu_rq(prev_cpu)); prev_eff_load -= task_load; if (sched_feat(WA_BIAS)) prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2; @@ -5593,161 +5786,21 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, return target; } -static unsigned long cpu_util_without(int cpu, struct task_struct *p); - -static unsigned long capacity_spare_without(int cpu, struct task_struct *p) -{ - return max_t(long, capacity_of(cpu) - cpu_util_without(cpu, p), 0); -} +static struct sched_group * +find_idlest_group(struct sched_domain *sd, struct task_struct *p, + int this_cpu, int sd_flag); /* - * find_idlest_group finds and returns the least busy CPU group within the - * domain. - * - * Assumes p is allowed on at least one CPU in sd. + * find_idlest_group_cpu - find the idlest CPU among the CPUs in the group. */ -static struct sched_group * -find_idlest_group(struct sched_domain *sd, struct task_struct *p, - int this_cpu, int sd_flag) -{ - struct sched_group *idlest = NULL, *group = sd->groups; - struct sched_group *most_spare_sg = NULL; - unsigned long min_runnable_load = ULONG_MAX; - unsigned long this_runnable_load = ULONG_MAX; - unsigned long min_avg_load = ULONG_MAX, this_avg_load = ULONG_MAX; - unsigned long most_spare = 0, this_spare = 0; - int imbalance_scale = 100 + (sd->imbalance_pct-100)/2; - unsigned long imbalance = scale_load_down(NICE_0_LOAD) * - (sd->imbalance_pct-100) / 100; - - do { - unsigned long load, avg_load, runnable_load; - unsigned long spare_cap, max_spare_cap; - int local_group; - int i; - - /* Skip over this group if it has no CPUs allowed */ - if (!cpumask_intersects(sched_group_span(group), - p->cpus_ptr)) - continue; - - local_group = cpumask_test_cpu(this_cpu, - sched_group_span(group)); - - /* - * Tally up the load of all CPUs in the group and find - * the group containing the CPU with most spare capacity. - */ - avg_load = 0; - runnable_load = 0; - max_spare_cap = 0; - - for_each_cpu(i, sched_group_span(group)) { - load = cpu_runnable_load(cpu_rq(i)); - runnable_load += load; - - avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs); - - spare_cap = capacity_spare_without(i, p); - - if (spare_cap > max_spare_cap) - max_spare_cap = spare_cap; - } - - /* Adjust by relative CPU capacity of the group */ - avg_load = (avg_load * SCHED_CAPACITY_SCALE) / - group->sgc->capacity; - runnable_load = (runnable_load * SCHED_CAPACITY_SCALE) / - group->sgc->capacity; - - if (local_group) { - this_runnable_load = runnable_load; - this_avg_load = avg_load; - this_spare = max_spare_cap; - } else { - if (min_runnable_load > (runnable_load + imbalance)) { - /* - * The runnable load is significantly smaller - * so we can pick this new CPU: - */ - min_runnable_load = runnable_load; - min_avg_load = avg_load; - idlest = group; - } else if ((runnable_load < (min_runnable_load + imbalance)) && - (100*min_avg_load > imbalance_scale*avg_load)) { - /* - * The runnable loads are close so take the - * blocked load into account through avg_load: - */ - min_avg_load = avg_load; - idlest = group; - } - - if (most_spare < max_spare_cap) { - most_spare = max_spare_cap; - most_spare_sg = group; - } - } - } while (group = group->next, group != sd->groups); - - /* - * The cross-over point between using spare capacity or least load - * is too conservative for high utilization tasks on partially - * utilized systems if we require spare_capacity > task_util(p), - * so we allow for some task stuffing by using - * spare_capacity > task_util(p)/2. - * - * Spare capacity can't be used for fork because the utilization has - * not been set yet, we must first select a rq to compute the initial - * utilization. - */ - if (sd_flag & SD_BALANCE_FORK) - goto skip_spare; - - if (this_spare > task_util(p) / 2 && - imbalance_scale*this_spare > 100*most_spare) - return NULL; - - if (most_spare > task_util(p) / 2) - return most_spare_sg; - -skip_spare: - if (!idlest) - return NULL; - - /* - * When comparing groups across NUMA domains, it's possible for the - * local domain to be very lightly loaded relative to the remote - * domains but "imbalance" skews the comparison making remote CPUs - * look much more favourable. When considering cross-domain, add - * imbalance to the runnable load on the remote node and consider - * staying local. - */ - if ((sd->flags & SD_NUMA) && - min_runnable_load + imbalance >= this_runnable_load) - return NULL; - - if (min_runnable_load > (this_runnable_load + imbalance)) - return NULL; - - if ((this_runnable_load < (min_runnable_load + imbalance)) && - (100*this_avg_load < imbalance_scale*min_avg_load)) - return NULL; - - return idlest; -} - -/* - * find_idlest_group_cpu - find the idlest CPU among the CPUs in the group. - */ -static int -find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) +static int +find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) { unsigned long load, min_load = ULONG_MAX; unsigned int min_exit_latency = UINT_MAX; u64 latest_idle_timestamp = 0; int least_loaded_cpu = this_cpu; - int shallowest_idle_cpu = -1, si_cpu = -1; + int shallowest_idle_cpu = -1; int i; /* Check if we have any choice: */ @@ -5756,6 +5809,9 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this /* Traverse only the allowed CPUs */ for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) { + if (sched_idle_cpu(i)) + return i; + if (available_idle_cpu(i)) { struct rq *rq = cpu_rq(i); struct cpuidle_state *idle = idle_get_state(rq); @@ -5778,13 +5834,8 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this latest_idle_timestamp = rq->idle_stamp; shallowest_idle_cpu = i; } - } else if (shallowest_idle_cpu == -1 && si_cpu == -1) { - if (sched_idle_cpu(i)) { - si_cpu = i; - continue; - } - - load = cpu_runnable_load(cpu_rq(i)); + } else if (shallowest_idle_cpu == -1) { + load = cpu_load(cpu_rq(i)); if (load < min_load) { min_load = load; least_loaded_cpu = i; @@ -5792,11 +5843,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this } } - if (shallowest_idle_cpu != -1) - return shallowest_idle_cpu; - if (si_cpu != -1) - return si_cpu; - return least_loaded_cpu; + return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu; } static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p, @@ -5808,7 +5855,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p return prev_cpu; /* - * We need task's util for capacity_spare_without, sync it up to + * We need task's util for cpu_util_without, sync it up to * prev_cpu's last_update_time. */ if (!(sd_flag & SD_BALANCE_FORK)) @@ -5852,6 +5899,14 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p return new_cpu; } +static inline int __select_idle_cpu(int cpu) +{ + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) + return cpu; + + return -1; +} + #ifdef CONFIG_SCHED_SMT DEFINE_STATIC_KEY_FALSE(sched_smt_present); EXPORT_SYMBOL_GPL(sched_smt_present); @@ -5910,37 +5965,34 @@ void __update_idle_core(struct rq *rq) * there are no idle cores left in the system; tracked through * sd_llc->shared->has_idle_cores and enabled through update_idle_core() above. */ -static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target) +static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu) { - struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); - int core, cpu; + bool idle = true; + int cpu; if (!static_branch_likely(&sched_smt_present)) - return -1; - - if (!test_idle_cores(target, false)) - return -1; - - cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); - - for_each_cpu_wrap(core, cpus, target) { - bool idle = true; + return __select_idle_cpu(core); - for_each_cpu(cpu, cpu_smt_mask(core)) { - __cpumask_clear_cpu(cpu, cpus); - if (!available_idle_cpu(cpu)) - idle = false; + for_each_cpu(cpu, cpu_smt_mask(core)) { + if (!available_idle_cpu(cpu)) { + idle = false; + if (*idle_cpu == -1) { + if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->cpus_ptr)) { + *idle_cpu = cpu; + break; + } + continue; + } + break; } - - if (idle) - return core; + if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr)) + *idle_cpu = cpu; } - /* - * Failed to find an idle core; stop looking for one. - */ - set_idle_cores(target, 0); + if (idle) + return core; + cpumask_andnot(cpus, cpus, cpu_smt_mask(core)); return -1; } @@ -5949,29 +6001,33 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int */ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) { - int cpu, si_cpu = -1; - - if (!static_branch_likely(&sched_smt_present)) - return -1; + int cpu; for_each_cpu(cpu, cpu_smt_mask(target)) { if (!cpumask_test_cpu(cpu, p->cpus_ptr) || !cpumask_test_cpu(cpu, sched_domain_span(sd))) continue; - if (available_idle_cpu(cpu)) + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) return cpu; - if (si_cpu == -1 && sched_idle_cpu(cpu)) - si_cpu = cpu; } - return si_cpu; + return -1; } #else /* CONFIG_SCHED_SMT */ -static inline int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target) +static inline void set_idle_cores(int cpu, int val) { - return -1; +} + +static inline bool test_idle_cores(int cpu, bool def) +{ + return def; +} + +static inline int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu) +{ + return __select_idle_cpu(core); } static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) @@ -5986,57 +6042,80 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd * comparing the average scan cost (tracked in sd->avg_scan_cost) against the * average idle time for this rq (as found in rq->avg_idle). */ -static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target) +static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_core, int target) { struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); - struct sched_domain *this_sd; - u64 avg_cost, avg_idle; - u64 time, cost; - s64 delta; + int i, cpu, idle_cpu = -1, nr = INT_MAX; + struct rq *this_rq = this_rq(); int this = smp_processor_id(); - int cpu, nr = INT_MAX, si_cpu = -1; + struct sched_domain *this_sd; + u64 time = 0; this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); if (!this_sd) return -1; - /* - * Due to large variance we need a large fuzz factor; hackbench in - * particularly is sensitive here. - */ - avg_idle = this_rq()->avg_idle / 512; - avg_cost = this_sd->avg_scan_cost + 1; + cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); - if (sched_feat(SIS_AVG_CPU) && avg_idle < avg_cost) - return -1; + if (sched_feat(SIS_PROP) && !has_idle_core) { + u64 avg_cost, avg_idle, span_avg; + unsigned long now = jiffies; + + /* + * If we're busy, the assumption that the last idle period + * predicts the future is flawed; age away the remaining + * predicted idle time. + */ + if (unlikely(this_rq->wake_stamp < now)) { + while (this_rq->wake_stamp < now && this_rq->wake_avg_idle) { + this_rq->wake_stamp++; + this_rq->wake_avg_idle >>= 1; + } + } + + avg_idle = this_rq->wake_avg_idle; + avg_cost = this_sd->avg_scan_cost + 1; - if (sched_feat(SIS_PROP)) { - u64 span_avg = sd->span_weight * avg_idle; + span_avg = sd->span_weight * avg_idle; if (span_avg > 4*avg_cost) nr = div_u64(span_avg, avg_cost); else nr = 4; - } - time = cpu_clock(this); + time = cpu_clock(this); + } - cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); + for_each_cpu_wrap(cpu, cpus, target + 1) { + if (has_idle_core) { + i = select_idle_core(p, cpu, cpus, &idle_cpu); + if ((unsigned int)i < nr_cpumask_bits) + return i; - for_each_cpu_wrap(cpu, cpus, target) { - if (!--nr) - return si_cpu; - if (available_idle_cpu(cpu)) - break; - if (si_cpu == -1 && sched_idle_cpu(cpu)) - si_cpu = cpu; + } else { + if (!--nr) + return -1; + idle_cpu = __select_idle_cpu(cpu); + if ((unsigned int)idle_cpu < nr_cpumask_bits) + break; + } } - time = cpu_clock(this) - time; - cost = this_sd->avg_scan_cost; - delta = (s64)(time - cost) / 8; - this_sd->avg_scan_cost += delta; + if (has_idle_core) + set_idle_cores(target, false); + + if (sched_feat(SIS_PROP) && !has_idle_core) { + time = cpu_clock(this) - time; - return cpu; + /* + * Account for the scan cost of wakeups against the average + * idle time. + */ + this_rq->wake_avg_idle -= min(this_rq->wake_avg_idle, time); + + update_avg(&this_sd->avg_scan_cost, time); + } + + return idle_cpu; } /* @@ -6044,6 +6123,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t */ static int select_idle_sibling(struct task_struct *p, int prev, int target) { + bool has_idle_core = false; struct sched_domain *sd; int i, recent_used_cpu; @@ -6059,16 +6139,12 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) /* Check a recently used CPU as a potential idle candidate: */ recent_used_cpu = p->recent_used_cpu; + p->recent_used_cpu = prev; if (recent_used_cpu != prev && recent_used_cpu != target && cpus_share_cache(recent_used_cpu, target) && (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) { - /* - * Replace recent_used_cpu with prev as it is a potential - * candidate for the next wake: - */ - p->recent_used_cpu = prev; return recent_used_cpu; } @@ -6076,15 +6152,17 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) if (!sd) return target; - i = select_idle_core(p, sd, target); - if ((unsigned)i < nr_cpumask_bits) - return i; + if (sched_smt_active()) { + has_idle_core = test_idle_cores(target, false); - i = select_idle_cpu(p, sd, target); - if ((unsigned)i < nr_cpumask_bits) - return i; + if (!has_idle_core && cpus_share_cache(prev, target)) { + i = select_idle_smt(p, sd, prev); + if ((unsigned int)i < nr_cpumask_bits) + return i; + } + } - i = select_idle_smt(p, sd, target); + i = select_idle_cpu(p, sd, has_idle_core, target); if ((unsigned)i < nr_cpumask_bits) return i; @@ -6550,9 +6628,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f /* Fast path */ new_cpu = select_idle_sibling(p, prev_cpu, new_cpu); - - if (want_affine) - current->recent_used_cpu = cpu; } rcu_read_unlock(); @@ -6689,24 +6764,22 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) static void set_last_buddy(struct sched_entity *se) { - if (entity_is_task(se) && unlikely(task_has_idle_policy(task_of(se)))) - return; - for_each_sched_entity(se) { if (SCHED_WARN_ON(!se->on_rq)) return; + if (se_is_idle(se)) + return; cfs_rq_of(se)->last = se; } } static void set_next_buddy(struct sched_entity *se) { - if (entity_is_task(se) && unlikely(task_has_idle_policy(task_of(se)))) - return; - for_each_sched_entity(se) { if (SCHED_WARN_ON(!se->on_rq)) return; + if (se_is_idle(se)) + return; cfs_rq_of(se)->next = se; } } @@ -6727,6 +6800,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ struct cfs_rq *cfs_rq = task_cfs_rq(curr); int scale = cfs_rq->nr_running >= sched_nr_latency; int next_buddy_marked = 0; + int cse_is_idle, pse_is_idle; if (unlikely(se == pse)) return; @@ -6771,8 +6845,21 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return; find_matching_se(&se, &pse); - update_curr(cfs_rq_of(se)); BUG_ON(!pse); + + cse_is_idle = se_is_idle(se); + pse_is_idle = se_is_idle(pse); + + /* + * Preempt an idle group in favor of a non-idle group (and don't preempt + * in the inverse case). + */ + if (cse_is_idle && !pse_is_idle) + goto preempt; + if (cse_is_idle != pse_is_idle) + return; + + update_curr(cfs_rq_of(se)); if (wakeup_preempt_entity(se, pse) == 1) { /* * Bias pick_next to pick the sched entity that is @@ -7136,11 +7223,26 @@ static unsigned long __read_mostly max_load_balance_interval = HZ/10; enum fbq_type { regular, remote, all }; +/* + * group_type describes the group of CPUs at the moment of the load balance. + * The enum is ordered by pulling priority, with the group with lowest priority + * first so the groupe_type can be simply compared when selecting the busiest + * group. see update_sd_pick_busiest(). + */ enum group_type { - group_other = 0, + group_has_spare = 0, + group_fully_busy, group_misfit_task, + group_asym_packing, group_imbalanced, - group_overloaded, + group_overloaded +}; + +enum migration_type { + migrate_load = 0, + migrate_util, + migrate_task, + migrate_misfit }; #define LBF_ALL_PINNED 0x01 @@ -7173,7 +7275,7 @@ struct lb_env { unsigned int loop_max; enum fbq_type fbq_type; - enum group_type src_grp_type; + enum migration_type migration_type; struct list_head tasks; }; @@ -7400,7 +7502,7 @@ static struct task_struct *detach_one_task(struct lb_env *env) static const unsigned int sched_nr_migrate_break = 32; /* - * detach_tasks() -- tries to detach up to imbalance runnable load from + * detach_tasks() -- tries to detach up to imbalance load/util/tasks from * busiest_rq, as part of a balancing operation within domain "sd". * * Returns number of detached tasks if successful and 0 otherwise. @@ -7408,12 +7510,21 @@ static const unsigned int sched_nr_migrate_break = 32; static int detach_tasks(struct lb_env *env) { struct list_head *tasks = &env->src_rq->cfs_tasks; + unsigned long util, load; struct task_struct *p; - unsigned long load; int detached = 0; lockdep_assert_held(&env->src_rq->lock); + /* + * Source run queue has been emptied by another CPU, clear + * LBF_ALL_PINNED flag as we will not test any task. + */ + if (env->src_rq->nr_running <= 1) { + env->flags &= ~LBF_ALL_PINNED; + return 0; + } + if (env->imbalance <= 0) return 0; @@ -7442,27 +7553,53 @@ static int detach_tasks(struct lb_env *env) if (!can_migrate_task(p, env)) goto next; - /* - * Depending of the number of CPUs and tasks and the - * cgroup hierarchy, task_h_load() can return a null - * value. Make sure that env->imbalance decreases - * otherwise detach_tasks() will stop only after - * detaching up to loop_max tasks. - */ - load = max_t(unsigned long, task_h_load(p), 1); + switch (env->migration_type) { + case migrate_load: + /* + * Depending of the number of CPUs and tasks and the + * cgroup hierarchy, task_h_load() can return a null + * value. Make sure that env->imbalance decreases + * otherwise detach_tasks() will stop only after + * detaching up to loop_max tasks. + */ + load = max_t(unsigned long, task_h_load(p), 1); + if (sched_feat(LB_MIN) && + load < 16 && !env->sd->nr_balance_failed) + goto next; - if (sched_feat(LB_MIN) && load < 16 && !env->sd->nr_balance_failed) - goto next; + if (load/2 > env->imbalance) + goto next; - if ((load / 2) > env->imbalance) - goto next; + env->imbalance -= load; + break; + + case migrate_util: + util = task_util_est(p); + + if (util > env->imbalance) + goto next; + + env->imbalance -= util; + break; + + case migrate_task: + env->imbalance--; + break; + + case migrate_misfit: + /* This is not a misfit task */ + if (task_fits_capacity(p, capacity_of(env->src_cpu))) + goto next; + + env->imbalance = 0; + break; + } detach_task(p, env); list_add(&p->se.group_node, &env->tasks); detached++; - env->imbalance -= load; #ifdef CONFIG_PREEMPTION /* @@ -7476,7 +7613,7 @@ static int detach_tasks(struct lb_env *env) /* * We only want to steal up to the prescribed amount of - * runnable load. + * load/util/tasks. */ if (env->imbalance <= 0) break; @@ -7621,7 +7758,7 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq) if (cfs_rq->avg.util_sum) return false; - if (cfs_rq->avg.runnable_load_sum) + if (cfs_rq->avg.runnable_sum) return false; return true; @@ -7758,14 +7895,15 @@ static void update_blocked_averages(int cpu) struct sg_lb_stats { unsigned long avg_load; /*Avg load across the CPUs of the group */ unsigned long group_load; /* Total load over the CPUs of the group */ - unsigned long load_per_task; unsigned long group_capacity; - unsigned long group_util; /* Total utilization of the group */ - unsigned int sum_nr_running; /* Nr tasks running in the group */ + unsigned long group_util; /* Total utilization over the CPUs of the group */ + unsigned long group_runnable; /* Total runnable time over the CPUs of the group */ + unsigned int sum_nr_running; /* Nr of tasks running in the group */ + unsigned int sum_h_nr_running; /* Nr of CFS tasks running in the group */ unsigned int idle_cpus; unsigned int group_weight; enum group_type group_type; - int group_no_capacity; + unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */ unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */ #ifdef CONFIG_NUMA_BALANCING unsigned int nr_numa_running; @@ -7780,10 +7918,10 @@ struct sg_lb_stats { struct sd_lb_stats { struct sched_group *busiest; /* Busiest group in this sd */ struct sched_group *local; /* Local group in this sd */ - unsigned long total_running; unsigned long total_load; /* Total load of all groups in sd */ unsigned long total_capacity; /* Total capacity of all groups in sd */ unsigned long avg_load; /* Average load across all groups in sd */ + unsigned int prefer_sibling; /* tasks should go to sibling first */ struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */ struct sg_lb_stats local_stat; /* Statistics of the local group */ @@ -7794,19 +7932,18 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds) /* * Skimp on the clearing to avoid duplicate work. We can avoid clearing * local_stat because update_sg_lb_stats() does a full clear/assignment. - * We must however clear busiest_stat::avg_load because - * update_sd_pick_busiest() reads this before assignment. + * We must however set busiest_stat::group_type and + * busiest_stat::idle_cpus to the worst busiest group because + * update_sd_pick_busiest() reads these before assignment. */ *sds = (struct sd_lb_stats){ .busiest = NULL, .local = NULL, - .total_running = 0UL, .total_load = 0UL, .total_capacity = 0UL, .busiest_stat = { - .avg_load = 0UL, - .sum_nr_running = 0, - .group_type = group_other, + .idle_cpus = UINT_MAX, + .group_type = group_has_spare, }, }; } @@ -7996,13 +8133,17 @@ static inline int sg_imbalanced(struct sched_group *group) * any benefit for the load balance. */ static inline bool -group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs) +group_has_capacity(unsigned int imbalance_pct, struct sg_lb_stats *sgs) { if (sgs->sum_nr_running < sgs->group_weight) return true; + if ((sgs->group_capacity * imbalance_pct) < + (sgs->group_runnable * 100)) + return false; + if ((sgs->group_capacity * 100) > - (sgs->group_util * env->sd->imbalance_pct)) + (sgs->group_util * imbalance_pct)) return true; return false; @@ -8017,13 +8158,17 @@ group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs) * false. */ static inline bool -group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs) +group_is_overloaded(unsigned int imbalance_pct, struct sg_lb_stats *sgs) { if (sgs->sum_nr_running <= sgs->group_weight) return false; if ((sgs->group_capacity * 100) < - (sgs->group_util * env->sd->imbalance_pct)) + (sgs->group_util * imbalance_pct)) + return true; + + if ((sgs->group_capacity * imbalance_pct) < + (sgs->group_runnable * 100)) return true; return false; @@ -8050,19 +8195,26 @@ group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref) } static inline enum -group_type group_classify(struct sched_group *group, +group_type group_classify(unsigned int imbalance_pct, + struct sched_group *group, struct sg_lb_stats *sgs) { - if (sgs->group_no_capacity) + if (group_is_overloaded(imbalance_pct, sgs)) return group_overloaded; if (sg_imbalanced(group)) return group_imbalanced; + if (sgs->group_asym_packing) + return group_asym_packing; + if (sgs->group_misfit_task_load) return group_misfit_task; - return group_other; + if (!group_has_capacity(imbalance_pct, sgs)) + return group_fully_busy; + + return group_has_spare; } static bool update_nohz_stats(struct rq *rq, bool force) @@ -8099,21 +8251,26 @@ static inline void update_sg_lb_stats(struct lb_env *env, struct sg_lb_stats *sgs, int *sg_status) { - int i, nr_running; + int i, nr_running, local_group; memset(sgs, 0, sizeof(*sgs)); + local_group = cpumask_test_cpu(env->dst_cpu, sched_group_span(group)); + for_each_cpu_and(i, sched_group_span(group), env->cpus) { struct rq *rq = cpu_rq(i); if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false)) env->flags |= LBF_NOHZ_AGAIN; - sgs->group_load += cpu_runnable_load(rq); + sgs->group_load += cpu_load(rq); sgs->group_util += cpu_util(i); - sgs->sum_nr_running += rq->cfs.h_nr_running; + sgs->group_runnable += cpu_runnable(rq); + sgs->sum_h_nr_running += rq->cfs.h_nr_running; nr_running = rq->nr_running; + sgs->sum_nr_running += nr_running; + if (nr_running > 1) *sg_status |= SG_OVERLOAD; @@ -8127,9 +8284,16 @@ static inline void update_sg_lb_stats(struct lb_env *env, /* * No need to call idle_cpu() if nr_running is not 0 */ - if (!nr_running && idle_cpu(i)) + if (!nr_running && idle_cpu(i)) { sgs->idle_cpus++; + /* Idle cpu can't have misfit task */ + continue; + } + + if (local_group) + continue; + /* Check for a misfit task on the cpu */ if (env->sd->flags & SD_ASYM_CPUCAPACITY && sgs->group_misfit_task_load < rq->misfit_task_load) { sgs->group_misfit_task_load = rq->misfit_task_load; @@ -8137,17 +8301,24 @@ static inline void update_sg_lb_stats(struct lb_env *env, } } - /* Adjust by relative CPU capacity of the group */ - sgs->group_capacity = group->sgc->capacity; - sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity; + /* Check if dst CPU is idle and preferred to this group */ + if (env->sd->flags & SD_ASYM_PACKING && + env->idle != CPU_NOT_IDLE && + sgs->sum_h_nr_running && + sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu)) { + sgs->group_asym_packing = 1; + } - if (sgs->sum_nr_running) - sgs->load_per_task = sgs->group_load / sgs->sum_nr_running; + sgs->group_capacity = group->sgc->capacity; sgs->group_weight = group->group_weight; - sgs->group_no_capacity = group_is_overloaded(env, sgs); - sgs->group_type = group_classify(group, sgs); + sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs); + + /* Computing avg_load makes sense only when group is overloaded */ + if (sgs->group_type == group_overloaded) + sgs->avg_load = (sgs->group_load * SCHED_CAPACITY_SCALE) / + sgs->group_capacity; } /** @@ -8170,6 +8341,10 @@ static bool update_sd_pick_busiest(struct lb_env *env, { struct sg_lb_stats *busiest = &sds->busiest_stat; + /* Make sure that there is at least one task to pull */ + if (!sgs->sum_h_nr_running) + return false; + /* * Don't try to pull misfit tasks we can't help. * We can use max_capacity here as reduction in capacity on some @@ -8178,7 +8353,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, */ if (sgs->group_type == group_misfit_task && (!group_smaller_max_cpu_capacity(sg, sds->local) || - !group_has_capacity(env, &sds->local_stat))) + sds->local_stat.group_type != group_has_spare)) return false; if (sgs->group_type > busiest->group_type) @@ -8187,62 +8362,88 @@ static bool update_sd_pick_busiest(struct lb_env *env, if (sgs->group_type < busiest->group_type) return false; - if (sgs->avg_load <= busiest->avg_load) - return false; - - if (!(env->sd->flags & SD_ASYM_CPUCAPACITY)) - goto asym_packing; - /* - * Candidate sg has no more than one task per CPU and - * has higher per-CPU capacity. Migrating tasks to less - * capable CPUs may harm throughput. Maximize throughput, - * power/energy consequences are not considered. + * The candidate and the current busiest group are the same type of + * group. Let check which one is the busiest according to the type. */ - if (sgs->sum_nr_running <= sgs->group_weight && - group_smaller_min_cpu_capacity(sds->local, sg)) - return false; - /* - * If we have more than one misfit sg go with the biggest misfit. - */ - if (sgs->group_type == group_misfit_task && - sgs->group_misfit_task_load < busiest->group_misfit_task_load) + switch (sgs->group_type) { + case group_overloaded: + /* Select the overloaded group with highest avg_load. */ + if (sgs->avg_load <= busiest->avg_load) + return false; + break; + + case group_imbalanced: + /* + * Select the 1st imbalanced group as we don't have any way to + * choose one more than another. + */ return false; -asym_packing: - /* This is the busiest node in its class. */ - if (!(env->sd->flags & SD_ASYM_PACKING)) - return true; + case group_asym_packing: + /* Prefer to move from lowest priority CPU's work */ + if (sched_asym_prefer(sg->asym_prefer_cpu, sds->busiest->asym_prefer_cpu)) + return false; + break; - /* No ASYM_PACKING if target CPU is already busy */ - if (env->idle == CPU_NOT_IDLE) - return true; - /* - * ASYM_PACKING needs to move all the work to the highest - * prority CPUs in the group, therefore mark all groups - * of lower priority than ourself as busy. - */ - if (sgs->sum_nr_running && - sched_asym_prefer(env->dst_cpu, sg->asym_prefer_cpu)) { - if (!sds->busiest) - return true; + case group_misfit_task: + /* + * If we have more than one misfit sg go with the biggest + * misfit. + */ + if (sgs->group_misfit_task_load < busiest->group_misfit_task_load) + return false; + break; - /* Prefer to move from lowest priority CPU's work */ - if (sched_asym_prefer(sds->busiest->asym_prefer_cpu, - sg->asym_prefer_cpu)) - return true; + case group_fully_busy: + /* + * Select the fully busy group with highest avg_load. In + * theory, there is no need to pull task from such kind of + * group because tasks have all compute capacity that they need + * but we can still improve the overall throughput by reducing + * contention when accessing shared HW resources. + * + * XXX for now avg_load is not computed and always 0 so we + * select the 1st one. + */ + if (sgs->avg_load <= busiest->avg_load) + return false; + break; + + case group_has_spare: + /* + * Select not overloaded group with lowest number of + * idle cpus. We could also compare the spare capacity + * which is more stable but it can end up that the + * group has less spare capacity but finally more idle + * CPUs which means less opportunity to pull tasks. + */ + if (sgs->idle_cpus >= busiest->idle_cpus) + return false; + break; } - return false; + /* + * Candidate sg has no more than one task per CPU and has higher + * per-CPU capacity. Migrating tasks to less capable CPUs may harm + * throughput. Maximize throughput, power/energy consequences are not + * considered. + */ + if ((env->sd->flags & SD_ASYM_CPUCAPACITY) && + (sgs->group_type <= group_fully_busy) && + (group_smaller_min_cpu_capacity(sds->local, sg))) + return false; + + return true; } #ifdef CONFIG_NUMA_BALANCING static inline enum fbq_type fbq_classify_group(struct sg_lb_stats *sgs) { - if (sgs->sum_nr_running > sgs->nr_numa_running) + if (sgs->sum_h_nr_running > sgs->nr_numa_running) return regular; - if (sgs->sum_nr_running > sgs->nr_preferred_running) + if (sgs->sum_h_nr_running > sgs->nr_preferred_running) return remote; return all; } @@ -8267,28 +8468,328 @@ static inline enum fbq_type fbq_classify_rq(struct rq *rq) } #endif /* CONFIG_NUMA_BALANCING */ -/** - * update_sd_lb_stats - Update sched_domain's statistics for load balancing. - * @env: The load balancing environment. - * @sds: variable to hold the statistics for this sched_domain. + +struct sg_lb_stats; + +/* + * task_running_on_cpu - return 1 if @p is running on @cpu. */ -static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds) + +static unsigned int task_running_on_cpu(int cpu, struct task_struct *p) { - struct sched_domain *child = env->sd->child; - struct sched_group *sg = env->sd->groups; - struct sg_lb_stats *local = &sds->local_stat; - struct sg_lb_stats tmp_sgs; - bool prefer_sibling = child && child->flags & SD_PREFER_SIBLING; - int sg_status = 0; + /* Task has no contribution or is new */ + if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time)) + return 0; -#ifdef CONFIG_NO_HZ_COMMON - if (env->idle == CPU_NEWLY_IDLE && READ_ONCE(nohz.has_blocked)) - env->flags |= LBF_NOHZ_STATS; -#endif + if (task_on_rq_queued(p)) + return 1; - do { - struct sg_lb_stats *sgs = &tmp_sgs; - int local_group; + return 0; +} + +/** + * idle_cpu_without - would a given CPU be idle without p ? + * @cpu: the processor on which idleness is tested. + * @p: task which should be ignored. + * + * Return: 1 if the CPU would be idle. 0 otherwise. + */ +static int idle_cpu_without(int cpu, struct task_struct *p) +{ + struct rq *rq = cpu_rq(cpu); + + if (rq->curr != rq->idle && rq->curr != p) + return 0; + + /* + * rq->nr_running can't be used but an updated version without the + * impact of p on cpu must be used instead. The updated nr_running + * be computed and tested before calling idle_cpu_without(). + */ + +#ifdef CONFIG_SMP + if (!llist_empty(&rq->wake_list)) + return 0; +#endif + + return 1; +} + +/* + * update_sg_wakeup_stats - Update sched_group's statistics for wakeup. + * @sd: The sched_domain level to look for idlest group. + * @group: sched_group whose statistics are to be updated. + * @sgs: variable to hold the statistics for this group. + * @p: The task for which we look for the idlest group/CPU. + */ +static inline void update_sg_wakeup_stats(struct sched_domain *sd, + struct sched_group *group, + struct sg_lb_stats *sgs, + struct task_struct *p) +{ + int i, nr_running; + + memset(sgs, 0, sizeof(*sgs)); + + for_each_cpu(i, sched_group_span(group)) { + struct rq *rq = cpu_rq(i); + unsigned int local; + + sgs->group_load += cpu_load_without(rq, p); + sgs->group_util += cpu_util_without(i, p); + sgs->group_runnable += cpu_runnable_without(rq, p); + local = task_running_on_cpu(i, p); + sgs->sum_h_nr_running += rq->cfs.h_nr_running - local; + + nr_running = rq->nr_running - local; + sgs->sum_nr_running += nr_running; + + /* + * No need to call idle_cpu_without() if nr_running is not 0 + */ + if (!nr_running && idle_cpu_without(i, p)) + sgs->idle_cpus++; + + } + + /* Check if task fits in the group */ + if (sd->flags & SD_ASYM_CPUCAPACITY && + !task_fits_capacity(p, group->sgc->max_capacity)) { + sgs->group_misfit_task_load = 1; + } + + sgs->group_capacity = group->sgc->capacity; + + sgs->group_weight = group->group_weight; + + sgs->group_type = group_classify(sd->imbalance_pct, group, sgs); + + /* + * Computing avg_load makes sense only when group is fully busy or + * overloaded + */ + if (sgs->group_type == group_fully_busy || + sgs->group_type == group_overloaded) + sgs->avg_load = (sgs->group_load * SCHED_CAPACITY_SCALE) / + sgs->group_capacity; +} + +static bool update_pick_idlest(struct sched_group *idlest, + struct sg_lb_stats *idlest_sgs, + struct sched_group *group, + struct sg_lb_stats *sgs) +{ + if (sgs->group_type < idlest_sgs->group_type) + return true; + + if (sgs->group_type > idlest_sgs->group_type) + return false; + + /* + * The candidate and the current idlest group are the same type of + * group. Let check which one is the idlest according to the type. + */ + + switch (sgs->group_type) { + case group_overloaded: + case group_fully_busy: + /* Select the group with lowest avg_load. */ + if (idlest_sgs->avg_load <= sgs->avg_load) + return false; + break; + + case group_imbalanced: + case group_asym_packing: + /* Those types are not used in the slow wakeup path */ + return false; + + case group_misfit_task: + /* Select group with the highest max capacity */ + if (idlest->sgc->max_capacity >= group->sgc->max_capacity) + return false; + break; + + case group_has_spare: + /* Select group with most idle CPUs */ + if (idlest_sgs->idle_cpus >= sgs->idle_cpus) + return false; + break; + } + + return true; +} + +/* + * find_idlest_group() finds and returns the least busy CPU group within the + * domain. + * + * Assumes p is allowed on at least one CPU in sd. + */ +static struct sched_group * +find_idlest_group(struct sched_domain *sd, struct task_struct *p, + int this_cpu, int sd_flag) +{ + struct sched_group *idlest = NULL, *local = NULL, *group = sd->groups; + struct sg_lb_stats local_sgs, tmp_sgs; + struct sg_lb_stats *sgs; + unsigned long imbalance; + struct sg_lb_stats idlest_sgs = { + .avg_load = UINT_MAX, + .group_type = group_overloaded, + }; + + imbalance = scale_load_down(NICE_0_LOAD) * + (sd->imbalance_pct-100) / 100; + + do { + int local_group; + + /* Skip over this group if it has no CPUs allowed */ + if (!cpumask_intersects(sched_group_span(group), + p->cpus_ptr)) + continue; + + local_group = cpumask_test_cpu(this_cpu, + sched_group_span(group)); + + if (local_group) { + sgs = &local_sgs; + local = group; + } else { + sgs = &tmp_sgs; + } + + update_sg_wakeup_stats(sd, group, sgs, p); + + if (!local_group && update_pick_idlest(idlest, &idlest_sgs, group, sgs)) { + idlest = group; + idlest_sgs = *sgs; + } + + } while (group = group->next, group != sd->groups); + + + /* There is no idlest group to push tasks to */ + if (!idlest) + return NULL; + + /* The local group has been skipped because of CPU affinity */ + if (!local) + return idlest; + + /* + * If the local group is idler than the selected idlest group + * don't try and push the task. + */ + if (local_sgs.group_type < idlest_sgs.group_type) + return NULL; + + /* + * If the local group is busier than the selected idlest group + * try and push the task. + */ + if (local_sgs.group_type > idlest_sgs.group_type) + return idlest; + + switch (local_sgs.group_type) { + case group_overloaded: + case group_fully_busy: + /* + * When comparing groups across NUMA domains, it's possible for + * the local domain to be very lightly loaded relative to the + * remote domains but "imbalance" skews the comparison making + * remote CPUs look much more favourable. When considering + * cross-domain, add imbalance to the load on the remote node + * and consider staying local. + */ + + if ((sd->flags & SD_NUMA) && + ((idlest_sgs.avg_load + imbalance) >= local_sgs.avg_load)) + return NULL; + + /* + * If the local group is less loaded than the selected + * idlest group don't try and push any tasks. + */ + if (idlest_sgs.avg_load >= (local_sgs.avg_load + imbalance)) + return NULL; + + if (100 * local_sgs.avg_load <= sd->imbalance_pct * idlest_sgs.avg_load) + return NULL; + break; + + case group_imbalanced: + case group_asym_packing: + /* Those type are not used in the slow wakeup path */ + return NULL; + + case group_misfit_task: + /* Select group with the highest max capacity */ + if (local->sgc->max_capacity >= idlest->sgc->max_capacity) + return NULL; + break; + + case group_has_spare: + if (sd->flags & SD_NUMA) { +#ifdef CONFIG_NUMA_BALANCING + int idlest_cpu; + /* + * If there is spare capacity at NUMA, try to select + * the preferred node + */ + if (cpu_to_node(this_cpu) == p->numa_preferred_nid) + return NULL; + + idlest_cpu = cpumask_first(sched_group_span(idlest)); + if (cpu_to_node(idlest_cpu) == p->numa_preferred_nid) + return idlest; +#endif + /* + * Otherwise, keep the task on this node to stay close + * its wakeup source and improve locality. If there is + * a real need of migration, periodic load balance will + * take care of it. + */ + if (local_sgs.idle_cpus) + return NULL; + } + + /* + * Select group with highest number of idle CPUs. We could also + * compare the utilization which is more stable but it can end + * up that the group has less spare capacity but finally more + * idle CPUs which means more opportunity to run task. + */ + if (local_sgs.idle_cpus >= idlest_sgs.idle_cpus) + return NULL; + break; + } + + return idlest; +} + +/** + * update_sd_lb_stats - Update sched_domain's statistics for load balancing. + * @env: The load balancing environment. + * @sds: variable to hold the statistics for this sched_domain. + */ + +static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds) +{ + struct sched_domain *child = env->sd->child; + struct sched_group *sg = env->sd->groups; + struct sg_lb_stats *local = &sds->local_stat; + struct sg_lb_stats tmp_sgs; + int sg_status = 0; + +#ifdef CONFIG_NO_HZ_COMMON + if (env->idle == CPU_NEWLY_IDLE && READ_ONCE(nohz.has_blocked)) + env->flags |= LBF_NOHZ_STATS; +#endif + + do { + struct sg_lb_stats *sgs = &tmp_sgs; + int local_group; local_group = cpumask_test_cpu(env->dst_cpu, sched_group_span(sg)); if (local_group) { @@ -8305,22 +8806,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd if (local_group) goto next_group; - /* - * In case the child domain prefers tasks go to siblings - * first, lower the sg capacity so that we'll try - * and move all the excess tasks away. We lower the capacity - * of a group only if the local group has the capacity to fit - * these excess tasks. The extra check prevents the case where - * you always pull from the heaviest group when it is already - * under-utilized (possible with a large weight task outweighs - * the tasks on the system). - */ - if (prefer_sibling && sds->local && - group_has_capacity(env, local) && - (sgs->sum_nr_running > local->sum_nr_running + 1)) { - sgs->group_no_capacity = 1; - sgs->group_type = group_classify(sg, sgs); - } if (update_sd_pick_busiest(env, sds, sg, sgs)) { sds->busiest = sg; @@ -8329,13 +8814,15 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd next_group: /* Now, start updating sd_lb_stats */ - sds->total_running += sgs->sum_nr_running; sds->total_load += sgs->group_load; sds->total_capacity += sgs->group_capacity; sg = sg->next; } while (sg != env->sd->groups); + /* Tag domain that child domain prefers tasks go to siblings first */ + sds->prefer_sibling = child && child->flags & SD_PREFER_SIBLING; + #ifdef CONFIG_NO_HZ_COMMON if ((env->flags & LBF_NOHZ_AGAIN) && cpumask_subset(nohz.idle_cpus_mask, sched_domain_span(env->sd))) { @@ -8366,203 +8853,165 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd } /** - * check_asym_packing - Check to see if the group is packed into the - * sched domain. - * - * This is primarily intended to used at the sibling level. Some - * cores like POWER7 prefer to use lower numbered SMT threads. In the - * case of POWER7, it can move to lower SMT modes only when higher - * threads are idle. When in lower SMT modes, the threads will - * perform better since they share less core resources. Hence when we - * have idle threads, we want them to be the higher ones. - * - * This packing function is run on idle threads. It checks to see if - * the busiest CPU in this domain (core in the P7 case) has a higher - * CPU number than the packing function is being run on. Here we are - * assuming lower CPU number will be equivalent to lower a SMT thread - * number. - * - * Return: 1 when packing is required and a task should be moved to - * this CPU. The amount of the imbalance is returned in env->imbalance. - * - * @env: The load balancing environment. - * @sds: Statistics of the sched_domain which is to be packed - */ -static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) -{ - int busiest_cpu; - - if (!(env->sd->flags & SD_ASYM_PACKING)) - return 0; - - if (env->idle == CPU_NOT_IDLE) - return 0; - - if (!sds->busiest) - return 0; - - busiest_cpu = sds->busiest->asym_prefer_cpu; - if (sched_asym_prefer(busiest_cpu, env->dst_cpu)) - return 0; - - env->imbalance = sds->busiest_stat.group_load; - - return 1; -} - -/** - * fix_small_imbalance - Calculate the minor imbalance that exists - * amongst the groups of a sched_domain, during - * load balancing. - * @env: The load balancing environment. - * @sds: Statistics of the sched_domain whose imbalance is to be calculated. + * calculate_imbalance - Calculate the amount of imbalance present within the + * groups of a given sched_domain during load balance. + * @env: load balance environment + * @sds: statistics of the sched_domain whose imbalance is to be calculated. */ -static inline -void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) +static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *sds) { - unsigned long tmp, capa_now = 0, capa_move = 0; - unsigned int imbn = 2; - unsigned long scaled_busy_load_per_task; struct sg_lb_stats *local, *busiest; local = &sds->local_stat; busiest = &sds->busiest_stat; - if (!local->sum_nr_running) - local->load_per_task = cpu_avg_load_per_task(env->dst_cpu); - else if (busiest->load_per_task > local->load_per_task) - imbn = 1; + if (busiest->group_type == group_misfit_task) { + /* Set imbalance to allow misfit tasks to be balanced. */ + env->migration_type = migrate_misfit; + env->imbalance = 1; + return; + } - scaled_busy_load_per_task = - (busiest->load_per_task * SCHED_CAPACITY_SCALE) / - busiest->group_capacity; + if (busiest->group_type == group_asym_packing) { + /* + * In case of asym capacity, we will try to migrate all load to + * the preferred CPU. + */ + env->migration_type = migrate_task; + env->imbalance = busiest->sum_h_nr_running; + return; + } - if (busiest->avg_load + scaled_busy_load_per_task >= - local->avg_load + (scaled_busy_load_per_task * imbn)) { - env->imbalance = busiest->load_per_task; + if (busiest->group_type == group_imbalanced) { + /* + * In the group_imb case we cannot rely on group-wide averages + * to ensure CPU-load equilibrium, try to move any task to fix + * the imbalance. The next load balance will take care of + * balancing back the system. + */ + env->migration_type = migrate_task; + env->imbalance = 1; return; } /* - * OK, we don't have enough imbalance to justify moving tasks, - * however we may be able to increase total CPU capacity used by - * moving them. + * Try to use spare capacity of local group without overloading it or + * emptying busiest */ + if (local->group_type == group_has_spare) { + if ((busiest->group_type > group_fully_busy) && + !(env->sd->flags & SD_SHARE_PKG_RESOURCES)) { + /* + * If busiest is overloaded, try to fill spare + * capacity. This might end up creating spare capacity + * in busiest or busiest still being overloaded but + * there is no simple way to directly compute the + * amount of load to migrate in order to balance the + * system. + */ + env->migration_type = migrate_util; + env->imbalance = max(local->group_capacity, local->group_util) - + local->group_util; - capa_now += busiest->group_capacity * - min(busiest->load_per_task, busiest->avg_load); - capa_now += local->group_capacity * - min(local->load_per_task, local->avg_load); - capa_now /= SCHED_CAPACITY_SCALE; - - /* Amount of load we'd subtract */ - if (busiest->avg_load > scaled_busy_load_per_task) { - capa_move += busiest->group_capacity * - min(busiest->load_per_task, - busiest->avg_load - scaled_busy_load_per_task); - } - - /* Amount of load we'd add */ - if (busiest->avg_load * busiest->group_capacity < - busiest->load_per_task * SCHED_CAPACITY_SCALE) { - tmp = (busiest->avg_load * busiest->group_capacity) / - local->group_capacity; - } else { - tmp = (busiest->load_per_task * SCHED_CAPACITY_SCALE) / - local->group_capacity; - } - capa_move += local->group_capacity * - min(local->load_per_task, local->avg_load + tmp); - capa_move /= SCHED_CAPACITY_SCALE; - - /* Move if we gain throughput */ - if (capa_move > capa_now) - env->imbalance = busiest->load_per_task; -} + /* + * In some cases, the group's utilization is max or even + * higher than capacity because of migrations but the + * local CPU is (newly) idle. There is at least one + * waiting task in this overloaded busiest group. Let's + * try to pull it. + */ + if (env->idle != CPU_NOT_IDLE && env->imbalance == 0) { + env->migration_type = migrate_task; + env->imbalance = 1; + } -/** - * calculate_imbalance - Calculate the amount of imbalance present within the - * groups of a given sched_domain during load balance. - * @env: load balance environment - * @sds: statistics of the sched_domain whose imbalance is to be calculated. - */ -static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *sds) -{ - unsigned long max_pull, load_above_capacity = ~0UL; - struct sg_lb_stats *local, *busiest; + return; + } - local = &sds->local_stat; - busiest = &sds->busiest_stat; + if (busiest->group_weight == 1 || sds->prefer_sibling) { + unsigned int nr_diff = busiest->sum_nr_running; + /* + * When prefer sibling, evenly spread running tasks on + * groups. + */ + env->migration_type = migrate_task; + lsub_positive(&nr_diff, local->sum_nr_running); + env->imbalance = nr_diff >> 1; + return; + } - if (busiest->group_type == group_imbalanced) { /* - * In the group_imb case we cannot rely on group-wide averages - * to ensure CPU-load equilibrium, look at wider averages. XXX + * If there is no overload, we just want to even the number of + * idle cpus. */ - busiest->load_per_task = - min(busiest->load_per_task, sds->avg_load); + env->migration_type = migrate_task; + env->imbalance = max_t(long, 0, (local->idle_cpus - + busiest->idle_cpus) >> 1); + return; } /* - * Avg load of busiest sg can be less and avg load of local sg can - * be greater than avg load across all sgs of sd because avg load - * factors in sg capacity and sgs with smaller group_type are - * skipped when updating the busiest sg: + * Local is fully busy but has to take more load to relieve the + * busiest group */ - if (busiest->group_type != group_misfit_task && - (busiest->avg_load <= sds->avg_load || - local->avg_load >= sds->avg_load)) { - env->imbalance = 0; - return fix_small_imbalance(env, sds); - } + if (local->group_type < group_overloaded) { + /* + * Local will become overloaded so the avg_load metrics are + * finally needed. + */ - /* - * If there aren't any idle CPUs, avoid creating some. - */ - if (busiest->group_type == group_overloaded && - local->group_type == group_overloaded) { - load_above_capacity = busiest->sum_nr_running * SCHED_CAPACITY_SCALE; - if (load_above_capacity > busiest->group_capacity) { - load_above_capacity -= busiest->group_capacity; - load_above_capacity *= scale_load_down(NICE_0_LOAD); - load_above_capacity /= busiest->group_capacity; - } else - load_above_capacity = ~0UL; + local->avg_load = (local->group_load * SCHED_CAPACITY_SCALE) / + local->group_capacity; + + sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) / + sds->total_capacity; + /* + * If the local group is more loaded than the selected + * busiest group don't try to pull any tasks. + */ + if (local->avg_load >= busiest->avg_load) { + env->imbalance = 0; + return; + } } /* - * We're trying to get all the CPUs to the average_load, so we don't - * want to push ourselves above the average load, nor do we wish to - * reduce the max loaded CPU below the average load. At the same time, - * we also don't want to reduce the group load below the group - * capacity. Thus we look for the minimum possible imbalance. + * Both group are or will become overloaded and we're trying to get all + * the CPUs to the average_load, so we don't want to push ourselves + * above the average load, nor do we wish to reduce the max loaded CPU + * below the average load. At the same time, we also don't want to + * reduce the group load below the group capacity. Thus we look for + * the minimum possible imbalance. */ - max_pull = min(busiest->avg_load - sds->avg_load, load_above_capacity); - - /* How much load to actually move to equalise the imbalance */ + env->migration_type = migrate_load; env->imbalance = min( - max_pull * busiest->group_capacity, + (busiest->avg_load - sds->avg_load) * busiest->group_capacity, (sds->avg_load - local->avg_load) * local->group_capacity ) / SCHED_CAPACITY_SCALE; - - /* Boost imbalance to allow misfit task to be balanced. */ - if (busiest->group_type == group_misfit_task) { - env->imbalance = max_t(long, env->imbalance, - busiest->group_misfit_task_load); - } - - /* - * if *imbalance is less than the average load per runnable task - * there is no guarantee that any tasks will be moved so we'll have - * a think about bumping its value to force at least one task to be - * moved - */ - if (env->imbalance < busiest->load_per_task) - return fix_small_imbalance(env, sds); } /******* find_busiest_group() helpers end here *********************/ +/* + * Decision matrix according to the local and busiest group type: + * + * busiest \ local has_spare fully_busy misfit asym imbalanced overloaded + * has_spare nr_idle balanced N/A N/A balanced balanced + * fully_busy nr_idle nr_idle N/A N/A balanced balanced + * misfit_task force N/A N/A N/A N/A N/A + * asym_packing force force N/A N/A force force + * imbalanced force force N/A N/A force force + * overloaded force force N/A N/A force avg_load + * + * N/A : Not Applicable because already filtered while updating + * statistics. + * balanced : The system is balanced for these 2 groups. + * force : Calculate the imbalance as load migration is probably needed. + * avg_load : Only if imbalance is significant enough. + * nr_idle : dst_cpu is not busy and the number of idle CPUs is quite + * different in groups. + */ + /** * find_busiest_group - Returns the busiest group within the sched_domain * if there is an imbalance. @@ -8582,7 +9031,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) init_sd_lb_stats(&sds); /* - * Compute the various statistics relavent for load balancing at + * Compute the various statistics relevant for load balancing at * this level. */ update_sd_lb_stats(env, &sds); @@ -8597,17 +9046,17 @@ static struct sched_group *find_busiest_group(struct lb_env *env) local = &sds.local_stat; busiest = &sds.busiest_stat; - /* ASYM feature bypasses nice load balance check */ - if (check_asym_packing(env, &sds)) - return sds.busiest; - /* There is no busy sibling group to pull tasks from */ - if (!sds.busiest || busiest->sum_nr_running == 0) + if (!sds.busiest) goto out_balanced; - /* XXX broken for overlapping NUMA groups */ - sds.avg_load = (SCHED_CAPACITY_SCALE * sds.total_load) - / sds.total_capacity; + /* Misfit tasks should be dealt with regardless of the avg load */ + if (busiest->group_type == group_misfit_task) + goto force_balance; + + /* ASYM feature bypasses nice load balance check */ + if (busiest->group_type == group_asym_packing) + goto force_balance; /* * If the busiest group is imbalanced the below checks don't @@ -8617,56 +9066,81 @@ static struct sched_group *find_busiest_group(struct lb_env *env) if (busiest->group_type == group_imbalanced) goto force_balance; - /* - * When dst_cpu is idle, prevent SMP nice and/or asymmetric group - * capacities from resulting in underutilization due to avg_load. - */ - if (env->idle != CPU_NOT_IDLE && group_has_capacity(env, local) && - busiest->group_no_capacity) - goto force_balance; - - /* Misfit tasks should be dealt with regardless of the avg load */ - if (busiest->group_type == group_misfit_task) - goto force_balance; - /* * If the local group is busier than the selected busiest group * don't try and pull any tasks. */ - if (local->avg_load >= busiest->avg_load) + if (local->group_type > busiest->group_type) goto out_balanced; /* - * Don't pull any tasks if this group is already above the domain - * average load. + * When groups are overloaded, use the avg_load to ensure fairness + * between tasks. */ - if (local->avg_load >= sds.avg_load) - goto out_balanced; + if (local->group_type == group_overloaded) { + /* + * If the local group is more loaded than the selected + * busiest group don't try to pull any tasks. + */ + if (local->avg_load >= busiest->avg_load) + goto out_balanced; + + /* XXX broken for overlapping NUMA groups */ + sds.avg_load = (sds.total_load * SCHED_CAPACITY_SCALE) / + sds.total_capacity; - if (env->idle == CPU_IDLE) { /* - * This CPU is idle. If the busiest group is not overloaded - * and there is no imbalance between this and busiest group - * wrt idle CPUs, it is balanced. The imbalance becomes - * significant if the diff is greater than 1 otherwise we - * might end up to just move the imbalance on another group + * Don't pull any tasks if this group is already above the + * domain average load. */ - if ((busiest->group_type != group_overloaded) && - (local->idle_cpus <= (busiest->idle_cpus + 1))) + if (local->avg_load >= sds.avg_load) goto out_balanced; - } else { + /* - * In the CPU_NEWLY_IDLE, CPU_NOT_IDLE cases, use - * imbalance_pct to be conservative. + * If the busiest group is more loaded, use imbalance_pct to be + * conservative. */ if (100 * busiest->avg_load <= env->sd->imbalance_pct * local->avg_load) goto out_balanced; } + /* Try to move all excess tasks to child's sibling domain */ + if (sds.prefer_sibling && local->group_type == group_has_spare && + busiest->sum_nr_running > local->sum_nr_running + 1) + goto force_balance; + + if (busiest->group_type != group_overloaded) { + if (env->idle == CPU_NOT_IDLE) + /* + * If the busiest group is not overloaded (and as a + * result the local one too) but this CPU is already + * busy, let another idle CPU try to pull task. + */ + goto out_balanced; + + if (busiest->group_weight > 1 && + local->idle_cpus <= (busiest->idle_cpus + 1)) + /* + * If the busiest group is not overloaded + * and there is no imbalance between this and busiest + * group wrt idle CPUs, it is balanced. The imbalance + * becomes significant if the diff is greater than 1 + * otherwise we might end up to just move the imbalance + * on another group. Of course this applies only if + * there is more than 1 CPU per group. + */ + goto out_balanced; + + if (busiest->sum_h_nr_running == 1) + /* + * busiest doesn't have any tasks waiting to run + */ + goto out_balanced; + } + force_balance: /* Looks like there is an imbalance. Compute it */ - env->src_grp_type = busiest->group_type; calculate_imbalance(env, &sds); return env->imbalance ? sds.busiest : NULL; @@ -8682,11 +9156,13 @@ static struct rq *find_busiest_queue(struct lb_env *env, struct sched_group *group) { struct rq *busiest = NULL, *rq; - unsigned long busiest_load = 0, busiest_capacity = 1; + unsigned long busiest_util = 0, busiest_load = 0, busiest_capacity = 1; + unsigned int busiest_nr = 0; int i; for_each_cpu_and(i, sched_group_span(group), env->cpus) { - unsigned long capacity, load; + unsigned long capacity, load, util; + unsigned int nr_running; enum fbq_type rt; rq = cpu_rq(i); @@ -8714,20 +9190,8 @@ static struct rq *find_busiest_queue(struct lb_env *env, if (rt > env->fbq_type) continue; - /* - * For ASYM_CPUCAPACITY domains with misfit tasks we simply - * seek the "biggest" misfit task. - */ - if (env->src_grp_type == group_misfit_task) { - if (rq->misfit_task_load > busiest_load) { - busiest_load = rq->misfit_task_load; - busiest = rq; - } - - continue; - } - capacity = capacity_of(i); + nr_running = rq->cfs.h_nr_running; /* * For ASYM_CPUCAPACITY domains, don't pick a CPU that could @@ -8737,35 +9201,69 @@ static struct rq *find_busiest_queue(struct lb_env *env, */ if (env->sd->flags & SD_ASYM_CPUCAPACITY && capacity_of(env->dst_cpu) < capacity && - rq->nr_running == 1) + nr_running == 1) continue; - load = cpu_runnable_load(rq); + switch (env->migration_type) { + case migrate_load: + /* + * When comparing with load imbalance, use cpu_load() + * which is not scaled with the CPU capacity. + */ + load = cpu_load(rq); - /* - * When comparing with imbalance, use cpu_runnable_load() - * which is not scaled with the CPU capacity. - */ + if (nr_running == 1 && load > env->imbalance && + !check_cpu_capacity(rq, env->sd)) + break; - if (rq->nr_running == 1 && load > env->imbalance && - !check_cpu_capacity(rq, env->sd)) - continue; + /* + * For the load comparisons with the other CPUs, + * consider the cpu_load() scaled with the CPU + * capacity, so that the load can be moved away + * from the CPU that is potentially running at a + * lower capacity. + * + * Thus we're looking for max(load_i / capacity_i), + * crosswise multiplication to rid ourselves of the + * division works out to: + * load_i * capacity_j > load_j * capacity_i; + * where j is our previous maximum. + */ + if (load * busiest_capacity > busiest_load * capacity) { + busiest_load = load; + busiest_capacity = capacity; + busiest = rq; + } + break; + + case migrate_util: + util = cpu_util(cpu_of(rq)); + + if (busiest_util < util) { + busiest_util = util; + busiest = rq; + } + break; + + case migrate_task: + if (busiest_nr < nr_running) { + busiest_nr = nr_running; + busiest = rq; + } + break; + + case migrate_misfit: + /* + * For ASYM_CPUCAPACITY domains with misfit tasks we + * simply seek the "biggest" misfit task. + */ + if (rq->misfit_task_load > busiest_load) { + busiest_load = rq->misfit_task_load; + busiest = rq; + } + + break; - /* - * For the load comparisons with the other CPU's, consider - * the cpu_runnable_load() scaled with the CPU capacity, so - * that the load can be moved away from the CPU that is - * potentially running at a lower capacity. - * - * Thus we're looking for max(load_i / capacity_i), crosswise - * multiplication to rid ourselves of the division works out - * to: load_i * capacity_j > load_j * capacity_i; where j is - * our previous maximum. - */ - if (load * busiest_capacity > busiest_load * capacity) { - busiest_load = load; - busiest_capacity = capacity; - busiest = rq; } } @@ -8811,7 +9309,7 @@ voluntary_active_balance(struct lb_env *env) return 1; } - if (env->src_grp_type == group_misfit_task) + if (env->migration_type == migrate_misfit) return 1; return 0; @@ -9285,6 +9783,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) { int continue_balancing = 1; int cpu = rq->cpu; + int busy = idle != CPU_IDLE && !sched_idle_cpu(cpu); unsigned long interval; struct sched_domain *sd; /* Earliest time when we have to do rebalance again */ @@ -9321,7 +9820,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) break; } - interval = get_sd_balance_interval(sd, idle != CPU_IDLE); + interval = get_sd_balance_interval(sd, busy); need_serialize = sd->flags & SD_SERIALIZE; if (need_serialize) { @@ -9337,9 +9836,10 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) * state even if we migrated tasks. Update it. */ idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE; + busy = idle != CPU_IDLE && !sched_idle_cpu(cpu); } sd->last_balance = jiffies; - interval = get_sd_balance_interval(sd, idle != CPU_IDLE); + interval = get_sd_balance_interval(sd, busy); } if (need_serialize) spin_unlock(&balancing); @@ -10448,10 +10948,12 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, static DEFINE_MUTEX(shares_mutex); -int sched_group_set_shares(struct task_group *tg, unsigned long shares) +static int __sched_group_set_shares(struct task_group *tg, unsigned long shares) { int i; + lockdep_assert_held(&shares_mutex); + /* * We can't change the weight of the root cgroup. */ @@ -10460,9 +10962,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES)); - mutex_lock(&shares_mutex); if (tg->shares == shares) - goto done; + return 0; tg->shares = shares; for_each_possible_cpu(i) { @@ -10480,10 +10981,96 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) rq_unlock_irqrestore(rq, &rf); } -done: + return 0; +} + +int sched_group_set_shares(struct task_group *tg, unsigned long shares) +{ + int ret; + + mutex_lock(&shares_mutex); + if (tg_is_idle(tg)) + ret = -EINVAL; + else + ret = __sched_group_set_shares(tg, shares); + mutex_unlock(&shares_mutex); + + return ret; +} + +int sched_group_set_idle(struct task_group *tg, long idle) +{ + int i; + + if (tg == &root_task_group) + return -EINVAL; + + if (idle < 0 || idle > 1) + return -EINVAL; + + mutex_lock(&shares_mutex); + + if (tg->idle == idle) { + mutex_unlock(&shares_mutex); + return 0; + } + + tg->idle = idle; + + for_each_possible_cpu(i) { + struct rq *rq = cpu_rq(i); + struct sched_entity *se = tg->se[i]; + struct cfs_rq *parent_cfs_rq, *grp_cfs_rq = tg->cfs_rq[i]; + bool was_idle = cfs_rq_is_idle(grp_cfs_rq); + long idle_task_delta; + struct rq_flags rf; + + rq_lock_irqsave(rq, &rf); + + grp_cfs_rq->idle = idle; + if (WARN_ON_ONCE(was_idle == cfs_rq_is_idle(grp_cfs_rq))) + goto next_cpu; + + if (se->on_rq) { + parent_cfs_rq = cfs_rq_of(se); + if (cfs_rq_is_idle(grp_cfs_rq)) + parent_cfs_rq->idle_nr_running++; + else + parent_cfs_rq->idle_nr_running--; + } + + idle_task_delta = grp_cfs_rq->h_nr_running - + grp_cfs_rq->idle_h_nr_running; + if (!cfs_rq_is_idle(grp_cfs_rq)) + idle_task_delta *= -1; + + for_each_sched_entity(se) { + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + if (!se->on_rq) + break; + + cfs_rq->idle_h_nr_running += idle_task_delta; + + /* Already accounted at parent level and above. */ + if (cfs_rq_is_idle(cfs_rq)) + break; + } + +next_cpu: + rq_unlock_irqrestore(rq, &rf); + } + + /* Idle groups have minimum weight. */ + if (tg_is_idle(tg)) + __sched_group_set_shares(tg, scale_load(WEIGHT_IDLEPRIO)); + else + __sched_group_set_shares(tg, NICE_0_LOAD); + mutex_unlock(&shares_mutex); return 0; } + #else /* CONFIG_FAIR_GROUP_SCHED */ void free_fair_sched_group(struct task_group *tg) { } diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 2410db5e9a353..6cc181df4cd15 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -54,7 +54,6 @@ SCHED_FEAT(TTWU_QUEUE, true) /* * When doing wakeups, attempt to limit superfluous scans of the LLC domain. */ -SCHED_FEAT(SIS_AVG_CPU, false) SCHED_FEAT(SIS_PROP, true) /* @@ -77,7 +76,7 @@ SCHED_FEAT(WARN_DOUBLE_CLOCK, false) SCHED_FEAT(RT_PUSH_IPI, true) #endif -SCHED_FEAT(RT_RUNTIME_SHARE, true) +SCHED_FEAT(RT_RUNTIME_SHARE, false) SCHED_FEAT(LB_MIN, false) SCHED_FEAT(ATTACH_AGE_LOAD, true) @@ -89,3 +88,6 @@ SCHED_FEAT(WA_BIAS, true) * UtilEstimation. Use estimated CPU utilization. */ SCHED_FEAT(UTIL_EST, true) + +SCHED_FEAT(ALT_PERIOD, true) +SCHED_FEAT(BASE_SLICE, true) diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c index 51beee02f74f6..7b8545ae60875 100644 --- a/kernel/sched/pelt.c +++ b/kernel/sched/pelt.c @@ -119,8 +119,8 @@ accumulate_sum(u64 delta, struct sched_avg *sa, */ if (periods) { sa->load_sum = decay_load(sa->load_sum, periods); - sa->runnable_load_sum = - decay_load(sa->runnable_load_sum, periods); + sa->runnable_sum = + decay_load(sa->runnable_sum, periods); sa->util_sum = decay_load((u64)(sa->util_sum), periods); /* @@ -135,7 +135,7 @@ accumulate_sum(u64 delta, struct sched_avg *sa, if (load) sa->load_sum += load * contrib; if (runnable) - sa->runnable_load_sum += runnable * contrib; + sa->runnable_sum += runnable * contrib << SCHED_CAPACITY_SHIFT; if (running) sa->util_sum += contrib << SCHED_CAPACITY_SHIFT; @@ -222,7 +222,7 @@ ___update_load_sum(u64 now, struct sched_avg *sa, } static __always_inline void -___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runnable) +___update_load_avg(struct sched_avg *sa, unsigned long load) { u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib; @@ -230,7 +230,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runna * Step 2: update *_avg. */ sa->load_avg = div_u64(load * sa->load_sum, divider); - sa->runnable_load_avg = div_u64(runnable * sa->runnable_load_sum, divider); + sa->runnable_avg = div_u64(sa->runnable_sum, divider); WRITE_ONCE(sa->util_avg, sa->util_sum / divider); } @@ -238,33 +238,32 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runna * sched_entity: * * task: - * se_runnable() == se_weight() + * se_weight() = se->load.weight + * se_runnable() = !!on_rq * * group: [ see update_cfs_group() ] * se_weight() = tg->weight * grq->load_avg / tg->load_avg - * se_runnable() = se_weight(se) * grq->runnable_load_avg / grq->load_avg + * se_runnable() = grq->h_nr_running * - * load_sum := runnable_sum - * load_avg = se_weight(se) * runnable_avg + * runnable_sum = se_runnable() * runnable = grq->runnable_sum + * runnable_avg = runnable_sum * - * runnable_load_sum := runnable_sum - * runnable_load_avg = se_runnable(se) * runnable_avg - * - * XXX collapse load_sum and runnable_load_sum + * load_sum := runnable + * load_avg = se_weight(se) * load_sum * * cfq_rq: * + * runnable_sum = \Sum se->avg.runnable_sum + * runnable_avg = \Sum se->avg.runnable_avg + * * load_sum = \Sum se_weight(se) * se->avg.load_sum * load_avg = \Sum se->avg.load_avg - * - * runnable_load_sum = \Sum se_runnable(se) * se->avg.runnable_load_sum - * runnable_load_avg = \Sum se->avg.runable_load_avg */ int __update_load_avg_blocked_se(u64 now, struct sched_entity *se) { if (___update_load_sum(now, &se->avg, 0, 0, 0)) { - ___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); + ___update_load_avg(&se->avg, se_weight(se)); trace_pelt_se_tp(se); return 1; } @@ -274,10 +273,10 @@ int __update_load_avg_blocked_se(u64 now, struct sched_entity *se) int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se) { - if (___update_load_sum(now, &se->avg, !!se->on_rq, !!se->on_rq, + if (___update_load_sum(now, &se->avg, !!se->on_rq, se_runnable(se), cfs_rq->curr == se)) { - ___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); + ___update_load_avg(&se->avg, se_weight(se)); cfs_se_util_change(&se->avg); trace_pelt_se_tp(se); return 1; @@ -290,10 +289,10 @@ int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq) { if (___update_load_sum(now, &cfs_rq->avg, scale_load_down(cfs_rq->load.weight), - scale_load_down(cfs_rq->runnable_weight), + cfs_rq->h_nr_running, cfs_rq->curr != NULL)) { - ___update_load_avg(&cfs_rq->avg, 1, 1); + ___update_load_avg(&cfs_rq->avg, 1); trace_pelt_cfs_tp(cfs_rq); return 1; } @@ -306,9 +305,9 @@ int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq) * * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked * util_sum = cpu_scale * load_sum - * runnable_load_sum = load_sum + * runnable_sum = util_sum * - * load_avg and runnable_load_avg are not supported and meaningless. + * load_avg and runnable_avg are not supported and meaningless. * */ @@ -319,7 +318,7 @@ int update_rt_rq_load_avg(u64 now, struct rq *rq, int running) running, running)) { - ___update_load_avg(&rq->avg_rt, 1, 1); + ___update_load_avg(&rq->avg_rt, 1); trace_pelt_rt_tp(rq); return 1; } @@ -332,7 +331,9 @@ int update_rt_rq_load_avg(u64 now, struct rq *rq, int running) * * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked * util_sum = cpu_scale * load_sum - * runnable_load_sum = load_sum + * runnable_sum = util_sum + * + * load_avg and runnable_avg are not supported and meaningless. * */ @@ -343,7 +344,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) running, running)) { - ___update_load_avg(&rq->avg_dl, 1, 1); + ___update_load_avg(&rq->avg_dl, 1); trace_pelt_dl_tp(rq); return 1; } @@ -357,7 +358,9 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) * * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked * util_sum = cpu_scale * load_sum - * runnable_load_sum = load_sum + * runnable_sum = util_sum + * + * load_avg and runnable_avg are not supported and meaningless. * */ @@ -394,7 +397,7 @@ int update_irq_load_avg(struct rq *rq, u64 running) 1); if (ret) { - ___update_load_avg(&rq->avg_irq, 1, 1); + ___update_load_avg(&rq->avg_irq, 1); trace_pelt_irq_tp(rq); } diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h index afff644da0650..43e2a47489fae 100644 --- a/kernel/sched/pelt.h +++ b/kernel/sched/pelt.h @@ -127,9 +127,9 @@ static inline u64 rq_clock_pelt(struct rq *rq) static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) { if (unlikely(cfs_rq->throttle_count)) - return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time; + return cfs_rq->throttled_clock_pelt - cfs_rq->throttled_clock_pelt_time; - return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time; + return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_pelt_time; } #else static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 5a8dfcb3fcc0a..516229c443925 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -34,13 +34,19 @@ * delayed on that resource such that nobody is advancing and the CPU * goes idle. This leaves both workload and CPU unproductive. * - * Naturally, the FULL state doesn't exist for the CPU resource at the - * system level, but exist at the cgroup level, means all non-idle tasks - * in a cgroup are delayed on the CPU resource which used by others outside - * of the cgroup or throttled by the cgroup cpu.max configuration. - * * SOME = nr_delayed_tasks != 0 - * FULL = nr_delayed_tasks != 0 && nr_running_tasks == 0 + * FULL = nr_delayed_tasks != 0 && nr_productive_tasks == 0 + * + * What it means for a task to be productive is defined differently + * for each resource. For IO, productive means a running task. For + * memory, productive means a running task that isn't a reclaimer. For + * CPU, productive means an oncpu task. + * + * Naturally, the FULL state doesn't exist for the CPU resource at the + * system level, but exist at the cgroup level. At the cgroup level, + * FULL means all non-idle tasks in the cgroup are delayed on the CPU + * resource which is being used by others outside of the cgroup or + * throttled by the cgroup cpu.max configuration. * * The percentage of wallclock time spent in those compound stall * states gives pressure numbers between 0 and 100 for each resource, @@ -81,13 +87,13 @@ * * threads = min(nr_nonidle_tasks, nr_cpus) * SOME = min(nr_delayed_tasks / threads, 1) - * FULL = (threads - min(nr_running_tasks, threads)) / threads + * FULL = (threads - min(nr_productive_tasks, threads)) / threads * * For the 257 number crunchers on 256 CPUs, this yields: * * threads = min(257, 256) * SOME = min(1 / 256, 1) = 0.4% - * FULL = (256 - min(257, 256)) / 256 = 0% + * FULL = (256 - min(256, 256)) / 256 = 0% * * For the 1 out of 4 memory-delayed tasks, this yields: * @@ -112,7 +118,7 @@ * For each runqueue, we track: * * tSOME[cpu] = time(nr_delayed_tasks[cpu] != 0) - * tFULL[cpu] = time(nr_delayed_tasks[cpu] && !nr_running_tasks[cpu]) + * tFULL[cpu] = time(nr_delayed_tasks[cpu] && !nr_productive_tasks[cpu]) * tNONIDLE[cpu] = time(nr_nonidle_tasks[cpu] != 0) * * and then periodically aggregate: @@ -148,6 +154,7 @@ static int psi_bug __read_mostly; DEFINE_STATIC_KEY_FALSE(psi_disabled); +DEFINE_STATIC_KEY_TRUE(psi_cgroups_enabled); #ifdef CONFIG_PSI_DEFAULT_DISABLED static bool psi_enable; @@ -215,6 +222,9 @@ void __init psi_init(void) return; } + if (!cgroup_psi_enabled()) + static_branch_disable(&psi_cgroups_enabled); + psi_period = jiffies_to_nsecs(PSI_FREQ); group_init(&psi_system); } @@ -229,7 +239,8 @@ static bool test_state(unsigned int *tasks, enum psi_states state) case PSI_MEM_SOME: return unlikely(tasks[NR_MEMSTALL]); case PSI_MEM_FULL: - return unlikely(tasks[NR_MEMSTALL] && !tasks[NR_RUNNING]); + return unlikely(tasks[NR_MEMSTALL] && + tasks[NR_RUNNING] == tasks[NR_MEMSTALL_RUNNING]); case PSI_CPU_SOME: return unlikely(tasks[NR_RUNNING] > tasks[NR_ONCPU]); case PSI_CPU_FULL: @@ -511,7 +522,7 @@ static void init_triggers(struct psi_group *group, u64 now) static u64 update_triggers(struct psi_group *group, u64 now) { struct psi_trigger *t; - bool new_stall = false; + bool update_total = false; u64 *total = group->total[PSI_POLL]; /* @@ -520,24 +531,35 @@ static u64 update_triggers(struct psi_group *group, u64 now) */ list_for_each_entry(t, &group->triggers, node) { u64 growth; + bool new_stall; - /* Check for stall activity */ - if (group->polling_total[t->state] == total[t->state]) - continue; + new_stall = group->polling_total[t->state] != total[t->state]; + /* Check for stall activity or a previous threshold breach */ + if (!new_stall && !t->pending_event) + continue; /* - * Multiple triggers might be looking at the same state, - * remember to update group->polling_total[] once we've - * been through all of them. Also remember to extend the - * polling time if we see new stall activity. + * Check for new stall activity, as well as deferred + * events that occurred in the last window after the + * trigger had already fired (we want to ratelimit + * events without dropping any). */ - new_stall = true; - - /* Calculate growth since last update */ - growth = window_update(&t->win, now, total[t->state]); - if (growth < t->threshold) - continue; - + if (new_stall) { + /* + * Multiple triggers might be looking at the same state, + * remember to update group->polling_total[] once we've + * been through all of them. Also remember to extend the + * polling time if we see new stall activity. + */ + update_total = true; + + /* Calculate growth since last update */ + growth = window_update(&t->win, now, total[t->state]); + if (growth < t->threshold) + continue; + + t->pending_event = true; + } /* Limit event signaling to once per window */ if (now < t->last_event_time + t->win.size) continue; @@ -546,9 +568,11 @@ static u64 update_triggers(struct psi_group *group, u64 now) if (cmpxchg(&t->event, 0, 1) == 0) wake_up_interruptible(&t->event_wait); t->last_event_time = now; + /* Reset threshold breach flag once event got generated */ + t->pending_event = false; } - if (new_stall) + if (update_total) memcpy(group->polling_total, total, sizeof(group->polling_total)); @@ -648,12 +672,10 @@ static void poll_timer_fn(struct timer_list *t) wake_up_interruptible(&group->poll_wait); } -static void record_times(struct psi_group_cpu *groupc, int cpu) +static void record_times(struct psi_group_cpu *groupc, u64 now) { u32 delta; - u64 now; - now = cpu_clock(cpu); delta = now - groupc->state_start; groupc->state_start = now; @@ -680,7 +702,7 @@ static void record_times(struct psi_group_cpu *groupc, int cpu) } static void psi_group_change(struct psi_group *group, int cpu, - unsigned int clear, unsigned int set, + unsigned int clear, unsigned int set, u64 now, bool wake_clock) { struct psi_group_cpu *groupc; @@ -700,19 +722,21 @@ static void psi_group_change(struct psi_group *group, int cpu, */ write_seqcount_begin(&groupc->seq); - record_times(groupc, cpu); + record_times(groupc, now); for (t = 0, m = clear; m; m &= ~(1 << t), t++) { if (!(m & (1 << t))) continue; - if (groupc->tasks[t] == 0 && !psi_bug) { - printk_deferred(KERN_ERR "psi: task underflow! cpu=%d t=%d tasks=[%u %u %u %u] clear=%x set=%x\n", + if (groupc->tasks[t]) { + groupc->tasks[t]--; + } else if (!psi_bug) { + printk_deferred(KERN_ERR "psi: task underflow! cpu=%d t=%d tasks=[%u %u %u %u %u] clear=%x set=%x\n", cpu, t, groupc->tasks[0], groupc->tasks[1], groupc->tasks[2], - groupc->tasks[3], clear, set); + groupc->tasks[3], groupc->tasks[4], + clear, set); psi_bug = 1; } - groupc->tasks[t]--; } for (t = 0; set; set &= ~(1 << t), t++) @@ -749,23 +773,23 @@ static void psi_group_change(struct psi_group *group, int cpu, static struct psi_group *iterate_groups(struct task_struct *task, void **iter) { + if (*iter == &psi_system) + return NULL; + #ifdef CONFIG_CGROUPS - struct cgroup *cgroup = NULL; + if (static_branch_likely(&psi_cgroups_enabled)) { + struct cgroup *cgroup = NULL; - if (!*iter) - cgroup = task->cgroups->dfl_cgrp; - else if (*iter == &psi_system) - return NULL; - else - cgroup = cgroup_parent(*iter); + if (!*iter) + cgroup = task->cgroups->dfl_cgrp; + else + cgroup = cgroup_parent(*iter); - if (cgroup && cgroup_parent(cgroup)) { - *iter = cgroup; - return cgroup_psi(cgroup); + if (cgroup && cgroup_parent(cgroup)) { + *iter = cgroup; + return cgroup_psi(cgroup); + } } -#else - if (*iter) - return NULL; #endif *iter = &psi_system; return &psi_system; @@ -792,12 +816,14 @@ void psi_task_change(struct task_struct *task, int clear, int set) struct psi_group *group; bool wake_clock = true; void *iter = NULL; + u64 now; if (!task->pid) return; psi_flags_change(task, clear, set); + now = cpu_clock(cpu); /* * Periodic aggregation shuts off if there is a period of no * task changes, so we wake it back up if necessary. However, @@ -810,7 +836,7 @@ void psi_task_change(struct task_struct *task, int clear, int set) wake_clock = false; while ((group = iterate_groups(task, &iter))) - psi_group_change(group, cpu, clear, set, wake_clock); + psi_group_change(group, cpu, clear, set, now, wake_clock); } void psi_task_switch(struct task_struct *prev, struct task_struct *next, @@ -819,6 +845,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next, struct psi_group *group, *common = NULL; int cpu = task_cpu(prev); void *iter; + u64 now = cpu_clock(cpu); if (next->pid) { bool identical_state; @@ -840,7 +867,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next, break; } - psi_group_change(group, cpu, 0, TSK_ONCPU, true); + psi_group_change(group, cpu, 0, TSK_ONCPU, now, true); } } @@ -848,12 +875,15 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next, int clear = TSK_ONCPU, set = 0; /* - * When we're going to sleep, psi_dequeue() lets us handle - * TSK_RUNNING and TSK_IOWAIT here, where we can combine it - * with TSK_ONCPU and save walking common ancestors twice. + * When we're going to sleep, psi_dequeue() lets us + * handle TSK_RUNNING, TSK_MEMSTALL_RUNNING and + * TSK_IOWAIT here, where we can combine it with + * TSK_ONCPU and save walking common ancestors twice. */ if (sleep) { clear |= TSK_RUNNING; + if (prev->in_memstall) + clear |= TSK_MEMSTALL_RUNNING; if (prev->in_iowait) set |= TSK_IOWAIT; } @@ -862,7 +892,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next, iter = NULL; while ((group = iterate_groups(prev, &iter)) && group != common) - psi_group_change(group, cpu, clear, set, true); + psi_group_change(group, cpu, clear, set, now, true); /* * TSK_ONCPU is handled up to the common ancestor. If we're tasked @@ -871,7 +901,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next, if (sleep) { clear &= ~TSK_ONCPU; for (; group; group = iterate_groups(prev, &iter)) - psi_group_change(group, cpu, clear, set, true); + psi_group_change(group, cpu, clear, set, now, true); } } } @@ -902,7 +932,7 @@ void psi_memstall_enter(unsigned long *flags) rq = this_rq_lock_irq(&rf); current->in_memstall = 1; - psi_task_change(current, 0, TSK_MEMSTALL); + psi_task_change(current, 0, TSK_MEMSTALL | TSK_MEMSTALL_RUNNING); rq_unlock_irq(rq, &rf); } @@ -931,7 +961,7 @@ void psi_memstall_leave(unsigned long *flags) rq = this_rq_lock_irq(&rf); current->in_memstall = 0; - psi_task_change(current, TSK_MEMSTALL, 0); + psi_task_change(current, TSK_MEMSTALL | TSK_MEMSTALL_RUNNING, 0); rq_unlock_irq(rq, &rf); } @@ -1045,14 +1075,17 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) mutex_unlock(&group->avgs_lock); for (full = 0; full < 2; full++) { - unsigned long avg[3]; - u64 total; + unsigned long avg[3] = { 0, }; + u64 total = 0; int w; - for (w = 0; w < 3; w++) - avg[w] = group->avg[res * 2 + full][w]; - total = div_u64(group->total[PSI_AVGS][res * 2 + full], - NSEC_PER_USEC); + /* CPU FULL is undefined at the system level */ + if (!(group == &psi_system && res == PSI_CPU && full)) { + for (w = 0; w < 3; w++) + avg[w] = group->avg[res * 2 + full][w]; + total = div_u64(group->total[PSI_AVGS][res * 2 + full], + NSEC_PER_USEC); + } seq_printf(m, "%s avg10=%lu.%02lu avg60=%lu.%02lu avg300=%lu.%02lu total=%llu\n", full ? "full" : "some", @@ -1132,12 +1165,13 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, t->state = state; t->threshold = threshold_us * NSEC_PER_USEC; t->win.size = window_us * NSEC_PER_USEC; - window_reset(&t->win, 0, 0, 0); + window_reset(&t->win, sched_clock(), + group->total[PSI_POLL][t->state], 0); t->event = 0; t->last_event_time = 0; init_waitqueue_head(&t->event_wait); - kref_init(&t->refcount); + t->pending_event = false; mutex_lock(&group->trigger_lock); @@ -1166,15 +1200,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, return t; } -static void psi_trigger_destroy(struct kref *ref) +void psi_trigger_destroy(struct psi_trigger *t) { - struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount); - struct psi_group *group = t->group; + struct psi_group *group; struct task_struct *task_to_destroy = NULL; - if (static_branch_likely(&psi_disabled)) + /* + * We do not check psi_disabled since it might have been disabled after + * the trigger got created. + */ + if (!t) return; + group = t->group; /* * Wakeup waiters to stop polling. Can happen if cgroup is deleted * from under a polling process. @@ -1210,9 +1248,9 @@ static void psi_trigger_destroy(struct kref *ref) mutex_unlock(&group->trigger_lock); /* - * Wait for both *trigger_ptr from psi_trigger_replace and - * poll_task RCUs to complete their read-side critical sections - * before destroying the trigger and optionally the poll_task + * Wait for psi_schedule_poll_work RCU to complete its read-side + * critical section before destroying the trigger and optionally the + * poll_task. */ synchronize_rcu(); /* @@ -1229,18 +1267,6 @@ static void psi_trigger_destroy(struct kref *ref) kfree(t); } -void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new) -{ - struct psi_trigger *old = *trigger_ptr; - - if (static_branch_likely(&psi_disabled)) - return; - - rcu_assign_pointer(*trigger_ptr, new); - if (old) - kref_put(&old->refcount, psi_trigger_destroy); -} - __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait) { @@ -1250,24 +1276,15 @@ __poll_t psi_trigger_poll(void **trigger_ptr, if (static_branch_likely(&psi_disabled)) return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; - rcu_read_lock(); - - t = rcu_dereference(*(void __rcu __force **)trigger_ptr); - if (!t) { - rcu_read_unlock(); + t = smp_load_acquire(trigger_ptr); + if (!t) return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; - } - kref_get(&t->refcount); - - rcu_read_unlock(); poll_wait(file, &t->event_wait, wait); if (cmpxchg(&t->event, 1, 0) == 1) ret |= EPOLLPRI; - kref_put(&t->refcount, psi_trigger_destroy); - return ret; } @@ -1291,14 +1308,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, buf[buf_size - 1] = '\0'; - new = psi_trigger_create(&psi_system, buf, nbytes, res); - if (IS_ERR(new)) - return PTR_ERR(new); - seq = file->private_data; + /* Take seq->lock to protect seq->private from concurrent writes */ mutex_lock(&seq->lock); - psi_trigger_replace(&seq->private, new); + + /* Allow only one trigger per file descriptor */ + if (seq->private) { + mutex_unlock(&seq->lock); + return -EBUSY; + } + + new = psi_trigger_create(&psi_system, buf, nbytes, res); + if (IS_ERR(new)) { + mutex_unlock(&seq->lock); + return PTR_ERR(new); + } + + smp_store_release(&seq->private, new); mutex_unlock(&seq->lock); return nbytes; @@ -1333,7 +1360,7 @@ static int psi_fop_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; - psi_trigger_replace(&seq->private, NULL); + psi_trigger_destroy(seq->private); return single_release(inode, file); } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 2dffb8762e16b..28c82dee13ea9 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -52,11 +52,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) rt_b->rt_period_timer.function = sched_rt_period_timer; } -static void start_rt_bandwidth(struct rt_bandwidth *rt_b) +static inline void do_start_rt_bandwidth(struct rt_bandwidth *rt_b) { - if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) - return; - raw_spin_lock(&rt_b->rt_runtime_lock); if (!rt_b->rt_period_active) { rt_b->rt_period_active = 1; @@ -75,6 +72,14 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) raw_spin_unlock(&rt_b->rt_runtime_lock); } +static void start_rt_bandwidth(struct rt_bandwidth *rt_b) +{ + if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) + return; + + do_start_rt_bandwidth(rt_b); +} + void init_rt_rq(struct rt_rq *rt_rq) { struct rt_prio_array *array; @@ -983,13 +988,17 @@ static void update_curr_rt(struct rq *rq) for_each_sched_rt_entity(rt_se) { struct rt_rq *rt_rq = rt_rq_of_se(rt_se); + int exceeded; if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { raw_spin_lock(&rt_rq->rt_runtime_lock); rt_rq->rt_time += delta_exec; - if (sched_rt_runtime_exceeded(rt_rq)) + exceeded = sched_rt_runtime_exceeded(rt_rq); + if (exceeded) resched_curr(rq); raw_spin_unlock(&rt_rq->rt_runtime_lock); + if (exceeded) + do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq)); } } } @@ -2659,8 +2668,12 @@ static int sched_rt_global_validate(void) static void sched_rt_do_global(void) { + unsigned long flags; + + raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); def_rt_bandwidth.rt_runtime = global_rt_runtime(); def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period()); + raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); } int sched_rt_handler(struct ctl_table *table, int write, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ba0a7d00155e0..aa310b442382d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -199,6 +199,12 @@ static inline int task_has_dl_policy(struct task_struct *p) #define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) +static inline void update_avg(u64 *avg, u64 sample) +{ + s64 diff = sample - *avg; + *avg += diff / 8; +} + /* * !! For sched_setattr_nocheck() (kernel) only !! * @@ -213,6 +219,8 @@ static inline int task_has_dl_policy(struct task_struct *p) */ #define SCHED_FLAG_SUGOV 0x10000000 +#define SCHED_DL_FLAGS (SCHED_FLAG_RECLAIM | SCHED_FLAG_DL_OVERRUN | SCHED_FLAG_SUGOV) + static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se) { #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL @@ -339,6 +347,7 @@ struct cfs_bandwidth { u64 quota; u64 runtime; u64 burst; + u64 runtime_snap; s64 hierarchical_quota; u8 idle; @@ -351,7 +360,9 @@ struct cfs_bandwidth { /* Statistics: */ int nr_periods; int nr_throttled; + int nr_burst; u64 throttled_time; + u64 burst_time; #endif }; @@ -366,6 +377,9 @@ struct task_group { struct cfs_rq **cfs_rq; unsigned long shares; + /* A positive value indicates that this is a SCHED_IDLE group. */ + int idle; + #ifdef CONFIG_SMP /* * load_avg can be heavily contended at clock tick time, so put @@ -475,6 +489,8 @@ extern void sched_move_task(struct task_struct *tsk); #ifdef CONFIG_FAIR_GROUP_SCHED extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); +extern int sched_group_set_idle(struct task_group *tg, long idle); + #ifdef CONFIG_SMP extern void set_task_rq_fair(struct sched_entity *se, struct cfs_rq *prev, struct cfs_rq *next); @@ -493,9 +509,9 @@ struct cfs_bandwidth { }; /* CFS-related fields in a runqueue */ struct cfs_rq { struct load_weight load; - unsigned long runnable_weight; unsigned int nr_running; unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */ + unsigned int idle_nr_running; /* SCHED_IDLE */ unsigned int idle_h_nr_running; /* SCHED_IDLE */ u64 exec_clock; @@ -532,7 +548,7 @@ struct cfs_rq { int nr; unsigned long load_avg; unsigned long util_avg; - unsigned long runnable_sum; + unsigned long runnable_avg; } removed; #ifdef CONFIG_FAIR_GROUP_SCHED @@ -567,13 +583,16 @@ struct cfs_rq { struct list_head leaf_cfs_rq_list; struct task_group *tg; /* group that "owns" this runqueue */ + /* Locally cached copy of our task_group's idle value */ + int idle; + #ifdef CONFIG_CFS_BANDWIDTH int runtime_enabled; s64 runtime_remaining; u64 throttled_clock; - u64 throttled_clock_task; - u64 throttled_clock_task_time; + u64 throttled_clock_pelt; + u64 throttled_clock_pelt_time; int throttled; int throttle_count; struct list_head throttled_list; @@ -692,8 +711,30 @@ struct dl_rq { #ifdef CONFIG_FAIR_GROUP_SCHED /* An entity is a task if it doesn't "own" a runqueue */ #define entity_is_task(se) (!se->my_q) + +static inline void se_update_runnable(struct sched_entity *se) +{ + if (!entity_is_task(se)) + se->runnable_weight = se->my_q->h_nr_running; +} + +static inline long se_runnable(struct sched_entity *se) +{ + if (entity_is_task(se)) + return !!se->on_rq; + else + return se->runnable_weight; +} + #else #define entity_is_task(se) 1 + +static inline void se_update_runnable(struct sched_entity *se) {} + +static inline long se_runnable(struct sched_entity *se) +{ + return !!se->on_rq; +} #endif #ifdef CONFIG_SMP @@ -705,10 +746,6 @@ static inline long se_weight(struct sched_entity *se) return scale_load_down(se->load.weight); } -static inline long se_runnable(struct sched_entity *se) -{ - return scale_load_down(se->runnable_weight); -} static inline bool sched_asym_prefer(int a, int b) { @@ -953,6 +990,9 @@ struct rq { u64 idle_stamp; u64 avg_idle; + unsigned long wake_stamp; + u64 wake_avg_idle; + /* This is used to determine avg_idle's max value */ u64 max_idle_balance_cost; #endif @@ -1958,7 +1998,7 @@ static inline void sub_nr_running(struct rq *rq, unsigned count) { rq->nr_running -= count; if (trace_sched_update_nr_running_tp_enabled()) { - call_trace_sched_update_nr_running(rq, count); + call_trace_sched_update_nr_running(rq, -count); } /* Check if we still need preemption */ diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index dc218e9f45585..fd9cb6b15603f 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -69,6 +69,9 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup) if (static_branch_likely(&psi_disabled)) return; + if (p->in_memstall) + set |= TSK_MEMSTALL_RUNNING; + if (!wakeup || p->sched_psi_wake_requeue) { if (p->in_memstall) set |= TSK_MEMSTALL; @@ -99,7 +102,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep) return; if (p->in_memstall) - clear |= TSK_MEMSTALL; + clear |= (TSK_MEMSTALL | TSK_MEMSTALL_RUNNING); psi_task_change(p, clear, 0); } diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index ffaa97a8d4051..e5ebaffc4fef5 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1552,66 +1552,58 @@ static void init_numa_topology_type(void) } } + +#define NR_DISTANCE_VALUES (1 << DISTANCE_BITS) + void sched_init_numa(void) { - int next_distance, curr_distance = node_distance(0, 0); struct sched_domain_topology_level *tl; - int level = 0; - int i, j, k; - - sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL); - if (!sched_domains_numa_distance) - return; - - /* Includes NUMA identity node at level 0. */ - sched_domains_numa_distance[level++] = curr_distance; - sched_domains_numa_levels = level; + unsigned long *distance_map; + int nr_levels = 0; + int i, j; /* * O(nr_nodes^2) deduplicating selection sort -- in order to find the * unique distances in the node_distance() table. - * - * Assumes node_distance(0,j) includes all distances in - * node_distance(i,j) in order to avoid cubic time. */ - next_distance = curr_distance; + distance_map = bitmap_alloc(NR_DISTANCE_VALUES, GFP_KERNEL); + if (!distance_map) + return; + + bitmap_zero(distance_map, NR_DISTANCE_VALUES); for (i = 0; i < nr_node_ids; i++) { for (j = 0; j < nr_node_ids; j++) { - for (k = 0; k < nr_node_ids; k++) { - int distance = node_distance(i, k); - - if (distance > curr_distance && - (distance < next_distance || - next_distance == curr_distance)) - next_distance = distance; - - /* - * While not a strong assumption it would be nice to know - * about cases where if node A is connected to B, B is not - * equally connected to A. - */ - if (sched_debug() && node_distance(k, i) != distance) - sched_numa_warn("Node-distance not symmetric"); + int distance = node_distance(i, j); - if (sched_debug() && i && !find_numa_distance(distance)) - sched_numa_warn("Node-0 not representative"); + if (distance < LOCAL_DISTANCE || distance >= NR_DISTANCE_VALUES) { + sched_numa_warn("Invalid distance value range"); + return; } - if (next_distance != curr_distance) { - sched_domains_numa_distance[level++] = next_distance; - sched_domains_numa_levels = level; - curr_distance = next_distance; - } else break; + + bitmap_set(distance_map, distance, 1); } + } + /* + * We can now figure out how many unique distance values there are and + * allocate memory accordingly. + */ + nr_levels = bitmap_weight(distance_map, NR_DISTANCE_VALUES); - /* - * In case of sched_debug() we verify the above assumption. - */ - if (!sched_debug()) - break; + sched_domains_numa_distance = kcalloc(nr_levels, sizeof(int), GFP_KERNEL); + if (!sched_domains_numa_distance) { + bitmap_free(distance_map); + return; + } + + for (i = 0, j = 0; i < nr_levels; i++, j++) { + j = find_next_bit(distance_map, NR_DISTANCE_VALUES, j); + sched_domains_numa_distance[i] = j; } + bitmap_free(distance_map); + /* - * 'level' contains the number of unique distances + * 'nr_levels' contains the number of unique distances * * The sched_domains_numa_distance[] array includes the actual distance * numbers. @@ -1620,15 +1612,15 @@ void sched_init_numa(void) /* * Here, we should temporarily reset sched_domains_numa_levels to 0. * If it fails to allocate memory for array sched_domains_numa_masks[][], - * the array will contain less then 'level' members. This could be + * the array will contain less then 'nr_levels' members. This could be * dangerous when we use it to iterate array sched_domains_numa_masks[][] * in other functions. * - * We reset it to 'level' at the end of this function. + * We reset it to 'nr_levels' at the end of this function. */ sched_domains_numa_levels = 0; - sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL); + sched_domains_numa_masks = kzalloc(sizeof(void *) * nr_levels, GFP_KERNEL); if (!sched_domains_numa_masks) return; @@ -1636,7 +1628,7 @@ void sched_init_numa(void) * Now for each level, construct a mask per node which contains all * CPUs of nodes that are that many hops away from us. */ - for (i = 0; i < level; i++) { + for (i = 0; i < nr_levels; i++) { sched_domains_numa_masks[i] = kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL); if (!sched_domains_numa_masks[i]) @@ -1644,12 +1636,17 @@ void sched_init_numa(void) for (j = 0; j < nr_node_ids; j++) { struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL); + int k; + if (!mask) return; sched_domains_numa_masks[i][j] = mask; for_each_node(k) { + if (sched_debug() && (node_distance(j, k) != node_distance(k, j))) + sched_numa_warn("Node-distance not symmetric"); + if (node_distance(j, k) > sched_domains_numa_distance[i]) continue; @@ -1661,7 +1658,7 @@ void sched_init_numa(void) /* Compute default topology size */ for (i = 0; sched_domain_topology[i].mask; i++); - tl = kzalloc((i + level + 1) * + tl = kzalloc((i + nr_levels + 1) * sizeof(struct sched_domain_topology_level), GFP_KERNEL); if (!tl) return; @@ -1684,7 +1681,7 @@ void sched_init_numa(void) /* * .. and append 'j' levels of NUMA goodness. */ - for (j = 1; j < level; i++, j++) { + for (j = 1; j < nr_levels; i++, j++) { tl[i] = (struct sched_domain_topology_level){ .mask = sd_numa_mask, .sd_flags = cpu_numa_flags, @@ -1696,8 +1693,8 @@ void sched_init_numa(void) sched_domain_topology = tl; - sched_domains_numa_levels = level; - sched_max_numa_distance = sched_domains_numa_distance[level - 1]; + sched_domains_numa_levels = nr_levels; + sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1]; init_numa_topology_type(); } diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 84bd05117dc22..c76fe1d4d91e2 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -206,6 +206,13 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr_e } EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ +void __wake_up_pollfree(struct wait_queue_head *wq_head) +{ + __wake_up(wq_head, TASK_NORMAL, 0, poll_to_key(EPOLLHUP | POLLFREE)); + /* POLLFREE must have cleared the queue. */ + WARN_ON_ONCE(waitqueue_active(wq_head)); +} + /* * Note: we use "set_current_state()" _after_ the wait-queue add, * because we need a memory barrier there on SMP, so that any @@ -377,7 +384,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i int ret = default_wake_function(wq_entry, mode, sync, key); if (ret) - list_del_init(&wq_entry->entry); + list_del_init_careful(&wq_entry->entry); return ret; } diff --git a/kernel/seccomp.c b/kernel/seccomp.c index b152fab8fcdd1..61bb88fb9660d 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -28,6 +28,9 @@ #include #include +/* Not exposed in headers: strictly internal use only. */ +#define SECCOMP_MODE_DEAD (SECCOMP_MODE_FILTER + 1) + #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER #include #endif @@ -706,6 +709,7 @@ static void __secure_computing_strict(int this_syscall) #ifdef SECCOMP_DEBUG dump_stack(); #endif + current->seccomp.mode = SECCOMP_MODE_DEAD; seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true); do_exit(SIGKILL); } @@ -892,6 +896,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, case SECCOMP_RET_KILL_THREAD: case SECCOMP_RET_KILL_PROCESS: default: + current->seccomp.mode = SECCOMP_MODE_DEAD; seccomp_log(this_syscall, SIGSYS, action, true); /* Dump core only if this is the last remaining thread. */ if (action == SECCOMP_RET_KILL_PROCESS || @@ -944,6 +949,11 @@ int __secure_computing(const struct seccomp_data *sd) return 0; case SECCOMP_MODE_FILTER: return __seccomp_filter(this_syscall, sd, false); + /* Surviving SECCOMP_RET_KILL_* must be proactively impossible. */ + case SECCOMP_MODE_DEAD: + WARN_ON_ONCE(1); + do_exit(SIGKILL); + return -1; default: BUG(); } diff --git a/kernel/signal.c b/kernel/signal.c index 29fba42478771..5f87e83a44302 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1914,12 +1914,12 @@ bool do_notify_parent(struct task_struct *tsk, int sig) bool autoreap = false; u64 utime, stime; - BUG_ON(sig == -1); + WARN_ON_ONCE(sig == -1); - /* do_notify_parent_cldstop should have been called instead. */ - BUG_ON(task_is_stopped_or_traced(tsk)); + /* do_notify_parent_cldstop should have been called instead. */ + WARN_ON_ONCE(task_is_stopped_or_traced(tsk)); - BUG_ON(!tsk->ptrace && + WARN_ON_ONCE(!tsk->ptrace && (tsk->group_leader != tsk || !thread_group_empty(tsk))); /* Wake up all pidfd waiters */ @@ -2099,15 +2099,6 @@ static inline bool may_ptrace_stop(void) return true; } -/* - * Return non-zero if there is a SIGKILL that should be waking us up. - * Called with the siglock held. - */ -static bool sigkill_pending(struct task_struct *tsk) -{ - return sigismember(&tsk->pending.signal, SIGKILL) || - sigismember(&tsk->signal->shared_pending.signal, SIGKILL); -} /* * This must be called with current->sighand->siglock held. @@ -2134,17 +2125,16 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t * calling arch_ptrace_stop, so we must release it now. * To preserve proper semantics, we must do this before * any signal bookkeeping like checking group_stop_count. - * Meanwhile, a SIGKILL could come in before we retake the - * siglock. That must prevent us from sleeping in TASK_TRACED. - * So after regaining the lock, we must check for SIGKILL. */ spin_unlock_irq(¤t->sighand->siglock); arch_ptrace_stop(exit_code, info); spin_lock_irq(¤t->sighand->siglock); - if (sigkill_pending(current)) - return; } + /* + * schedule() will not sleep if there is a pending signal that + * can awaken the task. + */ set_special_state(TASK_TRACED); /* diff --git a/kernel/smp.c b/kernel/smp.c index 3a390932f8b25..be65b76cb8036 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -222,7 +222,7 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) /* There shouldn't be any pending callbacks on an offline CPU. */ if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) && - !warned && !llist_empty(head))) { + !warned && entry != NULL)) { warned = true; WARN(1, "IPI on offline CPU %d\n", smp_processor_id()); diff --git a/kernel/sys.c b/kernel/sys.c index 4fde189706deb..ae54afd442ad1 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1927,13 +1927,6 @@ static int validate_prctl_map_addr(struct prctl_mm_map *prctl_map) error = -EINVAL; - /* - * @brk should be after @end_data in traditional maps. - */ - if (prctl_map->start_brk <= prctl_map->end_data || - prctl_map->brk <= prctl_map->end_data) - goto out; - /* * Neither we should allow to override limits if they set. */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 620faea6fd87c..971bb09920652 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -516,6 +516,14 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, }, + { + .procname = "sched_cfs_bandwidth_min_ratio", + .data = &sysctl_sched_cfs_bandwidth_min_ratio, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, #endif #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) { @@ -1454,6 +1462,17 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = &one_hundred, }, +#ifdef CONFIG_NUMA + { + .procname = "numa_stat", + .data = &sysctl_vm_numa_stat, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sysctl_vm_numa_stat_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif #ifdef CONFIG_HUGETLB_PAGE { .procname = "nr_hugepages", @@ -1462,18 +1481,6 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = hugetlb_sysctl_handler, }, -#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP - { - .procname = "hugetlb_free_vmemmap", - .data = &hugetlb_free_vmemmap_enabled_key.key, - .maxlen = sizeof(hugetlb_free_vmemmap_enabled_key.key.enabled), - .mode = 0644, - /* only handle a transition from default "0" to "1" */ - .proc_handler = hugetlb_vmemmap_sysctl_handler, - .extra1 = SYSCTL_ONE, - .extra2 = SYSCTL_ONE, - }, -#endif #ifdef CONFIG_NUMA { .procname = "nr_hugepages_mempolicy", @@ -1482,15 +1489,6 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = &hugetlb_mempolicy_sysctl_handler, }, - { - .procname = "numa_stat", - .data = &sysctl_vm_numa_stat, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = sysctl_vm_numa_stat_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, #endif { .procname = "hugetlb_shm_group", diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 1f3e3a17f67e0..e1e8d5dab0c59 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -759,22 +759,6 @@ static void hrtimer_switch_to_hres(void) retrigger_next_event(NULL); } -static void clock_was_set_work(struct work_struct *work) -{ - clock_was_set(); -} - -static DECLARE_WORK(hrtimer_work, clock_was_set_work); - -/* - * Called from timekeeping and resume code to reprogram the hrtimer - * interrupt device on all cpus. - */ -void clock_was_set_delayed(void) -{ - schedule_work(&hrtimer_work); -} - #else static inline int hrtimer_is_hres_enabled(void) { return 0; } @@ -892,6 +876,22 @@ void clock_was_set(void) timerfd_clock_was_set(); } +static void clock_was_set_work(struct work_struct *work) +{ + clock_was_set(); +} + +static DECLARE_WORK(hrtimer_work, clock_was_set_work); + +/* + * Called from timekeeping and resume code to reprogram the hrtimer + * interrupt device on all cpus and to notify timerfd. + */ +void clock_was_set_delayed(void) +{ + schedule_work(&hrtimer_work); +} + /* * During resume we might have to reprogram the high resolution timer * interrupt on all online CPUs. However, all other CPUs will be @@ -1031,12 +1031,13 @@ static void __remove_hrtimer(struct hrtimer *timer, * remove hrtimer, called with base lock held */ static inline int -remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart) +remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, + bool restart, bool keep_local) { u8 state = timer->state; if (state & HRTIMER_STATE_ENQUEUED) { - int reprogram; + bool reprogram; /* * Remove the timer and force reprogramming when high @@ -1049,8 +1050,16 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest debug_deactivate(timer); reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); + /* + * If the timer is not restarted then reprogramming is + * required if the timer is local. If it is local and about + * to be restarted, avoid programming it twice (on removal + * and a moment later when it's requeued). + */ if (!restart) state = HRTIMER_STATE_INACTIVE; + else + reprogram &= !keep_local; __remove_hrtimer(timer, base, state, reprogram); return 1; @@ -1104,9 +1113,31 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, struct hrtimer_clock_base *base) { struct hrtimer_clock_base *new_base; + bool force_local, first; + + /* + * If the timer is on the local cpu base and is the first expiring + * timer then this might end up reprogramming the hardware twice + * (on removal and on enqueue). To avoid that by prevent the + * reprogram on removal, keep the timer local to the current CPU + * and enforce reprogramming after it is queued no matter whether + * it is the new first expiring timer again or not. + */ + force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases); + force_local &= base->cpu_base->next_timer == timer; - /* Remove an active timer from the queue: */ - remove_hrtimer(timer, base, true); + /* + * Remove an active timer from the queue. In case it is not queued + * on the current CPU, make sure that remove_hrtimer() updates the + * remote data correctly. + * + * If it's on the current CPU and the first expiring timer, then + * skip reprogramming, keep the timer local and enforce + * reprogramming later if it was the first expiring timer. This + * avoids programming the underlying clock event twice (once at + * removal and once after enqueue). + */ + remove_hrtimer(timer, base, true, force_local); if (mode & HRTIMER_MODE_REL) tim = ktime_add_safe(tim, base->get_time()); @@ -1116,9 +1147,24 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, hrtimer_set_expires_range_ns(timer, tim, delta_ns); /* Switch the timer base, if necessary: */ - new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); + if (!force_local) { + new_base = switch_hrtimer_base(timer, base, + mode & HRTIMER_MODE_PINNED); + } else { + new_base = base; + } + + first = enqueue_hrtimer(timer, new_base, mode); + if (!force_local) + return first; - return enqueue_hrtimer(timer, new_base, mode); + /* + * Timer was forced to stay on the current CPU to avoid + * reprogramming on removal and enqueue. Force reprogram the + * hardware by evaluating the new first expiring timer. + */ + hrtimer_force_reprogram(new_base->cpu_base, 1); + return 0; } /** @@ -1184,7 +1230,7 @@ int hrtimer_try_to_cancel(struct hrtimer *timer) base = lock_hrtimer_base(timer, &flags); if (!hrtimer_callback_running(timer)) - ret = remove_hrtimer(timer, base, false); + ret = remove_hrtimer(timer, base, false, false); unlock_hrtimer_base(timer, &flags); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 7b24961367292..5294f5b1f9550 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -165,3 +165,6 @@ DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem); void timer_clear_idle(void); + +void clock_was_set(void); +void clock_was_set_delayed(void); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 4419486d7413c..5eb04bb598026 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -131,7 +131,7 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) */ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) { #ifdef CONFIG_NO_HZ_FULL - WARN_ON(tick_nohz_full_running); + WARN_ON_ONCE(tick_nohz_full_running); #endif tick_do_timer_cpu = cpu; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 4fc2af4367a7b..e23c9e765a5ff 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1236,8 +1237,7 @@ int do_settimeofday64(const struct timespec64 *ts) timekeeping_forward_now(tk); xt = tk_xtime(tk); - ts_delta.tv_sec = ts->tv_sec - xt.tv_sec; - ts_delta.tv_nsec = ts->tv_nsec - xt.tv_nsec; + ts_delta = timespec64_sub(*ts, xt); if (timespec64_compare(&tk->wall_to_monotonic, &ts_delta) > 0) { ret = -EINVAL; @@ -2305,6 +2305,20 @@ static int timekeeping_validate_timex(const struct __kernel_timex *txc) return 0; } +/** + * random_get_entropy_fallback - Returns the raw clock source value, + * used by random.c for platforms with no valid random_get_entropy(). + */ +unsigned long random_get_entropy_fallback(void) +{ + struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono; + struct clocksource *clock = READ_ONCE(tkr->clock); + + if (unlikely(timekeeping_suspended || !clock)) + return 0; + return clock->read(clock); +} +EXPORT_SYMBOL_GPL(random_get_entropy_fallback); /** * do_adjtimex() - Accessor function to NTP __do_adjtimex function diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 16a2b62f5f74c..93560ad5fc8d3 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -2080,26 +2080,28 @@ unsigned long msleep_interruptible(unsigned int msecs) EXPORT_SYMBOL(msleep_interruptible); /** - * usleep_range - Sleep for an approximate time - * @min: Minimum time in usecs to sleep - * @max: Maximum time in usecs to sleep + * usleep_range_state - Sleep for an approximate time in a given state + * @min: Minimum time in usecs to sleep + * @max: Maximum time in usecs to sleep + * @state: State of the current task that will be while sleeping * * In non-atomic context where the exact wakeup time is flexible, use - * usleep_range() instead of udelay(). The sleep improves responsiveness + * usleep_range_state() instead of udelay(). The sleep improves responsiveness * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces * power usage by allowing hrtimers to take advantage of an already- * scheduled interrupt instead of scheduling a new one just for this sleep. */ -void __sched usleep_range(unsigned long min, unsigned long max) +void __sched usleep_range_state(unsigned long min, unsigned long max, + unsigned int state) { ktime_t exp = ktime_add_us(ktime_get(), min); u64 delta = (u64)(max - min) * NSEC_PER_USEC; for (;;) { - __set_current_state(TASK_UNINTERRUPTIBLE); + __set_current_state(state); /* Do not return before the requested sleep time has elapsed */ if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) break; } } -EXPORT_SYMBOL(usleep_range); +EXPORT_SYMBOL(usleep_range_state); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 8409721620e49..219053748232e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -80,9 +80,6 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { unsigned int ret; - if (in_nmi()) /* not supported yet */ - return 1; - preempt_disable(); if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 15f7e7c614210..e5bd1d4bff16b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2522,14 +2522,14 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter) } static int -ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) +ftrace_nop_initialize(struct module *mod, struct dyn_ftrace *rec) { int ret; if (unlikely(ftrace_disabled)) return 0; - ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR); + ret = ftrace_init_nop(mod, rec); if (ret) { ftrace_bug_type = FTRACE_BUG_INIT; ftrace_bug(ret, rec); @@ -2971,7 +2971,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) * to the NOP instructions. */ if (!__is_defined(CC_USING_NOP_MCOUNT) && - !ftrace_code_disable(mod, p)) + !ftrace_nop_initialize(mod, p)) break; update_cnt++; @@ -6338,7 +6338,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op; int bit; - bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); + bit = trace_test_and_set_recursion(TRACE_LIST_START); if (bit < 0) return; @@ -6413,7 +6413,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, { int bit; - bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); + bit = trace_test_and_set_recursion(TRACE_LIST_START); if (bit < 0) return; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5240ba9a82db8..55da88f18342f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -219,7 +219,7 @@ static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; static int __init set_trace_boot_options(char *str) { strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); - return 0; + return 1; } __setup("trace_options=", set_trace_boot_options); @@ -230,12 +230,16 @@ static int __init set_trace_boot_clock(char *str) { strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); trace_boot_clock = trace_boot_clock_buf; - return 0; + return 1; } __setup("trace_clock=", set_trace_boot_clock); static int __init set_tracepoint_printk(char *str) { + /* Ignore the "tp_printk_stop_on_boot" param */ + if (*str == '_') + return 0; + if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) tracepoint_printk = 1; return 1; @@ -1301,10 +1305,12 @@ static int __init set_buf_size(char *str) if (!str) return 0; buf_size = memparse(str, &str); - /* nr_entries can not be zero */ - if (buf_size == 0) - return 0; - trace_buf_size = buf_size; + /* + * nr_entries can not be zero and the startup + * tests require some buffer space. Therefore + * ensure we have at least 4096 bytes of buffer. + */ + trace_buf_size = max(4096UL, buf_size); return 1; } __setup("trace_buf_size=", set_buf_size); @@ -2531,7 +2537,7 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, } EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); -static DEFINE_SPINLOCK(tracepoint_iter_lock); +static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock); static DEFINE_MUTEX(tracepoint_printk_mutex); static void output_printk(struct trace_event_buffer *fbuffer) @@ -2552,14 +2558,14 @@ static void output_printk(struct trace_event_buffer *fbuffer) event = &fbuffer->trace_file->event_call->event; - spin_lock_irqsave(&tracepoint_iter_lock, flags); + raw_spin_lock_irqsave(&tracepoint_iter_lock, flags); trace_seq_init(&iter->seq); iter->ent = fbuffer->entry; event_call->event.funcs->trace(iter, 0, event); trace_seq_putc(&iter->seq, 0); printk("%s", iter->seq.buffer); - spin_unlock_irqrestore(&tracepoint_iter_lock, flags); + raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags); } int tracepoint_printk_sysctl(struct ctl_table *table, int write, @@ -3007,7 +3013,7 @@ struct trace_buffer_struct { char buffer[4][TRACE_BUF_SIZE]; }; -static struct trace_buffer_struct *trace_percpu_buffer; +static struct trace_buffer_struct __percpu *trace_percpu_buffer; /* * Thise allows for lockless recording. If we're nested too deeply, then @@ -3017,7 +3023,7 @@ static char *get_trace_buf(void) { struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer); - if (!buffer || buffer->nesting >= 4) + if (!trace_percpu_buffer || buffer->nesting >= 4) return NULL; buffer->nesting++; @@ -3036,7 +3042,7 @@ static void put_trace_buf(void) static int alloc_percpu_trace_buffer(void) { - struct trace_buffer_struct *buffers; + struct trace_buffer_struct __percpu *buffers; buffers = alloc_percpu(struct trace_buffer_struct); if (WARN(!buffers, "Could not allocate percpu trace_printk buffer")) @@ -5632,12 +5638,18 @@ static void tracing_set_nop(struct trace_array *tr) tr->current_trace = &nop_trace; } +static bool tracer_options_updated; + static void add_tracer_options(struct trace_array *tr, struct tracer *t) { /* Only enable if the directory has been created already. */ if (!tr->dir) return; + /* Only create trace option files after update_tracer_options finish */ + if (!tracer_options_updated) + return; + create_trace_option_files(tr, t); } @@ -6994,7 +7006,8 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) err = kzalloc(sizeof(*err), GFP_KERNEL); if (!err) err = ERR_PTR(-ENOMEM); - tr->n_err_log_entries++; + else + tr->n_err_log_entries++; return err; } @@ -8384,6 +8397,7 @@ static void __update_tracer_options(struct trace_array *tr) static void update_tracer_options(struct trace_array *tr) { mutex_lock(&trace_types_lock); + tracer_options_updated = true; __update_tracer_options(tr); mutex_unlock(&trace_types_lock); } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index fc3aa81a43e3c..1d514a1a31554 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -518,23 +518,8 @@ struct tracer { * When function tracing occurs, the following steps are made: * If arch does not support a ftrace feature: * call internal function (uses INTERNAL bits) which calls... - * If callback is registered to the "global" list, the list - * function is called and recursion checks the GLOBAL bits. - * then this function calls... * The function callback, which can use the FTRACE bits to * check for recursion. - * - * Now if the arch does not suppport a feature, and it calls - * the global list function which calls the ftrace callback - * all three of these steps will do a recursion protection. - * There's no reason to do one if the previous caller already - * did. The recursion that we are protecting against will - * go through the same steps again. - * - * To prevent the multiple recursion checks, if a recursion - * bit is set that is higher than the MAX bit of the current - * check, then we know that the check was made by the previous - * caller, and we can skip the current check. */ enum { TRACE_BUFFER_BIT, @@ -547,12 +532,14 @@ enum { TRACE_FTRACE_NMI_BIT, TRACE_FTRACE_IRQ_BIT, TRACE_FTRACE_SIRQ_BIT, + TRACE_FTRACE_TRANSITION_BIT, - /* INTERNAL_BITs must be greater than FTRACE_BITs */ + /* Internal use recursion bits */ TRACE_INTERNAL_BIT, TRACE_INTERNAL_NMI_BIT, TRACE_INTERNAL_IRQ_BIT, TRACE_INTERNAL_SIRQ_BIT, + TRACE_INTERNAL_TRANSITION_BIT, TRACE_BRANCH_BIT, /* @@ -592,12 +579,6 @@ enum { * function is called to clear it. */ TRACE_GRAPH_NOTRACE_BIT, - - /* - * When transitioning between context, the preempt_count() may - * not be correct. Allow for a single recursion to cover this case. - */ - TRACE_TRANSITION_BIT, }; #define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) @@ -617,12 +598,18 @@ enum { #define TRACE_CONTEXT_BITS 4 #define TRACE_FTRACE_START TRACE_FTRACE_BIT -#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1) #define TRACE_LIST_START TRACE_INTERNAL_BIT -#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) -#define TRACE_CONTEXT_MASK TRACE_LIST_MAX +#define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) + +enum { + TRACE_CTX_NMI, + TRACE_CTX_IRQ, + TRACE_CTX_SOFTIRQ, + TRACE_CTX_NORMAL, + TRACE_CTX_TRANSITION, +}; static __always_inline int trace_get_context_bit(void) { @@ -630,59 +617,48 @@ static __always_inline int trace_get_context_bit(void) if (in_interrupt()) { if (in_nmi()) - bit = 0; + bit = TRACE_CTX_NMI; else if (in_irq()) - bit = 1; + bit = TRACE_CTX_IRQ; else - bit = 2; + bit = TRACE_CTX_SOFTIRQ; } else - bit = 3; + bit = TRACE_CTX_NORMAL; return bit; } -static __always_inline int trace_test_and_set_recursion(int start, int max) +static __always_inline int trace_test_and_set_recursion(int start) { unsigned int val = current->trace_recursion; int bit; - /* A previous recursion check was made */ - if ((val & TRACE_CONTEXT_MASK) > max) - return 0; - bit = trace_get_context_bit() + start; if (unlikely(val & (1 << bit))) { /* * It could be that preempt_count has not been updated during * a switch between contexts. Allow for a single recursion. */ - bit = TRACE_TRANSITION_BIT; + bit = start + TRACE_CTX_TRANSITION; if (trace_recursion_test(bit)) return -1; trace_recursion_set(bit); barrier(); - return bit + 1; + return bit; } - /* Normal check passed, clear the transition to allow it again */ - trace_recursion_clear(TRACE_TRANSITION_BIT); - val |= 1 << bit; current->trace_recursion = val; barrier(); - return bit + 1; + return bit; } static __always_inline void trace_clear_recursion(int bit) { unsigned int val = current->trace_recursion; - if (!bit) - return; - - bit--; bit = 1 << bit; val &= ~bit; @@ -1447,14 +1423,26 @@ __event_trigger_test_discard(struct trace_event_file *file, if (eflags & EVENT_FILE_FL_TRIGGER_COND) *tt = event_triggers_call(file, entry, event); - if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || - (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && - !filter_match_preds(file->filter, entry))) { - __trace_event_discard_commit(buffer, event); - return true; - } + if (likely(!(file->flags & (EVENT_FILE_FL_SOFT_DISABLED | + EVENT_FILE_FL_FILTERED | + EVENT_FILE_FL_PID_FILTER)))) + return false; + + if (file->flags & EVENT_FILE_FL_SOFT_DISABLED) + goto discard; + + if (file->flags & EVENT_FILE_FL_FILTERED && + !filter_match_preds(file->filter, entry)) + goto discard; + + if ((file->flags & EVENT_FILE_FL_PID_FILTER) && + trace_event_ignore_this_pid(file)) + goto discard; return false; + discard: + __trace_event_discard_commit(buffer, event); + return true; } /** diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e31ee325dad16..4acc77e049e5f 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2247,12 +2247,19 @@ static struct trace_event_file * trace_create_new_event(struct trace_event_call *call, struct trace_array *tr) { + struct trace_pid_list *pid_list; struct trace_event_file *file; file = kmem_cache_alloc(file_cachep, GFP_TRACE); if (!file) return NULL; + pid_list = rcu_dereference_protected(tr->filtered_pids, + lockdep_is_held(&event_mutex)); + + if (pid_list) + file->flags |= EVENT_FILE_FL_PID_FILTER; + file->event_call = call; file->tr = tr; atomic_set(&file->sm_ref, 0); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index f63766366e238..b8f1f0eadd2ef 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -149,6 +149,8 @@ struct hist_field { */ unsigned int var_ref_idx; bool read_once; + + unsigned int var_str_idx; }; static u64 hist_field_none(struct hist_field *field, @@ -351,6 +353,7 @@ struct hist_trigger_data { unsigned int n_keys; unsigned int n_fields; unsigned int n_vars; + unsigned int n_var_str; unsigned int key_size; struct tracing_map_sort_key sort_keys[TRACING_MAP_SORT_KEYS_MAX]; unsigned int n_sort_keys; @@ -2305,7 +2308,12 @@ static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt) } } - n_str = hist_data->n_field_var_str + hist_data->n_save_var_str; + n_str = hist_data->n_field_var_str + hist_data->n_save_var_str + + hist_data->n_var_str; + if (n_str > SYNTH_FIELDS_MAX) { + hist_elt_data_free(elt_data); + return -EINVAL; + } size = STR_VAR_LEN_MAX; @@ -2582,9 +2590,10 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, if (!hist_field->type) goto free; - if (field->filter_type == FILTER_STATIC_STRING) + if (field->filter_type == FILTER_STATIC_STRING) { hist_field->fn = hist_field_string; - else if (field->filter_type == FILTER_DYN_STRING) + hist_field->size = field->size; + } else if (field->filter_type == FILTER_DYN_STRING) hist_field->fn = hist_field_dynstring; else hist_field->fn = hist_field_pstring; @@ -2686,8 +2695,11 @@ static int init_var_ref(struct hist_field *ref_field, return err; free: kfree(ref_field->system); + ref_field->system = NULL; kfree(ref_field->event_name); + ref_field->event_name = NULL; kfree(ref_field->name); + ref_field->name = NULL; goto out; } @@ -2882,9 +2894,9 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, /* * For backward compatibility, if field_name * was "cpu", then we treat this the same as - * common_cpu. + * common_cpu. This also works for "CPU". */ - if (strcmp(field_name, "cpu") == 0) { + if (field && field->filter_type == FILTER_CPU) { *flags |= HIST_FIELD_FL_CPU; } else { hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, @@ -3522,7 +3534,7 @@ static inline void __update_field_vars(struct tracing_map_elt *elt, char *str = elt_data->field_var_str[j++]; char *val_str = (char *)(uintptr_t)var_val; - strscpy(str, val_str, STR_VAR_LEN_MAX); + strscpy(str, val_str, val->size); var_val = (u64)(uintptr_t)str; } tracing_map_set_var(elt, var_idx, var_val); @@ -4389,6 +4401,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data, var_ref_idx = find_var_ref_idx(hist_data, var_ref); if (WARN_ON(var_ref_idx < 0)) { + kfree(p); ret = var_ref_idx; goto err; } @@ -4599,6 +4612,7 @@ static int create_var_field(struct hist_trigger_data *hist_data, { struct trace_array *tr = hist_data->event_file->tr; unsigned long flags = 0; + int ret; if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX)) return -EINVAL; @@ -4613,7 +4627,12 @@ static int create_var_field(struct hist_trigger_data *hist_data, if (WARN_ON(hist_data->n_vars > TRACING_MAP_VARS_MAX)) return -EINVAL; - return __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags); + ret = __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags); + + if (hist_data->fields[val_idx]->flags & HIST_FIELD_FL_STRING) + hist_data->fields[val_idx]->var_str_idx = hist_data->n_var_str++; + + return ret; } static int create_val_fields(struct hist_trigger_data *hist_data, @@ -4813,6 +4832,8 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) s = kstrdup(field_str, GFP_KERNEL); if (!s) { + kfree(hist_data->attrs->var_defs.name[n_vars]); + hist_data->attrs->var_defs.name[n_vars] = NULL; ret = -ENOMEM; goto free; } @@ -5231,7 +5252,7 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data) if (hist_field->flags & HIST_FIELD_FL_STACKTRACE) cmp_fn = tracing_map_cmp_none; - else if (!field) + else if (!field || hist_field->flags & HIST_FIELD_FL_CPU) cmp_fn = tracing_map_cmp_num(hist_field->size, hist_field->is_signed); else if (is_string_field(field)) @@ -5333,6 +5354,22 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, hist_val = hist_field->fn(hist_field, elt, rbe, rec); if (hist_field->flags & HIST_FIELD_FL_VAR) { var_idx = hist_field->var.idx; + + if (hist_field->flags & HIST_FIELD_FL_STRING) { + unsigned int str_start, var_str_idx, idx; + char *str, *val_str; + + str_start = hist_data->n_field_var_str + + hist_data->n_save_var_str; + var_str_idx = hist_field->var_str_idx; + idx = str_start + var_str_idx; + + str = elt_data->field_var_str[idx]; + val_str = (char *)(uintptr_t)hist_val; + strscpy(str, val_str, hist_field->size); + + hist_val = (u64)(uintptr_t)str; + } tracing_map_set_var(elt, var_idx, hist_val); continue; } diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index e913d41a41949..82580f7ffad95 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -940,6 +940,16 @@ static void traceon_trigger(struct event_trigger_data *data, void *rec, struct ring_buffer_event *event) { + struct trace_event_file *file = data->private_data; + + if (file) { + if (tracer_tracing_is_on(file->tr)) + return; + + tracer_tracing_on(file->tr); + return; + } + if (tracing_is_on()) return; @@ -950,8 +960,15 @@ static void traceon_count_trigger(struct event_trigger_data *data, void *rec, struct ring_buffer_event *event) { - if (tracing_is_on()) - return; + struct trace_event_file *file = data->private_data; + + if (file) { + if (tracer_tracing_is_on(file->tr)) + return; + } else { + if (tracing_is_on()) + return; + } if (!data->count) return; @@ -959,13 +976,26 @@ traceon_count_trigger(struct event_trigger_data *data, void *rec, if (data->count != -1) (data->count)--; - tracing_on(); + if (file) + tracer_tracing_on(file->tr); + else + tracing_on(); } static void traceoff_trigger(struct event_trigger_data *data, void *rec, struct ring_buffer_event *event) { + struct trace_event_file *file = data->private_data; + + if (file) { + if (!tracer_tracing_is_on(file->tr)) + return; + + tracer_tracing_off(file->tr); + return; + } + if (!tracing_is_on()) return; @@ -976,8 +1006,15 @@ static void traceoff_count_trigger(struct event_trigger_data *data, void *rec, struct ring_buffer_event *event) { - if (!tracing_is_on()) - return; + struct trace_event_file *file = data->private_data; + + if (file) { + if (!tracer_tracing_is_on(file->tr)) + return; + } else { + if (!tracing_is_on()) + return; + } if (!data->count) return; @@ -985,7 +1022,10 @@ traceoff_count_trigger(struct event_trigger_data *data, void *rec, if (data->count != -1) (data->count)--; - tracing_off(); + if (file) + tracer_tracing_off(file->tr); + else + tracing_off(); } static int @@ -1179,7 +1219,14 @@ static void stacktrace_trigger(struct event_trigger_data *data, void *rec, struct ring_buffer_event *event) { - trace_dump_stack(STACK_SKIP); + struct trace_event_file *file = data->private_data; + unsigned long flags; + + if (file) { + local_save_flags(flags); + __trace_stack(file->tr, flags, STACK_SKIP, preempt_count()); + } else + trace_dump_stack(STACK_SKIP); } static void diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index b611cd36e22db..4e8acfe3437fd 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -138,7 +138,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, pc = preempt_count(); preempt_disable_notrace(); - bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX); + bit = trace_test_and_set_recursion(TRACE_FTRACE_START); if (bit < 0) goto out; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6fef7233740c0..0b28e264e8287 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -430,7 +430,7 @@ static int disable_trace_kprobe(struct trace_event_call *call, */ trace_probe_remove_file(tp, file); - return 0; + return 1; } #if defined(CONFIG_DYNAMIC_FTRACE) && \ @@ -646,7 +646,11 @@ static int register_trace_kprobe(struct trace_kprobe *tk) /* Register new event */ ret = register_kprobe_event(tk); if (ret) { - pr_warn("Failed to register probe event(%d)\n", ret); + if (ret == -EEXIST) { + trace_probe_log_set_index(0); + trace_probe_log_err(0, EVENT_EXIST); + } else + pr_warn("Failed to register probe event(%d)\n", ret); goto end; } @@ -995,15 +999,18 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) { struct dyn_event *ev = v; struct trace_kprobe *tk; + unsigned long nmissed; if (!is_trace_kprobe(ev)) return 0; tk = to_trace_kprobe(ev); + nmissed = trace_kprobe_is_return(tk) ? + tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; seq_printf(m, " %-44s %15lu %15lu\n", trace_probe_name(&tk->tp), trace_kprobe_nhit(tk), - tk->rp.kp.nmissed); + nmissed); return 0; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index f98d6d94cbbf7..23e85cb151346 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1029,11 +1029,36 @@ int trace_probe_init(struct trace_probe *tp, const char *event, return ret; } +static struct trace_event_call * +find_trace_event_call(const char *system, const char *event_name) +{ + struct trace_event_call *tp_event; + const char *name; + + list_for_each_entry(tp_event, &ftrace_events, list) { + if (!tp_event->class->system || + strcmp(system, tp_event->class->system)) + continue; + name = trace_event_name(tp_event); + if (!name || strcmp(event_name, name)) + continue; + return tp_event; + } + + return NULL; +} + int trace_probe_register_event_call(struct trace_probe *tp) { struct trace_event_call *call = trace_probe_event_call(tp); int ret; + lockdep_assert_held(&event_mutex); + + if (find_trace_event_call(trace_probe_group_name(tp), + trace_probe_name(tp))) + return -EEXIST; + ret = register_trace_event(&call->event); if (!ret) return -ENODEV; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index a0ff9e200ef6f..bab9e0dba9af2 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -410,6 +410,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(NO_EVENT_NAME, "Event name is not specified"), \ C(EVENT_TOO_LONG, "Event name is too long"), \ C(BAD_EVENT_NAME, "Event name must follow the same rules as C identifiers"), \ + C(EVENT_EXIST, "Given group/event name is already used by another event"), \ C(RETVAL_ON_PROBE, "$retval is not available on probe"), \ C(BAD_STACK_NUM, "Invalid stack number"), \ C(BAD_ARG_NUM, "Invalid argument number"), \ diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 5294843de6efd..efb51a23a14f2 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -514,7 +514,11 @@ static int register_trace_uprobe(struct trace_uprobe *tu) ret = register_uprobe_event(tu); if (ret) { - pr_warn("Failed to register probe event(%d)\n", ret); + if (ret == -EEXIST) { + trace_probe_log_set_index(0); + trace_probe_log_err(0, EVENT_EXIST); + } else + pr_warn("Failed to register probe event(%d)\n", ret); goto end; } @@ -1295,6 +1299,7 @@ static int uprobe_perf_open(struct trace_event_call *call, return 0; list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tu = container_of(pos, struct trace_uprobe, tp); err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); if (err) { uprobe_perf_close(call, event); diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 9e31bfc818ff8..83c2a0598c648 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "tracing_map.h" #include "trace.h" @@ -307,6 +308,7 @@ void tracing_map_array_free(struct tracing_map_array *a) for (i = 0; i < a->n_pages; i++) { if (!a->pages[i]) break; + kmemleak_free(a->pages[i]); free_page((unsigned long)a->pages[i]); } @@ -342,6 +344,7 @@ struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts, a->pages[i] = (void *)get_zeroed_page(GFP_KERNEL); if (!a->pages[i]) goto free; + kmemleak_alloc(a->pages[i], PAGE_SIZE, 1, GFP_KERNEL); } out: return a; @@ -834,29 +837,35 @@ int tracing_map_init(struct tracing_map *map) return err; } -static int cmp_entries_dup(const struct tracing_map_sort_entry **a, - const struct tracing_map_sort_entry **b) +static int cmp_entries_dup(const void *A, const void *B) { + const struct tracing_map_sort_entry *a, *b; int ret = 0; - if (memcmp((*a)->key, (*b)->key, (*a)->elt->map->key_size)) + a = *(const struct tracing_map_sort_entry **)A; + b = *(const struct tracing_map_sort_entry **)B; + + if (memcmp(a->key, b->key, a->elt->map->key_size)) ret = 1; return ret; } -static int cmp_entries_sum(const struct tracing_map_sort_entry **a, - const struct tracing_map_sort_entry **b) +static int cmp_entries_sum(const void *A, const void *B) { const struct tracing_map_elt *elt_a, *elt_b; + const struct tracing_map_sort_entry *a, *b; struct tracing_map_sort_key *sort_key; struct tracing_map_field *field; tracing_map_cmp_fn_t cmp_fn; void *val_a, *val_b; int ret = 0; - elt_a = (*a)->elt; - elt_b = (*b)->elt; + a = *(const struct tracing_map_sort_entry **)A; + b = *(const struct tracing_map_sort_entry **)B; + + elt_a = a->elt; + elt_b = b->elt; sort_key = &elt_a->map->sort_key; @@ -873,18 +882,21 @@ static int cmp_entries_sum(const struct tracing_map_sort_entry **a, return ret; } -static int cmp_entries_key(const struct tracing_map_sort_entry **a, - const struct tracing_map_sort_entry **b) +static int cmp_entries_key(const void *A, const void *B) { const struct tracing_map_elt *elt_a, *elt_b; + const struct tracing_map_sort_entry *a, *b; struct tracing_map_sort_key *sort_key; struct tracing_map_field *field; tracing_map_cmp_fn_t cmp_fn; void *val_a, *val_b; int ret = 0; - elt_a = (*a)->elt; - elt_b = (*b)->elt; + a = *(const struct tracing_map_sort_entry **)A; + b = *(const struct tracing_map_sort_entry **)B; + + elt_a = a->elt; + elt_b = b->elt; sort_key = &elt_a->map->sort_key; @@ -989,10 +1001,8 @@ static void sort_secondary(struct tracing_map *map, struct tracing_map_sort_key *primary_key, struct tracing_map_sort_key *secondary_key) { - int (*primary_fn)(const struct tracing_map_sort_entry **, - const struct tracing_map_sort_entry **); - int (*secondary_fn)(const struct tracing_map_sort_entry **, - const struct tracing_map_sort_entry **); + int (*primary_fn)(const void *, const void *); + int (*secondary_fn)(const void *, const void *); unsigned i, start = 0, n_sub = 1; if (is_key(map, primary_key->field_idx)) @@ -1061,8 +1071,7 @@ int tracing_map_sort_entries(struct tracing_map *map, unsigned int n_sort_keys, struct tracing_map_sort_entry ***sort_entries) { - int (*cmp_entries_fn)(const struct tracing_map_sort_entry **, - const struct tracing_map_sort_entry **); + int (*cmp_entries_fn)(const void *, const void *); struct tracing_map_sort_entry *sort_entry, **entries; int i, n_entries, ret; diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 7be3e7530841f..33a4093306f9c 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -35,11 +35,10 @@ void bacct_add_tsk(struct user_namespace *user_ns, /* Convert to seconds for btime */ do_div(delta, USEC_PER_SEC); stats->ac_btime = get_seconds() - delta; - if (thread_group_leader(tsk)) { + if (tsk->flags & PF_EXITING) stats->ac_exitcode = tsk->exit_code; - if (tsk->flags & PF_FORKNOEXEC) - stats->ac_flag |= AFORK; - } + if (thread_group_leader(tsk) && (tsk->flags & PF_FORKNOEXEC)) + stats->ac_flag |= AFORK; if (tsk->flags & PF_SUPERPRIV) stats->ac_flag |= ASU; if (tsk->flags & PF_DUMPCORE) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6aeb53b4e19f8..e90f37e22202a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -855,8 +855,17 @@ void wq_worker_running(struct task_struct *task) if (!worker->sleeping) return; + + /* + * If preempted by unbind_workers() between the WORKER_NOT_RUNNING check + * and the nr_running increment below, we may ruin the nr_running reset + * and leave with an unexpected pool->nr_running == 1 on the newly unbound + * pool. Protect against such race. + */ + preempt_disable(); if (!(worker->flags & WORKER_NOT_RUNNING)) atomic_inc(&worker->pool->nr_running); + preempt_enable(); worker->sleeping = 0; } @@ -5302,9 +5311,6 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) int ret = -EINVAL; cpumask_var_t saved_cpumask; - if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) - return -ENOMEM; - /* * Not excluding isolated cpus on purpose. * If the user wishes to include them, we allow that. @@ -5312,6 +5318,15 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) cpumask_and(cpumask, cpumask, cpu_possible_mask); if (!cpumask_empty(cpumask)) { apply_wqattrs_lock(); + if (cpumask_equal(cpumask, wq_unbound_cpumask)) { + ret = 0; + goto out_unlock; + } + + if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) { + ret = -ENOMEM; + goto out_unlock; + } /* save the old wq_unbound_cpumask. */ cpumask_copy(saved_cpumask, wq_unbound_cpumask); @@ -5324,10 +5339,11 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) if (ret < 0) cpumask_copy(wq_unbound_cpumask, saved_cpumask); + free_cpumask_var(saved_cpumask); +out_unlock: apply_wqattrs_unlock(); } - free_cpumask_var(saved_cpumask); return ret; } @@ -5869,6 +5885,13 @@ static void __init wq_numa_init(void) return; } + for_each_possible_cpu(cpu) { + if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) { + pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu); + return; + } + } + wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(); BUG_ON(!wq_update_unbound_numa_attrs_buf); @@ -5886,11 +5909,6 @@ static void __init wq_numa_init(void) for_each_possible_cpu(cpu) { node = cpu_to_node(cpu); - if (WARN_ON(node == NUMA_NO_NODE)) { - pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu); - /* happens iff arch is bonkers, let's just proceed */ - return; - } cpumask_set_cpu(cpu, tbl[node]); } diff --git a/lib/Kconfig b/lib/Kconfig index 3321d04dfa5a5..fa129b5c4320d 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -42,7 +42,6 @@ config BITREVERSE config HAVE_ARCH_BITREVERSE bool default n - depends on BITREVERSE help This option enables the use of hardware bit-reversal instructions on architectures which support such operations. diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ec54ee392d58e..fec594e42abe4 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -868,7 +868,6 @@ config HARDLOCKUP_DETECTOR depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH select LOCKUP_DETECTOR select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF - select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH help Say Y here to enable the kernel to act as a watchdog to detect hard lockups. @@ -1310,8 +1309,7 @@ config WARN_ALL_UNSEEDED_RANDOM so architecture maintainers really need to do what they can to get the CRNG seeded sooner after the system is booted. However, since users cannot do anything actionable to - address this, by default the kernel will issue only a single - warning for the first use of unseeded randomness. + address this, by default this option is disabled. Say Y here if you want to receive warnings for all uses of unseeded randomness. This will be of use primarily for @@ -1988,6 +1986,7 @@ config TEST_KMOD depends on m depends on NETDEVICES && NET_CORE && INET # for TUN depends on BLOCK + depends on PAGE_SIZE_LESS_THAN_256KB # for BTRFS select TEST_LKM select XFS_FS select TUN diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 6f4bcf5245547..b537a83678e11 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -1462,6 +1462,7 @@ int assoc_array_gc(struct assoc_array *array, struct assoc_array_ptr *cursor, *ptr; struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; unsigned long nr_leaves_on_tree; + bool retained; int keylen, slot, nr_free, next_slot, i; pr_devel("-->%s()\n", __func__); @@ -1538,6 +1539,7 @@ int assoc_array_gc(struct assoc_array *array, goto descend; } +retry_compress: pr_devel("-- compress node %p --\n", new_n); /* Count up the number of empty slots in this node and work out the @@ -1555,6 +1557,7 @@ int assoc_array_gc(struct assoc_array *array, pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); /* See what we can fold in */ + retained = false; next_slot = 0; for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { struct assoc_array_shortcut *s; @@ -1604,9 +1607,14 @@ int assoc_array_gc(struct assoc_array *array, pr_devel("[%d] retain node %lu/%d [nx %d]\n", slot, child->nr_leaves_on_branch, nr_free + 1, next_slot); + retained = true; } } + if (retained && new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { + pr_devel("internal nodes remain despite enough space, retrying\n"); + goto retry_compress; + } pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); nr_leaves_on_tree = new_n->nr_leaves_on_branch; diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index cbe0b6a6450d7..4f7b06789d157 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -11,3 +11,15 @@ libdes-y := des.o obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o libsha256-y := sha256.o + +obj-$(CONFIG_CRYPTO_LIB_SM3) += libsm3.o +libsm3-y := sm3.o + +obj-$(CONFIG_CRYPTO_LIB_SM4) += libsm4.o +libsm4-y := sm4.o + +obj-y += libblake2s.o +libblake2s-y += blake2s.o blake2s-generic.o +ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y) +libblake2s-y += blake2s-selftest.o +endif diff --git a/lib/crypto/blake2s-generic.c b/lib/crypto/blake2s-generic.c new file mode 100644 index 0000000000000..04ff8df245136 --- /dev/null +++ b/lib/crypto/blake2s-generic.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * This is an implementation of the BLAKE2s hash and PRF functions. + * + * Information: https://blake2.net/ + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static const u8 blake2s_sigma[10][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, +}; + +static inline void blake2s_increment_counter(struct blake2s_state *state, + const u32 inc) +{ + state->t[0] += inc; + state->t[1] += (state->t[0] < inc); +} + +void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, + size_t nblocks, const u32 inc) +{ + u32 m[16]; + u32 v[16]; + int i; + + WARN_ON(IS_ENABLED(DEBUG) && + (nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE)); + + while (nblocks > 0) { + blake2s_increment_counter(state, inc); + memcpy(m, block, BLAKE2S_BLOCK_SIZE); + le32_to_cpu_array(m, ARRAY_SIZE(m)); + memcpy(v, state->h, 32); + v[ 8] = BLAKE2S_IV0; + v[ 9] = BLAKE2S_IV1; + v[10] = BLAKE2S_IV2; + v[11] = BLAKE2S_IV3; + v[12] = BLAKE2S_IV4 ^ state->t[0]; + v[13] = BLAKE2S_IV5 ^ state->t[1]; + v[14] = BLAKE2S_IV6 ^ state->f[0]; + v[15] = BLAKE2S_IV7 ^ state->f[1]; + +#define G(r, i, a, b, c, d) do { \ + a += b + m[blake2s_sigma[r][2 * i + 0]]; \ + d = ror32(d ^ a, 16); \ + c += d; \ + b = ror32(b ^ c, 12); \ + a += b + m[blake2s_sigma[r][2 * i + 1]]; \ + d = ror32(d ^ a, 8); \ + c += d; \ + b = ror32(b ^ c, 7); \ +} while (0) + +#define ROUND(r) do { \ + G(r, 0, v[0], v[ 4], v[ 8], v[12]); \ + G(r, 1, v[1], v[ 5], v[ 9], v[13]); \ + G(r, 2, v[2], v[ 6], v[10], v[14]); \ + G(r, 3, v[3], v[ 7], v[11], v[15]); \ + G(r, 4, v[0], v[ 5], v[10], v[15]); \ + G(r, 5, v[1], v[ 6], v[11], v[12]); \ + G(r, 6, v[2], v[ 7], v[ 8], v[13]); \ + G(r, 7, v[3], v[ 4], v[ 9], v[14]); \ +} while (0) + ROUND(0); + ROUND(1); + ROUND(2); + ROUND(3); + ROUND(4); + ROUND(5); + ROUND(6); + ROUND(7); + ROUND(8); + ROUND(9); + +#undef G +#undef ROUND + + for (i = 0; i < 8; ++i) + state->h[i] ^= v[i] ^ v[i + 8]; + + block += BLAKE2S_BLOCK_SIZE; + --nblocks; + } +} + +EXPORT_SYMBOL(blake2s_compress_generic); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("BLAKE2s hash function"); +MODULE_AUTHOR("Jason A. Donenfeld "); diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c new file mode 100644 index 0000000000000..7a9edc96ddddf --- /dev/null +++ b/lib/crypto/blake2s-selftest.c @@ -0,0 +1,591 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include + +/* + * blake2s_testvecs[] generated with the program below (using libb2-dev and + * libssl-dev [OpenSSL]) + * + * #include + * #include + * #include + * + * #include + * + * #define BLAKE2S_TESTVEC_COUNT 256 + * + * static void print_vec(const uint8_t vec[], int len) + * { + * int i; + * + * printf(" { "); + * for (i = 0; i < len; i++) { + * if (i && (i % 12) == 0) + * printf("\n "); + * printf("0x%02x, ", vec[i]); + * } + * printf("},\n"); + * } + * + * int main(void) + * { + * uint8_t key[BLAKE2S_KEYBYTES]; + * uint8_t buf[BLAKE2S_TESTVEC_COUNT]; + * uint8_t hash[BLAKE2S_OUTBYTES]; + * int i, j; + * + * key[0] = key[1] = 1; + * for (i = 2; i < BLAKE2S_KEYBYTES; ++i) + * key[i] = key[i - 2] + key[i - 1]; + * + * for (i = 0; i < BLAKE2S_TESTVEC_COUNT; ++i) + * buf[i] = (uint8_t)i; + * + * printf("static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n"); + * + * for (i = 0; i < BLAKE2S_TESTVEC_COUNT; ++i) { + * int outlen = 1 + i % BLAKE2S_OUTBYTES; + * int keylen = (13 * i) % (BLAKE2S_KEYBYTES + 1); + * + * blake2s(hash, buf, key + BLAKE2S_KEYBYTES - keylen, outlen, i, + * keylen); + * print_vec(hash, outlen); + * } + * printf("};\n\n"); + * + * return 0; + *} + */ +static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { + { 0xa1, }, + { 0x7c, 0x89, }, + { 0x74, 0x0e, 0xd4, }, + { 0x47, 0x0c, 0x21, 0x15, }, + { 0x18, 0xd6, 0x9c, 0xa6, 0xc4, }, + { 0x13, 0x5d, 0x16, 0x63, 0x2e, 0xf9, }, + { 0x2c, 0xb5, 0x04, 0xb7, 0x99, 0xe2, 0x73, }, + { 0x9a, 0x0f, 0xd2, 0x39, 0xd6, 0x68, 0x1b, 0x92, }, + { 0xc8, 0xde, 0x7a, 0xea, 0x2f, 0xf4, 0xd2, 0xe3, 0x2b, }, + { 0x5b, 0xf9, 0x43, 0x52, 0x0c, 0x12, 0xba, 0xb5, 0x93, 0x9f, }, + { 0xc6, 0x2c, 0x4e, 0x80, 0xfc, 0x32, 0x5b, 0x33, 0xb8, 0xb8, 0x0a, }, + { 0xa7, 0x5c, 0xfd, 0x3a, 0xcc, 0xbf, 0x90, 0xca, 0xb7, 0x97, 0xde, 0xd8, }, + { 0x66, 0xca, 0x3c, 0xc4, 0x19, 0xef, 0x92, 0x66, 0x3f, 0x21, 0x8f, 0xda, + 0xb7, }, + { 0xba, 0xe5, 0xbb, 0x30, 0x25, 0x94, 0x6d, 0xc3, 0x89, 0x09, 0xc4, 0x25, + 0x52, 0x3e, }, + { 0xa2, 0xef, 0x0e, 0x52, 0x0b, 0x5f, 0xa2, 0x01, 0x6d, 0x0a, 0x25, 0xbc, + 0x57, 0xe2, 0x27, }, + { 0x4f, 0xe0, 0xf9, 0x52, 0x12, 0xda, 0x84, 0xb7, 0xab, 0xae, 0xb0, 0xa6, + 0x47, 0x2a, 0xc7, 0xf5, }, + { 0x56, 0xe7, 0xa8, 0x1c, 0x4c, 0xca, 0xed, 0x90, 0x31, 0xec, 0x87, 0x43, + 0xe7, 0x72, 0x08, 0xec, 0xbe, }, + { 0x7e, 0xdf, 0x80, 0x1c, 0x93, 0x33, 0xfd, 0x53, 0x44, 0xba, 0xfd, 0x96, + 0xe1, 0xbb, 0xb5, 0x65, 0xa5, 0x00, }, + { 0xec, 0x6b, 0xed, 0xf7, 0x7b, 0x62, 0x1d, 0x7d, 0xf4, 0x82, 0xf3, 0x1e, + 0x18, 0xff, 0x2b, 0xc4, 0x06, 0x20, 0x2a, }, + { 0x74, 0x98, 0xd7, 0x68, 0x63, 0xed, 0x87, 0xe4, 0x5d, 0x8d, 0x9e, 0x1d, + 0xfd, 0x2a, 0xbb, 0x86, 0xac, 0xe9, 0x2a, 0x89, }, + { 0x89, 0xc3, 0x88, 0xce, 0x2b, 0x33, 0x1e, 0x10, 0xd1, 0x37, 0x20, 0x86, + 0x28, 0x43, 0x70, 0xd9, 0xfb, 0x96, 0xd9, 0xb5, 0xd3, }, + { 0xcb, 0x56, 0x74, 0x41, 0x8d, 0x80, 0x01, 0x9a, 0x6b, 0x38, 0xe1, 0x41, + 0xad, 0x9c, 0x62, 0x74, 0xce, 0x35, 0xd5, 0x6c, 0x89, 0x6e, }, + { 0x79, 0xaf, 0x94, 0x59, 0x99, 0x26, 0xe1, 0xc9, 0x34, 0xfe, 0x7c, 0x22, + 0xf7, 0x43, 0xd7, 0x65, 0xd4, 0x48, 0x18, 0xac, 0x3d, 0xfd, 0x93, }, + { 0x85, 0x0d, 0xff, 0xb8, 0x3e, 0x87, 0x41, 0xb0, 0x95, 0xd3, 0x3d, 0x00, + 0x47, 0x55, 0x9e, 0xd2, 0x69, 0xea, 0xbf, 0xe9, 0x7a, 0x2d, 0x61, 0x45, }, + { 0x03, 0xe0, 0x85, 0xec, 0x54, 0xb5, 0x16, 0x53, 0xa8, 0xc4, 0x71, 0xe9, + 0x6a, 0xe7, 0xcb, 0xc4, 0x15, 0x02, 0xfc, 0x34, 0xa4, 0xa4, 0x28, 0x13, + 0xd1, }, + { 0xe3, 0x34, 0x4b, 0xe1, 0xd0, 0x4b, 0x55, 0x61, 0x8f, 0xc0, 0x24, 0x05, + 0xe6, 0xe0, 0x3d, 0x70, 0x24, 0x4d, 0xda, 0xb8, 0x91, 0x05, 0x29, 0x07, + 0x01, 0x3e, }, + { 0x61, 0xff, 0x01, 0x72, 0xb1, 0x4d, 0xf6, 0xfe, 0xd1, 0xd1, 0x08, 0x74, + 0xe6, 0x91, 0x44, 0xeb, 0x61, 0xda, 0x40, 0xaf, 0xfc, 0x8c, 0x91, 0x6b, + 0xec, 0x13, 0xed, }, + { 0xd4, 0x40, 0xd2, 0xa0, 0x7f, 0xc1, 0x58, 0x0c, 0x85, 0xa0, 0x86, 0xc7, + 0x86, 0xb9, 0x61, 0xc9, 0xea, 0x19, 0x86, 0x1f, 0xab, 0x07, 0xce, 0x37, + 0x72, 0x67, 0x09, 0xfc, }, + { 0x9e, 0xf8, 0x18, 0x67, 0x93, 0x10, 0x9b, 0x39, 0x75, 0xe8, 0x8b, 0x38, + 0x82, 0x7d, 0xb8, 0xb7, 0xa5, 0xaf, 0xe6, 0x6a, 0x22, 0x5e, 0x1f, 0x9c, + 0x95, 0x29, 0x19, 0xf2, 0x4b, }, + { 0xc8, 0x62, 0x25, 0xf5, 0x98, 0xc9, 0xea, 0xe5, 0x29, 0x3a, 0xd3, 0x22, + 0xeb, 0xeb, 0x07, 0x7c, 0x15, 0x07, 0xee, 0x15, 0x61, 0xbb, 0x05, 0x30, + 0x99, 0x7f, 0x11, 0xf6, 0x0a, 0x1d, }, + { 0x68, 0x70, 0xf7, 0x90, 0xa1, 0x8b, 0x1f, 0x0f, 0xbb, 0xce, 0xd2, 0x0e, + 0x33, 0x1f, 0x7f, 0xa9, 0x78, 0xa8, 0xa6, 0x81, 0x66, 0xab, 0x8d, 0xcd, + 0x58, 0x55, 0x3a, 0x0b, 0x7a, 0xdb, 0xb5, }, + { 0xdd, 0x35, 0xd2, 0xb4, 0xf6, 0xc7, 0xea, 0xab, 0x64, 0x24, 0x4e, 0xfe, + 0xe5, 0x3d, 0x4e, 0x95, 0x8b, 0x6d, 0x6c, 0xbc, 0xb0, 0xf8, 0x88, 0x61, + 0x09, 0xb7, 0x78, 0xa3, 0x31, 0xfe, 0xd9, 0x2f, }, + { 0x0a, }, + { 0x6e, 0xd4, }, + { 0x64, 0xe9, 0xd1, }, + { 0x30, 0xdd, 0x71, 0xef, }, + { 0x11, 0xb5, 0x0c, 0x87, 0xc9, }, + { 0x06, 0x1c, 0x6d, 0x04, 0x82, 0xd0, }, + { 0x5c, 0x42, 0x0b, 0xee, 0xc5, 0x9c, 0xb2, }, + { 0xe8, 0x29, 0xd6, 0xb4, 0x5d, 0xf7, 0x2b, 0x93, }, + { 0x18, 0xca, 0x27, 0x72, 0x43, 0x39, 0x16, 0xbc, 0x6a, }, + { 0x39, 0x8f, 0xfd, 0x64, 0xf5, 0x57, 0x23, 0xb0, 0x45, 0xf8, }, + { 0xbb, 0x3a, 0x78, 0x6b, 0x02, 0x1d, 0x0b, 0x16, 0xe3, 0xb2, 0x9a, }, + { 0xb8, 0xb4, 0x0b, 0xe5, 0xd4, 0x1d, 0x0d, 0x85, 0x49, 0x91, 0x35, 0xfa, }, + { 0x6d, 0x48, 0x2a, 0x0c, 0x42, 0x08, 0xbd, 0xa9, 0x78, 0x6f, 0x18, 0xaf, + 0xe2, }, + { 0x10, 0x45, 0xd4, 0x58, 0x88, 0xec, 0x4e, 0x1e, 0xf6, 0x14, 0x92, 0x64, + 0x7e, 0xb0, }, + { 0x8b, 0x0b, 0x95, 0xee, 0x92, 0xc6, 0x3b, 0x91, 0xf1, 0x1e, 0xeb, 0x51, + 0x98, 0x0a, 0x8d, }, + { 0xa3, 0x50, 0x4d, 0xa5, 0x1d, 0x03, 0x68, 0xe9, 0x57, 0x78, 0xd6, 0x04, + 0xf1, 0xc3, 0x94, 0xd8, }, + { 0xb8, 0x66, 0x6e, 0xdd, 0x46, 0x15, 0xae, 0x3d, 0x83, 0x7e, 0xcf, 0xe7, + 0x2c, 0xe8, 0x8f, 0xc7, 0x34, }, + { 0x2e, 0xc0, 0x1f, 0x29, 0xea, 0xf6, 0xb9, 0xe2, 0xc2, 0x93, 0xeb, 0x41, + 0x0d, 0xf0, 0x0a, 0x13, 0x0e, 0xa2, }, + { 0x71, 0xb8, 0x33, 0xa9, 0x1b, 0xac, 0xf1, 0xb5, 0x42, 0x8f, 0x5e, 0x81, + 0x34, 0x43, 0xb7, 0xa4, 0x18, 0x5c, 0x47, }, + { 0xda, 0x45, 0xb8, 0x2e, 0x82, 0x1e, 0xc0, 0x59, 0x77, 0x9d, 0xfa, 0xb4, + 0x1c, 0x5e, 0xa0, 0x2b, 0x33, 0x96, 0x5a, 0x58, }, + { 0xe3, 0x09, 0x05, 0xa9, 0xeb, 0x48, 0x13, 0xad, 0x71, 0x88, 0x81, 0x9a, + 0x3e, 0x2c, 0xe1, 0x23, 0x99, 0x13, 0x35, 0x9f, 0xb5, }, + { 0xb7, 0x86, 0x2d, 0x16, 0xe1, 0x04, 0x00, 0x47, 0x47, 0x61, 0x31, 0xfb, + 0x14, 0xac, 0xd8, 0xe9, 0xe3, 0x49, 0xbd, 0xf7, 0x9c, 0x3f, }, + { 0x7f, 0xd9, 0x95, 0xa8, 0xa7, 0xa0, 0xcc, 0xba, 0xef, 0xb1, 0x0a, 0xa9, + 0x21, 0x62, 0x08, 0x0f, 0x1b, 0xff, 0x7b, 0x9d, 0xae, 0xb2, 0x95, }, + { 0x85, 0x99, 0xea, 0x33, 0xe0, 0x56, 0xff, 0x13, 0xc6, 0x61, 0x8c, 0xf9, + 0x57, 0x05, 0x03, 0x11, 0xf9, 0xfb, 0x3a, 0xf7, 0xce, 0xbb, 0x52, 0x30, }, + { 0xb2, 0x72, 0x9c, 0xf8, 0x77, 0x4e, 0x8f, 0x6b, 0x01, 0x6c, 0xff, 0x4e, + 0x4f, 0x02, 0xd2, 0xbc, 0xeb, 0x51, 0x28, 0x99, 0x50, 0xab, 0xc4, 0x42, + 0xe3, }, + { 0x8b, 0x0a, 0xb5, 0x90, 0x8f, 0xf5, 0x7b, 0xdd, 0xba, 0x47, 0x37, 0xc9, + 0x2a, 0xd5, 0x4b, 0x25, 0x08, 0x8b, 0x02, 0x17, 0xa7, 0x9e, 0x6b, 0x6e, + 0xe3, 0x90, }, + { 0x90, 0xdd, 0xf7, 0x75, 0xa7, 0xa3, 0x99, 0x5e, 0x5b, 0x7d, 0x75, 0xc3, + 0x39, 0x6b, 0xa0, 0xe2, 0x44, 0x53, 0xb1, 0x9e, 0xc8, 0xf1, 0x77, 0x10, + 0x58, 0x06, 0x9a, }, + { 0x99, 0x52, 0xf0, 0x49, 0xa8, 0x8c, 0xec, 0xa6, 0x97, 0x32, 0x13, 0xb5, + 0xf7, 0xa3, 0x8e, 0xfb, 0x4b, 0x59, 0x31, 0x3d, 0x01, 0x59, 0x98, 0x5d, + 0x53, 0x03, 0x1a, 0x39, }, + { 0x9f, 0xe0, 0xc2, 0xe5, 0x5d, 0x93, 0xd6, 0x9b, 0x47, 0x8f, 0x9b, 0xe0, + 0x26, 0x35, 0x84, 0x20, 0x1d, 0xc5, 0x53, 0x10, 0x0f, 0x22, 0xb9, 0xb5, + 0xd4, 0x36, 0xb1, 0xac, 0x73, }, + { 0x30, 0x32, 0x20, 0x3b, 0x10, 0x28, 0xec, 0x1f, 0x4f, 0x9b, 0x47, 0x59, + 0xeb, 0x7b, 0xee, 0x45, 0xfb, 0x0c, 0x49, 0xd8, 0x3d, 0x69, 0xbd, 0x90, + 0x2c, 0xf0, 0x9e, 0x8d, 0xbf, 0xd5, }, + { 0x2a, 0x37, 0x73, 0x7f, 0xf9, 0x96, 0x19, 0xaa, 0x25, 0xd8, 0x13, 0x28, + 0x01, 0x29, 0x89, 0xdf, 0x6e, 0x0c, 0x9b, 0x43, 0x44, 0x51, 0xe9, 0x75, + 0x26, 0x0c, 0xb7, 0x87, 0x66, 0x0b, 0x5f, }, + { 0x23, 0xdf, 0x96, 0x68, 0x91, 0x86, 0xd0, 0x93, 0x55, 0x33, 0x24, 0xf6, + 0xba, 0x08, 0x75, 0x5b, 0x59, 0x11, 0x69, 0xb8, 0xb9, 0xe5, 0x2c, 0x77, + 0x02, 0xf6, 0x47, 0xee, 0x81, 0xdd, 0xb9, 0x06, }, + { 0x9d, }, + { 0x9d, 0x7d, }, + { 0xfd, 0xc3, 0xda, }, + { 0xe8, 0x82, 0xcd, 0x21, }, + { 0xc3, 0x1d, 0x42, 0x4c, 0x74, }, + { 0xe9, 0xda, 0xf1, 0xa2, 0xe5, 0x7c, }, + { 0x52, 0xb8, 0x6f, 0x81, 0x5c, 0x3a, 0x4c, }, + { 0x5b, 0x39, 0x26, 0xfc, 0x92, 0x5e, 0xe0, 0x49, }, + { 0x59, 0xe4, 0x7c, 0x93, 0x1c, 0xf9, 0x28, 0x93, 0xde, }, + { 0xde, 0xdf, 0xb2, 0x43, 0x61, 0x0b, 0x86, 0x16, 0x4c, 0x2e, }, + { 0x14, 0x8f, 0x75, 0x51, 0xaf, 0xb9, 0xee, 0x51, 0x5a, 0xae, 0x23, }, + { 0x43, 0x5f, 0x50, 0xd5, 0x70, 0xb0, 0x5b, 0x87, 0xf5, 0xd9, 0xb3, 0x6d, }, + { 0x66, 0x0a, 0x64, 0x93, 0x79, 0x71, 0x94, 0x40, 0xb7, 0x68, 0x2d, 0xd3, + 0x63, }, + { 0x15, 0x00, 0xc4, 0x0c, 0x7d, 0x1b, 0x10, 0xa9, 0x73, 0x1b, 0x90, 0x6f, + 0xe6, 0xa9, }, + { 0x34, 0x75, 0xf3, 0x86, 0x8f, 0x56, 0xcf, 0x2a, 0x0a, 0xf2, 0x62, 0x0a, + 0xf6, 0x0e, 0x20, }, + { 0xb1, 0xde, 0xc9, 0xf5, 0xdb, 0xf3, 0x2f, 0x4c, 0xd6, 0x41, 0x7d, 0x39, + 0x18, 0x3e, 0xc7, 0xc3, }, + { 0xc5, 0x89, 0xb2, 0xf8, 0xb8, 0xc0, 0xa3, 0xb9, 0x3b, 0x10, 0x6d, 0x7c, + 0x92, 0xfc, 0x7f, 0x34, 0x41, }, + { 0xc4, 0xd8, 0xef, 0xba, 0xef, 0xd2, 0xaa, 0xc5, 0x6c, 0x8e, 0x3e, 0xbb, + 0x12, 0xfc, 0x0f, 0x72, 0xbf, 0x0f, }, + { 0xdd, 0x91, 0xd1, 0x15, 0x9e, 0x7d, 0xf8, 0xc1, 0xb9, 0x14, 0x63, 0x96, + 0xb5, 0xcb, 0x83, 0x1d, 0x35, 0x1c, 0xec, }, + { 0xa9, 0xf8, 0x52, 0xc9, 0x67, 0x76, 0x2b, 0xad, 0xfb, 0xd8, 0x3a, 0xa6, + 0x74, 0x02, 0xae, 0xb8, 0x25, 0x2c, 0x63, 0x49, }, + { 0x77, 0x1f, 0x66, 0x70, 0xfd, 0x50, 0x29, 0xaa, 0xeb, 0xdc, 0xee, 0xba, + 0x75, 0x98, 0xdc, 0x93, 0x12, 0x3f, 0xdc, 0x7c, 0x38, }, + { 0xe2, 0xe1, 0x89, 0x5c, 0x37, 0x38, 0x6a, 0xa3, 0x40, 0xac, 0x3f, 0xb0, + 0xca, 0xfc, 0xa7, 0xf3, 0xea, 0xf9, 0x0f, 0x5d, 0x8e, 0x39, }, + { 0x0f, 0x67, 0xc8, 0x38, 0x01, 0xb1, 0xb7, 0xb8, 0xa2, 0xe7, 0x0a, 0x6d, + 0xd2, 0x63, 0x69, 0x9e, 0xcc, 0xf0, 0xf2, 0xbe, 0x9b, 0x98, 0xdd, }, + { 0x13, 0xe1, 0x36, 0x30, 0xfe, 0xc6, 0x01, 0x8a, 0xa1, 0x63, 0x96, 0x59, + 0xc2, 0xa9, 0x68, 0x3f, 0x58, 0xd4, 0x19, 0x0c, 0x40, 0xf3, 0xde, 0x02, }, + { 0xa3, 0x9e, 0xce, 0xda, 0x42, 0xee, 0x8c, 0x6c, 0x5a, 0x7d, 0xdc, 0x89, + 0x02, 0x77, 0xdd, 0xe7, 0x95, 0xbb, 0xff, 0x0d, 0xa4, 0xb5, 0x38, 0x1e, + 0xaf, }, + { 0x9a, 0xf6, 0xb5, 0x9a, 0x4f, 0xa9, 0x4f, 0x2c, 0x35, 0x3c, 0x24, 0xdc, + 0x97, 0x6f, 0xd9, 0xa1, 0x7d, 0x1a, 0x85, 0x0b, 0xf5, 0xda, 0x2e, 0xe7, + 0xb1, 0x1d, }, + { 0x84, 0x1e, 0x8e, 0x3d, 0x45, 0xa5, 0xf2, 0x27, 0xf3, 0x31, 0xfe, 0xb9, + 0xfb, 0xc5, 0x45, 0x99, 0x99, 0xdd, 0x93, 0x43, 0x02, 0xee, 0x58, 0xaf, + 0xee, 0x6a, 0xbe, }, + { 0x07, 0x2f, 0xc0, 0xa2, 0x04, 0xc4, 0xab, 0x7c, 0x26, 0xbb, 0xa8, 0xd8, + 0xe3, 0x1c, 0x75, 0x15, 0x64, 0x5d, 0x02, 0x6a, 0xf0, 0x86, 0xe9, 0xcd, + 0x5c, 0xef, 0xa3, 0x25, }, + { 0x2f, 0x3b, 0x1f, 0xb5, 0x91, 0x8f, 0x86, 0xe0, 0xdc, 0x31, 0x48, 0xb6, + 0xa1, 0x8c, 0xfd, 0x75, 0xbb, 0x7d, 0x3d, 0xc1, 0xf0, 0x10, 0x9a, 0xd8, + 0x4b, 0x0e, 0xe3, 0x94, 0x9f, }, + { 0x29, 0xbb, 0x8f, 0x6c, 0xd1, 0xf2, 0xb6, 0xaf, 0xe5, 0xe3, 0x2d, 0xdc, + 0x6f, 0xa4, 0x53, 0x88, 0xd8, 0xcf, 0x4d, 0x45, 0x42, 0x62, 0xdb, 0xdf, + 0xf8, 0x45, 0xc2, 0x13, 0xec, 0x35, }, + { 0x06, 0x3c, 0xe3, 0x2c, 0x15, 0xc6, 0x43, 0x03, 0x81, 0xfb, 0x08, 0x76, + 0x33, 0xcb, 0x02, 0xc1, 0xba, 0x33, 0xe5, 0xe0, 0xd1, 0x92, 0xa8, 0x46, + 0x28, 0x3f, 0x3e, 0x9d, 0x2c, 0x44, 0x54, }, + { 0xea, 0xbb, 0x96, 0xf8, 0xd1, 0x8b, 0x04, 0x11, 0x40, 0x78, 0x42, 0x02, + 0x19, 0xd1, 0xbc, 0x65, 0x92, 0xd3, 0xc3, 0xd6, 0xd9, 0x19, 0xe7, 0xc3, + 0x40, 0x97, 0xbd, 0xd4, 0xed, 0xfa, 0x5e, 0x28, }, + { 0x02, }, + { 0x52, 0xa8, }, + { 0x38, 0x25, 0x0d, }, + { 0xe3, 0x04, 0xd4, 0x92, }, + { 0x97, 0xdb, 0xf7, 0x81, 0xca, }, + { 0x8a, 0x56, 0x9d, 0x62, 0x56, 0xcc, }, + { 0xa1, 0x8e, 0x3c, 0x72, 0x8f, 0x63, 0x03, }, + { 0xf7, 0xf3, 0x39, 0x09, 0x0a, 0xa1, 0xbb, 0x23, }, + { 0x6b, 0x03, 0xc0, 0xe9, 0xd9, 0x83, 0x05, 0x22, 0x01, }, + { 0x1b, 0x4b, 0xf5, 0xd6, 0x4f, 0x05, 0x75, 0x91, 0x4c, 0x7f, }, + { 0x4c, 0x8c, 0x25, 0x20, 0x21, 0xcb, 0xc2, 0x4b, 0x3a, 0x5b, 0x8d, }, + { 0x56, 0xe2, 0x77, 0xa0, 0xb6, 0x9f, 0x81, 0xec, 0x83, 0x75, 0xc4, 0xf9, }, + { 0x71, 0x70, 0x0f, 0xad, 0x4d, 0x35, 0x81, 0x9d, 0x88, 0x69, 0xf9, 0xaa, + 0xd3, }, + { 0x50, 0x6e, 0x86, 0x6e, 0x43, 0xc0, 0xc2, 0x44, 0xc2, 0xe2, 0xa0, 0x1c, + 0xb7, 0x9a, }, + { 0xe4, 0x7e, 0x72, 0xc6, 0x12, 0x8e, 0x7c, 0xfc, 0xbd, 0xe2, 0x08, 0x31, + 0x3d, 0x47, 0x3d, }, + { 0x08, 0x97, 0x5b, 0x80, 0xae, 0xc4, 0x1d, 0x50, 0x77, 0xdf, 0x1f, 0xd0, + 0x24, 0xf0, 0x17, 0xc0, }, + { 0x01, 0xb6, 0x29, 0xf4, 0xaf, 0x78, 0x5f, 0xb6, 0x91, 0xdd, 0x76, 0x76, + 0xd2, 0xfd, 0x0c, 0x47, 0x40, }, + { 0xa1, 0xd8, 0x09, 0x97, 0x7a, 0xa6, 0xc8, 0x94, 0xf6, 0x91, 0x7b, 0xae, + 0x2b, 0x9f, 0x0d, 0x83, 0x48, 0xf7, }, + { 0x12, 0xd5, 0x53, 0x7d, 0x9a, 0xb0, 0xbe, 0xd9, 0xed, 0xe9, 0x9e, 0xee, + 0x61, 0x5b, 0x42, 0xf2, 0xc0, 0x73, 0xc0, }, + { 0xd5, 0x77, 0xd6, 0x5c, 0x6e, 0xa5, 0x69, 0x2b, 0x3b, 0x8c, 0xd6, 0x7d, + 0x1d, 0xbe, 0x2c, 0xa1, 0x02, 0x21, 0xcd, 0x29, }, + { 0xa4, 0x98, 0x80, 0xca, 0x22, 0xcf, 0x6a, 0xab, 0x5e, 0x40, 0x0d, 0x61, + 0x08, 0x21, 0xef, 0xc0, 0x6c, 0x52, 0xb4, 0xb0, 0x53, }, + { 0xbf, 0xaf, 0x8f, 0x3b, 0x7a, 0x97, 0x33, 0xe5, 0xca, 0x07, 0x37, 0xfd, + 0x15, 0xdf, 0xce, 0x26, 0x2a, 0xb1, 0xa7, 0x0b, 0xb3, 0xac, }, + { 0x16, 0x22, 0xe1, 0xbc, 0x99, 0x4e, 0x01, 0xf0, 0xfa, 0xff, 0x8f, 0xa5, + 0x0c, 0x61, 0xb0, 0xad, 0xcc, 0xb1, 0xe1, 0x21, 0x46, 0xfa, 0x2e, }, + { 0x11, 0x5b, 0x0b, 0x2b, 0xe6, 0x14, 0xc1, 0xd5, 0x4d, 0x71, 0x5e, 0x17, + 0xea, 0x23, 0xdd, 0x6c, 0xbd, 0x1d, 0xbe, 0x12, 0x1b, 0xee, 0x4c, 0x1a, }, + { 0x40, 0x88, 0x22, 0xf3, 0x20, 0x6c, 0xed, 0xe1, 0x36, 0x34, 0x62, 0x2c, + 0x98, 0x83, 0x52, 0xe2, 0x25, 0xee, 0xe9, 0xf5, 0xe1, 0x17, 0xf0, 0x5c, + 0xae, }, + { 0xc3, 0x76, 0x37, 0xde, 0x95, 0x8c, 0xca, 0x2b, 0x0c, 0x23, 0xe7, 0xb5, + 0x38, 0x70, 0x61, 0xcc, 0xff, 0xd3, 0x95, 0x7b, 0xf3, 0xff, 0x1f, 0x9d, + 0x59, 0x00, }, + { 0x0c, 0x19, 0x52, 0x05, 0x22, 0x53, 0xcb, 0x48, 0xd7, 0x10, 0x0e, 0x7e, + 0x14, 0x69, 0xb5, 0xa2, 0x92, 0x43, 0xa3, 0x9e, 0x4b, 0x8f, 0x51, 0x2c, + 0x5a, 0x2c, 0x3b, }, + { 0xe1, 0x9d, 0x70, 0x70, 0x28, 0xec, 0x86, 0x40, 0x55, 0x33, 0x56, 0xda, + 0x88, 0xca, 0xee, 0xc8, 0x6a, 0x20, 0xb1, 0xe5, 0x3d, 0x57, 0xf8, 0x3c, + 0x10, 0x07, 0x2a, 0xc4, }, + { 0x0b, 0xae, 0xf1, 0xc4, 0x79, 0xee, 0x1b, 0x3d, 0x27, 0x35, 0x8d, 0x14, + 0xd6, 0xae, 0x4e, 0x3c, 0xe9, 0x53, 0x50, 0xb5, 0xcc, 0x0c, 0xf7, 0xdf, + 0xee, 0xa1, 0x74, 0xd6, 0x71, }, + { 0xe6, 0xa4, 0xf4, 0x99, 0x98, 0xb9, 0x80, 0xea, 0x96, 0x7f, 0x4f, 0x33, + 0xcf, 0x74, 0x25, 0x6f, 0x17, 0x6c, 0xbf, 0xf5, 0x5c, 0x38, 0xd0, 0xff, + 0x96, 0xcb, 0x13, 0xf9, 0xdf, 0xfd, }, + { 0xbe, 0x92, 0xeb, 0xba, 0x44, 0x2c, 0x24, 0x74, 0xd4, 0x03, 0x27, 0x3c, + 0x5d, 0x5b, 0x03, 0x30, 0x87, 0x63, 0x69, 0xe0, 0xb8, 0x94, 0xf4, 0x44, + 0x7e, 0xad, 0xcd, 0x20, 0x12, 0x16, 0x79, }, + { 0x30, 0xf1, 0xc4, 0x8e, 0x05, 0x90, 0x2a, 0x97, 0x63, 0x94, 0x46, 0xff, + 0xce, 0xd8, 0x67, 0xa7, 0xac, 0x33, 0x8c, 0x95, 0xb7, 0xcd, 0xa3, 0x23, + 0x98, 0x9d, 0x76, 0x6c, 0x9d, 0xa8, 0xd6, 0x8a, }, + { 0xbe, }, + { 0x17, 0x6c, }, + { 0x1a, 0x42, 0x4f, }, + { 0xba, 0xaf, 0xb7, 0x65, }, + { 0xc2, 0x63, 0x43, 0x6a, 0xea, }, + { 0xe4, 0x4d, 0xad, 0xf2, 0x0b, 0x02, }, + { 0x04, 0xc7, 0xc4, 0x7f, 0xa9, 0x2b, 0xce, }, + { 0x66, 0xf6, 0x67, 0xcb, 0x03, 0x53, 0xc8, 0xf1, }, + { 0x56, 0xa3, 0x60, 0x78, 0xc9, 0x5f, 0x70, 0x1b, 0x5e, }, + { 0x99, 0xff, 0x81, 0x7c, 0x13, 0x3c, 0x29, 0x79, 0x4b, 0x65, }, + { 0x51, 0x10, 0x50, 0x93, 0x01, 0x93, 0xb7, 0x01, 0xc9, 0x18, 0xb7, }, + { 0x8e, 0x3c, 0x42, 0x1e, 0x5e, 0x7d, 0xc1, 0x50, 0x70, 0x1f, 0x00, 0x98, }, + { 0x5f, 0xd9, 0x9b, 0xc8, 0xd7, 0xb2, 0x72, 0x62, 0x1a, 0x1e, 0xba, 0x92, + 0xe9, }, + { 0x70, 0x2b, 0xba, 0xfe, 0xad, 0x5d, 0x96, 0x3f, 0x27, 0xc2, 0x41, 0x6d, + 0xc4, 0xb3, }, + { 0xae, 0xe0, 0xd5, 0xd4, 0xc7, 0xae, 0x15, 0x5e, 0xdc, 0xdd, 0x33, 0x60, + 0xd7, 0xd3, 0x5e, }, + { 0x79, 0x8e, 0xbc, 0x9e, 0x20, 0xb9, 0x19, 0x4b, 0x63, 0x80, 0xf3, 0x16, + 0xaf, 0x39, 0xbd, 0x92, }, + { 0xc2, 0x0e, 0x85, 0xa0, 0x0b, 0x9a, 0xb0, 0xec, 0xde, 0x38, 0xd3, 0x10, + 0xd9, 0xa7, 0x66, 0x27, 0xcf, }, + { 0x0e, 0x3b, 0x75, 0x80, 0x67, 0x14, 0x0c, 0x02, 0x90, 0xd6, 0xb3, 0x02, + 0x81, 0xf6, 0xa6, 0x87, 0xce, 0x58, }, + { 0x79, 0xb5, 0xe9, 0x5d, 0x52, 0x4d, 0xf7, 0x59, 0xf4, 0x2e, 0x27, 0xdd, + 0xb3, 0xed, 0x57, 0x5b, 0x82, 0xea, 0x6f, }, + { 0xa2, 0x97, 0xf5, 0x80, 0x02, 0x3d, 0xde, 0xa3, 0xf9, 0xf6, 0xab, 0xe3, + 0x57, 0x63, 0x7b, 0x9b, 0x10, 0x42, 0x6f, 0xf2, }, + { 0x12, 0x7a, 0xfc, 0xb7, 0x67, 0x06, 0x0c, 0x78, 0x1a, 0xfe, 0x88, 0x4f, + 0xc6, 0xac, 0x52, 0x96, 0x64, 0x28, 0x97, 0x84, 0x06, }, + { 0xc5, 0x04, 0x44, 0x6b, 0xb2, 0xa5, 0xa4, 0x66, 0xe1, 0x76, 0xa2, 0x51, + 0xf9, 0x59, 0x69, 0x97, 0x56, 0x0b, 0xbf, 0x50, 0xb3, 0x34, }, + { 0x21, 0x32, 0x6b, 0x42, 0xb5, 0xed, 0x71, 0x8d, 0xf7, 0x5a, 0x35, 0xe3, + 0x90, 0xe2, 0xee, 0xaa, 0x89, 0xf6, 0xc9, 0x9c, 0x4d, 0x73, 0xf4, }, + { 0x4c, 0xa6, 0x09, 0xf4, 0x48, 0xe7, 0x46, 0xbc, 0x49, 0xfc, 0xe5, 0xda, + 0xd1, 0x87, 0x13, 0x17, 0x4c, 0x59, 0x71, 0x26, 0x5b, 0x2c, 0x42, 0xb7, }, + { 0x13, 0x63, 0xf3, 0x40, 0x02, 0xe5, 0xa3, 0x3a, 0x5e, 0x8e, 0xf8, 0xb6, + 0x8a, 0x49, 0x60, 0x76, 0x34, 0x72, 0x94, 0x73, 0xf6, 0xd9, 0x21, 0x6a, + 0x26, }, + { 0xdf, 0x75, 0x16, 0x10, 0x1b, 0x5e, 0x81, 0xc3, 0xc8, 0xde, 0x34, 0x24, + 0xb0, 0x98, 0xeb, 0x1b, 0x8f, 0xa1, 0x9b, 0x05, 0xee, 0xa5, 0xe9, 0x35, + 0xf4, 0x1d, }, + { 0xcd, 0x21, 0x93, 0x6e, 0x5b, 0xa0, 0x26, 0x2b, 0x21, 0x0e, 0xa0, 0xb9, + 0x1c, 0xb5, 0xbb, 0xb8, 0xf8, 0x1e, 0xff, 0x5c, 0xa8, 0xf9, 0x39, 0x46, + 0x4e, 0x29, 0x26, }, + { 0x73, 0x7f, 0x0e, 0x3b, 0x0b, 0x5c, 0xf9, 0x60, 0xaa, 0x88, 0xa1, 0x09, + 0xb1, 0x5d, 0x38, 0x7b, 0x86, 0x8f, 0x13, 0x7a, 0x8d, 0x72, 0x7a, 0x98, + 0x1a, 0x5b, 0xff, 0xc9, }, + { 0xd3, 0x3c, 0x61, 0x71, 0x44, 0x7e, 0x31, 0x74, 0x98, 0x9d, 0x9a, 0xd2, + 0x27, 0xf3, 0x46, 0x43, 0x42, 0x51, 0xd0, 0x5f, 0xe9, 0x1c, 0x5c, 0x69, + 0xbf, 0xf6, 0xbe, 0x3c, 0x40, }, + { 0x31, 0x99, 0x31, 0x9f, 0xaa, 0x43, 0x2e, 0x77, 0x3e, 0x74, 0x26, 0x31, + 0x5e, 0x61, 0xf1, 0x87, 0xe2, 0xeb, 0x9b, 0xcd, 0xd0, 0x3a, 0xee, 0x20, + 0x7e, 0x10, 0x0a, 0x0b, 0x7e, 0xfa, }, + { 0xa4, 0x27, 0x80, 0x67, 0x81, 0x2a, 0xa7, 0x62, 0xf7, 0x6e, 0xda, 0xd4, + 0x5c, 0x39, 0x74, 0xad, 0x7e, 0xbe, 0xad, 0xa5, 0x84, 0x7f, 0xa9, 0x30, + 0x5d, 0xdb, 0xe2, 0x05, 0x43, 0xf7, 0x1b, }, + { 0x0b, 0x37, 0xd8, 0x02, 0xe1, 0x83, 0xd6, 0x80, 0xf2, 0x35, 0xc2, 0xb0, + 0x37, 0xef, 0xef, 0x5e, 0x43, 0x93, 0xf0, 0x49, 0x45, 0x0a, 0xef, 0xb5, + 0x76, 0x70, 0x12, 0x44, 0xc4, 0xdb, 0xf5, 0x7a, }, + { 0x1f, }, + { 0x82, 0x60, }, + { 0xcc, 0xe3, 0x08, }, + { 0x56, 0x17, 0xe4, 0x59, }, + { 0xe2, 0xd7, 0x9e, 0xc4, 0x4c, }, + { 0xb2, 0xad, 0xd3, 0x78, 0x58, 0x5a, }, + { 0xce, 0x43, 0xb4, 0x02, 0x96, 0xab, 0x3c, }, + { 0xe6, 0x05, 0x1a, 0x73, 0x22, 0x32, 0xbb, 0x77, }, + { 0x23, 0xe7, 0xda, 0xfe, 0x2c, 0xef, 0x8c, 0x22, 0xec, }, + { 0xe9, 0x8e, 0x55, 0x38, 0xd1, 0xd7, 0x35, 0x23, 0x98, 0xc7, }, + { 0xb5, 0x81, 0x1a, 0xe5, 0xb5, 0xa5, 0xd9, 0x4d, 0xca, 0x41, 0xe7, }, + { 0x41, 0x16, 0x16, 0x95, 0x8d, 0x9e, 0x0c, 0xea, 0x8c, 0x71, 0x9a, 0xc1, }, + { 0x7c, 0x33, 0xc0, 0xa4, 0x00, 0x62, 0xea, 0x60, 0x67, 0xe4, 0x20, 0xbc, + 0x5b, }, + { 0xdb, 0xb1, 0xdc, 0xfd, 0x08, 0xc0, 0xde, 0x82, 0xd1, 0xde, 0x38, 0xc0, + 0x90, 0x48, }, + { 0x37, 0x18, 0x2e, 0x0d, 0x61, 0xaa, 0x61, 0xd7, 0x86, 0x20, 0x16, 0x60, + 0x04, 0xd9, 0xd5, }, + { 0xb0, 0xcf, 0x2c, 0x4c, 0x5e, 0x5b, 0x4f, 0x2a, 0x23, 0x25, 0x58, 0x47, + 0xe5, 0x31, 0x06, 0x70, }, + { 0x91, 0xa0, 0xa3, 0x86, 0x4e, 0xe0, 0x72, 0x38, 0x06, 0x67, 0x59, 0x5c, + 0x70, 0x25, 0xdb, 0x33, 0x27, }, + { 0x44, 0x58, 0x66, 0xb8, 0x58, 0xc7, 0x13, 0xed, 0x4c, 0xc0, 0xf4, 0x9a, + 0x1e, 0x67, 0x75, 0x33, 0xb6, 0xb8, }, + { 0x7f, 0x98, 0x4a, 0x8e, 0x50, 0xa2, 0x5c, 0xcd, 0x59, 0xde, 0x72, 0xb3, + 0x9d, 0xc3, 0x09, 0x8a, 0xab, 0x56, 0xf1, }, + { 0x80, 0x96, 0x49, 0x1a, 0x59, 0xa2, 0xc5, 0xd5, 0xa7, 0x20, 0x8a, 0xb7, + 0x27, 0x62, 0x84, 0x43, 0xc6, 0xe1, 0x1b, 0x5d, }, + { 0x6b, 0xb7, 0x2b, 0x26, 0x62, 0x14, 0x70, 0x19, 0x3d, 0x4d, 0xac, 0xac, + 0x63, 0x58, 0x5e, 0x94, 0xb5, 0xb7, 0xe8, 0xe8, 0xa2, }, + { 0x20, 0xa8, 0xc0, 0xfd, 0x63, 0x3d, 0x6e, 0x98, 0xcf, 0x0c, 0x49, 0x98, + 0xe4, 0x5a, 0xfe, 0x8c, 0xaa, 0x70, 0x82, 0x1c, 0x7b, 0x74, }, + { 0xc8, 0xe8, 0xdd, 0xdf, 0x69, 0x30, 0x01, 0xc2, 0x0f, 0x7e, 0x2f, 0x11, + 0xcc, 0x3e, 0x17, 0xa5, 0x69, 0x40, 0x3f, 0x0e, 0x79, 0x7f, 0xcf, }, + { 0xdb, 0x61, 0xc0, 0xe2, 0x2e, 0x49, 0x07, 0x31, 0x1d, 0x91, 0x42, 0x8a, + 0xfc, 0x5e, 0xd3, 0xf8, 0x56, 0x1f, 0x2b, 0x73, 0xfd, 0x9f, 0xb2, 0x8e, }, + { 0x0c, 0x89, 0x55, 0x0c, 0x1f, 0x59, 0x2c, 0x9d, 0x1b, 0x29, 0x1d, 0x41, + 0x1d, 0xe6, 0x47, 0x8f, 0x8c, 0x2b, 0xea, 0x8f, 0xf0, 0xff, 0x21, 0x70, + 0x88, }, + { 0x12, 0x18, 0x95, 0xa6, 0x59, 0xb1, 0x31, 0x24, 0x45, 0x67, 0x55, 0xa4, + 0x1a, 0x2d, 0x48, 0x67, 0x1b, 0x43, 0x88, 0x2d, 0x8e, 0xa0, 0x70, 0xb3, + 0xc6, 0xbb, }, + { 0xe7, 0xb1, 0x1d, 0xb2, 0x76, 0x4d, 0x68, 0x68, 0x68, 0x23, 0x02, 0x55, + 0x3a, 0xe2, 0xe5, 0xd5, 0x4b, 0x43, 0xf9, 0x34, 0x77, 0x5c, 0xa1, 0xf5, + 0x55, 0xfd, 0x4f, }, + { 0x8c, 0x87, 0x5a, 0x08, 0x3a, 0x73, 0xad, 0x61, 0xe1, 0xe7, 0x99, 0x7e, + 0xf0, 0x5d, 0xe9, 0x5d, 0x16, 0x43, 0x80, 0x2f, 0xd0, 0x66, 0x34, 0xe2, + 0x42, 0x64, 0x3b, 0x1a, }, + { 0x39, 0xc1, 0x99, 0xcf, 0x22, 0xbf, 0x16, 0x8f, 0x9f, 0x80, 0x7f, 0x95, + 0x0a, 0x05, 0x67, 0x27, 0xe7, 0x15, 0xdf, 0x9d, 0xb2, 0xfe, 0x1c, 0xb5, + 0x1d, 0x60, 0x8f, 0x8a, 0x1d, }, + { 0x9b, 0x6e, 0x08, 0x09, 0x06, 0x73, 0xab, 0x68, 0x02, 0x62, 0x1a, 0xe4, + 0xd4, 0xdf, 0xc7, 0x02, 0x4c, 0x6a, 0x5f, 0xfd, 0x23, 0xac, 0xae, 0x6d, + 0x43, 0xa4, 0x7a, 0x50, 0x60, 0x3c, }, + { 0x1d, 0xb4, 0xc6, 0xe1, 0xb1, 0x4b, 0xe3, 0xf2, 0xe2, 0x1a, 0x73, 0x1b, + 0xa0, 0x92, 0xa7, 0xf5, 0xff, 0x8f, 0x8b, 0x5d, 0xdf, 0xa8, 0x04, 0xb3, + 0xb0, 0xf7, 0xcc, 0x12, 0xfa, 0x35, 0x46, }, + { 0x49, 0x45, 0x97, 0x11, 0x0f, 0x1c, 0x60, 0x8e, 0xe8, 0x47, 0x30, 0xcf, + 0x60, 0xa8, 0x71, 0xc5, 0x1b, 0xe9, 0x39, 0x4d, 0x49, 0xb6, 0x12, 0x1f, + 0x24, 0xab, 0x37, 0xff, 0x83, 0xc2, 0xe1, 0x3a, }, + { 0x60, }, + { 0x24, 0x26, }, + { 0x47, 0xeb, 0xc9, }, + { 0x4a, 0xd0, 0xbc, 0xf0, }, + { 0x8e, 0x2b, 0xc9, 0x85, 0x3c, }, + { 0xa2, 0x07, 0x15, 0xb8, 0x12, 0x74, }, + { 0x0f, 0xdb, 0x5b, 0x33, 0x69, 0xfe, 0x4b, }, + { 0xa2, 0x86, 0x54, 0xf4, 0xfd, 0xb2, 0xd4, 0xe6, }, + { 0xbb, 0x84, 0x78, 0x49, 0x27, 0x8e, 0x61, 0xda, 0x60, }, + { 0x04, 0xc3, 0xcd, 0xaa, 0x8f, 0xa7, 0x03, 0xc9, 0xf9, 0xb6, }, + { 0xf8, 0x27, 0x1d, 0x61, 0xdc, 0x21, 0x42, 0xdd, 0xad, 0x92, 0x40, }, + { 0x12, 0x87, 0xdf, 0xc2, 0x41, 0x45, 0x5a, 0x36, 0x48, 0x5b, 0x51, 0x2b, }, + { 0xbb, 0x37, 0x5d, 0x1f, 0xf1, 0x68, 0x7a, 0xc4, 0xa5, 0xd2, 0xa4, 0x91, + 0x8d, }, + { 0x5b, 0x27, 0xd1, 0x04, 0x54, 0x52, 0x9f, 0xa3, 0x47, 0x86, 0x33, 0x33, + 0xbf, 0xa0, }, + { 0xcf, 0x04, 0xea, 0xf8, 0x03, 0x2a, 0x43, 0xff, 0xa6, 0x68, 0x21, 0x4c, + 0xd5, 0x4b, 0xed, }, + { 0xaf, 0xb8, 0xbc, 0x63, 0x0f, 0x18, 0x4d, 0xe2, 0x7a, 0xdd, 0x46, 0x44, + 0xc8, 0x24, 0x0a, 0xb7, }, + { 0x3e, 0xdc, 0x36, 0xe4, 0x89, 0xb1, 0xfa, 0xc6, 0x40, 0x93, 0x2e, 0x75, + 0xb2, 0x15, 0xd1, 0xb1, 0x10, }, + { 0x6c, 0xd8, 0x20, 0x3b, 0x82, 0x79, 0xf9, 0xc8, 0xbc, 0x9d, 0xe0, 0x35, + 0xbe, 0x1b, 0x49, 0x1a, 0xbc, 0x3a, }, + { 0x78, 0x65, 0x2c, 0xbe, 0x35, 0x67, 0xdc, 0x78, 0xd4, 0x41, 0xf6, 0xc9, + 0xde, 0xde, 0x1f, 0x18, 0x13, 0x31, 0x11, }, + { 0x8a, 0x7f, 0xb1, 0x33, 0x8f, 0x0c, 0x3c, 0x0a, 0x06, 0x61, 0xf0, 0x47, + 0x29, 0x1b, 0x29, 0xbc, 0x1c, 0x47, 0xef, 0x7a, }, + { 0x65, 0x91, 0xf1, 0xe6, 0xb3, 0x96, 0xd3, 0x8c, 0xc2, 0x4a, 0x59, 0x35, + 0x72, 0x8e, 0x0b, 0x9a, 0x87, 0xca, 0x34, 0x7b, 0x63, }, + { 0x5f, 0x08, 0x87, 0x80, 0x56, 0x25, 0x89, 0x77, 0x61, 0x8c, 0x64, 0xa1, + 0x59, 0x6d, 0x59, 0x62, 0xe8, 0x4a, 0xc8, 0x58, 0x99, 0xd1, }, + { 0x23, 0x87, 0x1d, 0xed, 0x6f, 0xf2, 0x91, 0x90, 0xe2, 0xfe, 0x43, 0x21, + 0xaf, 0x97, 0xc6, 0xbc, 0xd7, 0x15, 0xc7, 0x2d, 0x08, 0x77, 0x91, }, + { 0x90, 0x47, 0x9a, 0x9e, 0x3a, 0xdf, 0xf3, 0xc9, 0x4c, 0x1e, 0xa7, 0xd4, + 0x6a, 0x32, 0x90, 0xfe, 0xb7, 0xb6, 0x7b, 0xfa, 0x96, 0x61, 0xfb, 0xa4, }, + { 0xb1, 0x67, 0x60, 0x45, 0xb0, 0x96, 0xc5, 0x15, 0x9f, 0x4d, 0x26, 0xd7, + 0x9d, 0xf1, 0xf5, 0x6d, 0x21, 0x00, 0x94, 0x31, 0x64, 0x94, 0xd3, 0xa7, + 0xd3, }, + { 0x02, 0x3e, 0xaf, 0xf3, 0x79, 0x73, 0xa5, 0xf5, 0xcc, 0x7a, 0x7f, 0xfb, + 0x79, 0x2b, 0x85, 0x8c, 0x88, 0x72, 0x06, 0xbe, 0xfe, 0xaf, 0xc1, 0x16, + 0xa6, 0xd6, }, + { 0x2a, 0xb0, 0x1a, 0xe5, 0xaa, 0x6e, 0xb3, 0xae, 0x53, 0x85, 0x33, 0x80, + 0x75, 0xae, 0x30, 0xe6, 0xb8, 0x72, 0x42, 0xf6, 0x25, 0x4f, 0x38, 0x88, + 0x55, 0xd1, 0xa9, }, + { 0x90, 0xd8, 0x0c, 0xc0, 0x93, 0x4b, 0x4f, 0x9e, 0x65, 0x6c, 0xa1, 0x54, + 0xa6, 0xf6, 0x6e, 0xca, 0xd2, 0xbb, 0x7e, 0x6a, 0x1c, 0xd3, 0xce, 0x46, + 0xef, 0xb0, 0x00, 0x8d, }, + { 0xed, 0x9c, 0x49, 0xcd, 0xc2, 0xde, 0x38, 0x0e, 0xe9, 0x98, 0x6c, 0xc8, + 0x90, 0x9e, 0x3c, 0xd4, 0xd3, 0xeb, 0x88, 0x32, 0xc7, 0x28, 0xe3, 0x94, + 0x1c, 0x9f, 0x8b, 0xf3, 0xcb, }, + { 0xac, 0xe7, 0x92, 0x16, 0xb4, 0x14, 0xa0, 0xe4, 0x04, 0x79, 0xa2, 0xf4, + 0x31, 0xe6, 0x0c, 0x26, 0xdc, 0xbf, 0x2f, 0x69, 0x1b, 0x55, 0x94, 0x67, + 0xda, 0x0c, 0xd7, 0x32, 0x1f, 0xef, }, + { 0x68, 0x63, 0x85, 0x57, 0x95, 0x9e, 0x42, 0x27, 0x41, 0x43, 0x42, 0x02, + 0xa5, 0x78, 0xa7, 0xc6, 0x43, 0xc1, 0x6a, 0xba, 0x70, 0x80, 0xcd, 0x04, + 0xb6, 0x78, 0x76, 0x29, 0xf3, 0xe8, 0xa0, }, + { 0xe6, 0xac, 0x8d, 0x9d, 0xf0, 0xc0, 0xf7, 0xf7, 0xe3, 0x3e, 0x4e, 0x28, + 0x0f, 0x59, 0xb2, 0x67, 0x9e, 0x84, 0x34, 0x42, 0x96, 0x30, 0x2b, 0xca, + 0x49, 0xb6, 0xc5, 0x9a, 0x84, 0x59, 0xa7, 0x81, }, + { 0x7e, }, + { 0x1e, 0x21, }, + { 0x26, 0xd3, 0xdd, }, + { 0x2c, 0xd4, 0xb3, 0x3d, }, + { 0x86, 0x7b, 0x76, 0x3c, 0xf0, }, + { 0x12, 0xc3, 0x70, 0x1d, 0x55, 0x18, }, + { 0x96, 0xc2, 0xbd, 0x61, 0x55, 0xf4, 0x24, }, + { 0x20, 0x51, 0xf7, 0x86, 0x58, 0x8f, 0x07, 0x2a, }, + { 0x93, 0x15, 0xa8, 0x1d, 0xda, 0x97, 0xee, 0x0e, 0x6c, }, + { 0x39, 0x93, 0xdf, 0xd5, 0x0e, 0xca, 0xdc, 0x7a, 0x92, 0xce, }, + { 0x60, 0xd5, 0xfd, 0xf5, 0x1b, 0x26, 0x82, 0x26, 0x73, 0x02, 0xbc, }, + { 0x98, 0xf2, 0x34, 0xe1, 0xf5, 0xfb, 0x00, 0xac, 0x10, 0x4a, 0x38, 0x9f, }, + { 0xda, 0x3a, 0x92, 0x8a, 0xd0, 0xcd, 0x12, 0xcd, 0x15, 0xbb, 0xab, 0x77, + 0x66, }, + { 0xa2, 0x92, 0x1a, 0xe5, 0xca, 0x0c, 0x30, 0x75, 0xeb, 0xaf, 0x00, 0x31, + 0x55, 0x66, }, + { 0x06, 0xea, 0xfd, 0x3e, 0x86, 0x38, 0x62, 0x4e, 0xa9, 0x12, 0xa4, 0x12, + 0x43, 0xbf, 0xa1, }, + { 0xe4, 0x71, 0x7b, 0x94, 0xdb, 0xa0, 0xd2, 0xff, 0x9b, 0xeb, 0xad, 0x8e, + 0x95, 0x8a, 0xc5, 0xed, }, + { 0x25, 0x5a, 0x77, 0x71, 0x41, 0x0e, 0x7a, 0xe9, 0xed, 0x0c, 0x10, 0xef, + 0xf6, 0x2b, 0x3a, 0xba, 0x60, }, + { 0xee, 0xe2, 0xa3, 0x67, 0x64, 0x1d, 0xc6, 0x04, 0xc4, 0xe1, 0x68, 0xd2, + 0x6e, 0xd2, 0x91, 0x75, 0x53, 0x07, }, + { 0xe0, 0xf6, 0x4d, 0x8f, 0x68, 0xfc, 0x06, 0x7e, 0x18, 0x79, 0x7f, 0x2b, + 0x6d, 0xef, 0x46, 0x7f, 0xab, 0xb2, 0xad, }, + { 0x3d, 0x35, 0x88, 0x9f, 0x2e, 0xcf, 0x96, 0x45, 0x07, 0x60, 0x71, 0x94, + 0x00, 0x8d, 0xbf, 0xf4, 0xef, 0x46, 0x2e, 0x3c, }, + { 0x43, 0xcf, 0x98, 0xf7, 0x2d, 0xf4, 0x17, 0xe7, 0x8c, 0x05, 0x2d, 0x9b, + 0x24, 0xfb, 0x4d, 0xea, 0x4a, 0xec, 0x01, 0x25, 0x29, }, + { 0x8e, 0x73, 0x9a, 0x78, 0x11, 0xfe, 0x48, 0xa0, 0x3b, 0x1a, 0x26, 0xdf, + 0x25, 0xe9, 0x59, 0x1c, 0x70, 0x07, 0x9f, 0xdc, 0xa0, 0xa6, }, + { 0xe8, 0x47, 0x71, 0xc7, 0x3e, 0xdf, 0xb5, 0x13, 0xb9, 0x85, 0x13, 0xa8, + 0x54, 0x47, 0x6e, 0x59, 0x96, 0x09, 0x13, 0x5f, 0x82, 0x16, 0x0b, }, + { 0xfb, 0xc0, 0x8c, 0x03, 0x21, 0xb3, 0xc4, 0xb5, 0x43, 0x32, 0x6c, 0xea, + 0x7f, 0xa8, 0x43, 0x91, 0xe8, 0x4e, 0x3f, 0xbf, 0x45, 0x58, 0x6a, 0xa3, }, + { 0x55, 0xf8, 0xf3, 0x00, 0x76, 0x09, 0xef, 0x69, 0x5d, 0xd2, 0x8a, 0xf2, + 0x65, 0xc3, 0xcb, 0x9b, 0x43, 0xfd, 0xb1, 0x7e, 0x7f, 0xa1, 0x94, 0xb0, + 0xd7, }, + { 0xaa, 0x13, 0xc1, 0x51, 0x40, 0x6d, 0x8d, 0x4c, 0x0a, 0x95, 0x64, 0x7b, + 0xd1, 0x96, 0xb6, 0x56, 0xb4, 0x5b, 0xcf, 0xd6, 0xd9, 0x15, 0x97, 0xdd, + 0xb6, 0xef, }, + { 0xaf, 0xb7, 0x36, 0xb0, 0x04, 0xdb, 0xd7, 0x9c, 0x9a, 0x44, 0xc4, 0xf6, + 0x1f, 0x12, 0x21, 0x2d, 0x59, 0x30, 0x54, 0xab, 0x27, 0x61, 0xa3, 0x57, + 0xef, 0xf8, 0x53, }, + { 0x97, 0x34, 0x45, 0x3e, 0xce, 0x7c, 0x35, 0xa2, 0xda, 0x9f, 0x4b, 0x46, + 0x6c, 0x11, 0x67, 0xff, 0x2f, 0x76, 0x58, 0x15, 0x71, 0xfa, 0x44, 0x89, + 0x89, 0xfd, 0xf7, 0x99, }, + { 0x1f, 0xb1, 0x62, 0xeb, 0x83, 0xc5, 0x9c, 0x89, 0xf9, 0x2c, 0xd2, 0x03, + 0x61, 0xbc, 0xbb, 0xa5, 0x74, 0x0e, 0x9b, 0x7e, 0x82, 0x3e, 0x70, 0x0a, + 0xa9, 0x8f, 0x2b, 0x59, 0xfb, }, + { 0xf8, 0xca, 0x5e, 0x3a, 0x4f, 0x9e, 0x10, 0x69, 0x10, 0xd5, 0x4c, 0xeb, + 0x1a, 0x0f, 0x3c, 0x6a, 0x98, 0xf5, 0xb0, 0x97, 0x5b, 0x37, 0x2f, 0x0d, + 0xbd, 0x42, 0x4b, 0x69, 0xa1, 0x82, }, + { 0x12, 0x8c, 0x6d, 0x52, 0x08, 0xef, 0x74, 0xb2, 0xe6, 0xaa, 0xd3, 0xb0, + 0x26, 0xb0, 0xd9, 0x94, 0xb6, 0x11, 0x45, 0x0e, 0x36, 0x71, 0x14, 0x2d, + 0x41, 0x8c, 0x21, 0x53, 0x31, 0xe9, 0x68, }, + { 0xee, 0xea, 0x0d, 0x89, 0x47, 0x7e, 0x72, 0xd1, 0xd8, 0xce, 0x58, 0x4c, + 0x94, 0x1f, 0x0d, 0x51, 0x08, 0xa3, 0xb6, 0x3d, 0xe7, 0x82, 0x46, 0x92, + 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, }, +}; + +bool __init blake2s_selftest(void) +{ + u8 key[BLAKE2S_KEY_SIZE]; + u8 buf[ARRAY_SIZE(blake2s_testvecs)]; + u8 hash[BLAKE2S_HASH_SIZE]; + struct blake2s_state state; + bool success = true; + int i, l; + + key[0] = key[1] = 1; + for (i = 2; i < sizeof(key); ++i) + key[i] = key[i - 2] + key[i - 1]; + + for (i = 0; i < sizeof(buf); ++i) + buf[i] = (u8)i; + + for (i = l = 0; i < ARRAY_SIZE(blake2s_testvecs); l = (l + 37) % ++i) { + int outlen = 1 + i % BLAKE2S_HASH_SIZE; + int keylen = (13 * i) % (BLAKE2S_KEY_SIZE + 1); + + blake2s(hash, buf, key + BLAKE2S_KEY_SIZE - keylen, outlen, i, + keylen); + if (memcmp(hash, blake2s_testvecs[i], outlen)) { + pr_err("blake2s self-test %d: FAIL\n", i + 1); + success = false; + } + + if (!keylen) + blake2s_init(&state, outlen); + else + blake2s_init_key(&state, outlen, + key + BLAKE2S_KEY_SIZE - keylen, + keylen); + + blake2s_update(&state, buf, l); + blake2s_update(&state, buf + l, i - l); + blake2s_final(&state, hash); + if (memcmp(hash, blake2s_testvecs[i], outlen)) { + pr_err("blake2s init/update/final self-test %d: FAIL\n", + i + 1); + success = false; + } + } + + return success; +} diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c new file mode 100644 index 0000000000000..536fce87555b3 --- /dev/null +++ b/lib/crypto/blake2s.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * This is an implementation of the BLAKE2s hash and PRF functions. + * + * Information: https://blake2.net/ + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +bool blake2s_selftest(void); + +void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen) +{ + const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; + + if (unlikely(!inlen)) + return; + if (inlen > fill) { + memcpy(state->buf + state->buflen, in, fill); + blake2s_compress_generic(state, state->buf, 1, + BLAKE2S_BLOCK_SIZE); + state->buflen = 0; + in += fill; + inlen -= fill; + } + if (inlen > BLAKE2S_BLOCK_SIZE) { + const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); + /* Hash one less (full) block than strictly possible */ + blake2s_compress_generic(state, in, nblocks - 1, + BLAKE2S_BLOCK_SIZE); + in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); + inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); + } + memcpy(state->buf + state->buflen, in, inlen); + state->buflen += inlen; +} +EXPORT_SYMBOL(blake2s_update); + +void blake2s_final(struct blake2s_state *state, u8 *out) +{ + WARN_ON(IS_ENABLED(DEBUG) && !out); + blake2s_set_lastblock(state); + memset(state->buf + state->buflen, 0, + BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ + blake2s_compress_generic(state, state->buf, 1, state->buflen); + cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); + memcpy(out, state->h, state->outlen); + memzero_explicit(state, sizeof(*state)); +} +EXPORT_SYMBOL(blake2s_final); + +static int __init mod_init(void) +{ + if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && + WARN_ON(!blake2s_selftest())) + return -ENODEV; + return 0; +} + +static void __exit mod_exit(void) +{ +} + +module_init(mod_init); +module_exit(mod_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("BLAKE2s hash function"); +MODULE_AUTHOR("Jason A. Donenfeld "); diff --git a/lib/crypto/sm3.c b/lib/crypto/sm3.c new file mode 100644 index 0000000000000..d473e358a873a --- /dev/null +++ b/lib/crypto/sm3.c @@ -0,0 +1,246 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * SM3 secure hash, as specified by OSCCA GM/T 0004-2012 SM3 and described + * at https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02 + * + * Copyright (C) 2017 ARM Limited or its affiliates. + * Copyright (C) 2017 Gilad Ben-Yossef + * Copyright (C) 2021 Tianjia Zhang + */ + +#include +#include +#include + +static const u32 ____cacheline_aligned K[64] = { + 0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb, + 0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc, + 0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce, + 0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6, + 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c, + 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce, + 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec, + 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5, + 0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53, + 0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d, + 0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4, + 0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43, + 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c, + 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce, + 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec, + 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5 +}; + +/* + * Transform the message X which consists of 16 32-bit-words. See + * GM/T 004-2012 for details. + */ +#define R(i, a, b, c, d, e, f, g, h, t, w1, w2) \ + do { \ + ss1 = rol32((rol32((a), 12) + (e) + (t)), 7); \ + ss2 = ss1 ^ rol32((a), 12); \ + d += FF ## i(a, b, c) + ss2 + ((w1) ^ (w2)); \ + h += GG ## i(e, f, g) + ss1 + (w1); \ + b = rol32((b), 9); \ + f = rol32((f), 19); \ + h = P0((h)); \ + } while (0) + +#define R1(a, b, c, d, e, f, g, h, t, w1, w2) \ + R(1, a, b, c, d, e, f, g, h, t, w1, w2) +#define R2(a, b, c, d, e, f, g, h, t, w1, w2) \ + R(2, a, b, c, d, e, f, g, h, t, w1, w2) + +#define FF1(x, y, z) (x ^ y ^ z) +#define FF2(x, y, z) ((x & y) | (x & z) | (y & z)) + +#define GG1(x, y, z) FF1(x, y, z) +#define GG2(x, y, z) ((x & y) | (~x & z)) + +/* Message expansion */ +#define P0(x) ((x) ^ rol32((x), 9) ^ rol32((x), 17)) +#define P1(x) ((x) ^ rol32((x), 15) ^ rol32((x), 23)) +#define I(i) (W[i] = get_unaligned_be32(data + i * 4)) +#define W1(i) (W[i & 0x0f]) +#define W2(i) (W[i & 0x0f] = \ + P1(W[i & 0x0f] \ + ^ W[(i-9) & 0x0f] \ + ^ rol32(W[(i-3) & 0x0f], 15)) \ + ^ rol32(W[(i-13) & 0x0f], 7) \ + ^ W[(i-6) & 0x0f]) + +static void sm3_transform(struct sm3_state *sctx, u8 const *data, u32 W[16]) +{ + u32 a, b, c, d, e, f, g, h, ss1, ss2; + + a = sctx->state[0]; + b = sctx->state[1]; + c = sctx->state[2]; + d = sctx->state[3]; + e = sctx->state[4]; + f = sctx->state[5]; + g = sctx->state[6]; + h = sctx->state[7]; + + R1(a, b, c, d, e, f, g, h, K[0], I(0), I(4)); + R1(d, a, b, c, h, e, f, g, K[1], I(1), I(5)); + R1(c, d, a, b, g, h, e, f, K[2], I(2), I(6)); + R1(b, c, d, a, f, g, h, e, K[3], I(3), I(7)); + R1(a, b, c, d, e, f, g, h, K[4], W1(4), I(8)); + R1(d, a, b, c, h, e, f, g, K[5], W1(5), I(9)); + R1(c, d, a, b, g, h, e, f, K[6], W1(6), I(10)); + R1(b, c, d, a, f, g, h, e, K[7], W1(7), I(11)); + R1(a, b, c, d, e, f, g, h, K[8], W1(8), I(12)); + R1(d, a, b, c, h, e, f, g, K[9], W1(9), I(13)); + R1(c, d, a, b, g, h, e, f, K[10], W1(10), I(14)); + R1(b, c, d, a, f, g, h, e, K[11], W1(11), I(15)); + R1(a, b, c, d, e, f, g, h, K[12], W1(12), W2(16)); + R1(d, a, b, c, h, e, f, g, K[13], W1(13), W2(17)); + R1(c, d, a, b, g, h, e, f, K[14], W1(14), W2(18)); + R1(b, c, d, a, f, g, h, e, K[15], W1(15), W2(19)); + + R2(a, b, c, d, e, f, g, h, K[16], W1(16), W2(20)); + R2(d, a, b, c, h, e, f, g, K[17], W1(17), W2(21)); + R2(c, d, a, b, g, h, e, f, K[18], W1(18), W2(22)); + R2(b, c, d, a, f, g, h, e, K[19], W1(19), W2(23)); + R2(a, b, c, d, e, f, g, h, K[20], W1(20), W2(24)); + R2(d, a, b, c, h, e, f, g, K[21], W1(21), W2(25)); + R2(c, d, a, b, g, h, e, f, K[22], W1(22), W2(26)); + R2(b, c, d, a, f, g, h, e, K[23], W1(23), W2(27)); + R2(a, b, c, d, e, f, g, h, K[24], W1(24), W2(28)); + R2(d, a, b, c, h, e, f, g, K[25], W1(25), W2(29)); + R2(c, d, a, b, g, h, e, f, K[26], W1(26), W2(30)); + R2(b, c, d, a, f, g, h, e, K[27], W1(27), W2(31)); + R2(a, b, c, d, e, f, g, h, K[28], W1(28), W2(32)); + R2(d, a, b, c, h, e, f, g, K[29], W1(29), W2(33)); + R2(c, d, a, b, g, h, e, f, K[30], W1(30), W2(34)); + R2(b, c, d, a, f, g, h, e, K[31], W1(31), W2(35)); + + R2(a, b, c, d, e, f, g, h, K[32], W1(32), W2(36)); + R2(d, a, b, c, h, e, f, g, K[33], W1(33), W2(37)); + R2(c, d, a, b, g, h, e, f, K[34], W1(34), W2(38)); + R2(b, c, d, a, f, g, h, e, K[35], W1(35), W2(39)); + R2(a, b, c, d, e, f, g, h, K[36], W1(36), W2(40)); + R2(d, a, b, c, h, e, f, g, K[37], W1(37), W2(41)); + R2(c, d, a, b, g, h, e, f, K[38], W1(38), W2(42)); + R2(b, c, d, a, f, g, h, e, K[39], W1(39), W2(43)); + R2(a, b, c, d, e, f, g, h, K[40], W1(40), W2(44)); + R2(d, a, b, c, h, e, f, g, K[41], W1(41), W2(45)); + R2(c, d, a, b, g, h, e, f, K[42], W1(42), W2(46)); + R2(b, c, d, a, f, g, h, e, K[43], W1(43), W2(47)); + R2(a, b, c, d, e, f, g, h, K[44], W1(44), W2(48)); + R2(d, a, b, c, h, e, f, g, K[45], W1(45), W2(49)); + R2(c, d, a, b, g, h, e, f, K[46], W1(46), W2(50)); + R2(b, c, d, a, f, g, h, e, K[47], W1(47), W2(51)); + + R2(a, b, c, d, e, f, g, h, K[48], W1(48), W2(52)); + R2(d, a, b, c, h, e, f, g, K[49], W1(49), W2(53)); + R2(c, d, a, b, g, h, e, f, K[50], W1(50), W2(54)); + R2(b, c, d, a, f, g, h, e, K[51], W1(51), W2(55)); + R2(a, b, c, d, e, f, g, h, K[52], W1(52), W2(56)); + R2(d, a, b, c, h, e, f, g, K[53], W1(53), W2(57)); + R2(c, d, a, b, g, h, e, f, K[54], W1(54), W2(58)); + R2(b, c, d, a, f, g, h, e, K[55], W1(55), W2(59)); + R2(a, b, c, d, e, f, g, h, K[56], W1(56), W2(60)); + R2(d, a, b, c, h, e, f, g, K[57], W1(57), W2(61)); + R2(c, d, a, b, g, h, e, f, K[58], W1(58), W2(62)); + R2(b, c, d, a, f, g, h, e, K[59], W1(59), W2(63)); + R2(a, b, c, d, e, f, g, h, K[60], W1(60), W2(64)); + R2(d, a, b, c, h, e, f, g, K[61], W1(61), W2(65)); + R2(c, d, a, b, g, h, e, f, K[62], W1(62), W2(66)); + R2(b, c, d, a, f, g, h, e, K[63], W1(63), W2(67)); + + sctx->state[0] ^= a; + sctx->state[1] ^= b; + sctx->state[2] ^= c; + sctx->state[3] ^= d; + sctx->state[4] ^= e; + sctx->state[5] ^= f; + sctx->state[6] ^= g; + sctx->state[7] ^= h; +} +#undef R +#undef R1 +#undef R2 +#undef I +#undef W1 +#undef W2 + +static inline void sm3_block(struct sm3_state *sctx, + u8 const *data, int blocks, u32 W[16]) +{ + while (blocks--) { + sm3_transform(sctx, data, W); + data += SM3_BLOCK_SIZE; + } +} + +void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len) +{ + unsigned int partial = sctx->count % SM3_BLOCK_SIZE; + u32 W[16]; + + sctx->count += len; + + if ((partial + len) >= SM3_BLOCK_SIZE) { + int blocks; + + if (partial) { + int p = SM3_BLOCK_SIZE - partial; + + memcpy(sctx->buffer + partial, data, p); + data += p; + len -= p; + + sm3_block(sctx, sctx->buffer, 1, W); + } + + blocks = len / SM3_BLOCK_SIZE; + len %= SM3_BLOCK_SIZE; + + if (blocks) { + sm3_block(sctx, data, blocks, W); + data += blocks * SM3_BLOCK_SIZE; + } + + memzero_explicit(W, sizeof(W)); + + partial = 0; + } + if (len) + memcpy(sctx->buffer + partial, data, len); +} +EXPORT_SYMBOL_GPL(sm3_update); + +void sm3_final(struct sm3_state *sctx, u8 *out) +{ + const int bit_offset = SM3_BLOCK_SIZE - sizeof(u64); + __be64 *bits = (__be64 *)(sctx->buffer + bit_offset); + __be32 *digest = (__be32 *)out; + unsigned int partial = sctx->count % SM3_BLOCK_SIZE; + u32 W[16]; + int i; + + sctx->buffer[partial++] = 0x80; + if (partial > bit_offset) { + memset(sctx->buffer + partial, 0, SM3_BLOCK_SIZE - partial); + partial = 0; + + sm3_block(sctx, sctx->buffer, 1, W); + } + + memset(sctx->buffer + partial, 0, bit_offset - partial); + *bits = cpu_to_be64(sctx->count << 3); + sm3_block(sctx, sctx->buffer, 1, W); + + for (i = 0; i < 8; i++) + put_unaligned_be32(sctx->state[i], digest++); + + /* Zeroize sensitive information. */ + memzero_explicit(W, sizeof(W)); + memzero_explicit(sctx, sizeof(*sctx)); +} +EXPORT_SYMBOL_GPL(sm3_final); + +MODULE_DESCRIPTION("Generic SM3 library"); +MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/sm4.c b/lib/crypto/sm4.c new file mode 100644 index 0000000000000..284e62576d0c6 --- /dev/null +++ b/lib/crypto/sm4.c @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4, as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2018 ARM Limited or its affiliates. + * Copyright (c) 2021 Tianjia Zhang + */ + +#include +#include +#include + +static const u32 fk[4] = { + 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc +}; + +static const u32 ____cacheline_aligned ck[32] = { + 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269, + 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9, + 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249, + 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9, + 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229, + 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299, + 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209, + 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 +}; + +static const u8 ____cacheline_aligned sbox[256] = { + 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, + 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, + 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, + 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, + 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, + 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, + 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, + 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, + 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, + 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, + 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, + 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, + 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, + 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, + 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, + 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, + 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, + 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, + 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, + 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, + 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, + 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, + 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, + 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, + 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, + 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, + 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, + 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, + 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, + 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, + 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, + 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48 +}; + +static inline u32 sm4_t_non_lin_sub(u32 x) +{ + u32 out; + + out = (u32)sbox[x & 0xff]; + out |= (u32)sbox[(x >> 8) & 0xff] << 8; + out |= (u32)sbox[(x >> 16) & 0xff] << 16; + out |= (u32)sbox[(x >> 24) & 0xff] << 24; + + return out; +} + +static inline u32 sm4_key_lin_sub(u32 x) +{ + return x ^ rol32(x, 13) ^ rol32(x, 23); +} + +static inline u32 sm4_enc_lin_sub(u32 x) +{ + return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24); +} + +static inline u32 sm4_key_sub(u32 x) +{ + return sm4_key_lin_sub(sm4_t_non_lin_sub(x)); +} + +static inline u32 sm4_enc_sub(u32 x) +{ + return sm4_enc_lin_sub(sm4_t_non_lin_sub(x)); +} + +static inline u32 sm4_round(u32 x0, u32 x1, u32 x2, u32 x3, u32 rk) +{ + return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk); +} + + +/** + * sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016 + * @ctx: The location where the computed key will be stored. + * @in_key: The supplied key. + * @key_len: The length of the supplied key. + * + * Returns 0 on success. The function fails only if an invalid key size (or + * pointer) is supplied. + */ +int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key, + unsigned int key_len) +{ + u32 rk[4]; + const u32 *key = (u32 *)in_key; + int i; + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + rk[0] = get_unaligned_be32(&key[0]) ^ fk[0]; + rk[1] = get_unaligned_be32(&key[1]) ^ fk[1]; + rk[2] = get_unaligned_be32(&key[2]) ^ fk[2]; + rk[3] = get_unaligned_be32(&key[3]) ^ fk[3]; + + for (i = 0; i < 32; i += 4) { + rk[0] ^= sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]); + rk[1] ^= sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]); + rk[2] ^= sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]); + rk[3] ^= sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]); + + ctx->rkey_enc[i + 0] = rk[0]; + ctx->rkey_enc[i + 1] = rk[1]; + ctx->rkey_enc[i + 2] = rk[2]; + ctx->rkey_enc[i + 3] = rk[3]; + ctx->rkey_dec[31 - 0 - i] = rk[0]; + ctx->rkey_dec[31 - 1 - i] = rk[1]; + ctx->rkey_dec[31 - 2 - i] = rk[2]; + ctx->rkey_dec[31 - 3 - i] = rk[3]; + } + + return 0; +} +EXPORT_SYMBOL_GPL(sm4_expandkey); + +/** + * sm4_crypt_block - Encrypt or decrypt a single SM4 block + * @rk: The rkey_enc for encrypt or rkey_dec for decrypt + * @out: Buffer to store output data + * @in: Buffer containing the input data + */ +void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in) +{ + u32 x[4], i; + + x[0] = get_unaligned_be32(in + 0 * 4); + x[1] = get_unaligned_be32(in + 1 * 4); + x[2] = get_unaligned_be32(in + 2 * 4); + x[3] = get_unaligned_be32(in + 3 * 4); + + for (i = 0; i < 32; i += 4) { + x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]); + x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]); + x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]); + x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]); + } + + put_unaligned_be32(x[3 - 0], out + 0 * 4); + put_unaligned_be32(x[3 - 1], out + 1 * 4); + put_unaligned_be32(x[3 - 2], out + 2 * 4); + put_unaligned_be32(x[3 - 3], out + 3 * 4); +} +EXPORT_SYMBOL_GPL(sm4_crypt_block); + +MODULE_DESCRIPTION("Generic SM4 library"); +MODULE_LICENSE("GPL v2"); diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c index 25d59a95bd668..abea25310ac73 100644 --- a/lib/decompress_unxz.c +++ b/lib/decompress_unxz.c @@ -167,7 +167,7 @@ * memeq and memzero are not used much and any remotely sane implementation * is fast enough. memcpy/memmove speed matters in multi-call mode, but * the kernel image is decompressed in single-call mode, in which only - * memcpy speed can matter and only if there is a lot of uncompressible data + * memmove speed can matter and only if there is a lot of uncompressible data * (LZMA2 stores uncompressible chunks in uncompressed form). Thus, the * functions below should just be kept small; it's probably not worth * optimizing for speed. diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c index a4db51c212663..dae3b51ac3d9b 100644 --- a/lib/dim/net_dim.c +++ b/lib/dim/net_dim.c @@ -12,41 +12,41 @@ * Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES */ #define NET_DIM_PARAMS_NUM_PROFILES 5 -#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 -#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 +#define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256 +#define NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE 128 #define NET_DIM_DEF_PROFILE_CQE 1 #define NET_DIM_DEF_PROFILE_EQE 1 #define NET_DIM_RX_EQE_PROFILES { \ - {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {.usec = 1, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 8, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 64, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 128, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 256, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,} \ } #define NET_DIM_RX_CQE_PROFILES { \ - {2, 256}, \ - {8, 128}, \ - {16, 64}, \ - {32, 64}, \ - {64, 64} \ + {.usec = 2, .pkts = 256,}, \ + {.usec = 8, .pkts = 128,}, \ + {.usec = 16, .pkts = 64,}, \ + {.usec = 32, .pkts = 64,}, \ + {.usec = 64, .pkts = 64,} \ } #define NET_DIM_TX_EQE_PROFILES { \ - {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \ + {.usec = 1, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 8, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 32, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 64, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 128, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,} \ } #define NET_DIM_TX_CQE_PROFILES { \ - {5, 128}, \ - {8, 64}, \ - {16, 32}, \ - {32, 32}, \ - {64, 32} \ + {.usec = 5, .pkts = 128,}, \ + {.usec = 8, .pkts = 64,}, \ + {.usec = 16, .pkts = 32,}, \ + {.usec = 32, .pkts = 32,}, \ + {.usec = 64, .pkts = 32,} \ } static const struct dim_cq_moder diff --git a/lib/hexdump.c b/lib/hexdump.c index 147133f8eb2fc..c0a08ddcf94e2 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -21,15 +21,33 @@ EXPORT_SYMBOL(hex_asc_upper); * * hex_to_bin() converts one hex digit to its actual value or -1 in case of bad * input. + * + * This function is used to load cryptographic keys, so it is coded in such a + * way that there are no conditions or memory accesses that depend on data. + * + * Explanation of the logic: + * (ch - '9' - 1) is negative if ch <= '9' + * ('0' - 1 - ch) is negative if ch >= '0' + * we "and" these two values, so the result is negative if ch is in the range + * '0' ... '9' + * we are only interested in the sign, so we do a shift ">> 8"; note that right + * shift of a negative value is implementation-defined, so we cast the + * value to (unsigned) before the shift --- we have 0xffffff if ch is in + * the range '0' ... '9', 0 otherwise + * we "and" this value with (ch - '0' + 1) --- we have a value 1 ... 10 if ch is + * in the range '0' ... '9', 0 otherwise + * we add this value to -1 --- we have a value 0 ... 9 if ch is in the range '0' + * ... '9', -1 otherwise + * the next line is similar to the previous one, but we need to decode both + * uppercase and lowercase letters, so we use (ch & 0xdf), which converts + * lowercase to uppercase */ -int hex_to_bin(char ch) +int hex_to_bin(unsigned char ch) { - if ((ch >= '0') && (ch <= '9')) - return ch - '0'; - ch = tolower(ch); - if ((ch >= 'a') && (ch <= 'f')) - return ch - 'a' + 10; - return -1; + unsigned char cu = ch & 0xdf; + return -1 + + ((ch - '0' + 1) & (unsigned)((ch - '9' - 1) & ('0' - 1 - ch)) >> 8) + + ((cu - 'A' + 11) & (unsigned)((cu - 'F' - 1) & ('A' - 1 - cu)) >> 8); } EXPORT_SYMBOL(hex_to_bin); @@ -44,10 +62,13 @@ EXPORT_SYMBOL(hex_to_bin); int hex2bin(u8 *dst, const char *src, size_t count) { while (count--) { - int hi = hex_to_bin(*src++); - int lo = hex_to_bin(*src++); + int hi, lo; - if ((hi < 0) || (lo < 0)) + hi = hex_to_bin(*src++); + if (unlikely(hi < 0)) + return -EINVAL; + lo = hex_to_bin(*src++); + if (unlikely(lo < 0)) return -EINVAL; *dst++ = (hi << 4) | lo; diff --git a/lib/idr.c b/lib/idr.c index 4d2eef0259d2c..b2bc190431ddf 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -489,7 +489,8 @@ void ida_free(struct ida *ida, unsigned int id) struct ida_bitmap *bitmap; unsigned long flags; - BUG_ON((int)id < 0); + if ((int)id < 0) + return; xas_lock_irqsave(&xas, flags); bitmap = xas_load(&xas); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 973e5b12b37e8..05d8106be9a2d 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -74,7 +74,44 @@ } \ } -#define iterate_all_kinds(i, n, v, I, B, K) { \ +#define iterate_xarray(i, n, __v, skip, STEP) { \ + struct page *head = NULL; \ + size_t wanted = n, seg, offset; \ + loff_t start = i->xarray_start + skip; \ + pgoff_t index = start >> PAGE_SHIFT; \ + int j; \ + \ + XA_STATE(xas, i->xarray, index); \ + \ + rcu_read_lock(); \ + xas_for_each(&xas, head, ULONG_MAX) { \ + if (xas_retry(&xas, head)) \ + continue; \ + if (WARN_ON(xa_is_value(head))) \ + break; \ + if (WARN_ON(PageHuge(head))) \ + break; \ + for (j = (head->index < index) ? index - head->index : 0; \ + j < hpage_nr_pages(head); j++) { \ + __v.bv_page = head + j; \ + offset = (i->xarray_start + skip) & ~PAGE_MASK; \ + seg = PAGE_SIZE - offset; \ + __v.bv_offset = offset; \ + __v.bv_len = min(n, seg); \ + (void)(STEP); \ + n -= __v.bv_len; \ + skip += __v.bv_len; \ + if (n == 0) \ + break; \ + } \ + if (n == 0) \ + break; \ + } \ + rcu_read_unlock(); \ + n = wanted - n; \ +} + +#define iterate_all_kinds(i, n, v, I, B, K, X) { \ if (likely(n)) { \ size_t skip = i->iov_offset; \ if (unlikely(i->type & ITER_BVEC)) { \ @@ -86,6 +123,9 @@ struct kvec v; \ iterate_kvec(i, n, v, kvec, skip, (K)) \ } else if (unlikely(i->type & ITER_DISCARD)) { \ + } else if (unlikely(i->type & ITER_XARRAY)) { \ + struct bio_vec v; \ + iterate_xarray(i, n, v, skip, (X)); \ } else { \ const struct iovec *iov; \ struct iovec v; \ @@ -94,7 +134,7 @@ } \ } -#define iterate_and_advance(i, n, v, I, B, K) { \ +#define iterate_and_advance(i, n, v, I, B, K, X) { \ if (unlikely(i->count < n)) \ n = i->count; \ if (i->count) { \ @@ -119,6 +159,9 @@ i->kvec = kvec; \ } else if (unlikely(i->type & ITER_DISCARD)) { \ skip += n; \ + } else if (unlikely(i->type & ITER_XARRAY)) { \ + struct bio_vec v; \ + iterate_xarray(i, n, v, skip, (X)) \ } else { \ const struct iovec *iov; \ struct iovec v; \ @@ -615,7 +658,9 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), memcpy_to_page(v.bv_page, v.bv_offset, (from += v.bv_len) - v.bv_len, v.bv_len), - memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) + memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), + memcpy_to_page(v.bv_page, v.bv_offset, + (from += v.bv_len) - v.bv_len, v.bv_len) ) return bytes; @@ -730,6 +775,18 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) bytes = curr_addr - s_addr - rem; return bytes; } + }), + ({ + rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset, + (from += v.bv_len) - v.bv_len, v.bv_len); + if (rem) { + curr_addr = (unsigned long) from; + bytes = curr_addr - s_addr - rem; + rcu_read_unlock(); + i->iov_offset += bytes; + i->count -= bytes; + return bytes; + } }) ) @@ -751,7 +808,9 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, v.bv_offset, v.bv_len), - memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), + memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len) ) return bytes; @@ -777,7 +836,9 @@ bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) 0;}), memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, v.bv_offset, v.bv_len), - memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), + memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len) ) iov_iter_advance(i, bytes); @@ -797,7 +858,9 @@ size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) v.iov_base, v.iov_len), memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, v.bv_offset, v.bv_len), - memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), + memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len) ) return bytes; @@ -832,7 +895,9 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, v.bv_offset, v.bv_len), memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, - v.iov_len) + v.iov_len), + memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len) ) return bytes; @@ -856,7 +921,9 @@ bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) 0;}), memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, v.bv_offset, v.bv_len), - memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), + memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len) ) iov_iter_advance(i, bytes); @@ -893,7 +960,7 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, { if (unlikely(!page_copy_sane(page, offset, bytes))) return 0; - if (i->type & (ITER_BVEC|ITER_KVEC)) { + if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) { void *kaddr = kmap_atomic(page); size_t wanted = copy_to_iter(kaddr + offset, bytes, i); kunmap_atomic(kaddr); @@ -919,7 +986,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, WARN_ON(1); return 0; } - if (i->type & (ITER_BVEC|ITER_KVEC)) { + if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) { void *kaddr = kmap_atomic(page); size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); kunmap_atomic(kaddr); @@ -960,7 +1027,8 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i) iterate_and_advance(i, bytes, v, clear_user(v.iov_base, v.iov_len), memzero_page(v.bv_page, v.bv_offset, v.bv_len), - memset(v.iov_base, 0, v.iov_len) + memset(v.iov_base, 0, v.iov_len), + memzero_page(v.bv_page, v.bv_offset, v.bv_len) ) return bytes; @@ -984,7 +1052,9 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, v.bv_offset, v.bv_len), - memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), + memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len) ) kunmap_atomic(kaddr); return bytes; @@ -1037,6 +1107,21 @@ static void pipe_advance(struct iov_iter *i, size_t size) pipe_truncate(i); } +static void iov_iter_bvec_advance(struct iov_iter *i, size_t size) +{ + struct bvec_iter bi; + + bi.bi_size = i->count; + bi.bi_bvec_done = i->iov_offset; + bi.bi_idx = 0; + bvec_iter_advance(i->bvec, &bi, size); + + i->bvec += bi.bi_idx; + i->nr_segs -= bi.bi_idx; + i->count = bi.bi_size; + i->iov_offset = bi.bi_bvec_done; +} + void iov_iter_advance(struct iov_iter *i, size_t size) { if (unlikely(iov_iter_is_pipe(i))) { @@ -1047,7 +1132,17 @@ void iov_iter_advance(struct iov_iter *i, size_t size) i->count -= size; return; } - iterate_and_advance(i, size, v, 0, 0, 0) + if (unlikely(iov_iter_is_xarray(i))) { + size = min(size, i->count); + i->iov_offset += size; + i->count -= size; + return; + } + if (iov_iter_is_bvec(i)) { + iov_iter_bvec_advance(i, size); + return; + } + iterate_and_advance(i, size, v, 0, 0, 0, 0) } EXPORT_SYMBOL(iov_iter_advance); @@ -1089,7 +1184,12 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) return; } unroll -= i->iov_offset; - if (iov_iter_is_bvec(i)) { + if (iov_iter_is_xarray(i)) { + BUG(); /* We should never go beyond the start of the specified + * range since we might then be straying into pages that + * aren't pinned. + */ + } else if (iov_iter_is_bvec(i)) { const struct bio_vec *bvec = i->bvec; while (1) { size_t n = (--bvec)->bv_len; @@ -1126,9 +1226,9 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i) return i->count; // it is a silly place, anyway if (i->nr_segs == 1) return i->count; - if (unlikely(iov_iter_is_discard(i))) + if (unlikely(iov_iter_is_discard(i) || iov_iter_is_xarray(i))) return i->count; - else if (iov_iter_is_bvec(i)) + if (iov_iter_is_bvec(i)) return min(i->count, i->bvec->bv_len - i->iov_offset); else return min(i->count, i->iov->iov_len - i->iov_offset); @@ -1176,6 +1276,31 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction, } EXPORT_SYMBOL(iov_iter_pipe); +/** + * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray + * @i: The iterator to initialise. + * @direction: The direction of the transfer. + * @xarray: The xarray to access. + * @start: The start file position. + * @count: The size of the I/O buffer in bytes. + * + * Set up an I/O iterator to either draw data out of the pages attached to an + * inode or to inject data into those pages. The pages *must* be prevented + * from evaporation, either by taking a ref on them or locking them by the + * caller. + */ +void iov_iter_xarray(struct iov_iter *i, unsigned int direction, + struct xarray *xarray, loff_t start, size_t count) +{ + BUG_ON(direction & ~1); + i->type = ITER_XARRAY | (direction & (READ | WRITE)); + i->xarray = xarray; + i->xarray_start = start; + i->count = count; + i->iov_offset = 0; +} +EXPORT_SYMBOL(iov_iter_xarray); + /** * iov_iter_discard - Initialise an I/O iterator that discards data * @i: The iterator to initialise. @@ -1204,10 +1329,13 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) return size | i->iov_offset; return size; } + if (unlikely(iov_iter_is_xarray(i))) + return (i->xarray_start + i->iov_offset) | i->count; iterate_all_kinds(i, size, v, (res |= (unsigned long)v.iov_base | v.iov_len, 0), res |= v.bv_offset | v.bv_len, - res |= (unsigned long)v.iov_base | v.iov_len + res |= (unsigned long)v.iov_base | v.iov_len, + res |= v.bv_offset | v.bv_len ) return res; } @@ -1229,7 +1357,9 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) (res |= (!res ? 0 : (unsigned long)v.bv_offset) | (size != v.bv_len ? size : 0)), (res |= (!res ? 0 : (unsigned long)v.iov_base) | - (size != v.iov_len ? size : 0)) + (size != v.iov_len ? size : 0)), + (res |= (!res ? 0 : (unsigned long)v.bv_offset) | + (size != v.bv_len ? size : 0)) ); return res; } @@ -1279,6 +1409,69 @@ static ssize_t pipe_get_pages(struct iov_iter *i, return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start); } +static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, + pgoff_t index, unsigned int nr_pages) +{ + XA_STATE(xas, xa, index); + struct page *page; + unsigned int ret = 0; + + rcu_read_lock(); + for (page = xas_load(&xas); page; page = xas_next(&xas)) { + if (xas_retry(&xas, page)) + continue; + + /* Has the page moved or been split? */ + if (unlikely(page != xas_reload(&xas))) { + xas_reset(&xas); + continue; + } + + pages[ret] = find_subpage(page, xas.xa_index); + get_page(pages[ret]); + if (++ret == nr_pages) + break; + } + rcu_read_unlock(); + return ret; +} + +static ssize_t iter_xarray_get_pages(struct iov_iter *i, + struct page **pages, size_t maxsize, + unsigned maxpages, size_t *_start_offset) +{ + unsigned nr, offset; + pgoff_t index, count; + size_t size = maxsize; + loff_t pos; + + if (!size || !maxpages) + return 0; + + pos = i->xarray_start + i->iov_offset; + index = pos >> PAGE_SHIFT; + offset = pos & ~PAGE_MASK; + *_start_offset = offset; + + count = 1; + if (size > PAGE_SIZE - offset) { + size -= PAGE_SIZE - offset; + count += size >> PAGE_SHIFT; + size &= ~PAGE_MASK; + if (size) + count++; + } + + if (count > maxpages) + count = maxpages; + + nr = iter_xarray_populate_pages(pages, i->xarray, index, count); + if (nr == 0) + return 0; + + return min(nr * PAGE_SIZE - offset, maxsize); +} + ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start) @@ -1288,6 +1481,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (unlikely(iov_iter_is_pipe(i))) return pipe_get_pages(i, pages, maxsize, maxpages, start); + if (unlikely(iov_iter_is_xarray(i))) + return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); if (unlikely(iov_iter_is_discard(i))) return -EFAULT; @@ -1304,7 +1499,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, pages); - if (unlikely(res < 0)) + if (unlikely(res <= 0)) return res; return (res == n ? len : res * PAGE_SIZE) - *start; 0;}),({ @@ -1314,7 +1509,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, return v.bv_len; }),({ return -EFAULT; - }) + }), + 0 ) return 0; } @@ -1359,6 +1555,45 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, return n; } +static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, + struct page ***pages, size_t maxsize, + size_t *_start_offset) +{ + struct page **p; + unsigned nr, offset; + pgoff_t index, count; + size_t size = maxsize; + loff_t pos; + + if (!size) + return 0; + + pos = i->xarray_start + i->iov_offset; + index = pos >> PAGE_SHIFT; + offset = pos & ~PAGE_MASK; + *_start_offset = offset; + + count = 1; + if (size > PAGE_SIZE - offset) { + size -= PAGE_SIZE - offset; + count += size >> PAGE_SHIFT; + size &= ~PAGE_MASK; + if (size) + count++; + } + + p = get_pages_array(count); + if (!p) + return -ENOMEM; + *pages = p; + + nr = iter_xarray_populate_pages(p, i->xarray, index, count); + if (nr == 0) + return 0; + + return min(nr * PAGE_SIZE - offset, maxsize); +} + ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start) @@ -1370,6 +1605,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, if (unlikely(iov_iter_is_pipe(i))) return pipe_get_pages_alloc(i, pages, maxsize, start); + if (unlikely(iov_iter_is_xarray(i))) + return iter_xarray_get_pages_alloc(i, pages, maxsize, start); if (unlikely(iov_iter_is_discard(i))) return -EFAULT; @@ -1386,8 +1623,9 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, return -ENOMEM; res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p); - if (unlikely(res < 0)) { + if (unlikely(res <= 0)) { kvfree(p); + *pages = NULL; return res; } *pages = p; @@ -1402,7 +1640,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, return v.bv_len; }),({ return -EFAULT; - }) + }), 0 ) return 0; } @@ -1441,6 +1679,13 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, v.iov_base, v.iov_len, sum, off); off += v.iov_len; + }), ({ + char *p = kmap_atomic(v.bv_page); + sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, + p + v.bv_offset, v.bv_len, + sum, off); + kunmap_atomic(p); + off += v.bv_len; }) ) *csum = sum; @@ -1483,6 +1728,13 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, v.iov_base, v.iov_len, sum, off); off += v.iov_len; + }), ({ + char *p = kmap_atomic(v.bv_page); + sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, + p + v.bv_offset, v.bv_len, + sum, off); + kunmap_atomic(p); + off += v.bv_len; }) ) *csum = sum; @@ -1530,6 +1782,13 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, (from += v.iov_len) - v.iov_len, v.iov_len, sum, off); off += v.iov_len; + }), ({ + char *p = kmap_atomic(v.bv_page); + sum = csum_and_memcpy(p + v.bv_offset, + (from += v.bv_len) - v.bv_len, + v.bv_len, sum, off); + kunmap_atomic(p); + off += v.bv_len; }) ) csstate->csum = sum; @@ -1580,6 +1839,21 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1; if (npages >= maxpages) return maxpages; + } else if (unlikely(iov_iter_is_xarray(i))) { + unsigned offset; + + offset = (i->xarray_start + i->iov_offset) & ~PAGE_MASK; + + npages = 1; + if (size > PAGE_SIZE - offset) { + size -= PAGE_SIZE - offset; + npages += size >> PAGE_SHIFT; + size &= ~PAGE_MASK; + if (size) + npages++; + } + if (npages >= maxpages) + return maxpages; } else iterate_all_kinds(i, size, v, ({ unsigned long p = (unsigned long)v.iov_base; npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) @@ -1596,7 +1870,8 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) - p / PAGE_SIZE; if (npages >= maxpages) return maxpages; - }) + }), + 0 ) return npages; } @@ -1609,7 +1884,7 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) WARN_ON(1); return NULL; } - if (unlikely(iov_iter_is_discard(new))) + if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new))) return NULL; if (iov_iter_is_bvec(new)) return new->bvec = kmemdup(new->bvec, @@ -1720,7 +1995,12 @@ int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, kunmap(v.bv_page); err;}), ({ w = v; - err = f(&w, context);}) + err = f(&w, context);}), ({ + w.iov_base = kmap(v.bv_page) + v.bv_offset; + w.iov_len = v.bv_len; + err = f(&w, context); + kunmap(v.bv_page); + err;}) ) return err; } diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 4d0b59fa5550f..46f24b7a32217 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -268,8 +268,12 @@ static FORCE_INLINE int LZ4_decompress_generic( ip += length; op += length; - /* Necessarily EOF, due to parsing restrictions */ - if (!partialDecoding || (cpy == oend)) + /* Necessarily EOF when !partialDecoding. + * When partialDecoding, it is EOF if we've either + * filled the output buffer or + * can't proceed with reading an offset for following match. + */ + if (!partialDecoding || (cpy == oend) || (ip >= (iend - 2))) break; } else { /* may overwrite up to WILDCOPYLENGTH beyond cpy */ diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c index 20ed0f7667871..00825028cc847 100644 --- a/lib/mpi/mpiutil.c +++ b/lib/mpi/mpiutil.c @@ -91,7 +91,7 @@ int mpi_resize(MPI a, unsigned nlimbs) return 0; /* no need to do it */ if (a->d) { - p = kmalloc_array(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL); + p = kcalloc(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL); if (!p) return -ENOMEM; memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t)); diff --git a/lib/nodemask.c b/lib/nodemask.c index 3aa454c54c0de..e22647f5181b3 100644 --- a/lib/nodemask.c +++ b/lib/nodemask.c @@ -3,9 +3,9 @@ #include #include -int __next_node_in(int node, const nodemask_t *srcp) +unsigned int __next_node_in(int node, const nodemask_t *srcp) { - int ret = __next_node(node, srcp); + unsigned int ret = __next_node(node, srcp); if (ret == MAX_NUMNODES) ret = __first_node(srcp); diff --git a/lib/once.c b/lib/once.c index 8b7d6235217ee..59149bf3bfb4a 100644 --- a/lib/once.c +++ b/lib/once.c @@ -3,10 +3,12 @@ #include #include #include +#include struct once_work { struct work_struct work; struct static_key_true *key; + struct module *module; }; static void once_deferred(struct work_struct *w) @@ -16,10 +18,11 @@ static void once_deferred(struct work_struct *w) work = container_of(w, struct once_work, work); BUG_ON(!static_key_enabled(work->key)); static_branch_disable(work->key); + module_put(work->module); kfree(work); } -static void once_disable_jump(struct static_key_true *key) +static void once_disable_jump(struct static_key_true *key, struct module *mod) { struct once_work *w; @@ -29,6 +32,8 @@ static void once_disable_jump(struct static_key_true *key) INIT_WORK(&w->work, once_deferred); w->key = key; + w->module = mod; + __module_get(mod); schedule_work(&w->work); } @@ -53,11 +58,11 @@ bool __do_once_start(bool *done, unsigned long *flags) EXPORT_SYMBOL(__do_once_start); void __do_once_done(bool *done, struct static_key_true *once_key, - unsigned long *flags) + unsigned long *flags, struct module *mod) __releases(once_lock) { *done = true; spin_unlock_irqrestore(&once_lock, *flags); - once_disable_jump(once_key); + once_disable_jump(once_key, mod); } EXPORT_SYMBOL(__do_once_done); diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index b9e6c3648be1a..98b9fd0354dd1 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -4,6 +4,8 @@ # from userspace. # +pound := \# + CC = gcc OPTFLAGS = -O2 # Adjust as desired CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS) @@ -47,7 +49,7 @@ else ifeq ($(HAS_NEON),yes) OBJS += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 else - HAS_ALTIVEC := $(shell printf '\#include \nvector int a;\n' |\ + HAS_ALTIVEC := $(shell printf '$(pound)include \nvector int a;\n' |\ gcc -c -x c - >/dev/null && rm ./-.o && echo yes) ifeq ($(HAS_ALTIVEC),yes) CFLAGS += -I../../../arch/powerpc/include diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c index a3cf071941ab4..841a55242abaa 100644 --- a/lib/raid6/test/test.c +++ b/lib/raid6/test/test.c @@ -19,7 +19,6 @@ #define NDISKS 16 /* Including P and Q */ const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); -struct raid6_calls raid6_call; char *dataptrs[NDISKS]; char data[NDISKS][PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); diff --git a/lib/random32.c b/lib/random32.c index 9085b1172015e..339624191b515 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -38,6 +38,9 @@ #include #include #include +#include +#include +#include #include /** @@ -544,9 +547,11 @@ static void prandom_reseed(struct timer_list *unused) * To avoid worrying about whether it's safe to delay that interrupt * long enough to seed all CPUs, just schedule an immediate timer event. */ -static void prandom_timer_start(struct random_ready_callback *unused) +static int prandom_timer_start(struct notifier_block *nb, + unsigned long action, void *data) { mod_timer(&seed_timer, jiffies); + return 0; } /* @@ -555,13 +560,13 @@ static void prandom_timer_start(struct random_ready_callback *unused) */ static int __init prandom_init_late(void) { - static struct random_ready_callback random_ready = { - .func = prandom_timer_start + static struct notifier_block random_ready = { + .notifier_call = prandom_timer_start }; - int ret = add_random_ready_callback(&random_ready); + int ret = register_random_ready_notifier(&random_ready); if (ret == -EALREADY) { - prandom_timer_start(&random_ready); + prandom_timer_start(&random_ready, 0, NULL); ret = 0; } return ret; diff --git a/lib/refcount.c b/lib/refcount.c index 6e904af0fb3e1..ebac8b7d15a7c 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -1,41 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Variant of atomic_t specialized for reference counts. - * - * The interface matches the atomic_t interface (to aid in porting) but only - * provides the few functions one should use for reference counting. - * - * It differs in that the counter saturates at UINT_MAX and will not move once - * there. This avoids wrapping the counter and causing 'spurious' - * use-after-free issues. - * - * Memory ordering rules are slightly relaxed wrt regular atomic_t functions - * and provide only what is strictly required for refcounts. - * - * The increments are fully relaxed; these will not provide ordering. The - * rationale is that whatever is used to obtain the object we're increasing the - * reference count on will provide the ordering. For locked data structures, - * its the lock acquire, for RCU/lockless data structures its the dependent - * load. - * - * Do note that inc_not_zero() provides a control dependency which will order - * future stores against the inc, this ensures we'll never modify the object - * if we did not in fact acquire a reference. - * - * The decrements will provide release order, such that all the prior loads and - * stores will be issued before, it also provides a control dependency, which - * will order us against the subsequent free(). - * - * The control dependency is against the load of the cmpxchg (ll/sc) that - * succeeded. This means the stores aren't fully ordered, but this is fine - * because the 1->0 transition indicates no concurrency. - * - * Note that the allocator is responsible for ordering things between free() - * and alloc(). - * - * The decrements dec_and_test() and sub_and_test() also provide acquire - * ordering on success. - * + * Out-of-line refcount functions. */ #include @@ -43,199 +8,33 @@ #include #include -/** - * refcount_add_not_zero_checked - add a value to a refcount unless it is 0 - * @i: the value to add to the refcount - * @r: the refcount - * - * Will saturate at UINT_MAX and WARN. - * - * Provides no memory ordering, it is assumed the caller has guaranteed the - * object memory to be stable (RCU, etc.). It does provide a control dependency - * and thereby orders future stores. See the comment on top. - * - * Use of this function is not recommended for the normal reference counting - * use case in which references are taken and released one at a time. In these - * cases, refcount_inc(), or one of its variants, should instead be used to - * increment a reference count. - * - * Return: false if the passed refcount is 0, true otherwise - */ -bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r) -{ - unsigned int new, val = atomic_read(&r->refs); - - do { - if (!val) - return false; - - if (unlikely(val == UINT_MAX)) - return true; - - new = val + i; - if (new < val) - new = UINT_MAX; - - } while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new)); - - WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); - - return true; -} -EXPORT_SYMBOL(refcount_add_not_zero_checked); - -/** - * refcount_add_checked - add a value to a refcount - * @i: the value to add to the refcount - * @r: the refcount - * - * Similar to atomic_add(), but will saturate at UINT_MAX and WARN. - * - * Provides no memory ordering, it is assumed the caller has guaranteed the - * object memory to be stable (RCU, etc.). It does provide a control dependency - * and thereby orders future stores. See the comment on top. - * - * Use of this function is not recommended for the normal reference counting - * use case in which references are taken and released one at a time. In these - * cases, refcount_inc(), or one of its variants, should instead be used to - * increment a reference count. - */ -void refcount_add_checked(unsigned int i, refcount_t *r) -{ - WARN_ONCE(!refcount_add_not_zero_checked(i, r), "refcount_t: addition on 0; use-after-free.\n"); -} -EXPORT_SYMBOL(refcount_add_checked); - -/** - * refcount_inc_not_zero_checked - increment a refcount unless it is 0 - * @r: the refcount to increment - * - * Similar to atomic_inc_not_zero(), but will saturate at UINT_MAX and WARN. - * - * Provides no memory ordering, it is assumed the caller has guaranteed the - * object memory to be stable (RCU, etc.). It does provide a control dependency - * and thereby orders future stores. See the comment on top. - * - * Return: true if the increment was successful, false otherwise - */ -bool refcount_inc_not_zero_checked(refcount_t *r) -{ - unsigned int new, val = atomic_read(&r->refs); - - do { - new = val + 1; - - if (!val) - return false; - - if (unlikely(!new)) - return true; - - } while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new)); +#define REFCOUNT_WARN(str) WARN_ONCE(1, "refcount_t: " str ".\n") - WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); - - return true; -} -EXPORT_SYMBOL(refcount_inc_not_zero_checked); - -/** - * refcount_inc_checked - increment a refcount - * @r: the refcount to increment - * - * Similar to atomic_inc(), but will saturate at UINT_MAX and WARN. - * - * Provides no memory ordering, it is assumed the caller already has a - * reference on the object. - * - * Will WARN if the refcount is 0, as this represents a possible use-after-free - * condition. - */ -void refcount_inc_checked(refcount_t *r) +void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t) { - WARN_ONCE(!refcount_inc_not_zero_checked(r), "refcount_t: increment on 0; use-after-free.\n"); -} -EXPORT_SYMBOL(refcount_inc_checked); - -/** - * refcount_sub_and_test_checked - subtract from a refcount and test if it is 0 - * @i: amount to subtract from the refcount - * @r: the refcount - * - * Similar to atomic_dec_and_test(), but it will WARN, return false and - * ultimately leak on underflow and will fail to decrement when saturated - * at UINT_MAX. - * - * Provides release memory ordering, such that prior loads and stores are done - * before, and provides an acquire ordering on success such that free() - * must come after. - * - * Use of this function is not recommended for the normal reference counting - * use case in which references are taken and released one at a time. In these - * cases, refcount_dec(), or one of its variants, should instead be used to - * decrement a reference count. - * - * Return: true if the resulting refcount is 0, false otherwise - */ -bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r) -{ - unsigned int new, val = atomic_read(&r->refs); - - do { - if (unlikely(val == UINT_MAX)) - return false; - - new = val - i; - if (new > val) { - WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n"); - return false; - } - - } while (!atomic_try_cmpxchg_release(&r->refs, &val, new)); - - if (!new) { - smp_acquire__after_ctrl_dep(); - return true; + refcount_set(r, REFCOUNT_SATURATED); + + switch (t) { + case REFCOUNT_ADD_NOT_ZERO_OVF: + REFCOUNT_WARN("saturated; leaking memory"); + break; + case REFCOUNT_ADD_OVF: + REFCOUNT_WARN("saturated; leaking memory"); + break; + case REFCOUNT_ADD_UAF: + REFCOUNT_WARN("addition on 0; use-after-free"); + break; + case REFCOUNT_SUB_UAF: + REFCOUNT_WARN("underflow; use-after-free"); + break; + case REFCOUNT_DEC_LEAK: + REFCOUNT_WARN("decrement hit 0; leaking memory"); + break; + default: + REFCOUNT_WARN("unknown saturation event!?"); } - return false; - -} -EXPORT_SYMBOL(refcount_sub_and_test_checked); - -/** - * refcount_dec_and_test_checked - decrement a refcount and test if it is 0 - * @r: the refcount - * - * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to - * decrement when saturated at UINT_MAX. - * - * Provides release memory ordering, such that prior loads and stores are done - * before, and provides an acquire ordering on success such that free() - * must come after. - * - * Return: true if the resulting refcount is 0, false otherwise - */ -bool refcount_dec_and_test_checked(refcount_t *r) -{ - return refcount_sub_and_test_checked(1, r); -} -EXPORT_SYMBOL(refcount_dec_and_test_checked); - -/** - * refcount_dec_checked - decrement a refcount - * @r: the refcount - * - * Similar to atomic_dec(), it will WARN on underflow and fail to decrement - * when saturated at UINT_MAX. - * - * Provides release memory ordering, such that prior loads and stores are done - * before. - */ -void refcount_dec_checked(refcount_t *r) -{ - WARN_ONCE(refcount_dec_and_test_checked(r), "refcount_t: decrement hit 0; leaking memory.\n"); } -EXPORT_SYMBOL(refcount_dec_checked); +EXPORT_SYMBOL(refcount_warn_saturate); /** * refcount_dec_if_one - decrement a refcount if it is 1 @@ -277,7 +76,7 @@ bool refcount_dec_not_one(refcount_t *r) unsigned int new, val = atomic_read(&r->refs); do { - if (unlikely(val == UINT_MAX)) + if (unlikely(val == REFCOUNT_SATURATED)) return true; if (val == 1) @@ -302,7 +101,7 @@ EXPORT_SYMBOL(refcount_dec_not_one); * @lock: the mutex to be locked * * Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail - * to decrement when saturated at UINT_MAX. + * to decrement when saturated at REFCOUNT_SATURATED. * * Provides release memory ordering, such that prior loads and stores are done * before, and provides a control dependency such that free() must come after. @@ -333,7 +132,7 @@ EXPORT_SYMBOL(refcount_dec_and_mutex_lock); * @lock: the spinlock to be locked * * Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to - * decrement when saturated at UINT_MAX. + * decrement when saturated at REFCOUNT_SATURATED. * * Provides release memory ordering, such that prior loads and stores are done * before, and provides a control dependency such that free() must come after. diff --git a/lib/sha1.c b/lib/sha1.c index 1d96d2c02b826..bad46695476bd 100644 --- a/lib/sha1.c +++ b/lib/sha1.c @@ -10,6 +10,7 @@ #include #include #include +#include #include /* @@ -55,7 +56,8 @@ #define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \ __u32 TEMP = input(t); setW(t, TEMP); \ E += TEMP + rol32(A,5) + (fn) + (constant); \ - B = ror32(B, 2); } while (0) + B = ror32(B, 2); \ + TEMP = E; E = D; D = C; C = B; B = A; A = TEMP; } while (0) #define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) #define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) @@ -82,6 +84,7 @@ void sha_transform(__u32 *digest, const char *data, __u32 *array) { __u32 A, B, C, D, E; + unsigned int i = 0; A = digest[0]; B = digest[1]; @@ -90,94 +93,24 @@ void sha_transform(__u32 *digest, const char *data, __u32 *array) E = digest[4]; /* Round 1 - iterations 0-16 take their input from 'data' */ - T_0_15( 0, A, B, C, D, E); - T_0_15( 1, E, A, B, C, D); - T_0_15( 2, D, E, A, B, C); - T_0_15( 3, C, D, E, A, B); - T_0_15( 4, B, C, D, E, A); - T_0_15( 5, A, B, C, D, E); - T_0_15( 6, E, A, B, C, D); - T_0_15( 7, D, E, A, B, C); - T_0_15( 8, C, D, E, A, B); - T_0_15( 9, B, C, D, E, A); - T_0_15(10, A, B, C, D, E); - T_0_15(11, E, A, B, C, D); - T_0_15(12, D, E, A, B, C); - T_0_15(13, C, D, E, A, B); - T_0_15(14, B, C, D, E, A); - T_0_15(15, A, B, C, D, E); + for (; i < 16; ++i) + T_0_15(i, A, B, C, D, E); /* Round 1 - tail. Input from 512-bit mixing array */ - T_16_19(16, E, A, B, C, D); - T_16_19(17, D, E, A, B, C); - T_16_19(18, C, D, E, A, B); - T_16_19(19, B, C, D, E, A); + for (; i < 20; ++i) + T_16_19(i, A, B, C, D, E); /* Round 2 */ - T_20_39(20, A, B, C, D, E); - T_20_39(21, E, A, B, C, D); - T_20_39(22, D, E, A, B, C); - T_20_39(23, C, D, E, A, B); - T_20_39(24, B, C, D, E, A); - T_20_39(25, A, B, C, D, E); - T_20_39(26, E, A, B, C, D); - T_20_39(27, D, E, A, B, C); - T_20_39(28, C, D, E, A, B); - T_20_39(29, B, C, D, E, A); - T_20_39(30, A, B, C, D, E); - T_20_39(31, E, A, B, C, D); - T_20_39(32, D, E, A, B, C); - T_20_39(33, C, D, E, A, B); - T_20_39(34, B, C, D, E, A); - T_20_39(35, A, B, C, D, E); - T_20_39(36, E, A, B, C, D); - T_20_39(37, D, E, A, B, C); - T_20_39(38, C, D, E, A, B); - T_20_39(39, B, C, D, E, A); + for (; i < 40; ++i) + T_20_39(i, A, B, C, D, E); /* Round 3 */ - T_40_59(40, A, B, C, D, E); - T_40_59(41, E, A, B, C, D); - T_40_59(42, D, E, A, B, C); - T_40_59(43, C, D, E, A, B); - T_40_59(44, B, C, D, E, A); - T_40_59(45, A, B, C, D, E); - T_40_59(46, E, A, B, C, D); - T_40_59(47, D, E, A, B, C); - T_40_59(48, C, D, E, A, B); - T_40_59(49, B, C, D, E, A); - T_40_59(50, A, B, C, D, E); - T_40_59(51, E, A, B, C, D); - T_40_59(52, D, E, A, B, C); - T_40_59(53, C, D, E, A, B); - T_40_59(54, B, C, D, E, A); - T_40_59(55, A, B, C, D, E); - T_40_59(56, E, A, B, C, D); - T_40_59(57, D, E, A, B, C); - T_40_59(58, C, D, E, A, B); - T_40_59(59, B, C, D, E, A); + for (; i < 60; ++i) + T_40_59(i, A, B, C, D, E); /* Round 4 */ - T_60_79(60, A, B, C, D, E); - T_60_79(61, E, A, B, C, D); - T_60_79(62, D, E, A, B, C); - T_60_79(63, C, D, E, A, B); - T_60_79(64, B, C, D, E, A); - T_60_79(65, A, B, C, D, E); - T_60_79(66, E, A, B, C, D); - T_60_79(67, D, E, A, B, C); - T_60_79(68, C, D, E, A, B); - T_60_79(69, B, C, D, E, A); - T_60_79(70, A, B, C, D, E); - T_60_79(71, E, A, B, C, D); - T_60_79(72, D, E, A, B, C); - T_60_79(73, C, D, E, A, B); - T_60_79(74, B, C, D, E, A); - T_60_79(75, A, B, C, D, E); - T_60_79(76, E, A, B, C, D); - T_60_79(77, D, E, A, B, C); - T_60_79(78, C, D, E, A, B); - T_60_79(79, B, C, D, E, A); + for (; i < 80; ++i) + T_60_79(i, A, B, C, D, E); digest[0] += A; digest[1] += B; diff --git a/lib/siphash.c b/lib/siphash.c index c47bb6ff21499..b4055b1cc2f67 100644 --- a/lib/siphash.c +++ b/lib/siphash.c @@ -18,19 +18,13 @@ #include #endif -#define SIPROUND \ - do { \ - v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \ - v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \ - v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \ - v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \ - } while (0) +#define SIPROUND SIPHASH_PERMUTATION(v0, v1, v2, v3) #define PREAMBLE(len) \ - u64 v0 = 0x736f6d6570736575ULL; \ - u64 v1 = 0x646f72616e646f6dULL; \ - u64 v2 = 0x6c7967656e657261ULL; \ - u64 v3 = 0x7465646279746573ULL; \ + u64 v0 = SIPHASH_CONST_0; \ + u64 v1 = SIPHASH_CONST_1; \ + u64 v2 = SIPHASH_CONST_2; \ + u64 v3 = SIPHASH_CONST_3; \ u64 b = ((u64)(len)) << 56; \ v3 ^= key->key[1]; \ v2 ^= key->key[0]; \ @@ -49,6 +43,7 @@ SIPROUND; \ return (v0 ^ v1) ^ (v2 ^ v3); +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); @@ -80,8 +75,8 @@ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) POSTAMBLE } EXPORT_SYMBOL(__siphash_aligned); +#endif -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); @@ -113,7 +108,6 @@ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) POSTAMBLE } EXPORT_SYMBOL(__siphash_unaligned); -#endif /** * siphash_1u64 - compute 64-bit siphash PRF value of a u64 @@ -250,6 +244,7 @@ EXPORT_SYMBOL(siphash_3u32); HSIPROUND; \ return (v0 ^ v1) ^ (v2 ^ v3); +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); @@ -280,8 +275,8 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_aligned); +#endif -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key) { @@ -313,7 +308,6 @@ u32 __hsiphash_unaligned(const void *data, size_t len, HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_unaligned); -#endif /** * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32 @@ -389,19 +383,13 @@ u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third, } EXPORT_SYMBOL(hsiphash_4u32); #else -#define HSIPROUND \ - do { \ - v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \ - v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \ - v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \ - v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \ - } while (0) +#define HSIPROUND HSIPHASH_PERMUTATION(v0, v1, v2, v3) #define HPREAMBLE(len) \ - u32 v0 = 0; \ - u32 v1 = 0; \ - u32 v2 = 0x6c796765U; \ - u32 v3 = 0x74656462U; \ + u32 v0 = HSIPHASH_CONST_0; \ + u32 v1 = HSIPHASH_CONST_1; \ + u32 v2 = HSIPHASH_CONST_2; \ + u32 v3 = HSIPHASH_CONST_3; \ u32 b = ((u32)(len)) << 24; \ v3 ^= key->key[1]; \ v2 ^= key->key[0]; \ @@ -418,6 +406,7 @@ EXPORT_SYMBOL(hsiphash_4u32); HSIPROUND; \ return v1 ^ v3; +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u32)); @@ -438,8 +427,8 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_aligned); +#endif -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key) { @@ -461,7 +450,6 @@ u32 __hsiphash_unaligned(const void *data, size_t len, HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_unaligned); -#endif /** * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32 diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 5ef3eccee27cb..3ae002ced4c7a 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -4286,8 +4286,8 @@ static struct bpf_test tests[] = { .u.insns_int = { BPF_LD_IMM64(R0, 0), BPF_LD_IMM64(R1, 0xffffffffffffffffLL), - BPF_STX_MEM(BPF_W, R10, R1, -40), - BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), BPF_EXIT_INSN(), }, INTERNAL, @@ -6684,7 +6684,14 @@ static int run_one(const struct bpf_prog *fp, struct bpf_test *test) u64 duration; u32 ret; - if (test->test[i].data_size == 0 && + /* + * NOTE: Several sub-tests may be present, in which case + * a zero {data_size, result} tuple indicates the end of + * the sub-test array. The first test is always run, + * even if both data_size and result happen to be zero. + */ + if (i > 0 && + test->test[i].data_size == 0 && test->test[i].result == 0) break; diff --git a/lib/test_kmod.c b/lib/test_kmod.c index 87a0cc750ea23..6813b183aa348 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -1155,6 +1155,7 @@ static struct kmod_test_device *register_test_dev_kmod(void) if (ret) { pr_err("could not register misc device: %d\n", ret); free_test_dev_kmod(test_dev); + test_dev = NULL; goto out; } diff --git a/lib/test_meminit.c b/lib/test_meminit.c index 9742e5cb853aa..ab00c79423a5f 100644 --- a/lib/test_meminit.c +++ b/lib/test_meminit.c @@ -319,6 +319,7 @@ static int __init do_kmem_cache_size_bulk(int size, int *total_failures) if (num) kmem_cache_free_bulk(c, num, objects); } + kmem_cache_destroy(c); *total_failures += fail; return 1; } diff --git a/lib/test_stackinit.c b/lib/test_stackinit.c index 2d7d257a430e6..35d398b065e4f 100644 --- a/lib/test_stackinit.c +++ b/lib/test_stackinit.c @@ -67,10 +67,10 @@ static bool range_contains(char *haystack_start, size_t haystack_size, #define INIT_STRUCT_none /**/ #define INIT_STRUCT_zero = { } #define INIT_STRUCT_static_partial = { .two = 0, } -#define INIT_STRUCT_static_all = { .one = arg->one, \ - .two = arg->two, \ - .three = arg->three, \ - .four = arg->four, \ +#define INIT_STRUCT_static_all = { .one = 0, \ + .two = 0, \ + .three = 0, \ + .four = 0, \ } #define INIT_STRUCT_dynamic_partial = { .two = arg->two, } #define INIT_STRUCT_dynamic_all = { .one = arg->one, \ @@ -84,8 +84,7 @@ static bool range_contains(char *haystack_start, size_t haystack_size, var.one = 0; \ var.two = 0; \ var.three = 0; \ - memset(&var.four, 0, \ - sizeof(var.four)) + var.four = 0 /* * @name: unique string name for the test @@ -208,18 +207,13 @@ struct test_small_hole { unsigned long four; }; -/* Try to trigger unhandled padding in a structure. */ -struct test_aligned { - u32 internal1; - u64 internal2; -} __aligned(64); - +/* Trigger unhandled padding in a structure. */ struct test_big_hole { u8 one; u8 two; u8 three; /* 61 byte padding hole here. */ - struct test_aligned four; + u8 four __aligned(64); } __aligned(64); struct test_trailing_hole { diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 8262c3f05a5d3..aefa377c9c26a 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1438,6 +1438,25 @@ static noinline void check_create_range_4(struct xarray *xa, XA_BUG_ON(xa, !xa_empty(xa)); } +static noinline void check_create_range_5(struct xarray *xa, + unsigned long index, unsigned int order) +{ + XA_STATE_ORDER(xas, xa, index, order); + unsigned int i; + + xa_store_order(xa, index, order, xa_mk_index(index), GFP_KERNEL); + + for (i = 0; i < order + 10; i++) { + do { + xas_lock(&xas); + xas_create_range(&xas); + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + } + + xa_destroy(xa); +} + static noinline void check_create_range(struct xarray *xa) { unsigned int order; @@ -1465,6 +1484,9 @@ static noinline void check_create_range(struct xarray *xa) check_create_range_4(xa, (3U << order) + 1, order); check_create_range_4(xa, (3U << order) - 1, order); check_create_range_4(xa, (1U << 24) + 1, order); + + check_create_range_5(xa, 0, order); + check_create_range_5(xa, (1U << order), order); } check_create_range_3(); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 2766d1b2c301d..393623bf3258f 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -731,14 +731,16 @@ static void enable_ptr_key_workfn(struct work_struct *work) static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn); -static void fill_random_ptr_key(struct random_ready_callback *unused) +static int fill_random_ptr_key(struct notifier_block *nb, + unsigned long action, void *data) { /* This may be in an interrupt handler. */ queue_work(system_unbound_wq, &enable_ptr_key_work); + return 0; } -static struct random_ready_callback random_ready = { - .func = fill_random_ptr_key +static struct notifier_block random_ready = { + .notifier_call = fill_random_ptr_key }; static int __init initialize_ptr_random(void) @@ -752,7 +754,7 @@ static int __init initialize_ptr_random(void) return 0; } - ret = add_random_ready_callback(&random_ready); + ret = register_random_ready_notifier(&random_ready); if (!ret) { return 0; } else if (ret == -EALREADY) { diff --git a/lib/xarray.c b/lib/xarray.c index 7d22b30591275..61464c52c20e6 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -722,6 +722,8 @@ void xas_create_range(struct xa_state *xas) for (;;) { struct xa_node *node = xas->xa_node; + if (node->shift >= shift) + break; xas->xa_node = xa_parent_locked(xas->xa, node); xas->xa_offset = node->offset - 1; if (node->offset != 0) @@ -1078,6 +1080,7 @@ void xas_split(struct xa_state *xas, void *entry, unsigned int order) xa_mk_node(child)); if (xa_is_value(curr)) values--; + xas_update(xas, child); } else { unsigned int canon = offset - xas->xa_sibs; @@ -1092,6 +1095,7 @@ void xas_split(struct xa_state *xas, void *entry, unsigned int order) } while (offset-- > xas->xa_offset); node->nr_values += values; + xas_update(xas, node); } EXPORT_SYMBOL_GPL(xas_split); #endif diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c index 156f26fdc4c91..dd80989ca5a6b 100644 --- a/lib/xz/xz_dec_lzma2.c +++ b/lib/xz/xz_dec_lzma2.c @@ -387,7 +387,14 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b, *left -= copy_size; - memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size); + /* + * If doing in-place decompression in single-call mode and the + * uncompressed size of the file is larger than the caller + * thought (i.e. it is invalid input!), the buffers below may + * overlap and cause undefined behavior with memcpy(). + * With valid inputs memcpy() would be fine here. + */ + memmove(dict->buf + dict->pos, b->in + b->in_pos, copy_size); dict->pos += copy_size; if (dict->full < dict->pos) @@ -397,7 +404,11 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b, if (dict->pos == dict->end) dict->pos = 0; - memcpy(b->out + b->out_pos, b->in + b->in_pos, + /* + * Like above but for multi-call mode: use memmove() + * to avoid undefined behavior with invalid input. + */ + memmove(b->out + b->out_pos, b->in + b->in_pos, copy_size); } @@ -421,6 +432,12 @@ static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b) if (dict->pos == dict->end) dict->pos = 0; + /* + * These buffers cannot overlap even if doing in-place + * decompression because in multi-call mode dict->buf + * has been allocated by us in this file; it's not + * provided by the caller like in single-call mode. + */ memcpy(b->out + b->out_pos, dict->buf + dict->start, copy_size); } diff --git a/lib/xz/xz_dec_stream.c b/lib/xz/xz_dec_stream.c index bd1d182419d7e..0b161f90d8d80 100644 --- a/lib/xz/xz_dec_stream.c +++ b/lib/xz/xz_dec_stream.c @@ -402,12 +402,12 @@ static enum xz_ret dec_stream_header(struct xz_dec *s) * we will accept other check types too, but then the check won't * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given. */ + if (s->temp.buf[HEADER_MAGIC_SIZE + 1] > XZ_CHECK_MAX) + return XZ_OPTIONS_ERROR; + s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1]; #ifdef XZ_DEC_ANY_CHECK - if (s->check_type > XZ_CHECK_MAX) - return XZ_OPTIONS_ERROR; - if (s->check_type > XZ_CHECK_CRC32) return XZ_UNSUPPORTED_CHECK; #else diff --git a/mm/Makefile b/mm/Makefile index e4e7d4f68d5f5..7e4ac69191ead 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -61,7 +61,7 @@ obj-$(CONFIG_FRONTSWAP) += frontswap.o obj-$(CONFIG_ZSWAP) += zswap.o obj-$(CONFIG_HAS_DMA) += dmapool.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o -obj-$(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP) += hugetlb_vmemmap.o +obj-$(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP) += hugetlb_vmemmap.o obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SPARSEMEM) += sparse.o obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 3f2480e4c5af3..4556a691ab973 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -385,9 +385,8 @@ static void wb_exit(struct bdi_writeback *wb) #include /* - * cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree, - * blkcg->cgwb_list, and memcg->cgwb_list. bdi->cgwb_tree is also RCU - * protected. + * cgwb_lock protects bdi->cgwb_tree, blkcg->cgwb_list, offline_cgwbs and + * memcg->cgwb_list. bdi->cgwb_tree is also RCU protected. */ static DEFINE_SPINLOCK(cgwb_lock); static struct workqueue_struct *cgwb_release_wq; @@ -477,6 +476,9 @@ void wb_congested_put(struct bdi_writeback_congested *congested) spin_unlock_irqrestore(&cgwb_lock, flags); kfree(congested); } +static LIST_HEAD(offline_cgwbs); +static void cleanup_offline_cgwbs_workfn(struct work_struct *work); +static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn); static void cgwb_release_workfn(struct work_struct *work) { @@ -491,12 +493,18 @@ static void cgwb_release_workfn(struct work_struct *work) css_put(wb->blkcg_css); mutex_unlock(&wb->bdi->cgwb_release_mutex); - /* triggers blkg destruction if cgwb_refcnt becomes zero */ - blkcg_cgwb_put(blkcg); + /* triggers blkg destruction if no online users left */ + blkcg_unpin_online(blkcg); fprop_local_destroy_percpu(&wb->memcg_completions); + + spin_lock_irq(&cgwb_lock); + list_del(&wb->offline_node); + spin_unlock_irq(&cgwb_lock); + percpu_ref_exit(&wb->refcnt); wb_exit(wb); + WARN_ON_ONCE(!list_empty(&wb->b_attached)); kfree_rcu(wb, rcu); } @@ -514,6 +522,7 @@ static void cgwb_kill(struct bdi_writeback *wb) WARN_ON(!radix_tree_delete(&wb->bdi->cgwb_tree, wb->memcg_css->id)); list_del(&wb->memcg_node); list_del(&wb->blkcg_node); + list_add(&wb->offline_node, &offline_cgwbs); percpu_ref_kill(&wb->refcnt); } @@ -573,6 +582,7 @@ static int cgwb_create(struct backing_dev_info *bdi, wb->memcg_css = memcg_css; wb->blkcg_css = blkcg_css; + INIT_LIST_HEAD(&wb->b_attached); INIT_WORK(&wb->release_work, cgwb_release_workfn); set_bit(WB_registered, &wb->state); @@ -592,7 +602,7 @@ static int cgwb_create(struct backing_dev_info *bdi, list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); list_add(&wb->memcg_node, memcg_cgwb_list); list_add(&wb->blkcg_node, blkcg_cgwb_list); - blkcg_cgwb_get(blkcg); + blkcg_pin_online(blkcg); css_get(memcg_css); css_get(blkcg_css); } @@ -735,6 +745,54 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi) mutex_unlock(&bdi->cgwb_release_mutex); } +/* + * cleanup_offline_cgwbs_workfn - try to release dying cgwbs + * + * Try to release dying cgwbs by switching attached inodes to the nearest + * living ancestor's writeback. Processed wbs are placed at the end + * of the list to guarantee the forward progress. + */ +static void cleanup_offline_cgwbs_workfn(struct work_struct *work) +{ + struct bdi_writeback *wb; + LIST_HEAD(processed); + + spin_lock_irq(&cgwb_lock); + + while (!list_empty(&offline_cgwbs)) { + wb = list_first_entry(&offline_cgwbs, struct bdi_writeback, + offline_node); + list_move(&wb->offline_node, &processed); + + /* + * If wb is dirty, cleaning up the writeback by switching + * attached inodes will result in an effective removal of any + * bandwidth restrictions, which isn't the goal. Instead, + * it can be postponed until the next time, when all io + * will be likely completed. If in the meantime some inodes + * will get re-dirtied, they should be eventually switched to + * a new cgwb. + */ + if (wb_has_dirty_io(wb)) + continue; + + if (!wb_tryget(wb)) + continue; + + spin_unlock_irq(&cgwb_lock); + while (cleanup_offline_cgwb(wb)) + cond_resched(); + spin_lock_irq(&cgwb_lock); + + wb_put(wb); + } + + if (!list_empty(&processed)) + list_splice_tail(&processed, &offline_cgwbs); + + spin_unlock_irq(&cgwb_lock); +} + /** * wb_memcg_offline - kill all wb's associated with a memcg being offlined * @memcg: memcg being offlined @@ -751,6 +809,8 @@ void wb_memcg_offline(struct mem_cgroup *memcg) cgwb_kill(wb); memcg_cgwb_list->next = NULL; /* prevent new wb's */ spin_unlock_irq(&cgwb_lock); + + queue_work(system_unbound_wq, &cleanup_offline_cgwbs_work); } /** @@ -1013,6 +1073,13 @@ void bdi_unregister(struct backing_dev_info *bdi) wb_shutdown(&bdi->wb); cgwb_bdi_unregister(bdi); + /* + * If this BDI's min ratio has been set, use bdi_set_min_ratio() to + * update the global bdi_min_ratio. + */ + if (bdi->min_ratio) + bdi_set_min_ratio(bdi, 0); + if (bdi->dev) { bdi_debug_unregister(bdi); device_unregister(bdi->dev); diff --git a/mm/compaction.c b/mm/compaction.c index d686887856fee..0758afd6325da 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1709,6 +1709,8 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc) update_fast_start_pfn(cc, free_pfn); pfn = pageblock_start_pfn(free_pfn); + if (pfn < cc->zone->zone_start_pfn) + pfn = cc->zone->zone_start_pfn; cc->fast_search_fail = 0; found_block = true; set_pageblock_skip(freepage); diff --git a/mm/damon/core.c b/mm/damon/core.c index 30e9211f494a7..d820569992b6e 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -197,7 +197,6 @@ int damon_set_targets(struct damon_ctx *ctx, for (i = 0; i < nr_ids; i++) { t = damon_new_target(ids[i]); if (!t) { - pr_err("Failed to alloc damon_target\n"); /* The caller should do cleanup of the ids itself */ damon_for_each_target_safe(t, next, ctx) damon_destroy_target(t); @@ -227,16 +226,10 @@ int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, unsigned long aggr_int, unsigned long primitive_upd_int, unsigned long min_nr_reg, unsigned long max_nr_reg) { - if (min_nr_reg < 3) { - pr_err("min_nr_regions (%lu) must be at least 3\n", - min_nr_reg); + if (min_nr_reg < 3) return -EINVAL; - } - if (min_nr_reg > max_nr_reg) { - pr_err("invalid nr_regions. min (%lu) > max (%lu)\n", - min_nr_reg, max_nr_reg); + if (min_nr_reg > max_nr_reg) return -EINVAL; - } ctx->sample_interval = sample_int; ctx->aggr_interval = aggr_int; @@ -634,6 +627,14 @@ static bool kdamond_need_stop(struct damon_ctx *ctx) return true; } +static void kdamond_usleep(unsigned long usecs) +{ + if (usecs > 100 * 1000) + schedule_timeout_idle(usecs_to_jiffies(usecs)); + else + usleep_idle_range(usecs, usecs + 1); +} + static void set_kdamond_stop(struct damon_ctx *ctx) { mutex_lock(&ctx->kdamond_lock); @@ -670,7 +671,7 @@ static int kdamond_fn(void *data) ctx->callback.after_sampling(ctx)) set_kdamond_stop(ctx); - usleep_range(ctx->sample_interval, ctx->sample_interval + 1); + kdamond_usleep(ctx->sample_interval); if (ctx->primitive.check_accesses) max_nr_accesses = ctx->primitive.check_accesses(ctx); diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index faee070977d80..d40dca3f74a8b 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -32,7 +32,7 @@ static char *user_input_str(const char __user *buf, size_t count, loff_t *ppos) if (*ppos) return ERR_PTR(-EINVAL); - kbuf = kmalloc(count + 1, GFP_KERNEL); + kbuf = kmalloc(count + 1, GFP_KERNEL | __GFP_NOWARN); if (!kbuf) return ERR_PTR(-ENOMEM); @@ -185,7 +185,7 @@ static ssize_t dbgfs_target_ids_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct damon_ctx *ctx = file->private_data; - char *kbuf, *nrs; + char *kbuf; unsigned long *targets; ssize_t nr_targets; ssize_t ret = count; @@ -196,9 +196,8 @@ static ssize_t dbgfs_target_ids_write(struct file *file, if (IS_ERR(kbuf)) return PTR_ERR(kbuf); - nrs = kbuf; - targets = str_to_target_ids(nrs, ret, &nr_targets); + targets = str_to_target_ids(kbuf, ret, &nr_targets); if (!targets) { ret = -ENOMEM; goto out; @@ -247,7 +246,7 @@ static ssize_t dbgfs_kdamond_pid_read(struct file *file, char *kbuf; ssize_t len; - kbuf = kmalloc(count, GFP_KERNEL); + kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN); if (!kbuf) return -ENOMEM; @@ -309,10 +308,12 @@ static int dbgfs_before_terminate(struct damon_ctx *ctx) if (!targetid_is_pid(ctx)) return 0; + mutex_lock(&ctx->kdamond_lock); damon_for_each_target_safe(t, next, ctx) { put_pid((struct pid *)t->id); damon_destroy_target(t); } + mutex_unlock(&ctx->kdamond_lock); return 0; } @@ -538,12 +539,14 @@ static ssize_t dbgfs_monitor_on_write(struct file *file, return -EINVAL; } + mutex_lock(&damon_dbgfs_lock); if (!strncmp(kbuf, "on", count)) err = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs); else if (!strncmp(kbuf, "off", count)) err = damon_stop(dbgfs_ctxs, dbgfs_nr_ctxs); else err = -EINVAL; + mutex_unlock(&damon_dbgfs_lock); if (err) ret = err; @@ -596,15 +599,16 @@ static int __init __damon_dbgfs_init(void) static int __init damon_dbgfs_init(void) { - int rc; + int rc = -ENOMEM; + mutex_lock(&damon_dbgfs_lock); dbgfs_ctxs = kmalloc(sizeof(*dbgfs_ctxs), GFP_KERNEL); if (!dbgfs_ctxs) - return -ENOMEM; + goto out; dbgfs_ctxs[0] = dbgfs_new_ctx(); if (!dbgfs_ctxs[0]) { kfree(dbgfs_ctxs); - return -ENOMEM; + goto out; } dbgfs_nr_ctxs = 1; @@ -615,6 +619,8 @@ static int __init damon_dbgfs_init(void) pr_err("%s: dbgfs init failed\n", __func__); } +out: + mutex_unlock(&damon_dbgfs_lock); return rc; } diff --git a/mm/damon/vaddr-test.h b/mm/damon/vaddr-test.h index 1f5c13257dbaf..95ec362cdc37c 100644 --- a/mm/damon/vaddr-test.h +++ b/mm/damon/vaddr-test.h @@ -252,59 +252,62 @@ static void damon_test_apply_three_regions4(struct kunit *test) new_three_regions, expected, ARRAY_SIZE(expected)); } -static void damon_test_split_evenly(struct kunit *test) +static void damon_test_split_evenly_fail(struct kunit *test, + unsigned long start, unsigned long end, unsigned int nr_pieces) { - struct damon_ctx *c = damon_new_ctx(); - struct damon_target *t; - struct damon_region *r; - unsigned long i; - - KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(NULL, NULL, 5), - -EINVAL); - - t = damon_new_target(42); - r = damon_new_region(0, 100); - KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 0), -EINVAL); + struct damon_target *t = damon_new_target(42); + struct damon_region *r = damon_new_region(start, end); damon_add_region(r, t); - KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 10), 0); - KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 10u); + KUNIT_EXPECT_EQ(test, + damon_va_evenly_split_region(t, r, nr_pieces), -EINVAL); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u); - i = 0; damon_for_each_region(r, t) { - KUNIT_EXPECT_EQ(test, r->ar.start, i++ * 10); - KUNIT_EXPECT_EQ(test, r->ar.end, i * 10); + KUNIT_EXPECT_EQ(test, r->ar.start, start); + KUNIT_EXPECT_EQ(test, r->ar.end, end); } + damon_free_target(t); +} + +static void damon_test_split_evenly_succ(struct kunit *test, + unsigned long start, unsigned long end, unsigned int nr_pieces) +{ + struct damon_target *t = damon_new_target(42); + struct damon_region *r = damon_new_region(start, end); + unsigned long expected_width = (end - start) / nr_pieces; + unsigned long i = 0; - t = damon_new_target(42); - r = damon_new_region(5, 59); damon_add_region(r, t); - KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 5), 0); - KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 5u); + KUNIT_EXPECT_EQ(test, + damon_va_evenly_split_region(t, r, nr_pieces), 0); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), nr_pieces); - i = 0; damon_for_each_region(r, t) { - if (i == 4) + if (i == nr_pieces - 1) break; - KUNIT_EXPECT_EQ(test, r->ar.start, 5 + 10 * i++); - KUNIT_EXPECT_EQ(test, r->ar.end, 5 + 10 * i); + KUNIT_EXPECT_EQ(test, + r->ar.start, start + i++ * expected_width); + KUNIT_EXPECT_EQ(test, r->ar.end, start + i * expected_width); } - KUNIT_EXPECT_EQ(test, r->ar.start, 5 + 10 * i); - KUNIT_EXPECT_EQ(test, r->ar.end, 59ul); + KUNIT_EXPECT_EQ(test, r->ar.start, start + i * expected_width); + KUNIT_EXPECT_EQ(test, r->ar.end, end); damon_free_target(t); +} - t = damon_new_target(42); - r = damon_new_region(5, 6); - damon_add_region(r, t); - KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 2), -EINVAL); - KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u); +static void damon_test_split_evenly(struct kunit *test) +{ + struct damon_ctx *c = damon_new_ctx(); + + KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(NULL, NULL, 5), + -EINVAL); + + damon_test_split_evenly_fail(test, 0, 100, 0); + damon_test_split_evenly_succ(test, 0, 100, 10); + damon_test_split_evenly_succ(test, 5, 59, 5); + damon_test_split_evenly_fail(test, 5, 6, 2); - damon_for_each_region(r, t) { - KUNIT_EXPECT_EQ(test, r->ar.start, 5ul); - KUNIT_EXPECT_EQ(test, r->ar.end, 6ul); - } - damon_free_target(t); damon_destroy_ctx(c); } diff --git a/mm/filemap.c b/mm/filemap.c index 282bff6ae177b..810e6e0a0c5c7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -659,49 +659,6 @@ int filemap_write_and_wait(struct address_space *mapping) } EXPORT_SYMBOL(filemap_write_and_wait); -/** - * filemap_range_needs_writeback - check if range potentially needs writeback - * @mapping: address space within which to check - * @start_byte: offset in bytes where the range starts - * @end_byte: offset in bytes where the range ends (inclusive) - * - * Find at least one page in the range supplied, usually used to check if - * direct writing in this range will trigger a writeback. Used by O_DIRECT - * read/write with IOCB_NOWAIT, to see if the caller needs to do - * filemap_write_and_wait_range() before proceeding. - * - * Return: %true if the caller should do filemap_write_and_wait_range() before - * doing O_DIRECT to a page in this range, %false otherwise. - */ -bool filemap_range_needs_writeback(struct address_space *mapping, - loff_t start_byte, loff_t end_byte) -{ - XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT); - pgoff_t max = end_byte >> PAGE_SHIFT; - struct page *page; - - if (!mapping_needs_writeback(mapping)) - return false; - if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && - !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) - return false; - if (end_byte < start_byte) - return false; - - rcu_read_lock(); - xas_for_each(&xas, page, max) { - if (xas_retry(&xas, page)) - continue; - if (xa_is_value(page)) - continue; - if (PageDirty(page) || PageLocked(page) || PageWriteback(page)) - break; - } - rcu_read_unlock(); - return page != NULL; -} -EXPORT_SYMBOL_GPL(filemap_range_needs_writeback); - /** * filemap_write_and_wait_range - write out & wait on a file range * @mapping: the address_space for the pages diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a9da159fe8d68..041ded3dc2560 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -906,46 +906,91 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg) static void kthread_clear_huge_pages(struct work_struct *work) { - struct hstate *h = container_of(work, struct hstate, clear_work.work); - unsigned int order = huge_page_order(h); - unsigned long i, nr_pages = 1 << order; + unsigned long i, nr_pages; struct page *page, *iter; + unsigned int order; + struct hstate *h; int nid; spin_lock(&hugetlb_lock); - for (nid = 0; nid < MAX_NUMNODES; nid++) { - while (!list_empty(&h->hugepage_wait_for_clean[nid])) { - page = list_entry( - h->hugepage_wait_for_clean[nid].next, - struct page, - lru); - list_del(&page->lru); - spin_unlock(&hugetlb_lock); + for_each_hstate(h) { + order = huge_page_order(h); + nr_pages = 1 << order; - for (i = 0, iter = page; - i < nr_pages; - i++, iter = mem_map_next(iter, page, i)) { - clear_highpage(iter); - cond_resched(); - } + for (nid = 0; nid < MAX_NUMNODES; nid++) { + while (!list_empty(&h->hugepage_wait_for_clean[nid])) { + page = list_entry( + h->hugepage_wait_for_clean[nid].next, + struct page, + lru); + list_del(&page->lru); + spin_unlock(&hugetlb_lock); - spin_lock(&hugetlb_lock); - list_add(&page->lru, &h->hugepage_freelists[nid]); + for (i = 0, iter = page; + i < nr_pages; + i++, iter = mem_map_next(iter, page, i)) { + clear_highpage(iter); + cond_resched(); + } - cond_resched_lock(&hugetlb_lock); + spin_lock(&hugetlb_lock); + list_add(&page->lru, + &h->hugepage_freelists[nid]); + + cond_resched_lock(&hugetlb_lock); + } } } spin_unlock(&hugetlb_lock); } +/* + * The cpumask is the mask of possible cpus that can be waken + * to clean huge pages after releasing them. + */ +static struct cpumask hugetlb_background_clr_cpumask; + +static DEFINE_PER_CPU(struct delayed_work, hugetlb_clean_work); + +static void wakeup_background_clean_work(void) +{ + struct delayed_work *work; + int cpu; + + if (cpumask_empty(&hugetlb_background_clr_cpumask)) { + cpu = raw_smp_processor_id(); + work = &per_cpu(hugetlb_clean_work, cpu); + schedule_delayed_work_on(cpu, work, msecs_to_jiffies(5)); + return; + } + + for_each_cpu_and(cpu, + &hugetlb_background_clr_cpumask, + cpu_online_mask) { + work = &per_cpu(hugetlb_clean_work, cpu); + schedule_delayed_work_on(cpu, work, msecs_to_jiffies(5)); + } +} + +static void flush_background_clean_work(void) +{ + struct delayed_work *work; + int cpu; + + for_each_cpu_and(cpu, + &hugetlb_background_clr_cpumask, + cpu_online_mask) { + work = &per_cpu(hugetlb_clean_work, cpu); + flush_delayed_work(work); + } +} + static void enqueue_huge_page(struct hstate *h, struct page *page) { int nid = page_to_nid(page); if (hugetlb_clear_hpage_background) { list_move(&page->lru, &h->hugepage_wait_for_clean[nid]); - schedule_delayed_work_on(raw_smp_processor_id(), - &h->clear_work, - msecs_to_jiffies(5)); + wakeup_background_clean_work(); } else { list_move(&page->lru, &h->hugepage_freelists[nid]); } @@ -965,7 +1010,7 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) if (hugetlb_clear_hpage_background && &h->hugepage_freelists[nid] == &page->lru) { spin_unlock(&hugetlb_lock); - flush_delayed_work(&h->clear_work); + flush_background_clean_work(); spin_lock(&hugetlb_lock); list_for_each_entry(page, &h->hugepage_freelists[nid], lru) @@ -1346,7 +1391,7 @@ static void hpage_free_vmemmap_workfn(struct work_struct *work) */ h = size_to_hstate(page_size(page)); - if (alloc_huge_page_vmemmap(h, page)) { + if (hugetlb_vmemmap_alloc(h, page)) { spin_lock(&hugetlb_lock); add_hugetlb_page(h, page); spin_unlock(&hugetlb_lock); @@ -1364,7 +1409,7 @@ static DECLARE_WORK(hpage_free_vmemmap_work, hpage_free_vmemmap_workfn); static inline void flush_free_hpage_work(struct hstate *h) { - if (free_vmemmap_pages_per_hpage(h)) + if (hugetlb_optimize_vmemmap_pages(h)) flush_work(&hpage_free_vmemmap_work); } @@ -1558,7 +1603,7 @@ void free_huge_page(struct page *page) static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) { ClearHPageVmemmapOptimized(page); - free_huge_page_vmemmap(h, page); + hugetlb_vmemmap_free(h, page); INIT_LIST_HEAD(&page->lru); set_compound_page_dtor(page, HUGETLB_PAGE_DTOR); spin_lock(&hugetlb_lock); @@ -1846,7 +1891,7 @@ int dissolve_free_huge_page(struct page *page) h->free_huge_pages--; h->free_huge_pages_node[nid]--; h->max_huge_pages--; - ClearPageHugeFreed(page); + ClearPageHugeFreed(head); spin_unlock(&hugetlb_lock); /* @@ -1857,7 +1902,7 @@ int dissolve_free_huge_page(struct page *page) * Attempt to allocate vmemmmap here so that we can take * appropriate action on failure. */ - rc = alloc_huge_page_vmemmap(h, head); + rc = hugetlb_vmemmap_alloc(h, head); spin_lock(&hugetlb_lock); if (!rc) { /* @@ -3288,7 +3333,6 @@ void __init hugetlb_add_hstate(unsigned int order) h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1); h->nr_huge_pages = 0; h->free_huge_pages = 0; - INIT_DELAYED_WORK(&h->clear_work, kthread_clear_huge_pages); for (i = 0; i < MAX_NUMNODES; ++i) { INIT_LIST_HEAD(&h->hugepage_freelists[i]); INIT_LIST_HEAD(&h->hugepage_wait_for_clean[i]); @@ -3450,8 +3494,71 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, return ret; } +static unsigned long *sysctl_hugetlb_background_clr_cpumask_bits = + cpumask_bits(&hugetlb_background_clr_cpumask); + +static int hugetlb_background_clr_cpumask_handler(struct ctl_table *table, + int write, + void __user *buffer, + size_t *lenp, + loff_t *ppos) +{ + int err; + cpumask_var_t tmpmask; + + if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_copy(tmpmask, &hugetlb_background_clr_cpumask); + + spin_lock(&hugetlb_lock); + err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); + if (!err && write) { + int cpu; + + cpumask_and(&hugetlb_background_clr_cpumask, + &hugetlb_background_clr_cpumask, + cpu_possible_mask); + + cpumask_xor(tmpmask, tmpmask, &hugetlb_background_clr_cpumask); + for_each_cpu(cpu, tmpmask) + cancel_delayed_work(&per_cpu(hugetlb_clean_work, cpu)); + } + + spin_unlock(&hugetlb_lock); + free_cpumask_var(tmpmask); + + return err; +} + +static struct ctl_table hugetlb_background_clr_cpumask_ctl_table[] = { + { + .procname = "hugetlb_clear_hpage_background_cpumask", + .data = &sysctl_hugetlb_background_clr_cpumask_bits, + .maxlen = NR_CPUS, + .mode = 0644, + .proc_handler = hugetlb_background_clr_cpumask_handler, + }, + { } +}; + #endif /* CONFIG_SYSCTL */ +static void __init hugetlb_init_background_clean(void) +{ + int cpu; + +#ifdef CONFIG_SYSCTL + if (!register_sysctl("vm", hugetlb_background_clr_cpumask_ctl_table)) + pr_err("Hugetlb: Unable to register background clean sysctl"); +#endif + + for_each_possible_cpu(cpu) + INIT_DELAYED_WORK(per_cpu_ptr(&hugetlb_clean_work, cpu), + kthread_clear_huge_pages); +} +late_initcall(hugetlb_init_background_clean); + void hugetlb_report_meminfo(struct seq_file *m) { struct hstate *h; @@ -3825,6 +3932,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); struct mmu_notifier_range range; + bool force_flush = false; WARN_ON(!is_vm_hugetlb_page(vma)); BUG_ON(start & ~huge_page_mask(h)); @@ -3853,10 +3961,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ptl = huge_pte_lock(h, mm, ptep); if (huge_pmd_unshare(mm, &address, ptep)) { spin_unlock(ptl); - /* - * We just unmapped a page of PMDs by clearing a PUD. - * The caller's TLB flush range should cover this area. - */ + tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE); + force_flush = true; continue; } @@ -3913,6 +4019,22 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, } mmu_notifier_invalidate_range_end(&range); tlb_end_vma(tlb, vma); + + /* + * If we unshared PMDs, the TLB flush was not recorded in mmu_gather. We + * could defer the flush until now, since by holding i_mmap_rwsem we + * guaranteed that the last refernece would not be dropped. But we must + * do the flushing before we return, as otherwise i_mmap_rwsem will be + * dropped and the last reference to the shared PMDs page might be + * dropped as well. + * + * In theory we could defer the freeing of the PMD pages as well, but + * huge_pmd_unshare() relies on the exact page_count for the PMD page to + * detect sharing, so we cannot defer the release of the page either. + * Instead, do flush now. + */ + if (force_flush) + tlb_flush_mmu_tlbonly(tlb); } void __unmap_hugepage_range_final(struct mmu_gather *tlb, @@ -5256,7 +5378,14 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) pud_clear(pud); put_page(virt_to_page(ptep)); mm_dec_nr_pmds(mm); - *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; + /* + * This update of passed address optimizes loops sequentially + * processing addresses in increments of huge page size (PMD_SIZE + * in this case). By clearing the pud, a PUD_SIZE area is unmapped. + * Update address to the 'last page' in the cleared area so that + * calling loop can move to first page past this area. + */ + *addr |= PUD_SIZE - PMD_SIZE; return 1; } #define want_pmd_share() (1) diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 087b79f36bc43..d6cd5236ad61f 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -1,181 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Free some vmemmap pages of HugeTLB + * Optimize vmemmap pages associated with HugeTLB * * Copyright (c) 2020, Bytedance. All rights reserved. * * Author: Muchun Song * - * The struct page structures (page structs) are used to describe a physical - * page frame. By default, there is a one-to-one mapping from a page frame to - * it's corresponding page struct. - * - * HugeTLB pages consist of multiple base page size pages and is supported by - * many architectures. See hugetlbpage.rst in the Documentation directory for - * more details. On the x86-64 architecture, HugeTLB pages of size 2MB and 1GB - * are currently supported. Since the base page size on x86 is 4KB, a 2MB - * HugeTLB page consists of 512 base pages and a 1GB HugeTLB page consists of - * 4096 base pages. For each base page, there is a corresponding page struct. - * - * Within the HugeTLB subsystem, only the first 4 page structs are used to - * contain unique information about a HugeTLB page. __NR_USED_SUBPAGE provides - * this upper limit. The only 'useful' information in the remaining page structs - * is the compound_head field, and this field is the same for all tail pages. - * - * By removing redundant page structs for HugeTLB pages, memory can be returned - * to the buddy allocator for other uses. - * - * Different architectures support different HugeTLB pages. For example, the - * following table is the HugeTLB page size supported by x86 and arm64 - * architectures. Because arm64 supports 4k, 16k, and 64k base pages and - * supports contiguous entries, so it supports many kinds of sizes of HugeTLB - * page. - * - * +--------------+-----------+-----------------------------------------------+ - * | Architecture | Page Size | HugeTLB Page Size | - * +--------------+-----------+-----------+-----------+-----------+-----------+ - * | x86-64 | 4KB | 2MB | 1GB | | | - * +--------------+-----------+-----------+-----------+-----------+-----------+ - * | | 4KB | 64KB | 2MB | 32MB | 1GB | - * | +-----------+-----------+-----------+-----------+-----------+ - * | arm64 | 16KB | 2MB | 32MB | 1GB | | - * | +-----------+-----------+-----------+-----------+-----------+ - * | | 64KB | 2MB | 512MB | 16GB | | - * +--------------+-----------+-----------+-----------+-----------+-----------+ - * - * When the system boot up, every HugeTLB page has more than one struct page - * structs which size is (unit: pages): - * - * struct_size = HugeTLB_Size / PAGE_SIZE * sizeof(struct page) / PAGE_SIZE - * - * Where HugeTLB_Size is the size of the HugeTLB page. We know that the size - * of the HugeTLB page is always n times PAGE_SIZE. So we can get the following - * relationship. - * - * HugeTLB_Size = n * PAGE_SIZE - * - * Then, - * - * struct_size = n * PAGE_SIZE / PAGE_SIZE * sizeof(struct page) / PAGE_SIZE - * = n * sizeof(struct page) / PAGE_SIZE - * - * We can use huge mapping at the pud/pmd level for the HugeTLB page. - * - * For the HugeTLB page of the pmd level mapping, then - * - * struct_size = n * sizeof(struct page) / PAGE_SIZE - * = PAGE_SIZE / sizeof(pte_t) * sizeof(struct page) / PAGE_SIZE - * = sizeof(struct page) / sizeof(pte_t) - * = 64 / 8 - * = 8 (pages) - * - * Where n is how many pte entries which one page can contains. So the value of - * n is (PAGE_SIZE / sizeof(pte_t)). - * - * This optimization only supports 64-bit system, so the value of sizeof(pte_t) - * is 8. And this optimization also applicable only when the size of struct page - * is a power of two. In most cases, the size of struct page is 64 bytes (e.g. - * x86-64 and arm64). So if we use pmd level mapping for a HugeTLB page, the - * size of struct page structs of it is 8 page frames which size depends on the - * size of the base page. - * - * For the HugeTLB page of the pud level mapping, then - * - * struct_size = PAGE_SIZE / sizeof(pmd_t) * struct_size(pmd) - * = PAGE_SIZE / 8 * 8 (pages) - * = PAGE_SIZE (pages) - * - * Where the struct_size(pmd) is the size of the struct page structs of a - * HugeTLB page of the pmd level mapping. - * - * E.g.: A 2MB HugeTLB page on x86_64 consists in 8 page frames while 1GB - * HugeTLB page consists in 4096. - * - * Next, we take the pmd level mapping of the HugeTLB page as an example to - * show the internal implementation of this optimization. There are 8 pages - * struct page structs associated with a HugeTLB page which is pmd mapped. - * - * Here is how things look before optimization. - * - * HugeTLB struct pages(8 pages) page frame(8 pages) - * +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+ - * | | | 0 | -------------> | 0 | - * | | +-----------+ +-----------+ - * | | | 1 | -------------> | 1 | - * | | +-----------+ +-----------+ - * | | | 2 | -------------> | 2 | - * | | +-----------+ +-----------+ - * | | | 3 | -------------> | 3 | - * | | +-----------+ +-----------+ - * | | | 4 | -------------> | 4 | - * | PMD | +-----------+ +-----------+ - * | level | | 5 | -------------> | 5 | - * | mapping | +-----------+ +-----------+ - * | | | 6 | -------------> | 6 | - * | | +-----------+ +-----------+ - * | | | 7 | -------------> | 7 | - * | | +-----------+ +-----------+ - * | | - * | | - * | | - * +-----------+ - * - * The value of page->compound_head is the same for all tail pages. The first - * page of page structs (page 0) associated with the HugeTLB page contains the 4 - * page structs necessary to describe the HugeTLB. The only use of the remaining - * pages of page structs (page 1 to page 7) is to point to page->compound_head. - * Therefore, we can remap pages 1 to 7 to page 0. Only 1 page of page structs - * will be used for each HugeTLB page. This will allow us to free the remaining - * 7 pages to the buddy allocator. - * - * Here is how things look after remapping. - * - * HugeTLB struct pages(8 pages) page frame(8 pages) - * +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+ - * | | | 0 | -------------> | 0 | - * | | +-----------+ +-----------+ - * | | | 1 | ---------------^ ^ ^ ^ ^ ^ ^ - * | | +-----------+ | | | | | | - * | | | 2 | -----------------+ | | | | | - * | | +-----------+ | | | | | - * | | | 3 | -------------------+ | | | | - * | | +-----------+ | | | | - * | | | 4 | ---------------------+ | | | - * | PMD | +-----------+ | | | - * | level | | 5 | -----------------------+ | | - * | mapping | +-----------+ | | - * | | | 6 | -------------------------+ | - * | | +-----------+ | - * | | | 7 | ---------------------------+ - * | | +-----------+ - * | | - * | | - * | | - * +-----------+ - * - * When a HugeTLB is freed to the buddy system, we should allocate 7 pages for - * vmemmap pages and restore the previous mapping relationship. - * - * For the HugeTLB page of the pud level mapping. It is similar to the former. - * We also can use this approach to free (PAGE_SIZE - 1) vmemmap pages. - * - * Apart from the HugeTLB page of the pmd/pud level mapping, some architectures - * (e.g. aarch64) provides a contiguous bit in the translation table entries - * that hints to the MMU to indicate that it is one of a contiguous set of - * entries that can be cached in a single TLB entry. - * - * The contiguous bit is used to increase the mapping size at the pmd and pte - * (last) level. So this type of HugeTLB page can be optimized only when its - * size of the struct page structs is greater than 1 page. - * - * Notice: The head vmemmap page is not freed to the buddy allocator and all - * tail vmemmap pages are mapped to the head vmemmap page frame. So we can see - * more than one struct page struct with PG_head (e.g. 8 per 2 MB HugeTLB page) - * associated with each HugeTLB page. The compound_head() can handle this - * correctly (more details refer to the comment above compound_head()). + * See Documentation/vm/vmemmap_dedup.rst */ #define pr_fmt(fmt) "HugeTLB: " fmt +#include #include "hugetlb_vmemmap.h" /* @@ -188,53 +23,63 @@ #define RESERVE_VMEMMAP_NR 1U #define RESERVE_VMEMMAP_SIZE (RESERVE_VMEMMAP_NR << PAGE_SHIFT) -DEFINE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON, - hugetlb_free_vmemmap_enabled_key); -EXPORT_SYMBOL(hugetlb_free_vmemmap_enabled_key); +enum vmemmap_optimize_mode { + VMEMMAP_OPTIMIZE_OFF, + VMEMMAP_OPTIMIZE_ON, +}; -static int __init early_hugetlb_free_vmemmap_param(char *buf) -{ - /* We cannot optimize if a "struct page" crosses page boundaries. */ - if (!is_power_of_2(sizeof(struct page))) { - pr_warn("cannot free vmemmap pages because \"struct page\" crosses page boundaries\n"); - return 0; - } +DEFINE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON, + hugetlb_optimize_vmemmap_key); +EXPORT_SYMBOL(hugetlb_optimize_vmemmap_key); - if (!buf) - return -EINVAL; +static enum vmemmap_optimize_mode vmemmap_optimize_mode = + IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON); - if (!strcmp(buf, "on")) - static_branch_enable(&hugetlb_free_vmemmap_enabled_key); - else if (!strcmp(buf, "off")) - static_branch_disable(&hugetlb_free_vmemmap_enabled_key); - else - return -EINVAL; +static void vmemmap_optimize_mode_switch(enum vmemmap_optimize_mode to) +{ + if (vmemmap_optimize_mode == to) + return; - return 0; + if (to == VMEMMAP_OPTIMIZE_OFF) + static_branch_dec(&hugetlb_optimize_vmemmap_key); + else + static_branch_inc(&hugetlb_optimize_vmemmap_key); + WRITE_ONCE(vmemmap_optimize_mode, to); } -early_param("hugetlb_free_vmemmap", early_hugetlb_free_vmemmap_param); -static inline unsigned long free_vmemmap_pages_size_per_hpage(struct hstate *h) +static int __init hugetlb_vmemmap_early_param(char *buf) { - return (unsigned long)free_vmemmap_pages_per_hpage(h) << PAGE_SHIFT; + bool enable; + enum vmemmap_optimize_mode mode; + + if (kstrtobool(buf, &enable)) + return -EINVAL; + + mode = enable ? VMEMMAP_OPTIMIZE_ON : VMEMMAP_OPTIMIZE_OFF; + vmemmap_optimize_mode_switch(mode); + + return 0; } +early_param("hugetlb_free_vmemmap", hugetlb_vmemmap_early_param); /* * Previously discarded vmemmap pages will be allocated and remapping * after this function returns zero. */ -int alloc_huge_page_vmemmap(struct hstate *h, struct page *head) +int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head) { int ret; unsigned long vmemmap_addr = (unsigned long)head; - unsigned long vmemmap_end, vmemmap_reuse; + unsigned long vmemmap_end, vmemmap_reuse, vmemmap_pages; if (!HPageVmemmapOptimized(head)) return 0; - vmemmap_addr += RESERVE_VMEMMAP_SIZE; - vmemmap_end = vmemmap_addr + free_vmemmap_pages_size_per_hpage(h); - vmemmap_reuse = vmemmap_addr - PAGE_SIZE; + vmemmap_addr += RESERVE_VMEMMAP_SIZE; + vmemmap_pages = hugetlb_optimize_vmemmap_pages(h); + vmemmap_end = vmemmap_addr + (vmemmap_pages << PAGE_SHIFT); + vmemmap_reuse = vmemmap_addr - PAGE_SIZE; + /* * The pages which the vmemmap virtual address range [@vmemmap_addr, * @vmemmap_end) are mapped to are freed to the buddy allocator, and @@ -244,30 +89,40 @@ int alloc_huge_page_vmemmap(struct hstate *h, struct page *head) */ ret = vmemmap_remap_alloc(vmemmap_addr, vmemmap_end, vmemmap_reuse, GFP_KERNEL | __GFP_NORETRY | __GFP_THISNODE); - if (!ret) + if (!ret) { ClearHPageVmemmapOptimized(head); + static_branch_dec(&hugetlb_optimize_vmemmap_key); + } return ret; } -void free_huge_page_vmemmap(struct hstate *h, struct page *head) +void hugetlb_vmemmap_free(struct hstate *h, struct page *head) { unsigned long vmemmap_addr = (unsigned long)head; - unsigned long vmemmap_end, vmemmap_reuse; + unsigned long vmemmap_end, vmemmap_reuse, vmemmap_pages; + + vmemmap_pages = hugetlb_optimize_vmemmap_pages(h); + if (!vmemmap_pages) + return; - if (!free_vmemmap_pages_per_hpage(h)) + if (READ_ONCE(vmemmap_optimize_mode) == VMEMMAP_OPTIMIZE_OFF) return; - vmemmap_addr += RESERVE_VMEMMAP_SIZE; - vmemmap_end = vmemmap_addr + free_vmemmap_pages_size_per_hpage(h); - vmemmap_reuse = vmemmap_addr - PAGE_SIZE; + static_branch_inc(&hugetlb_optimize_vmemmap_key); + + vmemmap_addr += RESERVE_VMEMMAP_SIZE; + vmemmap_end = vmemmap_addr + (vmemmap_pages << PAGE_SHIFT); + vmemmap_reuse = vmemmap_addr - PAGE_SIZE; /* * Remap the vmemmap virtual address range [@vmemmap_addr, @vmemmap_end) * to the page which @vmemmap_reuse is mapped to, then free the pages * which the range [@vmemmap_addr, @vmemmap_end] is mapped to. */ - if (!vmemmap_remap_free(vmemmap_addr, vmemmap_end, vmemmap_reuse)) + if (vmemmap_remap_free(vmemmap_addr, vmemmap_end, vmemmap_reuse)) + static_branch_dec(&hugetlb_optimize_vmemmap_key); + else SetHPageVmemmapOptimized(head); } @@ -278,20 +133,15 @@ void __init hugetlb_vmemmap_init(struct hstate *h) /* * There are only (RESERVE_VMEMMAP_SIZE / sizeof(struct page)) struct - * page structs that can be used when CONFIG_HUGETLB_PAGE_FREE_VMEMMAP, + * page structs that can be used when CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP, * so add a BUILD_BUG_ON to catch invalid usage of the tail struct page. */ BUILD_BUG_ON(__NR_USED_SUBPAGE >= RESERVE_VMEMMAP_SIZE / sizeof(struct page)); if (!is_power_of_2(sizeof(struct page))) { - /* - * The hugetlb_free_vmemmap_enabled_key can be enabled when - * CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON. It should - * be disabled if "struct page" crosses page boundaries. - */ - if (IS_ENABLED(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON)) - static_branch_disable(&hugetlb_free_vmemmap_enabled_key); + pr_warn_once("cannot optimize vmemmap pages because \"struct page\" crosses page boundaries\n"); + static_branch_disable(&hugetlb_optimize_vmemmap_key); return; } @@ -305,21 +155,57 @@ void __init hugetlb_vmemmap_init(struct hstate *h) * hugetlbpage.rst for more details. */ if (likely(vmemmap_pages > RESERVE_VMEMMAP_NR)) - h->nr_free_vmemmap_pages = vmemmap_pages - RESERVE_VMEMMAP_NR; + h->optimize_vmemmap_pages = vmemmap_pages - RESERVE_VMEMMAP_NR; + + pr_info("can optimize %d vmemmap pages for %s\n", + h->optimize_vmemmap_pages, h->name); +} + +#ifdef CONFIG_PROC_SYSCTL +static int hugetlb_optimize_vmemmap_handler(struct ctl_table *table, int write, + void *buffer, size_t *length, + loff_t *ppos) +{ + int ret; + enum vmemmap_optimize_mode mode; + static DEFINE_MUTEX(sysctl_mutex); + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; - pr_info("can free %d vmemmap pages for %s\n", h->nr_free_vmemmap_pages, - h->name); + mutex_lock(&sysctl_mutex); + mode = vmemmap_optimize_mode; + table->data = &mode; + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (write && !ret) + vmemmap_optimize_mode_switch(mode); + mutex_unlock(&sysctl_mutex); + + return ret; } -int hugetlb_vmemmap_sysctl_handler(struct ctl_table *table, int write, - void *buffer, size_t *length, loff_t *ppos) +static struct ctl_table hugetlb_vmemmap_sysctls[] = { + { + .procname = "hugetlb_optimize_vmemmap", + .maxlen = sizeof(enum vmemmap_optimize_mode), + .mode = 0644, + .proc_handler = hugetlb_optimize_vmemmap_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; + +static __init int hugetlb_vmemmap_sysctls_init(void) { /* - * The vmemmap pages cannot be optimized if a "struct page" crosses page - * boundaries. + * If "struct page" crosses page boundaries, the vmemmap pages cannot + * be optimized. */ - if (write && !is_power_of_2(sizeof(struct page))) - return -EPERM; + if (is_power_of_2(sizeof(struct page))) + register_sysctl_init("vm", hugetlb_vmemmap_sysctls); - return proc_do_static_key(table, write, buffer, length, ppos); + return 0; } +late_initcall(hugetlb_vmemmap_sysctls_init); +#endif /* CONFIG_PROC_SYSCTL */ diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h index b67a159027f4a..109b0a53b6fe9 100644 --- a/mm/hugetlb_vmemmap.h +++ b/mm/hugetlb_vmemmap.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Free some vmemmap pages of HugeTLB + * Optimize vmemmap pages associated with HugeTLB * * Copyright (c) 2020, Bytedance. All rights reserved. * @@ -10,28 +10,26 @@ #define _LINUX_HUGETLB_VMEMMAP_H #include -#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP -int alloc_huge_page_vmemmap(struct hstate *h, struct page *head); -void free_huge_page_vmemmap(struct hstate *h, struct page *head); +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP +int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head); +void hugetlb_vmemmap_free(struct hstate *h, struct page *head); void hugetlb_vmemmap_init(struct hstate *h); /* - * How many vmemmap pages associated with a HugeTLB page that can be freed - * to the buddy allocator. + * How many vmemmap pages associated with a HugeTLB page that can be + * optimized and freed to the buddy allocator. */ -static inline unsigned int free_vmemmap_pages_per_hpage(struct hstate *h) +static inline unsigned int hugetlb_optimize_vmemmap_pages(struct hstate *h) { - if (hugetlb_free_vmemmap_enabled()) - return h->nr_free_vmemmap_pages; - return 0; + return h->optimize_vmemmap_pages; } #else -static inline int alloc_huge_page_vmemmap(struct hstate *h, struct page *head) +static inline int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head) { return 0; } -static inline void free_huge_page_vmemmap(struct hstate *h, struct page *head) +static inline void hugetlb_vmemmap_free(struct hstate *h, struct page *head) { } @@ -39,9 +37,9 @@ static inline void hugetlb_vmemmap_init(struct hstate *h) { } -static inline unsigned int free_vmemmap_pages_per_hpage(struct hstate *h) +static inline unsigned int hugetlb_optimize_vmemmap_pages(struct hstate *h) { return 0; } -#endif /* CONFIG_HUGETLB_PAGE_FREE_VMEMMAP */ +#endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */ #endif /* _LINUX_HUGETLB_VMEMMAP_H */ diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index 5b7430bd83a65..c9d48f402238a 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c @@ -53,7 +53,8 @@ static int hwpoison_inject(void *data, u64 val) inject: pr_info("Injecting memory failure at pfn %#lx\n", pfn); - return memory_failure(pfn, MF_COUNT_INCREASED); + err = memory_failure(pfn, MF_COUNT_INCREASED); + return (err == -EOPNOTSUPP) ? 0 : err; put_out: put_hwpoison_page(p); return 0; diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 312942d784058..3761c79137b17 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -787,6 +787,8 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) unsigned long flags; struct kmemleak_object *object; struct kmemleak_scan_area *area = NULL; + unsigned long untagged_ptr; + unsigned long untagged_objp; object = find_and_get_object(ptr, 1); if (!object) { @@ -795,6 +797,9 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) return; } + untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); + untagged_objp = (unsigned long)kasan_reset_tag((void *)object->pointer); + if (scan_area_cache) area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp)); @@ -806,8 +811,8 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) goto out_unlock; } if (size == SIZE_MAX) { - size = object->pointer + object->size - ptr; - } else if (ptr + size > object->pointer + object->size) { + size = untagged_objp + object->size - untagged_ptr; + } else if (untagged_ptr + size > untagged_objp + object->size) { kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr); dump_object_info(object); kmem_cache_free(scan_area_cache, area); @@ -1118,7 +1123,7 @@ EXPORT_SYMBOL(kmemleak_no_scan); void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count, gfp_t gfp) { - if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) + if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) kmemleak_alloc(__va(phys), size, min_count, gfp); } EXPORT_SYMBOL(kmemleak_alloc_phys); @@ -1132,7 +1137,7 @@ EXPORT_SYMBOL(kmemleak_alloc_phys); */ void __ref kmemleak_free_part_phys(phys_addr_t phys, size_t size) { - if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) + if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) kmemleak_free_part(__va(phys), size); } EXPORT_SYMBOL(kmemleak_free_part_phys); @@ -1144,7 +1149,7 @@ EXPORT_SYMBOL(kmemleak_free_part_phys); */ void __ref kmemleak_not_leak_phys(phys_addr_t phys) { - if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) + if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) kmemleak_not_leak(__va(phys)); } EXPORT_SYMBOL(kmemleak_not_leak_phys); @@ -1156,7 +1161,7 @@ EXPORT_SYMBOL(kmemleak_not_leak_phys); */ void __ref kmemleak_ignore_phys(phys_addr_t phys) { - if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) + if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) kmemleak_ignore(__va(phys)); } EXPORT_SYMBOL(kmemleak_ignore_phys); @@ -1399,7 +1404,8 @@ static void kmemleak_scan(void) { unsigned long flags; struct kmemleak_object *object; - int i; + struct zone *zone; + int __maybe_unused i; int new_leaks = 0; jiffies_last_scan = jiffies; @@ -1439,9 +1445,9 @@ static void kmemleak_scan(void) * Struct page scanning for each node. */ get_online_mems(); - for_each_online_node(i) { - unsigned long start_pfn = node_start_pfn(i); - unsigned long end_pfn = node_end_pfn(i); + for_each_populated_zone(zone) { + unsigned long start_pfn = zone->zone_start_pfn; + unsigned long end_pfn = zone_end_pfn(zone); unsigned long pfn; for (pfn = start_pfn; pfn < end_pfn; pfn++) { @@ -1450,8 +1456,8 @@ static void kmemleak_scan(void) if (!page) continue; - /* only scan pages belonging to this node */ - if (page_to_nid(page) != i) + /* only scan pages belonging to this zone */ + if (page_zone(page) != zone) continue; /* only scan if page is in use */ if (page_count(page) == 0) diff --git a/mm/madvise.c b/mm/madvise.c index 11ac4e393eb7b..62b81da099aa3 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -919,6 +919,8 @@ static int madvise_inject_error(int behavior, */ put_page(page); ret = memory_failure(pfn, 0); + if (ret == -EOPNOTSUPP) + ret = 0; if (ret) return ret; } diff --git a/mm/memblock.c b/mm/memblock.c index 11f6ae37d6699..a75cc65f03307 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -164,6 +164,8 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type, { unsigned long i; + memblock_cap_size(base, &size); + for (i = 0; i < type->cnt; i++) if (memblock_addrs_overlap(base, size, type->regions[i].base, type->regions[i].size)) @@ -346,14 +348,20 @@ void __init memblock_discard(void) addr = __pa(memblock.reserved.regions); size = PAGE_ALIGN(sizeof(struct memblock_region) * memblock.reserved.max); - __memblock_free_late(addr, size); + if (memblock_reserved_in_slab) + kfree(memblock.reserved.regions); + else + __memblock_free_late(addr, size); } if (memblock.memory.regions != memblock_memory_init_regions) { addr = __pa(memblock.memory.regions); size = PAGE_ALIGN(sizeof(struct memblock_region) * memblock.memory.max); - __memblock_free_late(addr, size); + if (memblock_memory_in_slab) + kfree(memblock.memory.regions); + else + __memblock_free_late(addr, size); } } #endif @@ -1760,7 +1768,6 @@ bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t siz */ bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) { - memblock_cap_size(base, &size); return memblock_overlaps_region(&memblock.reserved, base, size); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 307e5283c6722..b713cb0136317 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -914,24 +914,6 @@ void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) rcu_read_unlock(); } -/* - * mod_objcg_mlstate() may be called with irq enabled, so - * mod_memcg_lruvec_state() should be used. - */ -static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, - struct pglist_data *pgdat, - enum node_stat_item idx, int nr) -{ - struct mem_cgroup *memcg; - struct lruvec *lruvec; - - rcu_read_lock(); - memcg = obj_cgroup_memcg(objcg); - lruvec = mem_cgroup_lruvec(memcg, pgdat); - mod_memcg_lruvec_state(lruvec, idx, nr); - rcu_read_unlock(); -} - /** * __count_memcg_events - account VM events in a cgroup * @memcg: the memory cgroup @@ -1525,6 +1507,26 @@ static const struct memory_stat memory_stats[] = { { "workingset_nodereclaim", 1, WORKINGSET_NODERECLAIM }, }; +/* Subset of vm_event_item to report for memcg event stats */ +static const unsigned int memcg_vm_event_stat[] = { + PGSCAN_KSWAPD, + PGSCAN_DIRECT, + PGSTEAL_KSWAPD, + PGSTEAL_DIRECT, + PGFAULT, + PGMAJFAULT, + PGREFILL, + PGACTIVATE, + PGDEACTIVATE, + PGLAZYFREE, + PGLAZYFREED, + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + THP_FAULT_ALLOC, + THP_COLLAPSE_ALLOC, +#endif +}; + static char *memory_stat_format(struct mem_cgroup *memcg) { struct seq_buf s; @@ -1560,27 +1562,17 @@ static char *memory_stat_format(struct mem_cgroup *memcg) } /* Accumulated memory events */ - - seq_buf_printf(&s, "pgfault %lu\n", memcg_events(memcg, PGFAULT)); - seq_buf_printf(&s, "pgmajfault %lu\n", memcg_events(memcg, PGMAJFAULT)); - seq_buf_printf(&s, "pgrefill %lu\n", memcg_events(memcg, PGREFILL)); seq_buf_printf(&s, "pgscan %lu\n", memcg_events(memcg, PGSCAN_KSWAPD) + memcg_events(memcg, PGSCAN_DIRECT)); seq_buf_printf(&s, "pgsteal %lu\n", memcg_events(memcg, PGSTEAL_KSWAPD) + memcg_events(memcg, PGSTEAL_DIRECT)); - seq_buf_printf(&s, "pgactivate %lu\n", memcg_events(memcg, PGACTIVATE)); - seq_buf_printf(&s, "pgdeactivate %lu\n", memcg_events(memcg, PGDEACTIVATE)); - seq_buf_printf(&s, "pglazyfree %lu\n", memcg_events(memcg, PGLAZYFREE)); - seq_buf_printf(&s, "pglazyfreed %lu\n", memcg_events(memcg, PGLAZYFREED)); -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - seq_buf_printf(&s, "thp_fault_alloc %lu\n", - memcg_events(memcg, THP_FAULT_ALLOC)); - seq_buf_printf(&s, "thp_collapse_alloc %lu\n", - memcg_events(memcg, THP_COLLAPSE_ALLOC)); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) + seq_buf_printf(&s, "%s %lu\n", + vm_event_name(memcg_vm_event_stat[i]), + memcg_events(memcg, memcg_vm_event_stat[i])); /* The above should easily fit into one page */ WARN_ON_ONCE(seq_buf_has_overflowed(&s)); @@ -2292,41 +2284,6 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, } #endif -/* - * Most kmem_cache_alloc() calls are from user context. The irq disable/enable - * sequence used in this case to access content from object stock is slow. - * To optimize for user context access, there are now two object stocks for - * task context and interrupt context access respectively. - * - * The task context object stock can be accessed by disabling preemption only - * which is cheap in non-preempt kernel. The interrupt context object stock - * can only be accessed after disabling interrupt. User context code can - * access interrupt object stock, but not vice versa. - */ -static inline struct obj_stock *get_obj_stock(unsigned long *pflags) -{ - struct memcg_stock_pcp *stock; - - if (likely(in_task())) { - *pflags = 0UL; - preempt_disable(); - stock = this_cpu_ptr(&memcg_stock); - return &stock->task_obj; - } - - local_irq_save(*pflags); - stock = this_cpu_ptr(&memcg_stock); - return &stock->irq_obj; -} - -static inline void put_obj_stock(unsigned long flags) -{ - if (likely(in_task())) - preempt_enable(); - else - local_irq_restore(flags); -} - /** * consume_stock: Try to consume stocked charge on this cpu. * @memcg: memcg to consume from. @@ -3056,6 +3013,59 @@ static struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg) */ #define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT) +/* + * Most kmem_cache_alloc() calls are from user context. The irq disable/enable + * sequence used in this case to access content from object stock is slow. + * To optimize for user context access, there are now two object stocks for + * task context and interrupt context access respectively. + * + * The task context object stock can be accessed by disabling preemption only + * which is cheap in non-preempt kernel. The interrupt context object stock + * can only be accessed after disabling interrupt. User context code can + * access interrupt object stock, but not vice versa. + */ +static inline struct obj_stock *get_obj_stock(unsigned long *pflags) +{ + struct memcg_stock_pcp *stock; + + if (likely(in_task())) { + *pflags = 0UL; + preempt_disable(); + stock = this_cpu_ptr(&memcg_stock); + return &stock->task_obj; + } + + local_irq_save(*pflags); + stock = this_cpu_ptr(&memcg_stock); + return &stock->irq_obj; +} + +static inline void put_obj_stock(unsigned long flags) +{ + if (likely(in_task())) + preempt_enable(); + else + local_irq_restore(flags); +} + +/* + * mod_objcg_mlstate() may be called with irq enabled, so + * mod_memcg_lruvec_state() should be used. + */ +static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, + struct pglist_data *pgdat, + enum node_stat_item idx, int nr) +{ + struct mem_cgroup *memcg; + struct lruvec *lruvec; + + rcu_read_lock(); + memcg = obj_cgroup_memcg(objcg); + lruvec = mem_cgroup_lruvec(memcg, pgdat); + mod_memcg_lruvec_state(lruvec, idx, nr); + rcu_read_unlock(); +} + int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, gfp_t gfp, bool new_page) { @@ -4227,7 +4237,7 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, #define LRU_ALL ((1 << NR_LRU_LISTS) - 1) static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, - int nid, unsigned int lru_mask) + int nid, unsigned int lru_mask, bool tree) { struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); unsigned long nr = 0; @@ -4238,13 +4248,17 @@ static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, for_each_lru(lru) { if (!(BIT(lru) & lru_mask)) continue; - nr += lruvec_page_state_local(lruvec, NR_LRU_BASE + lru); + if (tree) + nr += lruvec_page_state(lruvec, NR_LRU_BASE + lru); + else + nr += lruvec_page_state_local(lruvec, NR_LRU_BASE + lru); } return nr; } static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, - unsigned int lru_mask) + unsigned int lru_mask, + bool tree) { unsigned long nr = 0; enum lru_list lru; @@ -4252,7 +4266,10 @@ static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, for_each_lru(lru) { if (!(BIT(lru) & lru_mask)) continue; - nr += memcg_page_state_local(memcg, NR_LRU_BASE + lru); + if (tree) + nr += memcg_page_state(memcg, NR_LRU_BASE + lru); + else + nr += memcg_page_state_local(memcg, NR_LRU_BASE + lru); } return nr; } @@ -4272,38 +4289,28 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v) }; const struct numa_stat *stat; int nid; - unsigned long nr; struct mem_cgroup *memcg = mem_cgroup_from_seq(m); for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { - nr = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask); - seq_printf(m, "%s=%lu", stat->name, nr); - for_each_node_state(nid, N_MEMORY) { - nr = mem_cgroup_node_nr_lru_pages(memcg, nid, - stat->lru_mask); - seq_printf(m, " N%d=%lu", nid, nr); - } + seq_printf(m, "%s=%lu", stat->name, + mem_cgroup_nr_lru_pages(memcg, stat->lru_mask, + false)); + for_each_node_state(nid, N_MEMORY) + seq_printf(m, " N%d=%lu", nid, + mem_cgroup_node_nr_lru_pages(memcg, nid, + stat->lru_mask, false)); seq_putc(m, '\n'); } for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { - struct mem_cgroup *iter; - nr = 0; - for_each_mem_cgroup_tree(iter, memcg) { - nr += mem_cgroup_nr_lru_pages(iter, stat->lru_mask); - cond_resched(); - } - seq_printf(m, "hierarchical_%s=%lu", stat->name, nr); - for_each_node_state(nid, N_MEMORY) { - nr = 0; - for_each_mem_cgroup_tree(iter, memcg) { - nr += mem_cgroup_node_nr_lru_pages( - iter, nid, stat->lru_mask); - cond_resched(); - } - seq_printf(m, " N%d=%lu", nid, nr); - } + seq_printf(m, "hierarchical_%s=%lu", stat->name, + mem_cgroup_nr_lru_pages(memcg, stat->lru_mask, + true)); + for_each_node_state(nid, N_MEMORY) + seq_printf(m, " N%d=%lu", nid, + mem_cgroup_node_nr_lru_pages(memcg, nid, + stat->lru_mask, true)); seq_putc(m, '\n'); } @@ -6853,6 +6860,46 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of, return nbytes; } +static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned int nr_retries = MAX_RECLAIM_RETRIES; + unsigned long nr_to_reclaim, nr_reclaimed = 0; + int err; + + buf = strstrip(buf); + err = page_counter_memparse(buf, "", &nr_to_reclaim); + if (err) + return err; + + while (nr_reclaimed < nr_to_reclaim) { + unsigned long reclaimed; + + if (signal_pending(current)) + return -EINTR; + + /* + * This is the final attempt, drain percpu lru caches in the + * hope of introducing more evictable pages for + * try_to_free_mem_cgroup_pages(). + */ + if (!nr_retries) + lru_add_drain_all(); + + reclaimed = try_to_free_mem_cgroup_pages(memcg, + nr_to_reclaim - nr_reclaimed, + GFP_KERNEL, true); + + if (!reclaimed && !nr_retries--) + return -EAGAIN; + + nr_reclaimed += reclaimed; + } + + return nbytes; +} + static struct cftype memory_files[] = { { .name = "current", @@ -6932,6 +6979,11 @@ static struct cftype memory_files[] = { .seq_show = memory_wmark_read, }, #endif + { + .name = "reclaim", + .flags = CFTYPE_NS_DELEGATABLE, + .write = memory_reclaim, + }, { } /* terminate */ }; @@ -7461,7 +7513,7 @@ static int __init cgroup_memory(char *s) if (!strcmp(token, "nokmem")) cgroup_memory_nokmem = true; } - return 0; + return 1; } __setup("cgroup.memory=", cgroup_memory); diff --git a/mm/memfd.c b/mm/memfd.c index 2647c898990c8..fae4142f7d254 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -31,20 +31,28 @@ static void memfd_tag_pins(struct xa_state *xas) { struct page *page; - unsigned int tagged = 0; + int latency = 0; + int cache_count; lru_add_drain(); xas_lock_irq(xas); xas_for_each(xas, page, ULONG_MAX) { - if (xa_is_value(page)) - continue; - page = find_subpage(page, xas->xa_index); - if (page_count(page) - page_mapcount(page) > 1) + cache_count = 1; + if (!xa_is_value(page) && + PageTransHuge(page) && !PageHuge(page)) + cache_count = HPAGE_PMD_NR; + + if (!xa_is_value(page) && + page_count(page) - total_mapcount(page) != cache_count) xas_set_mark(xas, MEMFD_TAG_PINNED); + if (cache_count != 1) + xas_set(xas, page->index + cache_count); - if (++tagged % XA_CHECK_SCHED) + latency += cache_count; + if (latency < XA_CHECK_SCHED) continue; + latency = 0; xas_pause(xas); xas_unlock_irq(xas); @@ -73,7 +81,8 @@ static int memfd_wait_for_pins(struct address_space *mapping) error = 0; for (scan = 0; scan <= LAST_SCAN; scan++) { - unsigned int tagged = 0; + int latency = 0; + int cache_count; if (!xas_marked(&xas, MEMFD_TAG_PINNED)) break; @@ -87,10 +96,14 @@ static int memfd_wait_for_pins(struct address_space *mapping) xas_lock_irq(&xas); xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) { bool clear = true; - if (xa_is_value(page)) - continue; - page = find_subpage(page, xas.xa_index); - if (page_count(page) - page_mapcount(page) != 1) { + + cache_count = 1; + if (!xa_is_value(page) && + PageTransHuge(page) && !PageHuge(page)) + cache_count = HPAGE_PMD_NR; + + if (!xa_is_value(page) && cache_count != + page_count(page) - total_mapcount(page)) { /* * On the last scan, we clean up all those tags * we inserted; but make a note that we still @@ -103,8 +116,11 @@ static int memfd_wait_for_pins(struct address_space *mapping) } if (clear) xas_clear_mark(&xas, MEMFD_TAG_PINNED); - if (++tagged % XA_CHECK_SCHED) + + latency += cache_count; + if (latency < XA_CHECK_SCHED) continue; + latency = 0; xas_pause(&xas); xas_unlock_irq(&xas); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c874c3510ab13..4b0c9d7914820 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -666,8 +666,10 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn, (void *)&priv); if (ret == 1 && priv.tk.addr) kill_proc(&priv.tk, pfn, flags); + else + ret = 0; mmap_read_unlock(p->mm); - return ret ? -EFAULT : -EHWPOISON; + return ret > 0 ? -EHWPOISON : -EFAULT; } static const char *action_name[] = { @@ -1266,13 +1268,13 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) || (p != head && TestSetPageHWPoison(head))) { num_poisoned_pages_dec(); unlock_page(head); - return 0; + return -EOPNOTSUPP; } } unlock_page(head); - dissolve_free_huge_page(p); - action_result(pfn, MF_MSG_FREE_HUGE, MF_DELAYED); - return 0; + res = dissolve_free_huge_page(p); + action_result(pfn, MF_MSG_FREE_HUGE, res ? MF_FAILED : MF_DELAYED); + return res; } lock_page(head); @@ -1338,7 +1340,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, goto out; if (hwpoison_filter(page)) { - rc = 0; + rc = -EOPNOTSUPP; goto unlock; } @@ -1405,6 +1407,9 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, * * Must run in process context (e.g. a work queue) with interrupts * enabled and no spinlocks hold. + * Return: 0 for successfully handled the memory error, + * -EOPNOTSUPP for memory_filter() filtered the error event, + * < 0(except -EOPNOTSUPP) on failure. */ int memory_failure(unsigned long pfn, int flags) { @@ -1550,6 +1555,7 @@ int memory_failure(unsigned long pfn, int flags) num_poisoned_pages_dec(); unlock_page(p); put_hwpoison_page(p); + res = -EOPNOTSUPP; goto unlock_mutex; } diff --git a/mm/memory.c b/mm/memory.c index d7110efe7dba3..4280f2913ab0e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1013,6 +1013,17 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, return ret; } +/* Whether we should zap all COWed (private) pages too */ +static inline bool should_zap_cows(struct zap_details *details) +{ + /* By default, zap all pages */ + if (!details) + return true; + + /* Or, we zap COWed pages only if the caller wants to */ + return !details->check_mapping; +} + static unsigned long zap_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, @@ -1104,16 +1115,18 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, continue; } - /* If details->check_mapping, we leave swap entries. */ - if (unlikely(details)) - continue; - - if (!non_swap_entry(entry)) + if (!non_swap_entry(entry)) { + /* Genuine swap entry, hence a private anon page */ + if (!should_zap_cows(details)) + continue; rss[MM_SWAPENTS]--; - else if (is_migration_entry(entry)) { + } else if (is_migration_entry(entry)) { struct page *page; page = migration_entry_to_page(entry); + if (details && details->check_mapping && + details->check_mapping != page_rmapping(page)) + continue; rss[mm_counter(page)]--; } if (unlikely(!free_swap_and_cache(entry))) @@ -3243,11 +3256,20 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) return ret; if (unlikely(PageHWPoison(vmf->page))) { - if (ret & VM_FAULT_LOCKED) - unlock_page(vmf->page); - put_page(vmf->page); + struct page *page = vmf->page; + vm_fault_t poisonret = VM_FAULT_HWPOISON; + if (ret & VM_FAULT_LOCKED) { + if (page_mapped(page)) + unmap_mapping_pages(page_mapping(page), + page->index, 1, false); + /* Retry if a clean page was removed from the cache. */ + if (invalidate_inode_page(page)) + poisonret = VM_FAULT_NOPAGE; + unlock_page(page); + } + put_page(page); vmf->page = NULL; - return VM_FAULT_HWPOISON; + return poisonret; } if (unlikely(!(ret & VM_FAULT_LOCKED))) @@ -4770,6 +4792,8 @@ long copy_huge_page_from_user(struct page *dst_page, if (rc) break; + flush_dcache_page(subpage); + cond_resched(); } return ret_val; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 7af0f6669f395..6710125384e2d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -659,8 +659,8 @@ static inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn return movable_node_enabled ? movable_zone : kernel_zone; } -struct zone * zone_for_pfn_range(int online_type, int nid, unsigned start_pfn, - unsigned long nr_pages) +struct zone *zone_for_pfn_range(int online_type, int nid, + unsigned long start_pfn, unsigned long nr_pages) { if (online_type == MMOP_ONLINE_KERNEL) return default_kernel_zone_for_pfn(nid, start_pfn, nr_pages); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 628cd1d9deb0a..69aeb2d78ee33 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -348,7 +348,7 @@ static void mpol_rebind_preferred(struct mempolicy *pol, */ static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) { - if (!pol) + if (!pol || pol->mode == MPOL_LOCAL) return; if (!mpol_store_user_nodemask(pol) && !(pol->flags & MPOL_F_LOCAL) && nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) @@ -731,7 +731,6 @@ static int vma_replace_policy(struct vm_area_struct *vma, static int mbind_range(struct mm_struct *mm, unsigned long start, unsigned long end, struct mempolicy *new_pol) { - struct vm_area_struct *next; struct vm_area_struct *prev; struct vm_area_struct *vma; int err = 0; @@ -747,8 +746,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, if (start > vma->vm_start) prev = vma; - for (; vma && vma->vm_start < end; prev = vma, vma = next) { - next = vma->vm_next; + for (; vma && vma->vm_start < end; prev = vma, vma = vma->vm_next) { vmstart = max(start, vma->vm_start); vmend = min(end, vma->vm_end); @@ -762,10 +760,6 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, new_pol, vma->vm_userfaultfd_ctx); if (prev) { vma = prev; - next = vma->vm_next; - if (mpol_equal(vma_policy(vma), new_pol)) - continue; - /* vma_merge() joined vma && vma->next, case 8 */ goto replace; } if (vma->vm_start != vmstart) { @@ -2574,6 +2568,7 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!mpol_new) goto err_out; + atomic_set(&mpol_new->refcnt, 1); goto restart; } diff --git a/mm/migrate.c b/mm/migrate.c index c06844aa56476..4e9d3b39d4aaa 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1003,9 +1003,12 @@ static int move_to_new_page(struct page *newpage, struct page *page, if (!PageMappingFlags(page)) page->mapping = NULL; - if (likely(!is_zone_device_page(newpage))) - flush_dcache_page(newpage); + if (likely(!is_zone_device_page(newpage))) { + int i, nr = compound_nr(newpage); + for (i = 0; i < nr; i++) + flush_dcache_page(newpage + i); + } } out: return rc; diff --git a/mm/mmap.c b/mm/mmap.c index aeaa8c7786819..eb0cf61f0dcb9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2077,14 +2077,6 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) } -#ifndef arch_get_mmap_end -#define arch_get_mmap_end(addr) (TASK_SIZE) -#endif - -#ifndef arch_get_mmap_base -#define arch_get_mmap_base(addr, base) (base) -#endif - /* Get an address range which is currently unmapped. * For shmat() with addr=0. * @@ -2515,7 +2507,7 @@ static int __init cmdline_parse_stack_guard_gap(char *p) if (!*endptr) stack_guard_gap = val << PAGE_SHIFT; - return 0; + return 1; } __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap); diff --git a/mm/mmzone.c b/mm/mmzone.c index 4686fdc23bb96..f337831affc2d 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -72,20 +72,6 @@ struct zoneref *__next_zones_zonelist(struct zoneref *z, return z; } -#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL -bool memmap_valid_within(unsigned long pfn, - struct page *page, struct zone *zone) -{ - if (page_to_pfn(page) != pfn) - return false; - - if (page_zone(page) != zone) - return false; - - return true; -} -#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ - void lruvec_init(struct lruvec *lruvec) { enum lru_list lru; diff --git a/mm/mremap.c b/mm/mremap.c index 15bc448866374..b0176559be195 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -246,6 +246,9 @@ unsigned long move_page_tables(struct vm_area_struct *vma, struct mmu_notifier_range range; pmd_t *old_pmd, *new_pmd; + if (!len) + return 0; + old_end = old_addr + len; flush_cache_range(vma, old_addr, old_end); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 651b3a338810d..354998d35f8fa 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -631,7 +631,7 @@ static void oom_reap_task(struct task_struct *tsk) */ set_bit(MMF_OOM_SKIP, &mm->flags); - /* Drop a reference taken by wake_oom_reaper */ + /* Drop a reference taken by queue_oom_reaper */ put_task_struct(tsk); } @@ -641,12 +641,12 @@ static int oom_reaper(void *unused) struct task_struct *tsk = NULL; wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL); - spin_lock(&oom_reaper_lock); + spin_lock_irq(&oom_reaper_lock); if (oom_reaper_list != NULL) { tsk = oom_reaper_list; oom_reaper_list = tsk->oom_reaper_list; } - spin_unlock(&oom_reaper_lock); + spin_unlock_irq(&oom_reaper_lock); if (tsk) oom_reap_task(tsk); @@ -655,22 +655,48 @@ static int oom_reaper(void *unused) return 0; } -static void wake_oom_reaper(struct task_struct *tsk) +static void wake_oom_reaper(struct timer_list *timer) { - /* mm is already queued? */ - if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) - return; + struct task_struct *tsk = container_of(timer, struct task_struct, + oom_reaper_timer); + struct mm_struct *mm = tsk->signal->oom_mm; + unsigned long flags; - get_task_struct(tsk); + /* The victim managed to terminate on its own - see exit_mmap */ + if (test_bit(MMF_OOM_SKIP, &mm->flags)) { + put_task_struct(tsk); + return; + } - spin_lock(&oom_reaper_lock); + spin_lock_irqsave(&oom_reaper_lock, flags); tsk->oom_reaper_list = oom_reaper_list; oom_reaper_list = tsk; - spin_unlock(&oom_reaper_lock); + spin_unlock_irqrestore(&oom_reaper_lock, flags); trace_wake_reaper(tsk->pid); wake_up(&oom_reaper_wait); } +/* + * Give the OOM victim time to exit naturally before invoking the oom_reaping. + * The timers timeout is arbitrary... the longer it is, the longer the worst + * case scenario for the OOM can take. If it is too small, the oom_reaper can + * get in the way and release resources needed by the process exit path. + * e.g. The futex robust list can sit in Anon|Private memory that gets reaped + * before the exit path is able to wake the futex waiters. + */ +#define OOM_REAPER_DELAY (2*HZ) +static void queue_oom_reaper(struct task_struct *tsk) +{ + /* mm is already queued? */ + if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) + return; + + get_task_struct(tsk); + timer_setup(&tsk->oom_reaper_timer, wake_oom_reaper, 0); + tsk->oom_reaper_timer.expires = jiffies + OOM_REAPER_DELAY; + add_timer(&tsk->oom_reaper_timer); +} + static int __init oom_init(void) { oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper"); @@ -678,7 +704,7 @@ static int __init oom_init(void) } subsys_initcall(oom_init) #else -static inline void wake_oom_reaper(struct task_struct *tsk) +static inline void queue_oom_reaper(struct task_struct *tsk) { } #endif /* CONFIG_MMU */ @@ -927,7 +953,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message) rcu_read_unlock(); if (can_oom_reap) - wake_oom_reaper(victim); + queue_oom_reaper(victim); mmdrop(mm); put_task_struct(victim); @@ -963,7 +989,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message) task_lock(victim); if (task_will_free_mem(victim)) { mark_oom_victim(victim); - wake_oom_reaper(victim); + queue_oom_reaper(victim); task_unlock(victim); put_task_struct(victim); return; @@ -1061,7 +1087,7 @@ bool out_of_memory(struct oom_control *oc) */ if (task_will_free_mem(current)) { mark_oom_victim(current); - wake_oom_reaper(current); + queue_oom_reaper(current); return true; } @@ -1114,25 +1140,22 @@ bool out_of_memory(struct oom_control *oc) } /* - * The pagefault handler calls here because it is out of memory, so kill a - * memory-hogging task. If oom_lock is held by somebody else, a parallel oom - * killing is already in progress so do nothing. + * The pagefault handler calls here because some allocation has failed. We have + * to take care of the memcg OOM here because this is the only safe context without + * any locks held but let the oom killer triggered from the allocation context care + * about the global OOM. */ void pagefault_out_of_memory(void) { - struct oom_control oc = { - .zonelist = NULL, - .nodemask = NULL, - .memcg = NULL, - .gfp_mask = 0, - .order = 0, - }; + static DEFINE_RATELIMIT_STATE(pfoom_rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); if (mem_cgroup_oom_synchronize(true)) return; - if (!mutex_trylock(&oom_lock)) + if (fatal_signal_pending(current)) return; - out_of_memory(&oc); - mutex_unlock(&oom_lock); + + if (__ratelimit(&pfoom_rs)) + pr_warn("Huh VM_FAULT_OOM leaked out to the #PF handler. Retrying PF\n"); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e6e708a7e65c0..2f6b8ff5403a5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -906,7 +906,7 @@ static inline void __free_one_page(struct page *page, unsigned int max_order; struct capture_control *capc = task_capc(zone); - max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1); + max_order = min_t(unsigned int, MAX_ORDER - 1, pageblock_order); VM_BUG_ON(!zone_is_initialized(zone)); VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page); @@ -919,7 +919,7 @@ static inline void __free_one_page(struct page *page, VM_BUG_ON_PAGE(bad_range(zone, page), page); continue_merging: - while (order < max_order - 1) { + while (order < max_order) { if (compaction_capture(capc, page, order, migratetype)) { __mod_zone_freepage_state(zone, -(1 << order), migratetype); @@ -945,7 +945,7 @@ static inline void __free_one_page(struct page *page, pfn = combined_pfn; order++; } - if (max_order < MAX_ORDER) { + if (order < MAX_ORDER - 1) { /* If we are here, it means order is >= pageblock_order. * We want to prevent merge between freepages on isolate * pageblock and normal pageblock. Without this, pageblock @@ -966,7 +966,7 @@ static inline void __free_one_page(struct page *page, is_migrate_isolate(buddy_mt))) goto done_merging; } - max_order++; + max_order = order + 1; goto continue_merging; } @@ -3768,7 +3768,9 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...) va_list args; static DEFINE_RATELIMIT_STATE(nopage_rs, 10*HZ, 1); - if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) + if ((gfp_mask & __GFP_NOWARN) || + !__ratelimit(&nopage_rs) || + ((gfp_mask & __GFP_DMA) && !has_managed_dma())) return; va_start(args, fmt); @@ -5480,7 +5482,7 @@ static int build_zonerefs_node(pg_data_t *pgdat, struct zoneref *zonerefs) do { zone_type--; zone = pgdat->node_zones + zone_type; - if (managed_zone(zone)) { + if (populated_zone(zone)) { zoneref_set_zone(zone, &zonerefs[nr_zones++]); check_highest_zone(zone_type); } @@ -7325,10 +7327,17 @@ static void __init find_zone_movable_pfns_for_nodes(void) out2: /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */ - for (nid = 0; nid < MAX_NUMNODES; nid++) + for (nid = 0; nid < MAX_NUMNODES; nid++) { + unsigned long start_pfn, end_pfn; + zone_movable_pfn[nid] = roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); + get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); + if (zone_movable_pfn[nid] >= end_pfn) + zone_movable_pfn[nid] = 0; + } + out: /* restore the node_state */ node_states[N_MEMORY] = saved_node_state; @@ -7580,7 +7589,7 @@ void __init mem_init_print_info(const char *str) */ #define adj_init_size(start, end, size, pos, adj) \ do { \ - if (start <= pos && pos < end && size > adj) \ + if (&start[0] <= &pos[0] && &pos[0] < &end[0] && size > adj) \ size -= adj; \ } while (0) @@ -8695,3 +8704,18 @@ bool set_hwpoison_free_buddy_page(struct page *page) return hwpoisoned; } #endif + +#ifdef CONFIG_ZONE_DMA +bool has_managed_dma(void) +{ + struct pglist_data *pgdat; + + for_each_online_pgdat(pgdat) { + struct zone *zone = &pgdat->node_zones[ZONE_DMA]; + + if (managed_zone(zone)) + return true; + } + return false; +} +#endif /* CONFIG_ZONE_DMA */ diff --git a/mm/page_io.c b/mm/page_io.c index bcf27d0572534..f0e3f2be7b44c 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -69,54 +69,6 @@ void end_swap_bio_write(struct bio *bio) bio_put(bio); } -static void swap_slot_free_notify(struct page *page) -{ - struct swap_info_struct *sis; - struct gendisk *disk; - swp_entry_t entry; - - /* - * There is no guarantee that the page is in swap cache - the software - * suspend code (at least) uses end_swap_bio_read() against a non- - * swapcache page. So we must check PG_swapcache before proceeding with - * this optimization. - */ - if (unlikely(!PageSwapCache(page))) - return; - - sis = page_swap_info(page); - if (!(sis->flags & SWP_BLKDEV)) - return; - - /* - * The swap subsystem performs lazy swap slot freeing, - * expecting that the page will be swapped out again. - * So we can avoid an unnecessary write if the page - * isn't redirtied. - * This is good for real swap storage because we can - * reduce unnecessary I/O and enhance wear-leveling - * if an SSD is used as the as swap device. - * But if in-memory swap device (eg zram) is used, - * this causes a duplicated copy between uncompressed - * data in VM-owned memory and compressed data in - * zram-owned memory. So let's free zram-owned memory - * and make the VM-owned decompressed page *dirty*, - * so the page should be swapped out somewhere again if - * we again wish to reclaim it. - */ - disk = sis->bdev->bd_disk; - entry.val = page_private(page); - if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) { - unsigned long offset; - - offset = swp_offset(entry); - - SetPageDirty(page); - disk->fops->swap_slot_free_notify(sis->bdev, - offset); - } -} - static void end_swap_bio_read(struct bio *bio) { struct page *page = bio_first_page_all(bio); @@ -132,7 +84,6 @@ static void end_swap_bio_read(struct bio *bio) } SetPageUptodate(page); - swap_slot_free_notify(page); out: unlock_page(page); WRITE_ONCE(bio->bi_private, NULL); @@ -371,11 +322,6 @@ int swap_readpage(struct page *page, bool synchronous) ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); if (!ret) { - if (trylock_page(page)) { - swap_slot_free_notify(page); - unlock_page(page); - } - count_vm_event(PSWPIN); return 0; } diff --git a/mm/rmap.c b/mm/rmap.c index f304994a426e1..35403d74d405d 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1613,7 +1613,30 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, /* MADV_FREE page check */ if (!PageSwapBacked(page)) { - if (!PageDirty(page)) { + int ref_count, map_count; + + /* + * Synchronize with gup_pte_range(): + * - clear PTE; barrier; read refcount + * - inc refcount; barrier; read PTE + */ + smp_mb(); + + ref_count = page_ref_count(page); + map_count = page_mapcount(page); + + /* + * Order reads for page refcount and dirty flag + * (see comments in __remove_mapping()). + */ + smp_rmb(); + + /* + * The only page refs must be one from isolation + * plus the rmap(s) (dropped by discard:). + */ + if (ref_count == 1 + map_count && + !PageDirty(page)) { /* Invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE); diff --git a/mm/slab.h b/mm/slab.h index c918a716c81c8..1f2385583d844 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -149,7 +149,7 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size, #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ SLAB_TEMPORARY | SLAB_ACCOUNT) #else -#define SLAB_CACHE_FLAGS (0) +#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE) #endif /* Common flags available with current configuration */ diff --git a/mm/slub.c b/mm/slub.c index 73d3f79f04848..bd40445941d18 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2201,6 +2201,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, c->page = NULL; c->freelist = NULL; + c->tid = next_tid(c->tid); } /* @@ -2334,8 +2335,6 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { stat(s, CPUSLAB_FLUSH); deactivate_slab(s, c->page, c->freelist, c); - - c->tid = next_tid(c->tid); } /* @@ -2619,6 +2618,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, if (!freelist) { c->page = NULL; + c->tid = next_tid(c->tid); stat(s, DEACTIVATE_BYPASS); goto new_slab; } diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 3d80bcf5c403f..52e88eddeed40 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -34,7 +34,7 @@ #include #include -#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP /** * struct vmemmap_remap_walk - walk vmemmap page table * @@ -78,6 +78,14 @@ static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start) spin_lock(&init_mm.page_table_lock); if (likely(pmd_leaf(*pmd))) { + /* + * Higher order allocations from buddy allocator must be able to + * be treated as indepdenent small pages (as they can be freed + * individually). + */ + if (!PageReserved(page)) + split_page(page, get_order(PMD_SIZE)); + /* Make pte visible before pmd. See comment in __pte_alloc(). */ smp_wmb(); pmd_populate_kernel(&init_mm, pmd, pgtable); @@ -420,7 +428,7 @@ int vmemmap_remap_alloc(unsigned long start, unsigned long end, return 0; } -#endif /* CONFIG_HUGETLB_PAGE_FREE_VMEMMAP */ +#endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */ /* * Allocate a block of memory to be used to back the virtual memory map diff --git a/mm/usercopy.c b/mm/usercopy.c index 660717a1ea5cd..f70455bad7f32 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -294,7 +294,10 @@ static bool enable_checks __initdata = true; static int __init parse_hardened_usercopy(char *str) { - return strtobool(str, &enable_checks); + if (strtobool(str, &enable_checks)) + pr_warn("Invalid option string for hardened_usercopy: '%s'\n", + str); + return 1; } __setup("hardened_usercopy=", parse_hardened_usercopy); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 7789256e708e6..96b66df7f45a7 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -52,6 +52,8 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, /* don't free the page */ goto out; } + + flush_dcache_page(page); } else { page = *pagep; *pagep = NULL; @@ -569,6 +571,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, err = -EFAULT; goto out; } + flush_dcache_page(page); goto retry; } else BUG_ON(page); diff --git a/mm/util.c b/mm/util.c index 2649102b05d0b..1817485b83f23 100644 --- a/mm/util.c +++ b/mm/util.c @@ -320,6 +320,38 @@ unsigned long randomize_stack_top(unsigned long stack_top) #endif } +/** + * randomize_page - Generate a random, page aligned address + * @start: The smallest acceptable address the caller will take. + * @range: The size of the area, starting at @start, within which the + * random address must fall. + * + * If @start + @range would overflow, @range is capped. + * + * NOTE: Historical use of randomize_range, which this replaces, presumed that + * @start was already page aligned. We now align it regardless. + * + * Return: A page aligned address within [start, start + range). On error, + * @start is returned. + */ +unsigned long randomize_page(unsigned long start, unsigned long range) +{ + if (!PAGE_ALIGNED(start)) { + range -= PAGE_ALIGN(start) - start; + start = PAGE_ALIGN(start); + } + + if (start > ULONG_MAX - range) + range = ULONG_MAX - start; + + range >>= PAGE_SHIFT; + + if (range == 0) + return start; + + return start + (get_random_long() % range << PAGE_SHIFT); +} + #ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT unsigned long arch_randomize_brk(struct mm_struct *mm) { diff --git a/mm/vmstat.c b/mm/vmstat.c index 8c2dc08e1dd37..44143e1e36882 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1466,10 +1466,6 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m, if (!page) continue; - /* Watch for unexpected holes punched in the memmap */ - if (!memmap_valid_within(pfn, page, zone)) - continue; - if (page_zone(page) != zone) continue; diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 443b3b1c95818..6b100f02ee431 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1748,11 +1748,40 @@ static enum fullness_group putback_zspage(struct size_class *class, */ static void lock_zspage(struct zspage *zspage) { - struct page *page = get_first_page(zspage); + struct page *curr_page, *page; - do { - lock_page(page); - } while ((page = get_next_page(page)) != NULL); + /* + * Pages we haven't locked yet can be migrated off the list while we're + * trying to lock them, so we need to be careful and only attempt to + * lock each page under migrate_read_lock(). Otherwise, the page we lock + * may no longer belong to the zspage. This means that we may wait for + * the wrong page to unlock, so we must take a reference to the page + * prior to waiting for it to unlock outside migrate_read_lock(). + */ + while (1) { + migrate_read_lock(zspage); + page = get_first_page(zspage); + if (trylock_page(page)) + break; + get_page(page); + migrate_read_unlock(zspage); + wait_on_page_locked(page); + put_page(page); + } + + curr_page = page; + while ((page = get_next_page(curr_page))) { + if (trylock_page(page)) { + curr_page = page; + } else { + get_page(page); + migrate_read_unlock(zspage); + wait_on_page_locked(page); + put_page(page); + migrate_read_lock(zspage); + } + } + migrate_read_unlock(zspage); } static int zs_init_fs_context(struct fs_context *fc) @@ -1835,10 +1864,11 @@ static inline void zs_pool_dec_isolated(struct zs_pool *pool) VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0); atomic_long_dec(&pool->isolated_pages); /* - * There's no possibility of racing, since wait_for_isolated_drain() - * checks the isolated count under &class->lock after enqueuing - * on migration_wait. + * Checking pool->destroying must happen after atomic_long_dec() + * for pool->isolated_pages above. Paired with the smp_mb() in + * zs_unregister_migration(). */ + smp_mb__after_atomic(); if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying) wake_up_all(&pool->migration_wait); } diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c index 1c140af06d527..600b9563bfc53 100644 --- a/net/6lowpan/debugfs.c +++ b/net/6lowpan/debugfs.c @@ -170,7 +170,8 @@ static void lowpan_dev_debugfs_ctx_init(struct net_device *dev, struct dentry *root; char buf[32]; - WARN_ON_ONCE(id > LOWPAN_IPHC_CTX_TABLE_SIZE); + if (WARN_ON_ONCE(id >= LOWPAN_IPHC_CTX_TABLE_SIZE)) + return; sprintf(buf, "%d", id); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 3f47abf9ef4a6..796d95797ab40 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -116,9 +116,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) } vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); - - /* Get rid of the vlan's reference to real_dev */ - dev_put(real_dev); } int vlan_check_real_dev(struct net_device *real_dev, @@ -180,9 +177,6 @@ int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack) if (err) goto out_unregister_netdev; - /* Account for reference in struct vlan_dev_priv */ - dev_hold(real_dev); - vlan_stacked_transfer_operstate(real_dev, dev, vlan); linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */ diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 2a78da4072de9..589615ec490bb 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -583,6 +583,9 @@ static int vlan_dev_init(struct net_device *dev) if (!vlan->vlan_pcpu_stats) return -ENOMEM; + /* Get vlan's reference to real_dev */ + dev_hold(real_dev); + return 0; } @@ -790,6 +793,9 @@ static void vlan_dev_free(struct net_device *dev) free_percpu(vlan->vlan_pcpu_stats); vlan->vlan_pcpu_stats = NULL; + + /* Get rid of the vlan's reference to real_dev */ + dev_put(vlan->real_dev); } void vlan_setup(struct net_device *dev) diff --git a/net/9p/client.c b/net/9p/client.c index 1d48afc7033ca..b03fce66eb8de 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -538,6 +538,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) kfree(ename); } else { err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode); + if (err) + goto out_err; err = -ecode; p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index a3cd90a74012b..f582351d84ecb 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -605,7 +605,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); if (!chan->vc_wq) { err = -ENOMEM; - goto out_free_tag; + goto out_remove_file; } init_waitqueue_head(chan->vc_wq); chan->ring_bufs_avail = 1; @@ -623,6 +623,8 @@ static int p9_virtio_probe(struct virtio_device *vdev) return 0; +out_remove_file: + sysfs_remove_file(&vdev->dev.kobj, &dev_attr_mount_tag.attr); out_free_tag: kfree(tag); out_free_vq: diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 3963eb11c3fbd..2779ec1053a02 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -138,7 +138,7 @@ static bool p9_xen_write_todo(struct xen_9pfs_dataring *ring, RING_IDX size) static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req) { - struct xen_9pfs_front_priv *priv = NULL; + struct xen_9pfs_front_priv *priv; RING_IDX cons, prod, masked_cons, masked_prod; unsigned long flags; u32 size = p9_req->tc.size; @@ -151,7 +151,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req) break; } read_unlock(&xen_9pfs_lock); - if (!priv || priv->client != client) + if (list_entry_is_head(priv, &xen_9pfs_devs, list)) return -EINVAL; num = p9_req->tc.tag % priv->num_rings; @@ -301,9 +301,9 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) ref = priv->rings[i].intf->ref[j]; gnttab_end_foreign_access(ref, 0, 0); } - free_pages((unsigned long)priv->rings[i].data.in, - XEN_9PFS_RING_ORDER - - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + free_pages_exact(priv->rings[i].data.in, + 1UL << (XEN_9PFS_RING_ORDER + + XEN_PAGE_SHIFT)); } gnttab_end_foreign_access(priv->rings[i].ref, 0, 0); free_page((unsigned long)priv->rings[i].intf); @@ -341,8 +341,8 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, if (ret < 0) goto out; ring->ref = ret; - bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - XEN_9PFS_RING_ORDER - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + bytes = alloc_pages_exact(1UL << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT), + GFP_KERNEL | __GFP_ZERO); if (!bytes) { ret = -ENOMEM; goto out; @@ -373,9 +373,7 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, if (bytes) { for (i--; i >= 0; i--) gnttab_end_foreign_access(ring->intf->ref[i], 0, 0); - free_pages((unsigned long)bytes, - XEN_9PFS_RING_ORDER - - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + free_pages_exact(bytes, 1UL << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT)); } gnttab_end_foreign_access(ring->ref, 0, 0); free_page((unsigned long)ring->intf); diff --git a/net/Kconfig b/net/Kconfig index c2dbedab152a0..39cbaefe9bded 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -436,4 +436,12 @@ config FAILOVER migration of VMs with direct attached VFs by failing over to the paravirtual datapath when the VF is unplugged. +config ETHTOOL_NETLINK + bool "Netlink interface for ethtool" + default y + help + An alternative userspace interface for ethtool based on generic + netlink. It provides better extensibility and some new features, + e.g. notification messages. + endif # if NET diff --git a/net/Makefile b/net/Makefile index 449fc0b221f83..848303d98d3dd 100644 --- a/net/Makefile +++ b/net/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_NET) += $(tmp-y) # LLC has to be linked before the files in net/802/ obj-$(CONFIG_LLC) += llc/ -obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/ +obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/ ethtool/ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_TLS) += tls/ diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 2fdb1b573e8c8..aff991ca0e4a4 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -77,6 +77,7 @@ static void ax25_kill_by_device(struct net_device *dev) { ax25_dev *ax25_dev; ax25_cb *s; + struct sock *sk; if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return; @@ -85,11 +86,26 @@ static void ax25_kill_by_device(struct net_device *dev) again: ax25_for_each(s, &ax25_list) { if (s->ax25_dev == ax25_dev) { - s->ax25_dev = NULL; + sk = s->sk; + if (!sk) { + spin_unlock_bh(&ax25_list_lock); + ax25_disconnect(s, ENETUNREACH); + s->ax25_dev = NULL; + spin_lock_bh(&ax25_list_lock); + goto again; + } + sock_hold(sk); spin_unlock_bh(&ax25_list_lock); + lock_sock(sk); ax25_disconnect(s, ENETUNREACH); + s->ax25_dev = NULL; + if (sk->sk_socket) { + dev_put(ax25_dev->dev); + ax25_dev_put(ax25_dev); + } + release_sock(sk); spin_lock_bh(&ax25_list_lock); - + sock_put(sk); /* The entry could have been deleted from the * list meanwhile and thus the next pointer is * no longer valid. Play it safe and restart @@ -353,21 +369,25 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl))) return -EFAULT; - if ((ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr)) == NULL) - return -ENODEV; - if (ax25_ctl.digi_count > AX25_MAX_DIGIS) return -EINVAL; if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL) return -EINVAL; + ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr); + if (!ax25_dev) + return -ENODEV; + digi.ndigi = ax25_ctl.digi_count; for (k = 0; k < digi.ndigi; k++) digi.calls[k] = ax25_ctl.digi_addr[k]; - if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev)) == NULL) + ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev); + if (!ax25) { + ax25_dev_put(ax25_dev); return -ENOTCONN; + } switch (ax25_ctl.cmd) { case AX25_KILL: @@ -434,6 +454,7 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) } out_put: + ax25_dev_put(ax25_dev); ax25_cb_put(ax25); return ret; @@ -959,14 +980,16 @@ static int ax25_release(struct socket *sock) { struct sock *sk = sock->sk; ax25_cb *ax25; + ax25_dev *ax25_dev; if (sk == NULL) return 0; sock_hold(sk); - sock_orphan(sk); lock_sock(sk); + sock_orphan(sk); ax25 = sk_to_ax25(sk); + ax25_dev = ax25->ax25_dev; if (sk->sk_type == SOCK_SEQPACKET) { switch (ax25->state) { @@ -1028,6 +1051,15 @@ static int ax25_release(struct socket *sock) sk->sk_state_change(sk); ax25_destroy_socket(ax25); } + if (ax25_dev) { + del_timer_sync(&ax25->timer); + del_timer_sync(&ax25->t1timer); + del_timer_sync(&ax25->t2timer); + del_timer_sync(&ax25->t3timer); + del_timer_sync(&ax25->idletimer); + dev_put(ax25_dev->dev); + ax25_dev_put(ax25_dev); + } sock->sk = NULL; release_sock(sk); @@ -1104,8 +1136,10 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } } - if (ax25_dev != NULL) + if (ax25_dev) { ax25_fillin_cb(ax25, ax25_dev); + dev_hold(ax25_dev->dev); + } done: ax25_cb_add(ax25); diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 4ac2e0847652a..d2e0cc67d91a7 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -37,6 +37,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr) for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) if (ax25cmp(addr, (ax25_address *)ax25_dev->dev->dev_addr) == 0) { res = ax25_dev; + ax25_dev_hold(ax25_dev); } spin_unlock_bh(&ax25_dev_lock); @@ -56,6 +57,7 @@ void ax25_dev_device_up(struct net_device *dev) return; } + refcount_set(&ax25_dev->refcount, 1); dev->ax25_ptr = ax25_dev; ax25_dev->dev = dev; dev_hold(dev); @@ -84,6 +86,7 @@ void ax25_dev_device_up(struct net_device *dev) ax25_dev->next = ax25_dev_list; ax25_dev_list = ax25_dev; spin_unlock_bh(&ax25_dev_lock); + ax25_dev_hold(ax25_dev); ax25_register_dev_sysctl(ax25_dev); } @@ -113,9 +116,10 @@ void ax25_dev_device_down(struct net_device *dev) if ((s = ax25_dev_list) == ax25_dev) { ax25_dev_list = s->next; spin_unlock_bh(&ax25_dev_lock); + ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; dev_put(dev); - kfree(ax25_dev); + ax25_dev_put(ax25_dev); return; } @@ -123,9 +127,10 @@ void ax25_dev_device_down(struct net_device *dev) if (s->next == ax25_dev) { s->next = ax25_dev->next; spin_unlock_bh(&ax25_dev_lock); + ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; dev_put(dev); - kfree(ax25_dev); + ax25_dev_put(ax25_dev); return; } @@ -133,6 +138,7 @@ void ax25_dev_device_down(struct net_device *dev) } spin_unlock_bh(&ax25_dev_lock); dev->ax25_ptr = NULL; + ax25_dev_put(ax25_dev); } int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) @@ -144,20 +150,32 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) switch (cmd) { case SIOCAX25ADDFWD: - if ((fwd_dev = ax25_addr_ax25dev(&fwd->port_to)) == NULL) + fwd_dev = ax25_addr_ax25dev(&fwd->port_to); + if (!fwd_dev) { + ax25_dev_put(ax25_dev); return -EINVAL; - if (ax25_dev->forward != NULL) + } + if (ax25_dev->forward) { + ax25_dev_put(fwd_dev); + ax25_dev_put(ax25_dev); return -EINVAL; + } ax25_dev->forward = fwd_dev->dev; + ax25_dev_put(fwd_dev); + ax25_dev_put(ax25_dev); break; case SIOCAX25DELFWD: - if (ax25_dev->forward == NULL) + if (!ax25_dev->forward) { + ax25_dev_put(ax25_dev); return -EINVAL; + } ax25_dev->forward = NULL; + ax25_dev_put(ax25_dev); break; default: + ax25_dev_put(ax25_dev); return -EINVAL; } diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index b40e0bce67ead..dc2168d2a32a9 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -75,11 +75,13 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_dev *ax25_dev; int i; - if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL) - return -EINVAL; if (route->digi_count > AX25_MAX_DIGIS) return -EINVAL; + ax25_dev = ax25_addr_ax25dev(&route->port_addr); + if (!ax25_dev) + return -EINVAL; + write_lock_bh(&ax25_route_lock); ax25_rt = ax25_route_list; @@ -91,6 +93,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if (route->digi_count != 0) { if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return -ENOMEM; } ax25_rt->digipeat->lastrepeat = -1; @@ -101,6 +104,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) } } write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return 0; } ax25_rt = ax25_rt->next; @@ -108,6 +112,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return -ENOMEM; } @@ -120,6 +125,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); kfree(ax25_rt); + ax25_dev_put(ax25_dev); return -ENOMEM; } ax25_rt->digipeat->lastrepeat = -1; @@ -132,6 +138,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_rt->next = ax25_route_list; ax25_route_list = ax25_rt; write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return 0; } @@ -173,6 +180,7 @@ static int ax25_rt_del(struct ax25_routes_struct *route) } } write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return 0; } @@ -215,6 +223,7 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option) out: write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return err; } diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 15ab812c4fe4b..3a476e4f6cd0b 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -261,12 +261,20 @@ void ax25_disconnect(ax25_cb *ax25, int reason) { ax25_clear_queues(ax25); - if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) - ax25_stop_heartbeat(ax25); - ax25_stop_t1timer(ax25); - ax25_stop_t2timer(ax25); - ax25_stop_t3timer(ax25); - ax25_stop_idletimer(ax25); + if (reason == ENETUNREACH) { + del_timer_sync(&ax25->timer); + del_timer_sync(&ax25->t1timer); + del_timer_sync(&ax25->t2timer); + del_timer_sync(&ax25->t3timer); + del_timer_sync(&ax25->idletimer); + } else { + if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_stop_heartbeat(ax25); + ax25_stop_t1timer(ax25); + ax25_stop_t2timer(ax25); + ax25_stop_t3timer(ax25); + ax25_stop_idletimer(ax25); + } ax25->state = AX25_STATE_0; diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index a6b26ca5c6973..2e818eca3e1c6 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1561,10 +1561,14 @@ int batadv_bla_init(struct batadv_priv *bat_priv) return 0; bat_priv->bla.claim_hash = batadv_hash_new(128); - bat_priv->bla.backbone_hash = batadv_hash_new(32); + if (!bat_priv->bla.claim_hash) + return -ENOMEM; - if (!bat_priv->bla.claim_hash || !bat_priv->bla.backbone_hash) + bat_priv->bla.backbone_hash = batadv_hash_new(32); + if (!bat_priv->bla.backbone_hash) { + batadv_hash_destroy(bat_priv->bla.claim_hash); return -ENOMEM; + } batadv_hash_set_lock_class(bat_priv->bla.claim_hash, &batadv_claim_hash_lock_class_key); diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 385fccdcf69d0..f33a7f7a1249a 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -391,6 +391,7 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, /** * batadv_frag_create() - create a fragment from skb + * @net_dev: outgoing device for fragment * @skb: skb to create fragment from * @frag_head: header to use in new fragment * @fragment_size: size of new fragment @@ -401,22 +402,25 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, * * Return: the new fragment, NULL on error. */ -static struct sk_buff *batadv_frag_create(struct sk_buff *skb, +static struct sk_buff *batadv_frag_create(struct net_device *net_dev, + struct sk_buff *skb, struct batadv_frag_packet *frag_head, unsigned int fragment_size) { + unsigned int ll_reserved = LL_RESERVED_SPACE(net_dev); + unsigned int tailroom = net_dev->needed_tailroom; struct sk_buff *skb_fragment; unsigned int header_size = sizeof(*frag_head); unsigned int mtu = fragment_size + header_size; - skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN); + skb_fragment = dev_alloc_skb(ll_reserved + mtu + tailroom); if (!skb_fragment) goto err; skb_fragment->priority = skb->priority; /* Eat the last mtu-bytes of the skb */ - skb_reserve(skb_fragment, header_size + ETH_HLEN); + skb_reserve(skb_fragment, ll_reserved + header_size); skb_split(skb, skb_fragment, skb->len - fragment_size); /* Add the header */ @@ -439,11 +443,12 @@ int batadv_frag_send_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node) { + struct net_device *net_dev = neigh_node->if_incoming->net_dev; struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; struct batadv_frag_packet frag_header; struct sk_buff *skb_fragment; - unsigned int mtu = neigh_node->if_incoming->net_dev->mtu; + unsigned int mtu = net_dev->mtu; unsigned int header_size = sizeof(frag_header); unsigned int max_fragment_size, num_fragments; int ret; @@ -473,6 +478,17 @@ int batadv_frag_send_packet(struct sk_buff *skb, goto free_skb; } + /* GRO might have added fragments to the fragment list instead of + * frags[]. But this is not handled by skb_split and must be + * linearized to avoid incorrect length information after all + * batman-adv fragments were created and submitted to the + * hard-interface + */ + if (skb_has_frag_list(skb) && __skb_linearize(skb)) { + ret = -ENOMEM; + goto free_skb; + } + /* Create one header to be copied to all fragments */ frag_header.packet_type = BATADV_UNICAST_FRAG; frag_header.version = BATADV_COMPAT_VERSION; @@ -503,7 +519,7 @@ int batadv_frag_send_packet(struct sk_buff *skb, goto put_primary_if; } - skb_fragment = batadv_frag_create(skb, &frag_header, + skb_fragment = batadv_frag_create(net_dev, skb, &frag_header, max_fragment_size); if (!skb_fragment) { ret = -ENOMEM; @@ -522,13 +538,14 @@ int batadv_frag_send_packet(struct sk_buff *skb, frag_header.no++; } - /* Make room for the fragment header. */ - if (batadv_skb_head_push(skb, header_size) < 0 || - pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) { - ret = -ENOMEM; + /* make sure that there is at least enough head for the fragmentation + * and ethernet headers + */ + ret = skb_cow_head(skb, ETH_HLEN + header_size); + if (ret < 0) goto put_primary_if; - } + skb_push(skb, header_size); memcpy(skb->data, &frag_header, header_size); /* Send the last fragment */ diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index afb52282d5bd0..5f44c94ad707b 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -151,22 +151,25 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) struct net *net = dev_net(net_dev); struct net_device *parent_dev; struct net *parent_net; + int iflink; bool ret; /* check if this is a batman-adv mesh interface */ if (batadv_softif_is_valid(net_dev)) return true; - /* no more parents..stop recursion */ - if (dev_get_iflink(net_dev) == 0 || - dev_get_iflink(net_dev) == net_dev->ifindex) + iflink = dev_get_iflink(net_dev); + if (iflink == 0) return false; parent_net = batadv_getlink_net(net_dev, net); + /* iflink to itself, most likely physical device */ + if (net == parent_net && iflink == net_dev->ifindex) + return false; + /* recurse over the parent device */ - parent_dev = __dev_get_by_index((struct net *)parent_net, - dev_get_iflink(net_dev)); + parent_dev = __dev_get_by_index((struct net *)parent_net, iflink); /* if we got a NULL parent_dev there is something broken.. */ if (!parent_dev) { pr_err("Cannot find parent device\n"); @@ -216,14 +219,15 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) struct net_device *real_netdev = NULL; struct net *real_net; struct net *net; - int ifindex; + int iflink; ASSERT_RTNL(); if (!netdev) return NULL; - if (netdev->ifindex == dev_get_iflink(netdev)) { + iflink = dev_get_iflink(netdev); + if (iflink == 0) { dev_hold(netdev); return netdev; } @@ -233,9 +237,16 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) goto out; net = dev_net(hard_iface->soft_iface); - ifindex = dev_get_iflink(netdev); real_net = batadv_getlink_net(netdev, net); - real_netdev = dev_get_by_index(real_net, ifindex); + + /* iflink to itself, most likely physical device */ + if (net == real_net && netdev->ifindex == iflink) { + real_netdev = netdev; + dev_hold(real_netdev); + goto out; + } + + real_netdev = dev_get_by_index(real_net, iflink); out: if (hard_iface) @@ -554,6 +565,9 @@ static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface) needed_headroom = lower_headroom + (lower_header_len - ETH_HLEN); needed_headroom += batadv_max_header_len(); + /* fragmentation headers don't strip the unicast/... header */ + needed_headroom += sizeof(struct batadv_frag_packet); + soft_iface->needed_headroom = needed_headroom; soft_iface->needed_tailroom = lower_tailroom; } diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 4a89177def647..6a183c94cdeb4 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -197,29 +197,41 @@ int batadv_mesh_init(struct net_device *soft_iface) bat_priv->gw.generation = 0; - ret = batadv_v_mesh_init(bat_priv); - if (ret < 0) - goto err; - ret = batadv_originator_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_orig; + } ret = batadv_tt_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_tt; + } + + ret = batadv_v_mesh_init(bat_priv); + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_v; + } ret = batadv_bla_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_bla; + } ret = batadv_dat_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_dat; + } ret = batadv_nc_mesh_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_nc; + } batadv_gw_init(bat_priv); batadv_mcast_init(bat_priv); @@ -229,8 +241,20 @@ int batadv_mesh_init(struct net_device *soft_iface) return 0; -err: - batadv_mesh_free(soft_iface); +err_nc: + batadv_dat_free(bat_priv); +err_dat: + batadv_bla_free(bat_priv); +err_bla: + batadv_v_mesh_free(bat_priv); +err_v: + batadv_tt_free(bat_priv); +err_tt: + batadv_originator_free(bat_priv); +err_orig: + batadv_purge_outstanding_packets(bat_priv, NULL); + atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); + return ret; } diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index f5bf931252c4b..6f0a9f4392333 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -136,7 +136,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) { struct inet6_dev *in6_dev = __in6_dev_get(dev); - if (in6_dev && in6_dev->cnf.mc_forwarding) + if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding)) return BATADV_NO_FLAGS; else return BATADV_MCAST_WANT_NO_RTR6; @@ -1373,6 +1373,7 @@ batadv_mcast_forw_rtr_node_get(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @skb: The multicast packet to check * @orig: an originator to be set to forward the skb to + * @is_routable: stores whether the destination is routable * * Return: the forwarding mode as enum batadv_forw_mode and in case of * BATADV_FORW_SINGLE set the orig to the single originator the skb @@ -1380,17 +1381,16 @@ batadv_mcast_forw_rtr_node_get(struct batadv_priv *bat_priv, */ enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - struct batadv_orig_node **orig) + struct batadv_orig_node **orig, int *is_routable) { int ret, tt_count, ip_count, unsnoop_count, total_count; bool is_unsnoopable = false; unsigned int mcast_fanout; struct ethhdr *ethhdr; - int is_routable = 0; int rtr_count = 0; ret = batadv_mcast_forw_mode_check(bat_priv, skb, &is_unsnoopable, - &is_routable); + is_routable); if (ret == -ENOMEM) return BATADV_FORW_NONE; else if (ret < 0) @@ -1403,7 +1403,7 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, ip_count = batadv_mcast_forw_want_all_ip_count(bat_priv, ethhdr); unsnoop_count = !is_unsnoopable ? 0 : atomic_read(&bat_priv->mcast.num_want_all_unsnoopables); - rtr_count = batadv_mcast_forw_rtr_count(bat_priv, is_routable); + rtr_count = batadv_mcast_forw_rtr_count(bat_priv, *is_routable); total_count = tt_count + ip_count + unsnoop_count + rtr_count; @@ -1723,6 +1723,7 @@ batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier + * @is_routable: stores whether the destination is routable * * Sends copies of a frame with multicast destination to any node that signaled * interest in it, that is either via the translation table or the according @@ -1735,7 +1736,7 @@ batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv, * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise. */ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid) + unsigned short vid, int is_routable) { int ret; @@ -1751,12 +1752,16 @@ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, return ret; } + if (!is_routable) + goto skip_mc_router; + ret = batadv_mcast_forw_want_rtr(bat_priv, skb, vid); if (ret != NET_XMIT_SUCCESS) { kfree_skb(skb); return ret; } +skip_mc_router: consume_skb(skb); return ret; } diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 403929013ac47..fc1ffd22a6715 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -44,7 +44,8 @@ enum batadv_forw_mode { enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - struct batadv_orig_node **mcast_single_orig); + struct batadv_orig_node **mcast_single_orig, + int *is_routable); int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, struct sk_buff *skb, @@ -52,7 +53,7 @@ int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid); + unsigned short vid, int is_routable); void batadv_mcast_init(struct batadv_priv *bat_priv); @@ -71,7 +72,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node); static inline enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - struct batadv_orig_node **mcast_single_orig) + struct batadv_orig_node **mcast_single_orig, + int *is_routable) { return BATADV_FORW_ALL; } @@ -88,7 +90,7 @@ batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, static inline int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid) + unsigned short vid, int is_routable) { kfree_skb(skb); return NET_XMIT_DROP; diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 7e052d6f759b6..e59c5aa27ee0b 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -1351,21 +1351,21 @@ static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_TP_METER, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_tp_meter_start, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER_CANCEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_tp_meter_cancel, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_GET_ROUTING_ALGOS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_algo_dump, }, { @@ -1380,68 +1380,68 @@ static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_tt_local_dump, }, { .cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_tt_global_dump, }, { .cmd = BATADV_CMD_GET_ORIGINATORS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_orig_dump, }, { .cmd = BATADV_CMD_GET_NEIGHBORS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_hardif_neigh_dump, }, { .cmd = BATADV_CMD_GET_GATEWAYS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_gw_dump, }, { .cmd = BATADV_CMD_GET_BLA_CLAIM, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_bla_claim_dump, }, { .cmd = BATADV_CMD_GET_BLA_BACKBONE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_bla_backbone_dump, }, { .cmd = BATADV_CMD_GET_DAT_CACHE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_dat_cache_dump, }, { .cmd = BATADV_CMD_GET_MCAST_FLAGS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_mcast_flags_dump, }, { .cmd = BATADV_CMD_SET_MESH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_set_mesh, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_SET_HARDIF, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_set_hardif, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_HARDIF, @@ -1457,7 +1457,7 @@ static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_SET_VLAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_set_vlan, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_VLAN, diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 70e3b161c6635..850f927f33de2 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -155,8 +155,10 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv) &batadv_nc_coding_hash_lock_class_key); bat_priv->nc.decoding_hash = batadv_hash_new(128); - if (!bat_priv->nc.decoding_hash) + if (!bat_priv->nc.decoding_hash) { + batadv_hash_destroy(bat_priv->nc.coding_hash); goto err; + } batadv_hash_set_lock_class(bat_priv->nc.decoding_hash, &batadv_nc_decoding_hash_lock_class_key); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 7f209390069ea..504e3cb67bed4 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -200,6 +200,7 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, int gw_mode; enum batadv_forw_mode forw_mode = BATADV_FORW_SINGLE; struct batadv_orig_node *mcast_single_orig = NULL; + int mcast_is_routable = 0; int network_offset = ETH_HLEN; __be16 proto; @@ -302,7 +303,8 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, send: if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) { forw_mode = batadv_mcast_forw_mode(bat_priv, skb, - &mcast_single_orig); + &mcast_single_orig, + &mcast_is_routable); if (forw_mode == BATADV_FORW_NONE) goto dropped; @@ -367,7 +369,8 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, ret = batadv_mcast_forw_send_orig(bat_priv, skb, vid, mcast_single_orig); } else if (forw_mode == BATADV_FORW_SOME) { - ret = batadv_mcast_forw_send(bat_priv, skb, vid); + ret = batadv_mcast_forw_send(bat_priv, skb, vid, + mcast_is_routable); } else { if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index c5271ea4dc832..515205d7b650f 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -4405,8 +4405,10 @@ int batadv_tt_init(struct batadv_priv *bat_priv) return ret; ret = batadv_tt_global_init(bat_priv); - if (ret < 0) + if (ret < 0) { + batadv_tt_local_table_free(bat_priv); return ret; + } batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1, batadv_tt_tvlv_unicast_handler_v1, diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h index c32638dddbf94..f6b9dc4e408f2 100644 --- a/net/bluetooth/cmtp/cmtp.h +++ b/net/bluetooth/cmtp/cmtp.h @@ -26,7 +26,7 @@ #include #include -#define BTNAMSIZ 18 +#define BTNAMSIZ 21 /* CMTP ioctl defines */ #define CMTPCONNADD _IOW('C', 200, int) diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 0a2d78e811cf5..83eb84e8e688f 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -501,9 +501,7 @@ static int __init cmtp_init(void) { BT_INFO("CMTP (CAPI Emulation) ver %s", VERSION); - cmtp_init_sockets(); - - return 0; + return cmtp_init_sockets(); } static void __exit cmtp_exit(void) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 83a07fca9000f..2ebb6480b6ecb 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1297,6 +1297,12 @@ int hci_inquiry(void __user *arg) goto done; } + /* Restrict maximum inquiry length to 60 seconds */ + if (ir.length > 60) { + err = -EINVAL; + goto done; + } + hci_dev_lock(hdev); if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX || inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) { @@ -3298,10 +3304,10 @@ int hci_register_dev(struct hci_dev *hdev) */ switch (hdev->dev_type) { case HCI_PRIMARY: - id = ida_simple_get(&hci_index_ida, 0, 0, GFP_KERNEL); + id = ida_simple_get(&hci_index_ida, 0, HCI_MAX_ID, GFP_KERNEL); break; case HCI_AMP: - id = ida_simple_get(&hci_index_ida, 1, 0, GFP_KERNEL); + id = ida_simple_get(&hci_index_ida, 1, HCI_MAX_ID, GFP_KERNEL); break; default: return -EINVAL; @@ -3310,7 +3316,7 @@ int hci_register_dev(struct hci_dev *hdev) if (id < 0) return id; - sprintf(hdev->name, "hci%d", id); + snprintf(hdev->name, sizeof(hdev->name), "hci%d", id); hdev->id = id; BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); @@ -3381,6 +3387,7 @@ int hci_register_dev(struct hci_dev *hdev) return id; err_wqueue: + debugfs_remove_recursive(hdev->debugfs); destroy_workqueue(hdev->workqueue); destroy_workqueue(hdev->req_workqueue); err: diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index e8e7f108b0161..ff6625493c9f8 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4202,6 +4202,21 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, switch (ev->status) { case 0x00: + /* The synchronous connection complete event should only be + * sent once per new connection. Receiving a successful + * complete event when the connection status is already + * BT_CONNECTED means that the device is misbehaving and sent + * multiple complete event packets for the same new connection. + * + * Registering the device more than once can corrupt kernel + * memory, hence upon detecting this invalid event, we report + * an error and ignore the packet. + */ + if (conn->state == BT_CONNECTED) { + bt_dev_err(hdev, "Ignoring connect complete event for existing connection"); + goto unlock; + } + conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; conn->type = ev->link_type; @@ -4896,8 +4911,9 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, hci_dev_lock(hdev); hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle); - if (hcon) { + if (hcon && hcon->type == AMP_LINK) { hcon->state = BT_CLOSED; + hci_disconn_cfm(hcon, ev->reason); hci_conn_del(hcon); } @@ -4905,9 +4921,64 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, } #endif +static void le_conn_update_addr(struct hci_conn *conn, bdaddr_t *bdaddr, + u8 bdaddr_type, bdaddr_t *local_rpa) +{ + if (conn->out) { + conn->dst_type = bdaddr_type; + conn->resp_addr_type = bdaddr_type; + bacpy(&conn->resp_addr, bdaddr); + + /* Check if the controller has set a Local RPA then it must be + * used instead or hdev->rpa. + */ + if (local_rpa && bacmp(local_rpa, BDADDR_ANY)) { + conn->init_addr_type = ADDR_LE_DEV_RANDOM; + bacpy(&conn->init_addr, local_rpa); + } else if (hci_dev_test_flag(conn->hdev, HCI_PRIVACY)) { + conn->init_addr_type = ADDR_LE_DEV_RANDOM; + bacpy(&conn->init_addr, &conn->hdev->rpa); + } else { + hci_copy_identity_address(conn->hdev, &conn->init_addr, + &conn->init_addr_type); + } + } else { + conn->resp_addr_type = conn->hdev->adv_addr_type; + /* Check if the controller has set a Local RPA then it must be + * used instead or hdev->rpa. + */ + if (local_rpa && bacmp(local_rpa, BDADDR_ANY)) { + conn->resp_addr_type = ADDR_LE_DEV_RANDOM; + bacpy(&conn->resp_addr, local_rpa); + } else if (conn->hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) { + /* In case of ext adv, resp_addr will be updated in + * Adv Terminated event. + */ + if (!ext_adv_capable(conn->hdev)) + bacpy(&conn->resp_addr, + &conn->hdev->random_addr); + } else { + bacpy(&conn->resp_addr, &conn->hdev->bdaddr); + } + + conn->init_addr_type = bdaddr_type; + bacpy(&conn->init_addr, bdaddr); + + /* For incoming connections, set the default minimum + * and maximum connection interval. They will be used + * to check if the parameters are in range and if not + * trigger the connection update procedure. + */ + conn->le_conn_min_interval = conn->hdev->le_conn_min_interval; + conn->le_conn_max_interval = conn->hdev->le_conn_max_interval; + } +} + static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, - bdaddr_t *bdaddr, u8 bdaddr_type, u8 role, u16 handle, - u16 interval, u16 latency, u16 supervision_timeout) + bdaddr_t *bdaddr, u8 bdaddr_type, + bdaddr_t *local_rpa, u8 role, u16 handle, + u16 interval, u16 latency, + u16 supervision_timeout) { struct hci_conn_params *params; struct hci_conn *conn; @@ -4955,32 +5026,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, cancel_delayed_work(&conn->le_conn_timeout); } - if (!conn->out) { - /* Set the responder (our side) address type based on - * the advertising address type. - */ - conn->resp_addr_type = hdev->adv_addr_type; - if (hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) { - /* In case of ext adv, resp_addr will be updated in - * Adv Terminated event. - */ - if (!ext_adv_capable(hdev)) - bacpy(&conn->resp_addr, &hdev->random_addr); - } else { - bacpy(&conn->resp_addr, &hdev->bdaddr); - } - - conn->init_addr_type = bdaddr_type; - bacpy(&conn->init_addr, bdaddr); - - /* For incoming connections, set the default minimum - * and maximum connection interval. They will be used - * to check if the parameters are in range and if not - * trigger the connection update procedure. - */ - conn->le_conn_min_interval = hdev->le_conn_min_interval; - conn->le_conn_max_interval = hdev->le_conn_max_interval; - } + le_conn_update_addr(conn, bdaddr, bdaddr_type, local_rpa); /* Lookup the identity address from the stored connection * address and address type. @@ -5074,7 +5120,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type, - ev->role, le16_to_cpu(ev->handle), + NULL, ev->role, le16_to_cpu(ev->handle), le16_to_cpu(ev->interval), le16_to_cpu(ev->latency), le16_to_cpu(ev->supervision_timeout)); @@ -5088,7 +5134,7 @@ static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type, - ev->role, le16_to_cpu(ev->handle), + &ev->local_rpa, ev->role, le16_to_cpu(ev->handle), le16_to_cpu(ev->interval), le16_to_cpu(ev->latency), le16_to_cpu(ev->supervision_timeout)); @@ -5119,7 +5165,8 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb) if (conn) { struct adv_info *adv_instance; - if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM) + if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM || + bacmp(&conn->resp_addr, BDADDR_ANY)) return; if (!hdev->cur_adv_instance) { @@ -5460,7 +5507,13 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) struct hci_ev_le_advertising_info *ev = ptr; s8 rssi; - if (ev->length <= HCI_MAX_AD_LENGTH) { + if (ptr > (void *)skb_tail_pointer(skb) - sizeof(*ev)) { + bt_dev_err(hdev, "Malicious advertising data."); + break; + } + + if (ev->length <= HCI_MAX_AD_LENGTH && + ev->data + ev->length <= skb_tail_pointer(skb)) { rssi = ev->data[ev->length]; process_adv_report(hdev, ev->evt_type, &ev->bdaddr, ev->bdaddr_type, NULL, 0, rssi, diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 959a16b133033..286fca6a9ab2a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -110,7 +110,8 @@ static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn, } /* Find channel with given SCID. - * Returns locked channel. */ + * Returns a reference locked channel. + */ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, u16 cid) { @@ -118,15 +119,19 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_scid(conn, cid); - if (c) - l2cap_chan_lock(c); + if (c) { + /* Only lock if chan reference is not 0 */ + c = l2cap_chan_hold_unless_zero(c); + if (c) + l2cap_chan_lock(c); + } mutex_unlock(&conn->chan_lock); return c; } /* Find channel with given DCID. - * Returns locked channel. + * Returns a reference locked channel. */ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, u16 cid) @@ -135,8 +140,12 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_dcid(conn, cid); - if (c) - l2cap_chan_lock(c); + if (c) { + /* Only lock if chan reference is not 0 */ + c = l2cap_chan_hold_unless_zero(c); + if (c) + l2cap_chan_lock(c); + } mutex_unlock(&conn->chan_lock); return c; @@ -161,8 +170,12 @@ static struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn, mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_ident(conn, ident); - if (c) - l2cap_chan_lock(c); + if (c) { + /* Only lock if chan reference is not 0 */ + c = l2cap_chan_hold_unless_zero(c); + if (c) + l2cap_chan_lock(c); + } mutex_unlock(&conn->chan_lock); return c; @@ -496,6 +509,16 @@ void l2cap_chan_hold(struct l2cap_chan *c) kref_get(&c->kref); } +struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c) +{ + BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); + + if (!kref_get_unless_zero(&c->kref)) + return NULL; + + return c; +} + void l2cap_chan_put(struct l2cap_chan *c) { BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref)); @@ -1812,7 +1835,10 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, src_match = !bacmp(&c->src, src); dst_match = !bacmp(&c->dst, dst); if (src_match && dst_match) { - l2cap_chan_hold(c); + c = l2cap_chan_hold_unless_zero(c); + if (!c) + continue; + read_unlock(&chan_list_lock); return c; } @@ -1827,7 +1853,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, } if (c1) - l2cap_chan_hold(c1); + c1 = l2cap_chan_hold_unless_zero(c1); read_unlock(&chan_list_lock); @@ -4221,6 +4247,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, unlock: l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return err; } @@ -4334,6 +4361,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, done: l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return err; } @@ -5062,6 +5090,7 @@ static inline int l2cap_move_channel_req(struct l2cap_conn *conn, l2cap_send_move_chan_rsp(chan, result); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return 0; } @@ -5154,6 +5183,7 @@ static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result) } l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, @@ -5183,6 +5213,7 @@ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } static int l2cap_move_channel_rsp(struct l2cap_conn *conn, @@ -5246,6 +5277,7 @@ static int l2cap_move_channel_confirm(struct l2cap_conn *conn, l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return 0; } @@ -5281,6 +5313,7 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn, } l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return 0; } @@ -5653,12 +5686,11 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, if (credits > max_credits) { BT_ERR("LE credits overflow"); l2cap_send_disconn_req(chan, ECONNRESET); - l2cap_chan_unlock(chan); /* Return 0 so that we don't trigger an unnecessary * command reject packet. */ - return 0; + goto unlock; } chan->tx_credits += credits; @@ -5669,7 +5701,9 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, if (chan->tx_credits) chan->ops->resume(chan); +unlock: l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return 0; } @@ -6983,6 +7017,7 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, done: l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, @@ -7386,7 +7421,7 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, if (src_type != c->src_type) continue; - l2cap_chan_hold(c); + c = l2cap_chan_hold_unless_zero(c); read_unlock(&chan_list_lock); return c; } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 82e76ff01267a..08e9f332adad3 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1347,6 +1347,9 @@ static void l2cap_sock_close_cb(struct l2cap_chan *chan) { struct sock *sk = chan->data; + if (!sk) + return; + l2cap_sock_kill(sk); } @@ -1355,6 +1358,9 @@ static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err) struct sock *sk = chan->data; struct sock *parent; + if (!sk) + return; + BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); /* This callback can be called both for server (BT_LISTEN) @@ -1538,8 +1544,10 @@ static void l2cap_sock_destruct(struct sock *sk) { BT_DBG("sk %p", sk); - if (l2cap_pi(sk)->chan) + if (l2cap_pi(sk)->chan) { + l2cap_pi(sk)->chan->data = NULL; l2cap_chan_put(l2cap_pi(sk)->chan); + } if (l2cap_pi(sk)->rx_busy_skb) { kfree_skb(l2cap_pi(sk)->rx_busy_skb); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 3a9e9d9670be4..83a8c48dfaa8b 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -553,22 +553,58 @@ struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel) return dlc; } +static int rfcomm_dlc_send_frag(struct rfcomm_dlc *d, struct sk_buff *frag) +{ + int len = frag->len; + + BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len); + + if (len > d->mtu) + return -EINVAL; + + rfcomm_make_uih(frag, d->addr); + __skb_queue_tail(&d->tx_queue, frag); + + return len; +} + int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb) { - int len = skb->len; + unsigned long flags; + struct sk_buff *frag, *next; + int len; if (d->state != BT_CONNECTED) return -ENOTCONN; - BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len); + frag = skb_shinfo(skb)->frag_list; + skb_shinfo(skb)->frag_list = NULL; - if (len > d->mtu) - return -EINVAL; + /* Queue all fragments atomically. */ + spin_lock_irqsave(&d->tx_queue.lock, flags); - rfcomm_make_uih(skb, d->addr); - skb_queue_tail(&d->tx_queue, skb); + len = rfcomm_dlc_send_frag(d, skb); + if (len < 0 || !frag) + goto unlock; + + for (; frag; frag = next) { + int ret; + + next = frag->next; + + ret = rfcomm_dlc_send_frag(d, frag); + if (ret < 0) { + kfree_skb(frag); + goto unlock; + } + + len += ret; + } + +unlock: + spin_unlock_irqrestore(&d->tx_queue.lock, flags); - if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags)) + if (len > 0 && !test_bit(RFCOMM_TX_THROTTLED, &d->flags)) rfcomm_schedule(); return len; } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 90bb53aa4beed..e67310a749d27 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -578,46 +578,20 @@ static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); sent = bt_sock_wait_ready(sk, msg->msg_flags); - if (sent) - goto done; - - while (len) { - size_t size = min_t(size_t, len, d->mtu); - int err; - - skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE, - msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) { - if (sent == 0) - sent = err; - break; - } - skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE); - - err = memcpy_from_msg(skb_put(skb, size), msg, size); - if (err) { - kfree_skb(skb); - if (sent == 0) - sent = err; - break; - } - skb->priority = sk->sk_priority; + release_sock(sk); - err = rfcomm_dlc_send(d, skb); - if (err < 0) { - kfree_skb(skb); - if (sent == 0) - sent = err; - break; - } + if (sent) + return sent; - sent += size; - len -= size; - } + skb = bt_skb_sendmmsg(sk, msg, len, d->mtu, RFCOMM_SKB_HEAD_RESERVE, + RFCOMM_SKB_TAIL_RESERVE); + if (IS_ERR(skb)) + return PTR_ERR(skb); -done: - release_sock(sk); + sent = rfcomm_dlc_send(d, skb); + if (sent < 0) + kfree_skb(skb); return sent; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index b91d6b440fdf6..78a549e506b12 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -48,6 +48,8 @@ struct sco_conn { spinlock_t lock; struct sock *sk; + struct delayed_work timeout_work; + unsigned int mtu; }; @@ -73,9 +75,20 @@ struct sco_pinfo { #define SCO_CONN_TIMEOUT (HZ * 40) #define SCO_DISCONN_TIMEOUT (HZ * 2) -static void sco_sock_timeout(struct timer_list *t) +static void sco_sock_timeout(struct work_struct *work) { - struct sock *sk = from_timer(sk, t, sk_timer); + struct sco_conn *conn = container_of(work, struct sco_conn, + timeout_work.work); + struct sock *sk; + + sco_conn_lock(conn); + sk = conn->sk; + if (sk) + sock_hold(sk); + sco_conn_unlock(conn); + + if (!sk) + return; BT_DBG("sock %p state %d", sk, sk->sk_state); @@ -84,20 +97,26 @@ static void sco_sock_timeout(struct timer_list *t) sk->sk_state_change(sk); bh_unlock_sock(sk); - sco_sock_kill(sk); sock_put(sk); } static void sco_sock_set_timer(struct sock *sk, long timeout) { + if (!sco_pi(sk)->conn) + return; + BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout); - sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); + cancel_delayed_work(&sco_pi(sk)->conn->timeout_work); + schedule_delayed_work(&sco_pi(sk)->conn->timeout_work, timeout); } static void sco_sock_clear_timer(struct sock *sk) { + if (!sco_pi(sk)->conn) + return; + BT_DBG("sock %p state %d", sk, sk->sk_state); - sk_stop_timer(sk, &sk->sk_timer); + cancel_delayed_work(&sco_pi(sk)->conn->timeout_work); } /* ---- SCO connections ---- */ @@ -114,6 +133,7 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon) return NULL; spin_lock_init(&conn->lock); + INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout); hcon->sco_data = conn; conn->hcon = hcon; @@ -176,10 +196,12 @@ static void sco_conn_del(struct hci_conn *hcon, int err) sco_sock_clear_timer(sk); sco_chan_del(sk, err); bh_unlock_sock(sk); - sco_sock_kill(sk); sock_put(sk); } + /* Ensure no more work items will run before freeing conn. */ + cancel_delayed_work_sync(&conn->timeout_work); + hcon->sco_data = NULL; kfree(conn); } @@ -211,44 +233,32 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk, return err; } -static int sco_connect(struct sock *sk) +static int sco_connect(struct hci_dev *hdev, struct sock *sk) { struct sco_conn *conn; struct hci_conn *hcon; - struct hci_dev *hdev; int err, type; BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst); - hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR); - if (!hdev) - return -EHOSTUNREACH; - - hci_dev_lock(hdev); - if (lmp_esco_capable(hdev) && !disable_esco) type = ESCO_LINK; else type = SCO_LINK; if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT && - (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) { - err = -EOPNOTSUPP; - goto done; - } + (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) + return -EOPNOTSUPP; hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst, sco_pi(sk)->setting); - if (IS_ERR(hcon)) { - err = PTR_ERR(hcon); - goto done; - } + if (IS_ERR(hcon)) + return PTR_ERR(hcon); conn = sco_conn_add(hcon); if (!conn) { hci_conn_drop(hcon); - err = -ENOMEM; - goto done; + return -ENOMEM; } /* Update source addr of the socket */ @@ -256,7 +266,7 @@ static int sco_connect(struct sock *sk) err = sco_chan_add(conn, sk, NULL); if (err) - goto done; + return err; if (hcon->state == BT_CONNECTED) { sco_sock_clear_timer(sk); @@ -266,17 +276,13 @@ static int sco_connect(struct sock *sk) sco_sock_set_timer(sk, sk->sk_sndtimeo); } -done: - hci_dev_unlock(hdev); - hci_dev_put(hdev); return err; } -static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len) +static int sco_send_frame(struct sock *sk, struct sk_buff *skb) { struct sco_conn *conn = sco_pi(sk)->conn; - struct sk_buff *skb; - int err; + int len = skb->len; /* Check outgoing MTU */ if (len > conn->mtu) @@ -284,15 +290,6 @@ static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len) BT_DBG("sk %p len %d", sk, len); - skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) - return err; - - if (memcpy_from_msg(skb_put(skb, len), msg, len)) { - kfree_skb(skb); - return -EFAULT; - } - hci_send_sco(conn->hcon, skb); return len; @@ -393,8 +390,7 @@ static void sco_sock_cleanup_listen(struct sock *parent) */ static void sco_sock_kill(struct sock *sk) { - if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket || - sock_flag(sk, SOCK_DEAD)) + if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) return; BT_DBG("sk %p state %d", sk, sk->sk_state); @@ -446,7 +442,6 @@ static void sco_sock_close(struct sock *sk) lock_sock(sk); __sco_sock_close(sk); release_sock(sk); - sco_sock_kill(sk); } static void sco_sock_init(struct sock *sk, struct sock *parent) @@ -488,8 +483,6 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT; - timer_setup(&sk->sk_timer, sco_sock_timeout, 0); - bt_sock_link(&sco_sk_list, sk); return sk; } @@ -554,6 +547,7 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen { struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; struct sock *sk = sock->sk; + struct hci_dev *hdev; int err; BT_DBG("sk %p", sk); @@ -562,18 +556,30 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen addr->sa_family != AF_BLUETOOTH) return -EINVAL; - if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) - return -EBADFD; + lock_sock(sk); + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { + err = -EBADFD; + goto done; + } - if (sk->sk_type != SOCK_SEQPACKET) - return -EINVAL; + if (sk->sk_type != SOCK_SEQPACKET) { + err = -EINVAL; + goto done; + } - lock_sock(sk); + hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR); + if (!hdev) { + err = -EHOSTUNREACH; + goto done; + } + hci_dev_lock(hdev); /* Set destination address and psm */ bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr); - err = sco_connect(sk); + err = sco_connect(hdev, sk); + hci_dev_unlock(hdev); + hci_dev_put(hdev); if (err) goto done; @@ -702,6 +708,7 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; + struct sk_buff *skb; int err; BT_DBG("sock %p, sk %p", sock, sk); @@ -713,14 +720,21 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; + skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0); + if (IS_ERR(skb)) + return PTR_ERR(skb); + lock_sock(sk); if (sk->sk_state == BT_CONNECTED) - err = sco_send_frame(sk, msg, len); + err = sco_send_frame(sk, skb); else err = -ENOTCONN; release_sock(sk); + + if (err < 0) + kfree_skb(skb); return err; } @@ -761,6 +775,11 @@ static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting) cp.max_latency = cpu_to_le16(0xffff); cp.retrans_effort = 0xff; break; + default: + /* use CVSD settings as fallback */ + cp.max_latency = cpu_to_le16(0xffff); + cp.retrans_effort = 0xff; + break; } hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ, diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 0dd8984a261da..f085b1648e66c 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -33,7 +33,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); const struct nf_br_ops *nf_ops; const unsigned char *dest; - struct ethhdr *eth; u16 vid = 0; rcu_read_lock(); @@ -53,15 +52,14 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) BR_INPUT_SKB_CB(skb)->frag_max_size = 0; skb_reset_mac_header(skb); - eth = eth_hdr(skb); skb_pull(skb, ETH_HLEN); if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid)) goto out; if (IS_ENABLED(CONFIG_INET) && - (eth->h_proto == htons(ETH_P_ARP) || - eth->h_proto == htons(ETH_P_RARP)) && + (eth_hdr(skb)->h_proto == htons(ETH_P_ARP) || + eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) && br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) { br_do_proxy_suppress_arp(skb, br, vid, NULL); } else if (IS_ENABLED(CONFIG_IPV6) && diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 09b1dd8cd8536..464f6a619444d 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -42,6 +42,13 @@ static int br_pass_frame_up(struct sk_buff *skb) u64_stats_update_end(&brstats->syncp); vg = br_vlan_group_rcu(br); + + /* Reset the offload_fwd_mark because there could be a stacked + * bridge above, and it should not think this bridge it doing + * that bridge's work forwarding out its ports. + */ + br_switchdev_frame_unmark(skb); + /* Bridge is just like any other port. Make sure the * packet is allowed except in promisc modue when someone * may be running packet capture. diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 2371b833b2bcd..19726d81025d5 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -743,6 +743,9 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) mtu = nf_bridge->frag_max_size; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); @@ -760,8 +763,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; - nf_bridge_update_protocol(skb); - data = this_cpu_ptr(&brnf_frag_data_storage); if (skb_vlan_tag_present(skb)) { @@ -789,8 +790,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; - nf_bridge_update_protocol(skb); - data = this_cpu_ptr(&brnf_frag_data_storage); data->encap_size = nf_bridge_encap_header_len(skb); data->size = ETH_HLEN + data->encap_size; @@ -1013,9 +1012,24 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, return okfn(net, sk, skb); ops = nf_hook_entries_get_hook_ops(e); - for (i = 0; i < e->num_hook_entries && - ops[i]->priority <= NF_BR_PRI_BRNF; i++) - ; + for (i = 0; i < e->num_hook_entries; i++) { + /* These hooks have already been called */ + if (ops[i]->priority < NF_BR_PRI_BRNF) + continue; + + /* These hooks have not been called yet, run them. */ + if (ops[i]->priority > NF_BR_PRI_BRNF) + break; + + /* take a closer look at NF_BR_PRI_BRNF. */ + if (ops[i]->hook == br_nf_pre_routing) { + /* This hook diverted the skb to this function, + * hooks after this have not been run yet. + */ + i++; + break; + } + } nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 8a664148f57aa..cbcbc19efcb34 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1536,7 +1536,7 @@ static size_t br_get_linkxstats_size(const struct net_device *dev, int attr) } return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) + - nla_total_size(sizeof(struct br_mcast_stats)) + + nla_total_size_64bit(sizeof(struct br_mcast_stats)) + nla_total_size(0); } diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index a566289628522..910f164dd20cb 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -53,20 +53,6 @@ struct chnl_net { enum caif_states state; }; -static void robust_list_del(struct list_head *delete_node) -{ - struct list_head *list_node; - struct list_head *n; - ASSERT_RTNL(); - list_for_each_safe(list_node, n, &chnl_net_list) { - if (list_node == delete_node) { - list_del(list_node); - return; - } - } - WARN_ON(1); -} - static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) { struct sk_buff *skb; @@ -368,6 +354,7 @@ static int chnl_net_init(struct net_device *dev) ASSERT_RTNL(); priv = netdev_priv(dev); strncpy(priv->name, dev->name, sizeof(priv->name)); + INIT_LIST_HEAD(&priv->list_field); return 0; } @@ -376,7 +363,7 @@ static void chnl_net_uninit(struct net_device *dev) struct chnl_net *priv; ASSERT_RTNL(); priv = netdev_priv(dev); - robust_list_del(&priv->list_field); + list_del_init(&priv->list_field); } static const struct net_device_ops netdev_ops = { @@ -541,7 +528,7 @@ static void __exit chnl_exit_module(void) rtnl_lock(); list_for_each_safe(list_node, _tmp, &chnl_net_list) { dev = list_entry(list_node, struct chnl_net, list_field); - list_del(list_node); + list_del_init(list_node); delete_device(dev); } rtnl_unlock(); diff --git a/net/can/bcm.c b/net/can/bcm.c index 1e17778d5ceeb..63d81147fb4e3 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -102,6 +102,7 @@ static inline u64 get_u64(const struct canfd_frame *cp, int offset) struct bcm_op { struct list_head list; + struct rcu_head rcu; int ifindex; canid_t can_id; u32 flags; @@ -720,10 +721,9 @@ static struct bcm_op *bcm_find_op(struct list_head *ops, return NULL; } -static void bcm_remove_op(struct bcm_op *op) +static void bcm_free_op_rcu(struct rcu_head *rcu_head) { - hrtimer_cancel(&op->timer); - hrtimer_cancel(&op->thrtimer); + struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu); if ((op->frames) && (op->frames != &op->sframe)) kfree(op->frames); @@ -734,6 +734,14 @@ static void bcm_remove_op(struct bcm_op *op) kfree(op); } +static void bcm_remove_op(struct bcm_op *op) +{ + hrtimer_cancel(&op->timer); + hrtimer_cancel(&op->thrtimer); + + call_rcu(&op->rcu, bcm_free_op_rcu); +} + static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op) { if (op->rx_reg_dev == dev) { @@ -759,6 +767,9 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) && (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) { + /* disable automatic timer on frame reception */ + op->flags |= RX_NO_AUTOTIMER; + /* * Don't care if we're bound or not (due to netdev * problems) can_rx_unregister() is always a save @@ -787,7 +798,6 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, bcm_rx_handler, op); list_del(&op->list); - synchronize_rcu(); bcm_remove_op(op); return 1; /* done */ } diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h index 12369b604ce95..cea712fb2a9e0 100644 --- a/net/can/j1939/j1939-priv.h +++ b/net/can/j1939/j1939-priv.h @@ -326,6 +326,7 @@ int j1939_session_activate(struct j1939_session *session); void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec); void j1939_session_timers_cancel(struct j1939_session *session); +#define J1939_MIN_TP_PACKET_SIZE 9 #define J1939_MAX_TP_PACKET_SIZE (7 * 0xff) #define J1939_MAX_ETP_PACKET_SIZE (7 * 0x00ffffff) diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index 6884d18f919c7..ca75d1b8f415c 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -75,6 +75,13 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data) skcb->addr.pgn = (cf->can_id >> 8) & J1939_PGN_MAX; /* set default message type */ skcb->addr.type = J1939_TP; + + if (!j1939_address_is_valid(skcb->addr.sa)) { + netdev_err_once(priv->ndev, "%s: sa is broadcast address, ignoring!\n", + __func__); + goto done; + } + if (j1939_pgn_is_pdu1(skcb->addr.pgn)) { /* Type 1: with destination address */ skcb->addr.da = skcb->addr.pgn; @@ -249,11 +256,14 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) struct j1939_priv *priv, *priv_new; int ret; - priv = j1939_priv_get_by_ndev(ndev); + spin_lock(&j1939_netdev_lock); + priv = j1939_priv_get_by_ndev_locked(ndev); if (priv) { kref_get(&priv->rx_kref); + spin_unlock(&j1939_netdev_lock); return priv; } + spin_unlock(&j1939_netdev_lock); priv = j1939_priv_create(ndev); if (!priv) @@ -269,10 +279,10 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) /* Someone was faster than us, use their priv and roll * back our's. */ + kref_get(&priv_new->rx_kref); spin_unlock(&j1939_netdev_lock); dev_put(ndev); kfree(priv); - kref_get(&priv_new->rx_kref); return priv_new; } j1939_priv_set(ndev, priv); diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 6571895228f01..22f4b798d385b 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1230,12 +1230,11 @@ static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer) session->err = -ETIME; j1939_session_deactivate(session); } else { - netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n", - __func__, session); - j1939_session_list_lock(session->priv); if (session->state >= J1939_SESSION_ACTIVE && session->state < J1939_SESSION_ACTIVE_MAX) { + netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n", + __func__, session); j1939_session_get(session); hrtimer_start(&session->rxtimer, ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS), @@ -1597,6 +1596,8 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv, abort = J1939_XTP_ABORT_FAULT; else if (len > priv->tp_max_packet_size) abort = J1939_XTP_ABORT_RESOURCE; + else if (len < J1939_MIN_TP_PACKET_SIZE) + abort = J1939_XTP_ABORT_FAULT; } if (abort != J1939_XTP_NO_ABORT) { @@ -1771,6 +1772,7 @@ static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb, static void j1939_xtp_rx_dat_one(struct j1939_session *session, struct sk_buff *skb) { + enum j1939_xtp_abort abort = J1939_XTP_ABORT_FAULT; struct j1939_priv *priv = session->priv; struct j1939_sk_buff_cb *skcb; struct sk_buff *se_skb = NULL; @@ -1785,9 +1787,11 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, skcb = j1939_skb_to_cb(skb); dat = skb->data; - if (skb->len <= 1) + if (skb->len != 8) { /* makes no sense */ + abort = J1939_XTP_ABORT_UNEXPECTED_DATA; goto out_session_cancel; + } switch (session->last_cmd) { case 0xff: @@ -1885,7 +1889,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, out_session_cancel: kfree_skb(se_skb); j1939_session_timers_cancel(session); - j1939_session_cancel(session, J1939_XTP_ABORT_FAULT); + j1939_session_cancel(session, abort); j1939_session_put(session); } @@ -2000,6 +2004,12 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) extd = J1939_ETP; /* fall through */ case J1939_TP_CMD_BAM: /* fall through */ + if (cmd == J1939_TP_CMD_BAM && !j1939_cb_is_broadcast(skcb)) { + netdev_err_once(priv->ndev, "%s: BAM to unicast (%02x), ignoring!\n", + __func__, skcb->addr.sa); + return; + } + fallthrough; case J1939_TP_CMD_RTS: /* fall through */ if (skcb->addr.type != extd) return; @@ -2061,6 +2071,12 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) break; case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */ + if (j1939_cb_is_broadcast(skcb)) { + netdev_err_once(priv->ndev, "%s: abort to broadcast (%02x), ignoring!\n", + __func__, skcb->addr.sa); + return; + } + if (j1939_tp_im_transmitter(skcb)) j1939_xtp_rx_abort(priv, skb, true); diff --git a/net/core/dev.c b/net/core/dev.c index 5cc958716862b..db723a9ffa8f8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2589,6 +2589,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->num_tc) netif_setup_tc(dev, txq); + dev_qdisc_change_real_num_tx(dev, txq); + dev->real_num_tx_queues = txq; if (disabling) { @@ -2787,6 +2789,12 @@ static u16 skb_tx_hash(const struct net_device *dev, qoffset = sb_dev->tc_to_txq[tc].offset; qcount = sb_dev->tc_to_txq[tc].count; + if (unlikely(!qcount)) { + net_warn_ratelimited("%s: invalid qcount, qoffset %u for tc %u\n", + sb_dev->name, qoffset, tc); + qoffset = 0; + qcount = dev->real_num_tx_queues; + } } if (skb_rx_queue_recorded(skb)) { @@ -3481,7 +3489,8 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) skb_reset_mac_header(skb); __skb_pull(skb, skb_network_offset(skb)); skb->pkt_type = PACKET_LOOPBACK; - skb->ip_summed = CHECKSUM_UNNECESSARY; + if (skb->ip_summed == CHECKSUM_NONE) + skb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(skb)); skb_dst_force(skb); netif_rx_ni(skb); @@ -3757,7 +3766,10 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ - if (txq->xmit_lock_owner != cpu) { + /* Other cpus might concurrently change txq->xmit_lock_owner + * to -1 or to their cpu id, but not to our id. + */ + if (READ_ONCE(txq->xmit_lock_owner) != cpu) { if (dev_xmit_recursion()) goto recursion_alert; @@ -3943,6 +3955,256 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, * CPU from the RPS map of the receiving queue for a given skb. * rcu_read_lock must be held on entry. */ +#define DEFAULT_HASH_BUCKET_BIT (8) +#define DEFAULT_QUOTA_PER_CPU (128) +static unsigned int quota_per_cpu = DEFAULT_QUOTA_PER_CPU; +DEFINE_STATIC_KEY_FALSE(rps_flow_queue_enable_key); +static atomic_t last_aging_jiffies; +/* default 5000 ms */ +static unsigned long max_aging_time = 5000; +static DEFINE_HASHTABLE(rps_flow_info_htable, DEFAULT_HASH_BUCKET_BIT); +static DEFINE_RWLOCK(rps_flow_rwlock); + +unsigned long rps_flow_node_aging_time_get(void) +{ + return max_aging_time; +} + +void rps_flow_node_aging_time_set(unsigned long aging_time) +{ + max_aging_time = aging_time; +} + +static inline bool ipv4_flow_match(const struct iphdr *iph, struct rps_flow_info_node *flow) +{ + return iph->saddr == flow->v4_saddr && iph->daddr == flow->v4_daddr && + iph->protocol == flow->v4_protocol; +} + +static inline u32 ipv4_hash_key(const struct iphdr *iph) +{ + return iph->saddr ^ iph->daddr ^ iph->protocol; +} + +static inline bool ipv6_flow_match(const struct ipv6hdr *ip6h, struct rps_flow_info_node *p_flow) +{ + /* flow_lbl maximum probability is different, so compare it first */ + if (memcmp(ip6h->flow_lbl, p_flow->v6_flow_lbl, sizeof(p_flow->v6_flow_lbl))) + return false; + + if (memcmp(ip6h->daddr.s6_addr32, p_flow->v6_daddr, sizeof(p_flow->v6_daddr))) + return false; + + if (memcmp(ip6h->saddr.s6_addr32, p_flow->v6_saddr, sizeof(p_flow->v6_saddr))) + return false; + + return true; +} + +static inline u32 ipv6_hash_key(const struct ipv6hdr *ip6h) +{ + /* saddr hash daddr hash and lbl hash */ + u32 hash = 0; + u8 count; + + for (count = 0; count < sizeof(ip6h->saddr.s6_addr32) / sizeof(u32); count++) + hash ^= ip6h->saddr.s6_addr32[count]; + + for (count = 0; count < sizeof(ip6h->daddr.s6_addr32) / sizeof(u32); count++) + hash ^= ip6h->daddr.s6_addr32[count]; + + for (count = 0; count < sizeof(ip6h->flow_lbl); count++) + hash ^= ip6h->flow_lbl[count]; + + return hash; +} + +static bool rps_flow_node_create_v6(const struct ipv6hdr *ip6h) +{ + struct rps_flow_info_node *p_flow; + u32 hash_key; + + hash_key = ipv6_hash_key(ip6h); + + /* find if the node already exist */ + read_lock(&rps_flow_rwlock); + hash_for_each_possible(rps_flow_info_htable, p_flow, node, hash_key) { + if (ipv6_flow_match(ip6h, p_flow)) { + p_flow->jiffies = jiffies; + read_unlock(&rps_flow_rwlock); + return true; + } + } + read_unlock(&rps_flow_rwlock); + + /* if not exist. create a new one */ + p_flow = kzalloc(sizeof(*p_flow), GFP_KERNEL); + if (!p_flow) + return false; + + memcpy(p_flow->v6_saddr, ip6h->saddr.s6_addr, sizeof(ip6h->saddr.s6_addr)); + memcpy(p_flow->v6_daddr, ip6h->daddr.s6_addr, sizeof(ip6h->daddr.s6_addr)); + memcpy(p_flow->v6_flow_lbl, ip6h->flow_lbl, sizeof(ip6h->flow_lbl)); + + p_flow->jiffies = jiffies; + + /* add the hash node */ + write_lock(&rps_flow_rwlock); + hash_add(rps_flow_info_htable, &p_flow->node, hash_key); + write_unlock(&rps_flow_rwlock); + return true; +} + +static bool rps_flow_node_create_v4(const struct iphdr *iph) +{ + struct rps_flow_info_node *p_flow; + u32 hash_key; + + hash_key = ipv4_hash_key(iph); + + /* find if the node already exist */ + read_lock(&rps_flow_rwlock); + hash_for_each_possible(rps_flow_info_htable, p_flow, node, hash_key) { + if (ipv4_flow_match(iph, p_flow)) { + p_flow->jiffies = jiffies; + read_unlock(&rps_flow_rwlock); + return true; + } + } + read_unlock(&rps_flow_rwlock); + + /* if not exist. create a new one */ + p_flow = kzalloc(sizeof(*p_flow), GFP_KERNEL); + if (!p_flow) + return false; + + p_flow->v4_saddr = iph->saddr; + p_flow->v4_daddr = iph->daddr; + p_flow->v4_protocol = iph->protocol; + p_flow->jiffies = jiffies; + + /* add the hash node */ + write_lock(&rps_flow_rwlock); + hash_add(rps_flow_info_htable, &p_flow->node, hash_key); + write_unlock(&rps_flow_rwlock); + return true; +} + +/* create rps flow node if not exist. + * update Timestamp for rps flow node if exist + */ +bool __rps_flow_node_add(const void *iph, bool ipv6) +{ + struct ipv6hdr *ip6h; + struct iphdr *ip4h; + + if (!ipv6) { + ip4h = (struct iphdr *)iph; + return rps_flow_node_create_v4(ip4h); + } + + ip6h = (struct ipv6hdr *)iph; + return rps_flow_node_create_v6(ip6h); +} + +static void rps_flow_node_clear(void) +{ + struct rps_flow_info_node *p_rps_flow_entry; + struct hlist_node *tmp; + u32 bucket; + + write_lock(&rps_flow_rwlock); + hash_for_each_safe(rps_flow_info_htable, bucket, tmp, p_rps_flow_entry, node) { + hash_del(&p_rps_flow_entry->node); + kfree(p_rps_flow_entry); + } + write_unlock(&rps_flow_rwlock); +} + +void rps_single_flow_enable_set(bool enable) +{ + if (enable) { + static_branch_inc(&rps_flow_queue_enable_key); + } else { + static_branch_dec(&rps_flow_queue_enable_key); + if (static_key_false(&rps_flow_queue_enable_key.key)) + rps_flow_node_clear(); + } +} + +/* compute hash */ +static inline u32 rps_flow_hash_update(void) +{ + static u32 packet_count; + static u32 hash_count; + + packet_count++; + if (packet_count % quota_per_cpu) { + packet_count = 0; + hash_count++; + } + return hash_count; +} + +/* delete aging rps_flow */ +static inline bool rps_flow_node_aging_period(void) +{ + struct rps_flow_info_node *p_rps_flow_entry; + struct hlist_node *tmp; + u32 bucket; + + if (jiffies_to_msecs(jiffies - atomic_read(&last_aging_jiffies)) < max_aging_time) + return false; + + atomic_set(&last_aging_jiffies, jiffies); + write_lock(&rps_flow_rwlock); + hash_for_each_safe(rps_flow_info_htable, bucket, tmp, p_rps_flow_entry, node) { + if (jiffies_to_msecs(jiffies - p_rps_flow_entry->jiffies) >= max_aging_time) { + hash_del(&p_rps_flow_entry->node); + kfree(p_rps_flow_entry); + } + } + write_unlock(&rps_flow_rwlock); + return true; +} + +/* find active rps_flow */ +static inline +struct rps_flow_info_node *rps_flow_find_active_node_v4(const struct iphdr *iph) +{ + struct rps_flow_info_node *p_rps_flow; + u32 hash_key = ipv4_hash_key(iph); + + read_lock(&rps_flow_rwlock); + hash_for_each_possible(rps_flow_info_htable, p_rps_flow, node, hash_key) { + if (ipv4_flow_match(iph, p_rps_flow) && + (jiffies_to_msecs(jiffies - p_rps_flow->jiffies) < max_aging_time)) { + read_unlock(&rps_flow_rwlock); + return p_rps_flow; + } + } + read_unlock(&rps_flow_rwlock); + return NULL; +} + +static inline +struct rps_flow_info_node *rps_flow_find_active_node_v6(const struct ipv6hdr *ip6h) +{ + struct rps_flow_info_node *p_rps_flow; + u32 hash_key = ipv6_hash_key(ip6h); + + read_lock(&rps_flow_rwlock); + hash_for_each_possible(rps_flow_info_htable, p_rps_flow, node, hash_key) { + if (ipv6_flow_match(ip6h, p_rps_flow) && + (jiffies_to_msecs(jiffies - p_rps_flow->jiffies) < max_aging_time)) { + read_unlock(&rps_flow_rwlock); + return p_rps_flow; + } + } + read_unlock(&rps_flow_rwlock); + return NULL; +} + static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow **rflowp) { @@ -3975,6 +4237,70 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, goto done; skb_reset_network_header(skb); + /* this is to set rps for single flow */ + if (unlikely(test_bit(RPS_SINGLE_FLOW_ENABLE, &rxqueue->rps_single_flow_flags))) { + u32 hash_single_flow; + + /* clean the old node */ + rps_flow_node_aging_period(); + + /* no rps ,skip it */ + if (!map) + goto origin_rps; + + /* skip vlan first */ + if (skb_vlan_tag_present(skb)) + goto origin_rps; + + switch (skb->protocol) { + /* ipv4 */ + case htons(ETH_P_IP): { + const struct iphdr *iph; + struct rps_flow_info_node *p_flow; + + iph = (struct iphdr *)skb_network_header(skb); + /* hash map to match the src and dest ipaddr */ + p_flow = rps_flow_find_active_node_v4(iph); + /* check if vailed */ + if (p_flow) { + pr_debug("v4 rps, flow info: saddr = %pI4, daddr =%pI4, proto=%d\n", + &p_flow->v4_saddr, + &p_flow->v4_daddr, + p_flow->v4_protocol); + } else { + goto origin_rps; + } + break; + } + case htons(ETH_P_IPV6): { + const struct ipv6hdr *ip6h; + struct rps_flow_info_node *p_flow; + + ip6h = (struct ipv6hdr *)skb_network_header(skb); + p_flow = rps_flow_find_active_node_v6(ip6h); + if (p_flow) { + pr_debug("v6 rps,flow info:saddr = %pI6, daddr = %pI6.\n", + &p_flow->v6_saddr, &p_flow->v6_daddr); + } else { + goto origin_rps; + } + break; + } + default: + goto origin_rps; + } + + /* get the target cpu */ + hash_single_flow = rps_flow_hash_update(); + tcpu = map->cpus[hash_single_flow % map->len]; + if (cpu_online(tcpu)) { + cpu = tcpu; + pr_debug("single flow rps, target cpu id = %d\n", cpu); + return cpu; + } + } + +origin_rps: hash = skb_get_hash(skb); if (!hash) goto done; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index af0130039f377..e8e8389ddc965 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -277,13 +277,17 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi, rcu_read_lock(); list_for_each_entry_rcu(new_stat, &hw_stats_list, list) { + struct net_device *dev; + /* * only add a note to our monitor buffer if: * 1) this is the dev we received on * 2) its after the last_rx delta * 3) our rx_dropped count has gone up */ - if ((new_stat->dev == napi->dev) && + /* Paired with WRITE_ONCE() in dropmon_net_event() */ + dev = READ_ONCE(new_stat->dev); + if ((dev == napi->dev) && (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) && (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) { trace_drop_common(NULL, NULL); @@ -1497,7 +1501,10 @@ static int dropmon_net_event(struct notifier_block *ev_block, mutex_lock(&net_dm_mutex); list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { if (new_stat->dev == dev) { - new_stat->dev = NULL; + + /* Paired with READ_ONCE() in trace_napi_poll_hit() */ + WRITE_ONCE(new_stat->dev, NULL); + if (trace_state == TRACE_OFF) { list_del_rcu(&new_stat->list); kfree_rcu(new_stat, rcu); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index cbd1885f24592..fec1b0735b73f 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -2190,7 +2190,7 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr) return err; } -static int __ethtool_get_module_info(struct net_device *dev, +int ethtool_get_module_info_call(struct net_device *dev, struct ethtool_modinfo *modinfo) { const struct ethtool_ops *ops = dev->ethtool_ops; @@ -2217,7 +2217,7 @@ static int ethtool_get_module_info(struct net_device *dev, if (copy_from_user(&modinfo, useraddr, sizeof(modinfo))) return -EFAULT; - ret = __ethtool_get_module_info(dev, &modinfo); + ret = ethtool_get_module_info_call(dev, &modinfo); if (ret) return ret; @@ -2227,7 +2227,7 @@ static int ethtool_get_module_info(struct net_device *dev, return 0; } -static int __ethtool_get_module_eeprom(struct net_device *dev, +int ethtool_get_module_eeprom_call(struct net_device *dev, struct ethtool_eeprom *ee, u8 *data) { const struct ethtool_ops *ops = dev->ethtool_ops; @@ -2251,12 +2251,12 @@ static int ethtool_get_module_eeprom(struct net_device *dev, int ret; struct ethtool_modinfo modinfo; - ret = __ethtool_get_module_info(dev, &modinfo); + ret = ethtool_get_module_info_call(dev, &modinfo); if (ret) return ret; return ethtool_get_any_eeprom(dev, useraddr, - __ethtool_get_module_eeprom, + ethtool_get_module_eeprom_call, modinfo.eeprom_len); } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 675f27ef6872f..83299a85480aa 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -300,7 +300,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, else err = ops->action(rule, fl, flags, arg); - if (!err && ops->suppress && ops->suppress(rule, arg)) + if (!err && ops->suppress && ops->suppress(rule, flags, arg)) continue; if (err != -EAGAIN) { diff --git a/net/core/filter.c b/net/core/filter.c index d62cac6379170..8a26e30425682 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1743,7 +1743,7 @@ BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset, if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) return -EINVAL; - if (unlikely(offset > 0xffff)) + if (unlikely(offset > INT_MAX)) return -EFAULT; if (unlikely(bpf_try_make_writable(skb, offset + len))) return -EFAULT; @@ -1778,7 +1778,7 @@ BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset, { void *ptr; - if (unlikely(offset > 0xffff)) + if (unlikely(offset > INT_MAX)) goto err_clear; ptr = skb_header_pointer(skb, offset, len, to); @@ -2752,6 +2752,9 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, if (unlikely(flags)) return -EINVAL; + if (unlikely(len == 0)) + return 0; + /* First find the starting scatterlist element */ i = msg->sg.start; do { @@ -3413,6 +3416,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb) BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \ BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \ BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \ + BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \ BPF_F_ADJ_ROOM_ENCAP_L2( \ BPF_ADJ_ROOM_ENCAP_L2_MASK)) @@ -3449,6 +3453,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) return -EINVAL; + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH && + inner_mac_len < ETH_HLEN) + return -EINVAL; + if (skb->encapsulation) return -EALREADY; @@ -3467,7 +3475,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, skb->inner_mac_header = inner_net - inner_mac_len; skb->inner_network_header = inner_net; skb->inner_transport_header = inner_trans; - skb_set_inner_protocol(skb, skb->protocol); + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH) + skb_set_inner_protocol(skb, htons(ETH_P_TEB)); + else + skb_set_inner_protocol(skb, skb->protocol); skb->encapsulation = 1; skb_set_network_header(skb, mac_len); @@ -4661,12 +4673,14 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case SO_RCVBUF: val = min_t(u32, val, sysctl_rmem_max); + val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF)); break; case SO_SNDBUF: val = min_t(u32, val, sysctl_wmem_max); + val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; WRITE_ONCE(sk->sk_sndbuf, max_t(int, val * 2, SOCK_MIN_SNDBUF)); @@ -5521,7 +5535,6 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len if (err) return err; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); return seg6_lookup_nexthop(skb, NULL, 0); @@ -5879,10 +5892,21 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, ifindex, proto, netns_id, flags); if (sk) { - sk = sk_to_full_sk(sk); - if (!sk_fullsock(sk)) { + struct sock *sk2 = sk_to_full_sk(sk); + + /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk + * sock refcnt is decremented to prevent a request_sock leak. + */ + if (!sk_fullsock(sk2)) + sk2 = NULL; + if (sk2 != sk) { sock_gen_put(sk); - return NULL; + /* Ensure there is no need to bump sk2 refcnt */ + if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) { + WARN_ONCE(1, "Found non-RCU, unreferenced socket!"); + return NULL; + } + sk = sk2; } } @@ -5916,10 +5940,21 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, flags); if (sk) { - sk = sk_to_full_sk(sk); - if (!sk_fullsock(sk)) { + struct sock *sk2 = sk_to_full_sk(sk); + + /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk + * sock refcnt is decremented to prevent a request_sock leak. + */ + if (!sk_fullsock(sk2)) + sk2 = NULL; + if (sk2 != sk) { sock_gen_put(sk); - return NULL; + /* Ensure there is no need to bump sk2 refcnt */ + if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) { + WARN_ONCE(1, "Found non-RCU, unreferenced socket!"); + return NULL; + } + sk = sk2; } } @@ -6383,30 +6418,39 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) return -EINVAL; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) return -EINVAL; if (!th->ack || th->rst || th->syn) return -ENOENT; + if (unlikely(iph_len < sizeof(struct iphdr))) + return -EINVAL; + if (tcp_synq_no_recent_overflow(sk)) return -ENOENT; cookie = ntohl(th->ack_seq) - 1; - switch (sk->sk_family) { - case AF_INET: - if (unlikely(iph_len < sizeof(struct iphdr))) + /* Both struct iphdr and struct ipv6hdr have the version field at the + * same offset so we can cast to the shorter header (struct iphdr). + */ + switch (((struct iphdr *)iph)->version) { + case 4: + if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk)) return -EINVAL; ret = __cookie_v4_check((struct iphdr *)iph, th, cookie); break; #if IS_BUILTIN(CONFIG_IPV6) - case AF_INET6: + case 6: if (unlikely(iph_len < sizeof(struct ipv6hdr))) return -EINVAL; + if (sk->sk_family != AF_INET6) + return -EINVAL; + ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie); break; #endif /* CONFIG_IPV6 */ @@ -6449,7 +6493,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) return -EINVAL; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) return -ENOENT; if (!th->syn || th->ack || th->fin || th->rst) @@ -7596,6 +7640,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { const int size_default = sizeof(__u32); + int field_size; if (off < 0 || off >= sizeof(struct bpf_sock)) return false; @@ -7607,7 +7652,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, case offsetof(struct bpf_sock, family): case offsetof(struct bpf_sock, type): case offsetof(struct bpf_sock, protocol): - case offsetof(struct bpf_sock, dst_port): case offsetof(struct bpf_sock, src_port): case bpf_ctx_range(struct bpf_sock, src_ip4): case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): @@ -7615,6 +7659,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); + case bpf_ctx_range(struct bpf_sock, dst_port): + field_size = size == size_default ? + size_default : sizeof_field(struct bpf_sock, dst_port); + bpf_ctx_record_field_size(info, field_size); + return bpf_ctx_narrow_access_ok(off, size, field_size); + case offsetofend(struct bpf_sock, dst_port) ... + offsetof(struct bpf_sock, dst_ip4) - 1: + return false; } return size == size_default; @@ -7803,9 +7855,9 @@ void bpf_warn_invalid_xdp_action(u32 act) { const u32 act_max = XDP_REDIRECT; - WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n", - act > act_max ? "Illegal" : "Driver unsupported", - act); + pr_warn_once("%s XDP return value %u, expect packet loss!\n", + act > act_max ? "Illegal" : "Driver unsupported", + act); } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); @@ -9315,6 +9367,27 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, off); break; + case offsetof(struct __sk_buff, cb[0]) ... + offsetofend(struct __sk_buff, cb[4]) - 1: + BUILD_BUG_ON(sizeof_field(struct sk_skb_cb, data) < 20); + BUILD_BUG_ON((offsetof(struct sk_buff, cb) + + offsetof(struct sk_skb_cb, data)) % + sizeof(__u64)); + + prog->cb_access = 1; + off = si->off; + off -= offsetof(struct __sk_buff, cb[0]); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct sk_skb_cb, data); + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg, + si->src_reg, off); + else + *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg, + si->src_reg, off); + break; + + default: return bpf_convert_ctx_access(type, si, insn_buf, prog, target_size); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 96957a7c732fa..4dac27c986231 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1025,8 +1025,10 @@ bool __skb_flow_dissect(const struct net *net, FLOW_DISSECTOR_KEY_IPV4_ADDRS, target_container); - memcpy(&key_addrs->v4addrs, &iph->saddr, - sizeof(key_addrs->v4addrs)); + memcpy(&key_addrs->v4addrs.src, &iph->saddr, + sizeof(key_addrs->v4addrs.src)); + memcpy(&key_addrs->v4addrs.dst, &iph->daddr, + sizeof(key_addrs->v4addrs.dst)); key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } @@ -1070,8 +1072,10 @@ bool __skb_flow_dissect(const struct net *net, FLOW_DISSECTOR_KEY_IPV6_ADDRS, target_container); - memcpy(&key_addrs->v6addrs, &iph->saddr, - sizeof(key_addrs->v6addrs)); + memcpy(&key_addrs->v6addrs.src, &iph->saddr, + sizeof(key_addrs->v6addrs.src)); + memcpy(&key_addrs->v6addrs.dst, &iph->daddr, + sizeof(key_addrs->v6addrs.dst)); key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } @@ -1145,6 +1149,7 @@ bool __skb_flow_dissect(const struct net *net, VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } key_vlan->vlan_tpid = saved_vlan_tpid; + key_vlan->vlan_eth_type = proto; } fdret = FLOW_DISSECT_RET_PROTO_AGAIN; diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index a5502c5aa44e7..bf270b6a99b4f 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -158,10 +158,8 @@ static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb) return dst->lwtstate->orig_output(net, sk, skb); } -static int xmit_check_hhlen(struct sk_buff *skb) +static int xmit_check_hhlen(struct sk_buff *skb, int hh_len) { - int hh_len = skb_dst(skb)->dev->hard_header_len; - if (skb_headroom(skb) < hh_len) { int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); @@ -273,6 +271,7 @@ static int bpf_xmit(struct sk_buff *skb) bpf = bpf_lwt_lwtunnel(dst->lwtstate); if (bpf->xmit.prog) { + int hh_len = dst->dev->hard_header_len; __be16 proto = skb->protocol; int ret; @@ -290,7 +289,7 @@ static int bpf_xmit(struct sk_buff *skb) /* If the header was expanded, headroom might be too * small for L2 header to come, expand as needed. */ - ret = xmit_check_hhlen(skb); + ret = xmit_check_hhlen(skb, hh_len); if (unlikely(ret)) return ret; diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 2f9c0de533c75..0b64f015b3b0b 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -190,6 +190,10 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla_entype) { + if (nla_len(nla_entype) < sizeof(u16)) { + NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE"); + return -EINVAL; + } encap_type = nla_get_u16(nla_entype); if (lwtunnel_valid_encap_type(encap_type, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f94d405358a21..8b6140e67e7f8 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -380,7 +380,7 @@ EXPORT_SYMBOL(neigh_ifdown); static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev, - bool exempt_from_gc) + u8 flags, bool exempt_from_gc) { struct neighbour *n = NULL; unsigned long now = jiffies; @@ -413,6 +413,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; + n->flags = flags; seqlock_init(&n->hh.hh_lock); n->parms = neigh_parms_clone(&tbl->parms); timer_setup(&n->timer, neigh_timer_handler, 0); @@ -576,19 +577,18 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, } EXPORT_SYMBOL(neigh_lookup_nodev); -static struct neighbour *___neigh_create(struct neigh_table *tbl, - const void *pkey, - struct net_device *dev, - bool exempt_from_gc, bool want_ref) +static struct neighbour * +___neigh_create(struct neigh_table *tbl, const void *pkey, + struct net_device *dev, u8 flags, + bool exempt_from_gc, bool want_ref) { - struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc); - u32 hash_val; - unsigned int key_len = tbl->key_len; - int error; + u32 hash_val, key_len = tbl->key_len; + struct neighbour *n1, *rc, *n; struct neigh_hash_table *nht; + int error; + n = neigh_alloc(tbl, dev, flags, exempt_from_gc); trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc); - if (!n) { rc = ERR_PTR(-ENOBUFS); goto out; @@ -675,7 +675,7 @@ static struct neighbour *___neigh_create(struct neigh_table *tbl, struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool want_ref) { - return ___neigh_create(tbl, pkey, dev, false, want_ref); + return ___neigh_create(tbl, pkey, dev, 0, false, want_ref); } EXPORT_SYMBOL(__neigh_create); @@ -734,11 +734,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, ASSERT_RTNL(); - n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); + n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL); if (!n) goto out; - n->protocol = 0; write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; @@ -1221,7 +1220,7 @@ static void neigh_update_hhs(struct neighbour *neigh) lladdr instead of overriding it if it is different. NEIGH_UPDATE_F_ADMIN means that the change is administrative. - + NEIGH_UPDATE_F_USE means that the entry is user triggered. NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing NTF_ROUTER flag. NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as @@ -1259,6 +1258,12 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, goto out; ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); + if (flags & NEIGH_UPDATE_F_USE) { + new = old & ~NUD_PERMANENT; + neigh->nud_state = new; + err = 0; + goto out; + } if (!(new & NUD_VALID)) { neigh_del_timer(neigh); @@ -1945,7 +1950,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || ndm->ndm_flags & NTF_EXT_LEARNED; - neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true); + neigh = ___neigh_create(tbl, dst, dev, + ndm->ndm_flags & NTF_EXT_LEARNED, + exempt_from_gc, true); if (IS_ERR(neigh)) { err = PTR_ERR(neigh); goto out; @@ -1964,22 +1971,20 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (protocol) neigh->protocol = protocol; - if (ndm->ndm_flags & NTF_EXT_LEARNED) flags |= NEIGH_UPDATE_F_EXT_LEARNED; - if (ndm->ndm_flags & NTF_ROUTER) flags |= NEIGH_UPDATE_F_ISROUTER; + if (ndm->ndm_flags & NTF_USE) + flags |= NEIGH_UPDATE_F_USE; - if (ndm->ndm_flags & NTF_USE) { + err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, + NETLINK_CB(skb).portid, extack); + if (!err && ndm->ndm_flags & NTF_USE) { neigh_event_send(neigh, NULL); err = 0; - } else - err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, - NETLINK_CB(skb).portid, extack); - + } neigh_release(neigh); - out: return err; } diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 36347933ec3af..61f5570645e38 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -182,12 +182,23 @@ static const struct seq_operations softnet_seq_ops = { .show = softnet_seq_show, }; -static void *ptype_get_idx(loff_t pos) +static void *ptype_get_idx(struct seq_file *seq, loff_t pos) { + struct list_head *ptype_list = NULL; struct packet_type *pt = NULL; + struct net_device *dev; loff_t i = 0; int t; + for_each_netdev_rcu(seq_file_net(seq), dev) { + ptype_list = &dev->ptype_all; + list_for_each_entry_rcu(pt, ptype_list, list) { + if (i == pos) + return pt; + ++i; + } + } + list_for_each_entry_rcu(pt, &ptype_all, list) { if (i == pos) return pt; @@ -208,22 +219,40 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); - return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; + return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net_device *dev; struct packet_type *pt; struct list_head *nxt; int hash; ++*pos; if (v == SEQ_START_TOKEN) - return ptype_get_idx(0); + return ptype_get_idx(seq, 0); pt = v; nxt = pt->list.next; + if (pt->dev) { + if (nxt != &pt->dev->ptype_all) + goto found; + + dev = pt->dev; + for_each_netdev_continue_rcu(seq_file_net(seq), dev) { + if (!list_empty(&dev->ptype_all)) { + nxt = dev->ptype_all.next; + goto found; + } + } + + nxt = ptype_all.next; + goto ptype_all; + } + if (pt->type == htons(ETH_P_ALL)) { +ptype_all: if (nxt != &ptype_all) goto found; hash = 0; @@ -252,7 +281,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_puts(seq, "Type Device Function\n"); - else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { + else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && + (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 98474d85fb51f..afa098ca7bcec 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -174,6 +174,14 @@ static int change_carrier(struct net_device *dev, unsigned long new_carrier) static ssize_t carrier_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + struct net_device *netdev = to_net_dev(dev); + + /* The check is also done in change_carrier; this helps returning early + * without hitting the trylock/restart in netdev_store. + */ + if (!netdev->netdev_ops->ndo_change_carrier) + return -EOPNOTSUPP; + return netdev_store(dev, attr, buf, len, change_carrier); } @@ -195,10 +203,16 @@ static ssize_t speed_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); int ret = -EINVAL; + /* The check is also done in __ethtool_get_link_ksettings; this helps + * returning early without hitting the trylock/restart below. + */ + if (!netdev->ethtool_ops->get_link_ksettings) + return ret; + if (!rtnl_trylock()) return restart_syscall(); - if (netif_running(netdev)) { + if (netif_running(netdev) && netif_device_present(netdev)) { struct ethtool_link_ksettings cmd; if (!__ethtool_get_link_ksettings(netdev, &cmd)) @@ -215,6 +229,12 @@ static ssize_t duplex_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); int ret = -EINVAL; + /* The check is also done in __ethtool_get_link_ksettings; this helps + * returning early without hitting the trylock/restart below. + */ + if (!netdev->ethtool_ops->get_link_ksettings) + return ret; + if (!rtnl_trylock()) return restart_syscall(); @@ -438,6 +458,14 @@ static ssize_t proto_down_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + struct net_device *netdev = to_net_dev(dev); + + /* The check is also done in change_proto_down; this helps returning + * early without hitting the trylock/restart in netdev_store. + */ + if (!netdev->netdev_ops->ndo_change_proto_down) + return -EOPNOTSUPP; + return netdev_store(dev, attr, buf, len, change_proto_down); } NETDEVICE_SHOW_RW(proto_down, fmt_dec); @@ -448,6 +476,12 @@ static ssize_t phys_port_id_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); ssize_t ret = -EINVAL; + /* The check is also done in dev_get_phys_port_id; this helps returning + * early without hitting the trylock/restart below. + */ + if (!netdev->netdev_ops->ndo_get_phys_port_id) + return -EOPNOTSUPP; + if (!rtnl_trylock()) return restart_syscall(); @@ -470,6 +504,13 @@ static ssize_t phys_port_name_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); ssize_t ret = -EINVAL; + /* The checks are also done in dev_get_phys_port_name; this helps + * returning early without hitting the trylock/restart below. + */ + if (!netdev->netdev_ops->ndo_get_phys_port_name && + !netdev->netdev_ops->ndo_get_devlink_port) + return -EOPNOTSUPP; + if (!rtnl_trylock()) return restart_syscall(); @@ -492,6 +533,14 @@ static ssize_t phys_switch_id_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); ssize_t ret = -EINVAL; + /* The checks are also done in dev_get_phys_port_name; this helps + * returning early without hitting the trylock/restart below. This works + * because recurse is false when calling dev_get_port_parent_id. + */ + if (!netdev->netdev_ops->ndo_get_port_parent_id && + !netdev->netdev_ops->ndo_get_devlink_port) + return -EOPNOTSUPP; + if (!rtnl_trylock()) return restart_syscall(); @@ -844,6 +893,71 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, return len; } +static ssize_t show_rps_flow_aging_time(struct netdev_rx_queue *queue, char *buf) +{ + unsigned long val; + + val = rps_flow_node_aging_time_get(); + return sprintf(buf, "%lu\n", val); +} + +static ssize_t store_rps_flow_aging_time(struct netdev_rx_queue *queue, const char *buf, size_t len) +{ + unsigned long val; + int rc; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + rc = kstrtoul(buf, 0, &val); + if (rc < 0) + return rc; + + /* if changed, store the new one */ + rps_flow_node_aging_time_set(val); + + return len; +} + +static ssize_t show_rps_single_flow(struct netdev_rx_queue *queue, char *buf) +{ + unsigned long val; + + val = test_bit(RPS_SINGLE_FLOW_ENABLE, &queue->rps_single_flow_flags); + return sprintf(buf, "%lu\n", val); +} + +static ssize_t store_rps_single_flow(struct netdev_rx_queue *queue, const char *buf, size_t len) +{ + bool enable; + int rc; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + rc = kstrtobool(buf, &enable); + if (rc < 0) + return rc; + + /* if changed, store the new one */ + if (enable != test_bit(RPS_SINGLE_FLOW_ENABLE, &queue->rps_single_flow_flags)) { + if (enable) + set_bit(RPS_SINGLE_FLOW_ENABLE, &queue->rps_single_flow_flags); + else + clear_bit(RPS_SINGLE_FLOW_ENABLE, &queue->rps_single_flow_flags); + + rps_single_flow_enable_set(enable); + } + + return len; +} + +static struct rx_queue_attribute rps_flow_aging_time_attribute __ro_after_init = + __ATTR(rps_flow_aging_time, 0644, show_rps_flow_aging_time, store_rps_flow_aging_time); + +static struct rx_queue_attribute rps_single_flow_attribute __ro_after_init = + __ATTR(rps_single_flow, 0644, show_rps_single_flow, store_rps_single_flow); + static struct rx_queue_attribute rps_cpus_attribute __ro_after_init = __ATTR(rps_cpus, 0644, show_rps_map, store_rps_map); @@ -854,6 +968,8 @@ static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_ini static struct attribute *rx_queue_default_attrs[] __ro_after_init = { #ifdef CONFIG_RPS + &rps_flow_aging_time_attribute.attr, + &rps_single_flow_attribute.attr, &rps_cpus_attribute.attr, &rps_dev_flow_table_cnt_attribute.attr, #endif @@ -1097,6 +1213,12 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue, if (!capable(CAP_NET_ADMIN)) return -EPERM; + /* The check is also done later; this helps returning early without + * hitting the trylock/restart below. + */ + if (!dev->netdev_ops->ndo_set_tx_maxrate) + return -EOPNOTSUPP; + err = kstrtou32(buf, 10, &rate); if (err < 0) return err; @@ -1606,6 +1728,9 @@ static void remove_queue_kobjects(struct net_device *dev) net_rx_queue_update_kobjects(dev, real_rx, 0); netdev_queue_update_kobjects(dev, real_tx, 0); + + dev->real_num_rx_queues = 0; + dev->real_num_tx_queues = 0; #ifdef CONFIG_SYSFS kset_unregister(dev->queues_kset); #endif diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index c303873496a34..62a972f04cefb 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -168,8 +168,10 @@ static void ops_exit_list(const struct pernet_operations *ops, { struct net *net; if (ops->exit) { - list_for_each_entry(net, net_exit_list, exit_list) + list_for_each_entry(net, net_exit_list, exit_list) { ops->exit(net); + cond_resched(); + } } if (ops->exit_batch) ops->exit_batch(net_exit_list); @@ -211,9 +213,9 @@ static int net_eq_idr(int id, void *net, void *peer) return 0; } -/* Should be called with nsid_lock held. If a new id is assigned, the bool alloc - * is set to true, thus the caller knows that the new id must be notified via - * rtnl. +/* Must be called from RCU-critical section or with nsid_lock held. If + * a new id is assigned, the bool alloc is set to true, thus the + * caller knows that the new id must be notified via rtnl. */ static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc) { @@ -237,7 +239,7 @@ static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc) return NETNSA_NSID_NOT_ASSIGNED; } -/* should be called with nsid_lock held */ +/* Must be called from RCU-critical section or with nsid_lock held */ static int __peernet2id(struct net *net, struct net *peer) { bool no = false; @@ -281,9 +283,10 @@ int peernet2id(struct net *net, struct net *peer) { int id; - spin_lock_bh(&net->nsid_lock); + rcu_read_lock(); id = __peernet2id(net, peer); - spin_unlock_bh(&net->nsid_lock); + rcu_read_unlock(); + return id; } EXPORT_SYMBOL(peernet2id); @@ -479,7 +482,9 @@ struct net *copy_net_ns(unsigned long flags, if (rv < 0) { put_userns: +#ifdef CONFIG_KEYS key_remove_domain(net->key_domain); +#endif put_user_ns(user_ns); net_drop_ns(net); dec_ucounts: @@ -611,7 +616,9 @@ static void cleanup_net(struct work_struct *work) list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); dec_net_namespaces(net->ucounts); +#ifdef CONFIG_KEYS key_remove_domain(net->key_domain); +#endif put_user_ns(net->user_ns); net_drop_ns(net); } @@ -962,6 +969,7 @@ struct rtnl_net_dump_cb { int s_idx; }; +/* Runs in RCU-critical section. */ static int rtnl_net_dumpid_one(int id, void *peer, void *data) { struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data; @@ -1046,19 +1054,9 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) goto end; } - spin_lock_bh(&net_cb.tgt_net->nsid_lock); - if (net_cb.fillargs.add_ref && - !net_eq(net_cb.ref_net, net_cb.tgt_net) && - !spin_trylock_bh(&net_cb.ref_net->nsid_lock)) { - spin_unlock_bh(&net_cb.tgt_net->nsid_lock); - err = -EAGAIN; - goto end; - } + rcu_read_lock(); idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb); - if (net_cb.fillargs.add_ref && - !net_eq(net_cb.ref_net, net_cb.tgt_net)) - spin_unlock_bh(&net_cb.ref_net->nsid_lock); - spin_unlock_bh(&net_cb.tgt_net->nsid_lock); + rcu_read_unlock(); cb->args[0] = net_cb.idx; end: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0bad5db23129a..dbc9b2f53649d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2414,6 +2414,7 @@ static int do_setlink(const struct sk_buff *skb, return err; if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) { + const char *pat = ifname && ifname[0] ? ifname : NULL; struct net *net = rtnl_link_get_net_capable(skb, dev_net(dev), tb, CAP_NET_ADMIN); if (IS_ERR(net)) { @@ -2421,7 +2422,7 @@ static int do_setlink(const struct sk_buff *skb, goto errout; } - err = dev_change_net_namespace(dev, net, ifname); + err = dev_change_net_namespace(dev, net, pat); put_net(net); if (err) goto errout; @@ -3021,8 +3022,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; unsigned char name_assign_type = NET_NAME_USER; struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; - const struct rtnl_link_ops *m_ops = NULL; - struct net_device *master_dev = NULL; + const struct rtnl_link_ops *m_ops; + struct net_device *master_dev; struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; struct nlattr *tb[IFLA_MAX + 1]; @@ -3062,6 +3063,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, dev = NULL; } + master_dev = NULL; + m_ops = NULL; if (dev) { master_dev = netdev_master_upper_dev_get(dev); if (master_dev) @@ -3728,7 +3731,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, /* Support fdb on master device the net/bridge default case */ if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && - (dev->priv_flags & IFF_BRIDGE_PORT)) { + netif_is_bridge_port(dev)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); const struct net_device_ops *ops = br_dev->netdev_ops; @@ -3839,7 +3842,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, /* Support fdb on master device the net/bridge default case */ if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && - (dev->priv_flags & IFF_BRIDGE_PORT)) { + netif_is_bridge_port(dev)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); const struct net_device_ops *ops = br_dev->netdev_ops; @@ -4065,13 +4068,13 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; if (!br_idx) { /* user did not specify a specific bridge */ - if (dev->priv_flags & IFF_BRIDGE_PORT) { + if (netif_is_bridge_port(dev)) { br_dev = netdev_master_upper_dev_get(dev); cops = br_dev->netdev_ops; } } else { if (dev != br_dev && - !(dev->priv_flags & IFF_BRIDGE_PORT)) + !netif_is_bridge_port(dev)) continue; if (br_dev != netdev_master_upper_dev_get(dev) && @@ -4083,7 +4086,7 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) goto cont; - if (dev->priv_flags & IFF_BRIDGE_PORT) { + if (netif_is_bridge_port(dev)) { if (cops && cops->ndo_fdb_dump) { err = cops->ndo_fdb_dump(skb, cb, br_dev, dev, @@ -4233,7 +4236,7 @@ static int rtnl_fdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (dev) { if (!ndm_flags || (ndm_flags & NTF_MASTER)) { - if (!(dev->priv_flags & IFF_BRIDGE_PORT)) { + if (!netif_is_bridge_port(dev)) { NL_SET_ERR_MSG(extack, "Device is not a bridge port"); return -EINVAL; } @@ -4949,7 +4952,7 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, static size_t if_nlmsg_stats_size(const struct net_device *dev, u32 filter_mask) { - size_t size = 0; + size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg)); if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0)) size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64)); diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 7b6b1d2c3d109..6d86506e315f3 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -23,6 +23,8 @@ static siphash_key_t net_secret __read_mostly; static siphash_key_t ts_secret __read_mostly; +#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ) + static __always_inline void net_secret_init(void) { net_get_random_once(&net_secret, sizeof(net_secret)); @@ -63,7 +65,7 @@ u32 secure_tcpv6_ts_off(const struct net *net, .daddr = *(struct in6_addr *)daddr, }; - if (net->ipv4.sysctl_tcp_timestamps != 1) + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); @@ -95,17 +97,19 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, } EXPORT_SYMBOL(secure_tcpv6_seq); -u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, +u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) { const struct { struct in6_addr saddr; struct in6_addr daddr; + unsigned int timeseed; __be16 dport; } __aligned(SIPHASH_ALIGNMENT) combined = { .saddr = *(struct in6_addr *)saddr, .daddr = *(struct in6_addr *)daddr, - .dport = dport + .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, + .dport = dport, }; net_secret_init(); return siphash(&combined, offsetofend(typeof(combined), dport), @@ -117,7 +121,7 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #ifdef CONFIG_INET u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr) { - if (net->ipv4.sysctl_tcp_timestamps != 1) + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); @@ -143,11 +147,13 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr, } EXPORT_SYMBOL_GPL(secure_tcp_seq); -u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) +u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { net_secret_init(); - return siphash_3u32((__force u32)saddr, (__force u32)daddr, - (__force u16)dport, &net_secret); + return siphash_4u32((__force u32)saddr, (__force u32)daddr, + (__force u16)dport, + jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, + &net_secret); } EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); #endif diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8f2cf7497e3b2..d565407b7185f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -768,7 +768,7 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt) ntohs(skb->protocol), skb->pkt_type, skb->skb_iif); if (dev) - printk("%sdev name=%s feat=0x%pNF\n", + printk("%sdev name=%s feat=%pNF\n", level, dev->name, &dev->features); if (sk) printk("%ssk family=%hu type=%u proto=%u\n", @@ -2139,7 +2139,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta) /* Free pulled out fragments. */ while ((list = skb_shinfo(skb)->frag_list) != insp) { skb_shinfo(skb)->frag_list = list->next; - kfree_skb(list); + consume_skb(list); } /* And insert new clone at head. */ if (clone) { @@ -5847,7 +5847,7 @@ static int pskb_carve_frag_list(struct sk_buff *skb, /* Free pulled out fragments. */ while ((list = shinfo->frag_list) != insp) { shinfo->frag_list = list->next; - kfree_skb(list); + consume_skb(list); } /* And insert new clone at head. */ if (clone) { diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 17cc1edd149cb..a606ad8e8be25 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -27,6 +27,7 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, int elem_first_coalesce) { struct page_frag *pfrag = sk_page_frag(sk); + u32 osize = msg->sg.size; int ret = 0; len -= msg->sg.size; @@ -35,13 +36,17 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, u32 orig_offset; int use, i; - if (!sk_page_frag_refill(sk, pfrag)) - return -ENOMEM; + if (!sk_page_frag_refill(sk, pfrag)) { + ret = -ENOMEM; + goto msg_trim; + } orig_offset = pfrag->offset; use = min_t(int, len, pfrag->size - orig_offset); - if (!sk_wmem_schedule(sk, use)) - return -ENOMEM; + if (!sk_wmem_schedule(sk, use)) { + ret = -ENOMEM; + goto msg_trim; + } i = msg->sg.end; sk_msg_iter_var_prev(i); @@ -71,6 +76,10 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, } return ret; + +msg_trim: + sk_msg_trim(sk, msg, osize); + return ret; } EXPORT_SYMBOL_GPL(sk_msg_alloc); diff --git a/net/core/sock.c b/net/core/sock.c index 7d22785e4a03e..0e98929972f46 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1181,6 +1181,16 @@ int sock_setsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(sock_setsockopt); +static const struct cred *sk_get_peer_cred(struct sock *sk) +{ + const struct cred *cred; + + spin_lock(&sk->sk_peer_lock); + cred = get_cred(sk->sk_peer_cred); + spin_unlock(&sk->sk_peer_lock); + + return cred; +} static void cred_to_ucred(struct pid *pid, const struct cred *cred, struct ucred *ucred) @@ -1355,7 +1365,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname, struct ucred peercred; if (len > sizeof(peercred)) len = sizeof(peercred); + + spin_lock(&sk->sk_peer_lock); cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); + spin_unlock(&sk->sk_peer_lock); + if (copy_to_user(optval, &peercred, len)) return -EFAULT; goto lenout; @@ -1363,20 +1377,23 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_PEERGROUPS: { + const struct cred *cred; int ret, n; - if (!sk->sk_peer_cred) + cred = sk_get_peer_cred(sk); + if (!cred) return -ENODATA; - n = sk->sk_peer_cred->group_info->ngroups; + n = cred->group_info->ngroups; if (len < n * sizeof(gid_t)) { len = n * sizeof(gid_t); + put_cred(cred); return put_user(len, optlen) ? -EFAULT : -ERANGE; } len = n * sizeof(gid_t); - ret = groups_to_user((gid_t __user *)optval, - sk->sk_peer_cred->group_info); + ret = groups_to_user((gid_t __user *)optval, cred->group_info); + put_cred(cred); if (ret) return ret; goto lenout; @@ -1714,9 +1731,10 @@ static void __sk_destruct(struct rcu_head *head) sk->sk_frag.page = NULL; } - if (sk->sk_peer_cred) - put_cred(sk->sk_peer_cred); + /* We do not need to acquire sk->sk_peer_lock, we are the last user. */ + put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); + if (likely(sk->sk_net_refcnt)) put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); @@ -2350,8 +2368,6 @@ static void sk_leave_memory_pressure(struct sock *sk) } } -/* On 32bit arches, an skb frag is limited to 2^15 */ -#define SKB_FRAG_PAGE_ORDER get_order(32768) DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key); /** @@ -2922,6 +2938,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_peer_pid = NULL; sk->sk_peer_cred = NULL; + spin_lock_init(&sk->sk_peer_lock); + sk->sk_write_pending = 0; sk->sk_rcvlowat = 1; sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; @@ -3449,7 +3467,7 @@ int proto_register(struct proto *prot, int alloc_slab) return ret; out_free_timewait_sock_slab: - if (alloc_slab && prot->twsk_prot) + if (alloc_slab) tw_prot_cleanup(prot->twsk_prot); out_free_request_sock_slab: if (alloc_slab) { diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 6002096e64fc5..3255be4a12cf6 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -48,7 +48,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) if (err) goto free_stab; - stab->sks = bpf_map_area_alloc(stab->map.max_entries * + stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries * sizeof(struct sock *), stab->map.numa_node); if (stab->sks) diff --git a/net/core/stream.c b/net/core/stream.c index 4f1d4aa5fb38d..a166a32b411fa 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -195,9 +195,6 @@ void sk_stream_kill_queues(struct sock *sk) /* First the read buffer. */ __skb_queue_purge(&sk->sk_receive_queue); - /* Next, the error queue. */ - __skb_queue_purge(&sk->sk_error_queue); - /* Next, the write queue. */ WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 669cbe1609d9e..48041f50ecfb4 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -424,7 +424,7 @@ static struct ctl_table net_core_table[] = { .mode = 0600, .proc_handler = proc_dolongvec_minmax_bpf_restricted, .extra1 = &long_one, - .extra2 = &long_max, + .extra2 = &bpf_jit_limit_max, }, #endif { diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index e9ecbb57df455..b53d5e1d026fe 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -2063,10 +2063,54 @@ u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev) } EXPORT_SYMBOL(dcb_ieee_getapp_default_prio_mask); +static void dcbnl_flush_dev(struct net_device *dev) +{ + struct dcb_app_type *itr, *tmp; + + spin_lock_bh(&dcb_lock); + + list_for_each_entry_safe(itr, tmp, &dcb_app_list, list) { + if (itr->ifindex == dev->ifindex) { + list_del(&itr->list); + kfree(itr); + } + } + + spin_unlock_bh(&dcb_lock); +} + +static int dcbnl_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + switch (event) { + case NETDEV_UNREGISTER: + if (!dev->dcbnl_ops) + return NOTIFY_DONE; + + dcbnl_flush_dev(dev); + + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block dcbnl_nb __read_mostly = { + .notifier_call = dcbnl_netdevice_event, +}; + static int __init dcbnl_init(void) { + int err; + INIT_LIST_HEAD(&dcb_app_list); + err = register_netdevice_notifier(&dcbnl_nb); + if (err) + return err; + rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0); rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d19557c6d04b5..7cf903f9e29a9 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -427,7 +427,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); if (*own_req) ireq->ireq_opt = NULL; else diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9f73ccf46c9b1..7c24927e9c2c2 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -538,7 +538,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, dccp_done(newsk); goto out; } - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); /* Clone pktoptions received with SYN, if we own the req */ if (*own_req && ireq->pktopts) { newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 25187528c308a..1f352d669c944 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -94,6 +94,8 @@ struct sock *dccp_create_openreq_child(const struct sock *sk, newdp->dccps_role = DCCP_ROLE_SERVER; newdp->dccps_hc_rx_ackvec = NULL; newdp->dccps_service_list = NULL; + newdp->dccps_hc_rx_ccid = NULL; + newdp->dccps_hc_tx_ccid = NULL; newdp->dccps_service = dreq->dreq_service; newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo; newdp->dccps_timestamp_time = dreq->dreq_timestamp_time; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 70e6fc2edd304..1f27641f9cc07 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -669,6 +669,7 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) struct net_device *master; master = of_find_net_device_by_node(ethernet); + of_node_put(ethernet); if (!master) return -EPROBE_DEFER; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 75b4cd4bcafb9..59759ceb426ac 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1327,13 +1327,11 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) * use the switch internal MDIO bus instead */ ret = dsa_slave_phy_connect(slave_dev, dp->index); - if (ret) { - netdev_err(slave_dev, - "failed to connect to port %d: %d\n", - dp->index, ret); - phylink_destroy(dp->pl); - return ret; - } + } + if (ret) { + netdev_err(slave_dev, "failed to connect to PHY: %pe\n", + ERR_PTR(ret)); + phylink_destroy(dp->pl); } return ret; diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile new file mode 100644 index 0000000000000..c07b6d552b6b6 --- /dev/null +++ b/net/ethtool/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o + +ethtool_nl-y := netlink.o eeprom.o diff --git a/net/ethtool/common.h b/net/ethtool/common.h new file mode 100644 index 0000000000000..bd5d4c7069e73 --- /dev/null +++ b/net/ethtool/common.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ETHTOOL_COMMON_H +#define _ETHTOOL_COMMON_H + +#include +#include + +extern int ethtool_get_module_info_call(struct net_device *dev, + struct ethtool_modinfo *modinfo); +extern int ethtool_get_module_eeprom_call(struct net_device *dev, + struct ethtool_eeprom *ee, u8 *data); + +#endif /* _ETHTOOL_COMMON_H */ diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c new file mode 100644 index 0000000000000..b5e10fa79bdd1 --- /dev/null +++ b/net/ethtool/eeprom.c @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include "netlink.h" +#include "common.h" + +struct eeprom_req_info { + struct ethnl_req_info base; + u32 offset; + u32 length; + u8 page; + u8 bank; + u8 i2c_address; +}; + +struct eeprom_reply_data { + struct ethnl_reply_data base; + u32 length; + u8 *data; +}; + +#define MODULE_EEPROM_REQINFO(__req_base) \ + container_of(__req_base, struct eeprom_req_info, base) + +#define MODULE_EEPROM_REPDATA(__reply_base) \ + container_of(__reply_base, struct eeprom_reply_data, base) + +static int fallback_set_params(struct eeprom_req_info *request, + struct ethtool_modinfo *modinfo, + struct ethtool_eeprom *eeprom) +{ + u32 offset = request->offset; + u32 length = request->length; + + if (request->page) + offset = request->page * ETH_MODULE_EEPROM_PAGE_LEN + offset; + + if (modinfo->type == ETH_MODULE_SFF_8079 && + request->i2c_address == 0x51) + offset += ETH_MODULE_EEPROM_PAGE_LEN * 2; + + if (offset >= modinfo->eeprom_len) + return -EINVAL; + + eeprom->cmd = ETHTOOL_GMODULEEEPROM; + eeprom->len = length; + eeprom->offset = offset; + + return 0; +} + +static int eeprom_fallback(struct eeprom_req_info *request, + struct eeprom_reply_data *reply, + struct genl_info *info) +{ + struct net_device *dev = reply->base.dev; + struct ethtool_modinfo modinfo = {0}; + struct ethtool_eeprom eeprom = {0}; + u8 *data; + int err; + + modinfo.cmd = ETHTOOL_GMODULEINFO; + err = ethtool_get_module_info_call(dev, &modinfo); + if (err < 0) + return err; + + err = fallback_set_params(request, &modinfo, &eeprom); + if (err < 0) + return err; + + data = kmalloc(eeprom.len, GFP_KERNEL); + if (!data) + return -ENOMEM; + err = ethtool_get_module_eeprom_call(dev, &eeprom, data); + if (err < 0) + goto err_out; + + reply->data = data; + reply->length = eeprom.len; + + return 0; + +err_out: + kfree(data); + return err; +} + +static int get_module_eeprom_by_page(struct net_device *dev, + struct ethtool_module_eeprom *page_data, + struct netlink_ext_ack *extack) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (dev->sfp_bus) + return sfp_get_module_eeprom_by_page(dev->sfp_bus, page_data, extack); + + if (ops->get_module_eeprom_by_page) + return ops->get_module_eeprom_by_page(dev, page_data, extack); + + return -EOPNOTSUPP; +} + +static int eeprom_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base); + struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_base); + struct ethtool_module_eeprom page_data = {0}; + struct net_device *dev = reply_base->dev; + int ret; + + page_data.offset = request->offset; + page_data.length = request->length; + page_data.i2c_address = request->i2c_address; + page_data.page = request->page; + page_data.bank = request->bank; + page_data.data = kmalloc(page_data.length, GFP_KERNEL); + if (!page_data.data) + return -ENOMEM; + + ret = ethnl_ops_begin(dev); + if (ret) + goto err_free; + + ret = get_module_eeprom_by_page(dev, &page_data, info->extack); + if (ret < 0) + goto err_ops; + + reply->length = ret; + reply->data = page_data.data; + + ethnl_ops_complete(dev); + return 0; + +err_ops: + ethnl_ops_complete(dev); +err_free: + kfree(page_data.data); + + if (ret == -EOPNOTSUPP) + return eeprom_fallback(request, reply, info); + return ret; +} + +static int eeprom_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_info); + + if (!tb[ETHTOOL_A_MODULE_EEPROM_OFFSET] || + !tb[ETHTOOL_A_MODULE_EEPROM_LENGTH] || + !tb[ETHTOOL_A_MODULE_EEPROM_PAGE] || + !tb[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS]) + return -EINVAL; + + request->i2c_address = nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS]); + request->offset = nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_OFFSET]); + request->length = nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_LENGTH]); + + if (!request->length) + return -EINVAL; + + /* The following set of conditions limit the API to only dump 1/2 + * EEPROM page without crossing low page boundary located at offset 128. + * This means user may only request dumps of length limited to 128 from + * either low 128 bytes or high 128 bytes. + * For pages higher than 0 only high 128 bytes are accessible. + */ + request->page = nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_PAGE]); + if (request->page && request->offset < ETH_MODULE_EEPROM_PAGE_LEN) { + NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_PAGE], + "reading from lower half page is allowed for page 0 only"); + return -EINVAL; + } + + if (request->offset < ETH_MODULE_EEPROM_PAGE_LEN && + request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN) { + NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_LENGTH], + "reading cross half page boundary is illegal"); + return -EINVAL; + } else if (request->offset >= ETH_MODULE_EEPROM_PAGE_LEN * 2) { + NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_OFFSET], + "offset is out of bounds"); + return -EINVAL; + } else if (request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN * 2) { + NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_LENGTH], + "reading cross page boundary is illegal"); + return -EINVAL; + } + + if (tb[ETHTOOL_A_MODULE_EEPROM_BANK]) + request->bank = nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_BANK]); + + return 0; +} + +static int eeprom_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_base); + + return nla_total_size(sizeof(u8) * request->length); /* _EEPROM_DATA */ +} + +static int eeprom_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base); + + return nla_put(skb, ETHTOOL_A_MODULE_EEPROM_DATA, reply->length, reply->data); +} + +static void eeprom_cleanup_data(struct ethnl_reply_data *reply_base) +{ + struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base); + + kfree(reply->data); +} + +const struct ethnl_request_ops ethnl_module_eeprom_request_ops = { + .request_cmd = ETHTOOL_MSG_MODULE_EEPROM_GET, + .reply_cmd = ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY, + .hdr_attr = ETHTOOL_A_MODULE_EEPROM_HEADER, + .req_info_size = sizeof(struct eeprom_req_info), + .reply_data_size = sizeof(struct eeprom_reply_data), + + .parse_request = eeprom_parse_request, + .prepare_data = eeprom_prepare_data, + .reply_size = eeprom_reply_size, + .fill_reply = eeprom_fill_reply, + .cleanup_data = eeprom_cleanup_data, +}; + +const struct nla_policy ethnl_module_eeprom_get_policy[] = { + [ETHTOOL_A_MODULE_EEPROM_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), + [ETHTOOL_A_MODULE_EEPROM_OFFSET] = { .type = NLA_U32 }, + [ETHTOOL_A_MODULE_EEPROM_LENGTH] = { .type = NLA_U32 }, + [ETHTOOL_A_MODULE_EEPROM_PAGE] = { .type = NLA_U8 }, + [ETHTOOL_A_MODULE_EEPROM_BANK] = { .type = NLA_U8 }, + [ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS] = + NLA_POLICY_RANGE(NLA_U8, 0, ETH_MODULE_MAX_I2C_ADDRESS), +}; diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c new file mode 100644 index 0000000000000..6d785e38cdf66 --- /dev/null +++ b/net/ethtool/netlink.c @@ -0,0 +1,570 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include "netlink.h" + +static struct genl_family ethtool_genl_family; + +static u32 ethnl_bcast_seq; + +const struct nla_policy ethnl_header_policy[] = { + [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 }, + [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING, + .len = ALTIFNAMSIZ - 1 }, + [ETHTOOL_A_HEADER_FLAGS] = { .type = NLA_ENSURE_UINT_TYPE(NLA_U32) }, +}; + +/** + * ethnl_parse_header_dev_get() - parse request header + * @req_info: structure to put results into + * @header: nest attribute with request header + * @net: request netns + * @extack: netlink extack for error reporting + * @require_dev: fail if no device identified in header + * + * Parse request header in nested attribute @nest and puts results into + * the structure pointed to by @req_info. Extack from @info is used for error + * reporting. If req_info->dev is not null on return, reference to it has + * been taken. If error is returned, *req_info is null initialized and no + * reference is held. + * + * Return: 0 on success or negative error code + */ +int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, + const struct nlattr *header, struct net *net, + struct netlink_ext_ack *extack, bool require_dev) +{ + struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy)]; + const struct nlattr *devname_attr; + struct net_device *dev = NULL; + u32 flags = 0; + int ret; + + if (!header) { + NL_SET_ERR_MSG(extack, "request header missing"); + return -EINVAL; + } + /* No validation here, command policy should have a nested policy set + * for the header, therefore validation should have already been done. + */ + ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy) - 1, header, + NULL, extack); + if (ret < 0) + return ret; + if (tb[ETHTOOL_A_HEADER_FLAGS]) + flags = nla_get_u32(tb[ETHTOOL_A_HEADER_FLAGS]); + + devname_attr = tb[ETHTOOL_A_HEADER_DEV_NAME]; + if (tb[ETHTOOL_A_HEADER_DEV_INDEX]) { + u32 ifindex = nla_get_u32(tb[ETHTOOL_A_HEADER_DEV_INDEX]); + + dev = dev_get_by_index(net, ifindex); + if (!dev) { + NL_SET_ERR_MSG_ATTR(extack, + tb[ETHTOOL_A_HEADER_DEV_INDEX], + "no device matches ifindex"); + return -ENODEV; + } + /* if both ifindex and ifname are passed, they must match */ + if (devname_attr && + strncmp(dev->name, nla_data(devname_attr), IFNAMSIZ)) { + dev_put(dev); + NL_SET_ERR_MSG_ATTR(extack, header, + "ifindex and name do not match"); + return -ENODEV; + } + } else if (devname_attr) { + dev = dev_get_by_name(net, nla_data(devname_attr)); + if (!dev) { + NL_SET_ERR_MSG_ATTR(extack, devname_attr, + "no device matches name"); + return -ENODEV; + } + } else if (require_dev) { + NL_SET_ERR_MSG_ATTR(extack, header, + "neither ifindex nor name specified"); + return -EINVAL; + } + + if (dev && !netif_device_present(dev)) { + dev_put(dev); + NL_SET_ERR_MSG(extack, "device not present"); + return -ENODEV; + } + + req_info->dev = dev; + req_info->flags = flags; + return 0; +} + +/** + * ethnl_fill_reply_header() - Put common header into a reply message + * @skb: skb with the message + * @dev: network device to describe in header + * @attrtype: attribute type to use for the nest + * + * Create a nested attribute with attributes describing given network device. + * + * Return: 0 on success, error value (-EMSGSIZE only) on error + */ +int ethnl_fill_reply_header(struct sk_buff *skb, struct net_device *dev, + u16 attrtype) +{ + struct nlattr *nest; + + if (!dev) + return 0; + nest = nla_nest_start(skb, attrtype); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, ETHTOOL_A_HEADER_DEV_INDEX, (u32)dev->ifindex) || + nla_put_string(skb, ETHTOOL_A_HEADER_DEV_NAME, dev->name)) + goto nla_put_failure; + /* If more attributes are put into reply header, ethnl_header_size() + * must be updated to account for them. + */ + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +/** + * ethnl_reply_init() - Create skb for a reply and fill device identification + * @payload: payload length (without netlink and genetlink header) + * @dev: device the reply is about (may be null) + * @cmd: ETHTOOL_MSG_* message type for reply + * @hdr_attrtype: attribute type for common header + * @info: genetlink info of the received packet we respond to + * @ehdrp: place to store payload pointer returned by genlmsg_new() + * + * Return: pointer to allocated skb on success, NULL on error + */ +struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd, + u16 hdr_attrtype, struct genl_info *info, + void **ehdrp) +{ + struct sk_buff *skb; + + skb = genlmsg_new(payload, GFP_KERNEL); + if (!skb) + goto err; + *ehdrp = genlmsg_put_reply(skb, info, ðtool_genl_family, 0, cmd); + if (!*ehdrp) + goto err_free; + + if (dev) { + int ret; + + ret = ethnl_fill_reply_header(skb, dev, hdr_attrtype); + if (ret < 0) + goto err_free; + } + return skb; + +err_free: + nlmsg_free(skb); +err: + if (info) + GENL_SET_ERR_MSG(info, "failed to setup reply message"); + return NULL; +} + +void *ethnl_dump_put(struct sk_buff *skb, struct netlink_callback *cb, u8 cmd) +{ + return genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + ðtool_genl_family, 0, cmd); +} + +void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd) +{ + return genlmsg_put(skb, 0, ++ethnl_bcast_seq, ðtool_genl_family, 0, + cmd); +} + +int ethnl_multicast(struct sk_buff *skb, struct net_device *dev) +{ + return genlmsg_multicast_netns(ðtool_genl_family, dev_net(dev), skb, + 0, ETHNL_MCGRP_MONITOR, GFP_KERNEL); +} + +/* GET request helpers */ + +/** + * struct ethnl_dump_ctx - context structure for generic dumpit() callback + * @ops: request ops of currently processed message type + * @req_info: parsed request header of processed request + * @reply_data: data needed to compose the reply + * @pos_hash: saved iteration position - hashbucket + * @pos_idx: saved iteration position - index + * + * These parameters are kept in struct netlink_callback as context preserved + * between iterations. They are initialized by ethnl_default_start() and used + * in ethnl_default_dumpit() and ethnl_default_done(). + */ +struct ethnl_dump_ctx { + const struct ethnl_request_ops *ops; + struct ethnl_req_info *req_info; + struct ethnl_reply_data *reply_data; + int pos_hash; + int pos_idx; +}; + +static const struct ethnl_request_ops * +ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { + [ETHTOOL_MSG_MODULE_EEPROM_GET] = ðnl_module_eeprom_request_ops, +}; + +static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) +{ + return (struct ethnl_dump_ctx *)cb->ctx; +} + +/** + * ethnl_default_parse() - Parse request message + * @req_info: pointer to structure to put data into + * @tb: parsed attributes + * @net: request netns + * @request_ops: struct request_ops for request type + * @extack: netlink extack for error reporting + * @require_dev: fail if no device identified in header + * + * Parse universal request header and call request specific ->parse_request() + * callback (if defined) to parse the rest of the message. + * + * Return: 0 on success or negative error code + */ +static int ethnl_default_parse(struct ethnl_req_info *req_info, + struct nlattr **tb, struct net *net, + const struct ethnl_request_ops *request_ops, + struct netlink_ext_ack *extack, bool require_dev) +{ + int ret; + + ret = ethnl_parse_header_dev_get(req_info, tb[request_ops->hdr_attr], + net, extack, require_dev); + if (ret < 0) + return ret; + + if (request_ops->parse_request) { + ret = request_ops->parse_request(req_info, tb, extack); + if (ret < 0) + return ret; + } + + return 0; +} + +/** + * ethnl_init_reply_data() - Initialize reply data for GET request + * @reply_data: pointer to embedded struct ethnl_reply_data + * @ops: instance of struct ethnl_request_ops describing the layout + * @dev: network device to initialize the reply for + * + * Fills the reply data part with zeros and sets the dev member. Must be called + * before calling the ->fill_reply() callback (for each iteration when handling + * dump requests). + */ +static void ethnl_init_reply_data(struct ethnl_reply_data *reply_data, + const struct ethnl_request_ops *ops, + struct net_device *dev) +{ + memset(reply_data, 0, ops->reply_data_size); + reply_data->dev = dev; +} + +/* default ->doit() handler for GET type requests */ +static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct ethnl_reply_data *reply_data = NULL; + struct ethnl_req_info *req_info = NULL; + const u8 cmd = info->genlhdr->cmd; + const struct ethnl_request_ops *ops; + struct sk_buff *rskb; + void *reply_payload; + int reply_len; + int ret; + + ops = ethnl_default_requests[cmd]; + if (WARN_ONCE(!ops, "cmd %u has no ethnl_request_ops\n", cmd)) + return -EOPNOTSUPP; + req_info = kzalloc(ops->req_info_size, GFP_KERNEL); + if (!req_info) + return -ENOMEM; + reply_data = kmalloc(ops->reply_data_size, GFP_KERNEL); + if (!reply_data) { + kfree(req_info); + return -ENOMEM; + } + + ret = ethnl_default_parse(req_info, info->attrs, genl_info_net(info), + ops, info->extack, !ops->allow_nodev_do); + if (ret < 0) + goto err_dev; + ethnl_init_reply_data(reply_data, ops, req_info->dev); + + rtnl_lock(); + ret = ops->prepare_data(req_info, reply_data, info); + rtnl_unlock(); + if (ret < 0) + goto err_cleanup; + ret = ops->reply_size(req_info, reply_data); + if (ret < 0) + goto err_cleanup; + reply_len = ret + ethnl_reply_header_size(); + ret = -ENOMEM; + rskb = ethnl_reply_init(reply_len, req_info->dev, ops->reply_cmd, + ops->hdr_attr, info, &reply_payload); + if (!rskb) + goto err_cleanup; + ret = ops->fill_reply(rskb, req_info, reply_data); + if (ret < 0) + goto err_msg; + if (ops->cleanup_data) + ops->cleanup_data(reply_data); + + genlmsg_end(rskb, reply_payload); + if (req_info->dev) + dev_put(req_info->dev); + kfree(reply_data); + kfree(req_info); + return genlmsg_reply(rskb, info); + +err_msg: + WARN_ONCE(ret == -EMSGSIZE, "calculated message payload length (%d) not sufficient\n", reply_len); + nlmsg_free(rskb); +err_cleanup: + if (ops->cleanup_data) + ops->cleanup_data(reply_data); +err_dev: + if (req_info->dev) + dev_put(req_info->dev); + kfree(reply_data); + kfree(req_info); + return ret; +} + +static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev, + const struct ethnl_dump_ctx *ctx, + struct netlink_callback *cb) +{ + void *ehdr; + int ret; + + ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + ðtool_genl_family, NLM_F_MULTI, + ctx->ops->reply_cmd); + if (!ehdr) + return -EMSGSIZE; + + ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev); + rtnl_lock(); + ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, NULL); + rtnl_unlock(); + if (ret < 0) + goto out; + ret = ethnl_fill_reply_header(skb, dev, ctx->ops->hdr_attr); + if (ret < 0) + goto out; + ret = ctx->ops->fill_reply(skb, ctx->req_info, ctx->reply_data); + +out: + if (ctx->ops->cleanup_data) + ctx->ops->cleanup_data(ctx->reply_data); + ctx->reply_data->dev = NULL; + if (ret < 0) + genlmsg_cancel(skb, ehdr); + else + genlmsg_end(skb, ehdr); + return ret; +} + +/* Default ->dumpit() handler for GET requests. Device iteration copied from + * rtnl_dump_ifinfo(); we have to be more careful about device hashtable + * persistence as we cannot guarantee to hold RTNL lock through the whole + * function as rtnetnlink does. + */ +static int ethnl_default_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb); + struct net *net = sock_net(skb->sk); + int s_idx = ctx->pos_idx; + int h, idx = 0; + int ret = 0; + + rtnl_lock(); + for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + struct hlist_head *head; + struct net_device *dev; + unsigned int seq; + + head = &net->dev_index_head[h]; + +restart_chain: + seq = net->dev_base_seq; + cb->seq = seq; + idx = 0; + hlist_for_each_entry(dev, head, index_hlist) { + if (idx < s_idx) + goto cont; + dev_hold(dev); + rtnl_unlock(); + + ret = ethnl_default_dump_one(skb, dev, ctx, cb); + dev_put(dev); + if (ret < 0) { + if (ret == -EOPNOTSUPP) + goto lock_and_cont; + if (likely(skb->len)) + ret = skb->len; + goto out; + } +lock_and_cont: + rtnl_lock(); + if (net->dev_base_seq != seq) { + s_idx = idx + 1; + goto restart_chain; + } +cont: + idx++; + } + + } + rtnl_unlock(); + +out: + ctx->pos_hash = h; + ctx->pos_idx = idx; + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + + return ret; +} + +/** + * struct genl_info - info that is available during dumpit op call + * @family: generic netlink family - for internal genl code usage + * @ops: generic netlink ops - for internal genl code usage + * @attrs: netlink attributes + */ +struct genl_dumpit_info { + const struct genl_family *family; + struct genl_ops op; + struct nlattr **attrs; +}; + +/* generic ->start() handler for GET requests */ +static int ethnl_default_start(struct netlink_callback *cb) +{ + const struct genl_dumpit_info *info = cb->data; + struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb); + struct ethnl_reply_data *reply_data; + const struct ethnl_request_ops *ops; + struct ethnl_req_info *req_info; + struct genlmsghdr *ghdr; + int ret; + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); + + ghdr = nlmsg_data(cb->nlh); + ops = ethnl_default_requests[ghdr->cmd]; + if (WARN_ONCE(!ops, "cmd %u has no ethnl_request_ops\n", ghdr->cmd)) + return -EOPNOTSUPP; + req_info = kzalloc(ops->req_info_size, GFP_KERNEL); + if (!req_info) + return -ENOMEM; + reply_data = kmalloc(ops->reply_data_size, GFP_KERNEL); + if (!reply_data) { + ret = -ENOMEM; + goto free_req_info; + } + + ret = ethnl_default_parse(req_info, info->attrs, sock_net(cb->skb->sk), + ops, cb->extack, false); + if (req_info->dev) { + /* We ignore device specification in dump requests but as the + * same parser as for non-dump (doit) requests is used, it + * would take reference to the device if it finds one + */ + dev_put(req_info->dev); + req_info->dev = NULL; + } + if (ret < 0) + goto free_reply_data; + + ctx->ops = ops; + ctx->req_info = req_info; + ctx->reply_data = reply_data; + ctx->pos_hash = 0; + ctx->pos_idx = 0; + + return 0; + +free_reply_data: + kfree(reply_data); +free_req_info: + kfree(req_info); + + return ret; +} + +/* default ->done() handler for GET requests */ +static int ethnl_default_done(struct netlink_callback *cb) +{ + struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb); + + kfree(ctx->reply_data); + kfree(ctx->req_info); + + return 0; +} + +/* genetlink setup */ +static const struct genl_ops ethtool_genl_ops[] = { + { + .cmd = ETHTOOL_MSG_MODULE_EEPROM_GET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + }, +}; + +static const struct genl_multicast_group ethtool_nl_mcgrps[] = { + [ETHNL_MCGRP_MONITOR] = { .name = ETHTOOL_MCGRP_MONITOR_NAME }, +}; + +static struct genl_family ethtool_genl_family __ro_after_init = { + .name = ETHTOOL_GENL_NAME, + .version = ETHTOOL_GENL_VERSION, + .policy = ethnl_module_eeprom_get_policy, + .maxattr = ARRAY_SIZE(ethnl_module_eeprom_get_policy) - 1, + .netnsok = true, + .parallel_ops = true, + .ops = ethtool_genl_ops, + .n_ops = ARRAY_SIZE(ethtool_genl_ops), + .mcgrps = ethtool_nl_mcgrps, + .n_mcgrps = ARRAY_SIZE(ethtool_nl_mcgrps), + .module = THIS_MODULE, +}; + +/* module setup */ + +static int __init ethnl_init(void) +{ + int ret; + + ret = genl_register_family(ðtool_genl_family); + if (WARN(ret < 0, "ethtool: genetlink family registration failed")) + return ret; + + return ret; +} + +subsys_initcall(ethnl_init); diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h new file mode 100644 index 0000000000000..2be60f5e52466 --- /dev/null +++ b/net/ethtool/netlink.h @@ -0,0 +1,339 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _NET_ETHTOOL_NETLINK_H +#define _NET_ETHTOOL_NETLINK_H + +#include +#include +#include +#include + +struct ethnl_req_info; + +int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, + const struct nlattr *nest, struct net *net, + struct netlink_ext_ack *extack, + bool require_dev); +int ethnl_fill_reply_header(struct sk_buff *skb, struct net_device *dev, + u16 attrtype); +struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd, + u16 hdr_attrtype, struct genl_info *info, + void **ehdrp); +void *ethnl_dump_put(struct sk_buff *skb, struct netlink_callback *cb, u8 cmd); +void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd); +int ethnl_multicast(struct sk_buff *skb, struct net_device *dev); + +/** + * ethnl_strz_size() - calculate attribute length for fixed size string + * @s: ETH_GSTRING_LEN sized string (may not be null terminated) + * + * Return: total length of an attribute with null terminated string from @s + */ +static inline int ethnl_strz_size(const char *s) +{ + return nla_total_size(strnlen(s, ETH_GSTRING_LEN) + 1); +} + +/** + * ethnl_put_strz() - put string attribute with fixed size string + * @skb: skb with the message + * @attrtype: attribute type + * @s: ETH_GSTRING_LEN sized string (may not be null terminated) + * + * Puts an attribute with null terminated string from @s into the message. + * + * Return: 0 on success, negative error code on failure + */ +static inline int ethnl_put_strz(struct sk_buff *skb, u16 attrtype, + const char *s) +{ + unsigned int len = strnlen(s, ETH_GSTRING_LEN); + struct nlattr *attr; + + attr = nla_reserve(skb, attrtype, len + 1); + if (!attr) + return -EMSGSIZE; + + memcpy(nla_data(attr), s, len); + ((char *)nla_data(attr))[len] = '\0'; + return 0; +} + +/** + * ethnl_update_u32() - update u32 value from NLA_U32 attribute + * @dst: value to update + * @attr: netlink attribute with new value or null + * @mod: pointer to bool for modification tracking + * + * Copy the u32 value from NLA_U32 netlink attribute @attr into variable + * pointed to by @dst; do nothing if @attr is null. Bool pointed to by @mod + * is set to true if this function changed the value of *dst, otherwise it + * is left as is. + */ +static inline void ethnl_update_u32(u32 *dst, const struct nlattr *attr, + bool *mod) +{ + u32 val; + + if (!attr) + return; + val = nla_get_u32(attr); + if (*dst == val) + return; + + *dst = val; + *mod = true; +} + +/** + * ethnl_update_u8() - update u8 value from NLA_U8 attribute + * @dst: value to update + * @attr: netlink attribute with new value or null + * @mod: pointer to bool for modification tracking + * + * Copy the u8 value from NLA_U8 netlink attribute @attr into variable + * pointed to by @dst; do nothing if @attr is null. Bool pointed to by @mod + * is set to true if this function changed the value of *dst, otherwise it + * is left as is. + */ +static inline void ethnl_update_u8(u8 *dst, const struct nlattr *attr, + bool *mod) +{ + u8 val; + + if (!attr) + return; + val = nla_get_u8(attr); + if (*dst == val) + return; + + *dst = val; + *mod = true; +} + +/** + * ethnl_update_bool32() - update u32 used as bool from NLA_U8 attribute + * @dst: value to update + * @attr: netlink attribute with new value or null + * @mod: pointer to bool for modification tracking + * + * Use the u8 value from NLA_U8 netlink attribute @attr to set u32 variable + * pointed to by @dst to 0 (if zero) or 1 (if not); do nothing if @attr is + * null. Bool pointed to by @mod is set to true if this function changed the + * logical value of *dst, otherwise it is left as is. + */ +static inline void ethnl_update_bool32(u32 *dst, const struct nlattr *attr, + bool *mod) +{ + u8 val; + + if (!attr) + return; + val = !!nla_get_u8(attr); + if (!!*dst == val) + return; + + *dst = val; + *mod = true; +} + +/** + * ethnl_update_binary() - update binary data from NLA_BINARY atribute + * @dst: value to update + * @len: destination buffer length + * @attr: netlink attribute with new value or null + * @mod: pointer to bool for modification tracking + * + * Use the u8 value from NLA_U8 netlink attribute @attr to rewrite data block + * of length @len at @dst by attribute payload; do nothing if @attr is null. + * Bool pointed to by @mod is set to true if this function changed the logical + * value of *dst, otherwise it is left as is. + */ +static inline void ethnl_update_binary(void *dst, unsigned int len, + const struct nlattr *attr, bool *mod) +{ + if (!attr) + return; + if (nla_len(attr) < len) + len = nla_len(attr); + if (!memcmp(dst, nla_data(attr), len)) + return; + + memcpy(dst, nla_data(attr), len); + *mod = true; +} + +/** + * ethnl_update_bitfield32() - update u32 value from NLA_BITFIELD32 attribute + * @dst: value to update + * @attr: netlink attribute with new value or null + * @mod: pointer to bool for modification tracking + * + * Update bits in u32 value which are set in attribute's mask to values from + * attribute's value. Do nothing if @attr is null or the value wouldn't change; + * otherwise, set bool pointed to by @mod to true. + */ +static inline void ethnl_update_bitfield32(u32 *dst, const struct nlattr *attr, + bool *mod) +{ + struct nla_bitfield32 change; + u32 newval; + + if (!attr) + return; + change = nla_get_bitfield32(attr); + newval = (*dst & ~change.selector) | (change.value & change.selector); + if (*dst == newval) + return; + + *dst = newval; + *mod = true; +} + +/** + * ethnl_reply_header_size() - total size of reply header + * + * This is an upper estimate so that we do not need to hold RTNL lock longer + * than necessary (to prevent rename between size estimate and composing the + * message). Accounts only for device ifindex and name as those are the only + * attributes ethnl_fill_reply_header() puts into the reply header. + */ +static inline unsigned int ethnl_reply_header_size(void) +{ + return nla_total_size(nla_total_size(sizeof(u32)) + + nla_total_size(IFNAMSIZ)); +} + +/* GET request handling */ + +/* Unified processing of GET requests uses two data structures: request info + * and reply data. Request info holds information parsed from client request + * and its stays constant through all request processing. Reply data holds data + * retrieved from ethtool_ops callbacks or other internal sources which is used + * to compose the reply. When processing a dump request, request info is filled + * only once (when the request message is parsed) but reply data is filled for + * each reply message. + * + * Both structures consist of part common for all request types (struct + * ethnl_req_info and struct ethnl_reply_data defined below) and optional + * parts specific for each request type. Common part always starts at offset 0. + */ + +/** + * struct ethnl_req_info - base type of request information for GET requests + * @dev: network device the request is for (may be null) + * @flags: request flags common for all request types + * + * This is a common base for request specific structures holding data from + * parsed userspace request. These always embed struct ethnl_req_info at + * zero offset. + */ +struct ethnl_req_info { + struct net_device *dev; + u32 flags; +}; + +/** + * struct ethnl_reply_data - base type of reply data for GET requests + * @dev: device for current reply message; in single shot requests it is + * equal to ðnl_req_info.dev; in dumps it's different for each + * reply message + * + * This is a common base for request specific structures holding data for + * kernel reply message. These always embed struct ethnl_reply_data at zero + * offset. + */ +struct ethnl_reply_data { + struct net_device *dev; +}; + +static inline int ethnl_ops_begin(struct net_device *dev) +{ + if (dev && dev->ethtool_ops->begin) + return dev->ethtool_ops->begin(dev); + else + return 0; +} + +static inline void ethnl_ops_complete(struct net_device *dev) +{ + if (dev && dev->ethtool_ops->complete) + dev->ethtool_ops->complete(dev); +} + +/** + * struct ethnl_request_ops - unified handling of GET requests + * @request_cmd: command id for request (GET) + * @reply_cmd: command id for reply (GET_REPLY) + * @hdr_attr: attribute type for request header + * @req_info_size: size of request info + * @reply_data_size: size of reply data + * @allow_nodev_do: allow non-dump request with no device identification + * @parse_request: + * Parse request except common header (struct ethnl_req_info). Common + * header is already filled on entry, the rest up to @repdata_offset + * is zero initialized. This callback should only modify type specific + * request info by parsed attributes from request message. + * @prepare_data: + * Retrieve and prepare data needed to compose a reply message. Calls to + * ethtool_ops handlers are limited to this callback. Common reply data + * (struct ethnl_reply_data) is filled on entry, type specific part after + * it is zero initialized. This callback should only modify the type + * specific part of reply data. Device identification from struct + * ethnl_reply_data is to be used as for dump requests, it iterates + * through network devices while dev member of struct ethnl_req_info + * points to the device from client request. + * @reply_size: + * Estimate reply message size. Returned value must be sufficient for + * message payload without common reply header. The callback may returned + * estimate higher than actual message size if exact calculation would + * not be worth the saved memory space. + * @fill_reply: + * Fill reply message payload (except for common header) from reply data. + * The callback must not generate more payload than previously called + * ->reply_size() estimated. + * @cleanup_data: + * Optional cleanup called when reply data is no longer needed. Can be + * used e.g. to free any additional data structures outside the main + * structure which were allocated by ->prepare_data(). When processing + * dump requests, ->cleanup() is called for each message. + * + * Description of variable parts of GET request handling when using the + * unified infrastructure. When used, a pointer to an instance of this + * structure is to be added to ðnl_default_requests array and generic + * handlers ethnl_default_doit(), ethnl_default_dumpit(), + * ethnl_default_start() and ethnl_default_done() used in @ethtool_genl_ops; + * ethnl_default_notify() can be used in @ethnl_notify_handlers to send + * notifications of the corresponding type. + */ +struct ethnl_request_ops { + u8 request_cmd; + u8 reply_cmd; + u16 hdr_attr; + unsigned int req_info_size; + unsigned int reply_data_size; + bool allow_nodev_do; + + int (*parse_request)(struct ethnl_req_info *req_info, + struct nlattr **tb, + struct netlink_ext_ack *extack); + int (*prepare_data)(const struct ethnl_req_info *req_info, + struct ethnl_reply_data *reply_data, + struct genl_info *info); + int (*reply_size)(const struct ethnl_req_info *req_info, + const struct ethnl_reply_data *reply_data); + int (*fill_reply)(struct sk_buff *skb, + const struct ethnl_req_info *req_info, + const struct ethnl_reply_data *reply_data); + void (*cleanup_data)(struct ethnl_reply_data *reply_data); +}; + +extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; + +/* request handlers */ + +extern const struct ethnl_request_ops ethnl_module_eeprom_request_ops; + +extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_DATA + 1]; + +#endif /* _NET_ETHTOOL_NETLINK_H */ diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index b2ba1d2556f14..e0df892b5c370 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -1457,7 +1457,7 @@ static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -1650,7 +1650,7 @@ static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -1828,7 +1828,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -2004,7 +2004,7 @@ static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 59c9d197a68b7..26aa40c220abc 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -219,7 +219,7 @@ int inet_listen(struct socket *sock, int backlog) * because the socket was in TCP_LISTEN state previously but * was shutdown() rather than close(). */ - tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { @@ -337,7 +337,7 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, inet->hdrincl = 1; } - if (net->ipv4.sysctl_ip_no_pmtu_disc) + if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -1222,7 +1222,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) if (new_saddr == old_saddr) return 0; - if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) { + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) { pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", __func__, &old_saddr, &new_saddr); } @@ -1277,7 +1277,7 @@ int inet_sk_rebuild_header(struct sock *sk) * Other protocols have to map its equivalent state to TCP_SYN_SENT. * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme */ - if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr || + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) || sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) @@ -1351,8 +1351,11 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, } ops = rcu_dereference(inet_offloads[proto]); - if (likely(ops && ops->callbacks.gso_segment)) + if (likely(ops && ops->callbacks.gso_segment)) { segs = ops->callbacks.gso_segment(skb, features); + if (!segs) + skb->network_header = skb_mac_header(skb) + nhoff - skb->head; + } if (IS_ERR_OR_NULL(segs)) goto out; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 7b951992c372b..b8fe943ae89d0 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1116,13 +1116,18 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) return err; } -static int arp_invalidate(struct net_device *dev, __be32 ip) +int arp_invalidate(struct net_device *dev, __be32 ip, bool force) { struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev); int err = -ENXIO; struct neigh_table *tbl = &arp_tbl; if (neigh) { + if ((neigh->nud_state & NUD_VALID) && !force) { + neigh_release(neigh); + return 0; + } + if (neigh->nud_state & ~NUD_NOARP) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE| @@ -1169,7 +1174,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r, if (!dev) return -EINVAL; } - return arp_invalidate(dev, ip); + return arp_invalidate(dev, ip, true); } /* diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index c1ac802d6894a..42eaad5e515f8 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -240,7 +240,7 @@ static int cipso_v4_cache_check(const unsigned char *key, struct cipso_v4_map_cache_entry *prev_entry = NULL; u32 hash; - if (!cipso_v4_cache_enabled) + if (!READ_ONCE(cipso_v4_cache_enabled)) return -ENOENT; hash = cipso_v4_map_cache_hash(key, key_len); @@ -297,13 +297,14 @@ static int cipso_v4_cache_check(const unsigned char *key, int cipso_v4_cache_add(const unsigned char *cipso_ptr, const struct netlbl_lsm_secattr *secattr) { + int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize); int ret_val = -EPERM; u32 bkt; struct cipso_v4_map_cache_entry *entry = NULL; struct cipso_v4_map_cache_entry *old_entry = NULL; u32 cipso_ptr_len; - if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) + if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0) return 0; cipso_ptr_len = cipso_ptr[1]; @@ -323,7 +324,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr, bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1); spin_lock_bh(&cipso_v4_cache[bkt].lock); - if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { + if (cipso_v4_cache[bkt].size < bkt_size) { list_add(&entry->list, &cipso_v4_cache[bkt].list); cipso_v4_cache[bkt].size += 1; } else { @@ -1200,7 +1201,8 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, /* This will send packets using the "optimized" format when * possible as specified in section 3.4.2.6 of the * CIPSO draft. */ - if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10) + if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 && + ret_val <= 10) tag_len = 14; else tag_len = 4 + ret_val; @@ -1603,7 +1605,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) * all the CIPSO validations here but it doesn't * really specify _exactly_ what we need to validate * ... so, just make it a sysctl tunable. */ - if (cipso_v4_rbm_strictvalid) { + if (READ_ONCE(cipso_v4_rbm_strictvalid)) { if (cipso_v4_map_lvl_valid(doi_def, tag[3]) < 0) { err_offset = opt_iter + 3; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 603a3495afa62..4a8ad46397c0e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2585,7 +2585,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, free: kfree(t); out: - return -ENOBUFS; + return -ENOMEM; } static void __devinet_sysctl_unregister(struct net *net, diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 86c836fa21459..f555dd4bac653 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -286,6 +286,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * return err; } + if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE || + ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE) + goto cow; + if (!skb_cloned(skb)) { if (tailen <= skb_tailroom(skb)) { nfrags = 1; @@ -499,7 +503,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); u32 padto; - padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached)); + padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached)); if (skb->len < padto) esp.tfclen = padto - skb->len; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b875b98820ede..ef3e7a3e3a29e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1122,9 +1122,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) return; /* Add broadcast address, if it is explicitly assigned. */ - if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) + if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) { fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim, 0); + arp_invalidate(dev, ifa->ifa_broadcast, false); + } if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && (prefix != addr || ifa->ifa_prefixlen < 32)) { @@ -1140,6 +1142,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) prim, 0); fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask, 32, prim, 0); + arp_invalidate(dev, prefix | ~mask, false); } } } @@ -1588,7 +1591,7 @@ static int __net_init fib_net_init(struct net *net) int error; #ifdef CONFIG_IP_ROUTE_CLASSID - net->ipv4.fib_num_tclassid_users = 0; + atomic_set(&net->ipv4.fib_num_tclassid_users, 0); #endif error = ip_fib_net_init(net); if (error < 0) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index b43a7ba5c6a42..e9a3cc9e98dfa 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -137,7 +137,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, return err; } -static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) +static bool fib4_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg) { struct fib_result *result = (struct fib_result *) arg->result; struct net_device *dev = NULL; @@ -258,7 +258,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, if (tb[FRA_FLOW]) { rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); if (rule4->tclassid) - net->ipv4.fib_num_tclassid_users++; + atomic_inc(&net->ipv4.fib_num_tclassid_users); } #endif @@ -290,7 +290,7 @@ static int fib4_rule_delete(struct fib_rule *rule) #ifdef CONFIG_IP_ROUTE_CLASSID if (((struct fib4_rule *)rule)->tclassid) - net->ipv4.fib_num_tclassid_users--; + atomic_dec(&net->ipv4.fib_num_tclassid_users); #endif net->ipv4.fib_has_custom_rules = true; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b1b3220917ca0..28da0443f3e9e 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -222,7 +223,7 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh) { #ifdef CONFIG_IP_ROUTE_CLASSID if (fib_nh->nh_tclassid) - net->ipv4.fib_num_tclassid_users--; + atomic_dec(&net->ipv4.fib_num_tclassid_users); #endif fib_nh_common_release(&fib_nh->nh_common); } @@ -318,11 +319,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) static inline unsigned int fib_devindex_hashfn(unsigned int val) { - unsigned int mask = DEVINDEX_HASHSIZE - 1; + return hash_32(val, DEVINDEX_HASHBITS); +} + +static struct hlist_head * +fib_info_devhash_bucket(const struct net_device *dev) +{ + u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex; - return (val ^ - (val >> DEVINDEX_HASHBITS) ^ - (val >> (DEVINDEX_HASHBITS * 2))) & mask; + return &fib_info_devhash[fib_devindex_hashfn(val)]; } static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, @@ -432,12 +437,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; struct fib_nh *nh; - unsigned int hash; spin_lock(&fib_info_lock); - hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(dev); + hlist_for_each_entry(nh, head, nh_hash) { if (nh->fib_nh_dev == dev && nh->fib_nh_gw4 == gw && @@ -624,7 +628,7 @@ int fib_nh_init(struct net *net, struct fib_nh *nh, #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid = cfg->fc_flow; if (nh->nh_tclassid) - net->ipv4.fib_num_tclassid_users++; + atomic_inc(&net->ipv4.fib_num_tclassid_users); #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->fib_nh_weight = nh_weight; @@ -654,6 +658,19 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, return nhs; } +static int fib_gw_from_attr(__be32 *gw, struct nlattr *nla, + struct netlink_ext_ack *extack) +{ + if (nla_len(nla) < sizeof(*gw)) { + NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_GATEWAY"); + return -EINVAL; + } + + *gw = nla_get_in_addr(nla); + + return 0; +} + /* only called when fib_nh is integrated into fib_info */ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, int remaining, struct fib_config *cfg, @@ -696,7 +713,11 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, return -EINVAL; } if (nla) { - fib_cfg.fc_gw4 = nla_get_in_addr(nla); + ret = fib_gw_from_attr(&fib_cfg.fc_gw4, nla, + extack); + if (ret) + goto errout; + if (fib_cfg.fc_gw4) fib_cfg.fc_gw_family = AF_INET; } else if (nlav) { @@ -706,10 +727,18 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, } nla = nla_find(attrs, attrlen, RTA_FLOW); - if (nla) + if (nla) { + if (nla_len(nla) < sizeof(u32)) { + NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW"); + return -EINVAL; + } fib_cfg.fc_flow = nla_get_u32(nla); + } fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); + /* RTA_ENCAP_TYPE length checked in + * lwtunnel_valid_encap_type_attr + */ nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla) fib_cfg.fc_encap_type = nla_get_u16(nla); @@ -847,8 +876,13 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, } if (cfg->fc_oif || cfg->fc_gw_family) { - struct fib_nh *nh = fib_info_nh(fi, 0); + struct fib_nh *nh; + + /* cannot match on nexthop object attributes */ + if (fi->nh) + return 1; + nh = fib_info_nh(fi, 0); if (cfg->fc_encap) { if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap, nh, cfg, extack)) @@ -894,6 +928,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, attrlen = rtnh_attrlen(rtnh); if (attrlen > 0) { struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); + int err; nla = nla_find(attrs, attrlen, RTA_GATEWAY); nlav = nla_find(attrs, attrlen, RTA_VIA); @@ -904,12 +939,17 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, } if (nla) { + __be32 gw; + + err = fib_gw_from_attr(&gw, nla, extack); + if (err) + return err; + if (nh->fib_nh_gw_family != AF_INET || - nla_get_in_addr(nla) != nh->fib_nh_gw4) + gw != nh->fib_nh_gw4) return 1; } else if (nlav) { struct fib_config cfg2; - int err; err = fib_gw_from_via(&cfg2, nlav, extack); if (err) @@ -932,8 +972,14 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, #ifdef CONFIG_IP_ROUTE_CLASSID nla = nla_find(attrs, attrlen, RTA_FLOW); - if (nla && nla_get_u32(nla) != nh->nh_tclassid) - return 1; + if (nla) { + if (nla_len(nla) < sizeof(u32)) { + NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW"); + return -EINVAL; + } + if (nla_get_u32(nla) != nh->nh_tclassid) + return 1; + } #endif } @@ -1171,7 +1217,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, nh->fib_nh_dev = in_dev->dev; dev_hold(nh->fib_nh_dev); - nh->fib_nh_scope = RT_SCOPE_HOST; + nh->fib_nh_scope = RT_SCOPE_LINK; if (!netif_carrier_ok(nh->fib_nh_dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = 0; @@ -1557,12 +1603,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg, } else { change_nexthops(fi) { struct hlist_head *head; - unsigned int hash; if (!nexthop_nh->fib_nh_dev) continue; - hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev); hlist_add_head(&nexthop_nh->nh_hash, head); } endfor_nexthops(fi) } @@ -1654,7 +1698,7 @@ EXPORT_SYMBOL_GPL(fib_nexthop_info); #if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6) int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, - int nh_weight, u8 rt_family) + int nh_weight, u8 rt_family, u32 nh_tclassid) { const struct net_device *dev = nhc->nhc_dev; struct rtnexthop *rtnh; @@ -1672,6 +1716,9 @@ int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, rtnh->rtnh_flags = flags; + if (nh_tclassid && nla_put_u32(skb, RTA_FLOW, nh_tclassid)) + goto nla_put_failure; + /* length of rtnetlink header + attributes */ rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; @@ -1699,14 +1746,13 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) } for_nexthops(fi) { - if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight, - AF_INET) < 0) - goto nla_put_failure; + u32 nh_tclassid = 0; #ifdef CONFIG_IP_ROUTE_CLASSID - if (nh->nh_tclassid && - nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) - goto nla_put_failure; + nh_tclassid = nh->nh_tclassid; #endif + if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight, + AF_INET, nh_tclassid) < 0) + goto nla_put_failure; } endfor_nexthops(fi); mp_end: @@ -1901,8 +1947,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) { - unsigned int hash = fib_devindex_hashfn(dev->ifindex); - struct hlist_head *head = &fib_info_devhash[hash]; + struct hlist_head *head = fib_info_devhash_bucket(dev); struct fib_nh *nh; hlist_for_each_entry(nh, head, nh_hash) { @@ -1921,12 +1966,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) */ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) { - int ret = 0; - int scope = RT_SCOPE_NOWHERE; + struct hlist_head *head = fib_info_devhash_bucket(dev); struct fib_info *prev_fi = NULL; - unsigned int hash = fib_devindex_hashfn(dev->ifindex); - struct hlist_head *head = &fib_info_devhash[hash]; + int scope = RT_SCOPE_NOWHERE; struct fib_nh *nh; + int ret = 0; if (force) scope = -1; @@ -2071,7 +2115,6 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) int fib_sync_up(struct net_device *dev, unsigned char nh_flags) { struct fib_info *prev_fi; - unsigned int hash; struct hlist_head *head; struct fib_nh *nh; int ret; @@ -2087,8 +2130,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) } prev_fi = NULL; - hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(dev); ret = 0; hlist_for_each_entry(nh, head, nh_hash) { @@ -2167,7 +2209,7 @@ void fib_select_multipath(struct fib_result *res, int hash) } change_nexthops(fi) { - if (net->ipv4.sysctl_fib_multipath_use_neigh) { + if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) { if (!fib_good_nh(nexthop_nh)) continue; if (!first) { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 51673d00bbeac..a1f830da4ad30 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -500,7 +500,7 @@ static void tnode_free(struct key_vector *tn) tn = container_of(head, struct tnode, rcu)->kv; } - if (tnode_free_size >= sysctl_fib_sync_mem) { + if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) { tnode_free_size = 0; synchronize_rcu(); } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c88612242c89f..b44f51e404aee 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -261,11 +261,12 @@ bool icmp_global_allow(void) spin_lock(&icmp_global.lock); delta = min_t(u32, now - icmp_global.stamp, HZ); if (delta >= HZ / 50) { - incr = sysctl_icmp_msgs_per_sec * delta / HZ ; + incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; if (incr) WRITE_ONCE(icmp_global.stamp, now); } - credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst); + credit = min_t(u32, icmp_global.credit + incr, + READ_ONCE(sysctl_icmp_msgs_burst)); if (credit) { /* We want to use a credit of one in average, but need to randomize * it for security reasons. @@ -289,7 +290,7 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code) return true; /* Limit if icmp type is enabled in ratemask. */ - if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask)) + if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask))) return true; return false; @@ -327,7 +328,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, vif = l3mdev_master_ifindex(dst->dev); peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); - rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); + rc = inet_peer_xrlim_allow(peer, + READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); if (peer) inet_putpeer(peer); out: @@ -460,6 +462,23 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) local_bh_enable(); } +/* + * The device used for looking up which routing table to use for sending an ICMP + * error is preferably the source whenever it is set, which should ensure the + * icmp error can be sent to the source host, else lookup using the routing + * table of the destination device, else use the main routing table (index 0). + */ +static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb) +{ + struct net_device *route_lookup_dev = NULL; + + if (skb->dev) + route_lookup_dev = skb->dev; + else if (skb_dst(skb)) + route_lookup_dev = skb_dst(skb)->dev; + return route_lookup_dev; +} + static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, struct sk_buff *skb_in, @@ -468,6 +487,7 @@ static struct rtable *icmp_route_lookup(struct net *net, int type, int code, struct icmp_bxm *param) { + struct net_device *route_lookup_dev; struct rtable *rt, *rt2; struct flowi4 fl4_dec; int err; @@ -482,7 +502,8 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; fl4->fl4_icmp_code = code; - fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev); + route_lookup_dev = icmp_get_route_lookup_dev(skb_in); + fl4->flowi4_oif = l3mdev_master_ifindex(route_lookup_dev); security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); rt = ip_route_output_key_hash(net, fl4, skb_in); @@ -506,7 +527,7 @@ static struct rtable *icmp_route_lookup(struct net *net, if (err) goto relookup_failed; - if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev, + if (inet_addr_type_dev_table(net, route_lookup_dev, fl4_dec.saddr) == RTN_LOCAL) { rt2 = __ip_route_output_key(net, &fl4_dec); if (IS_ERR(rt2)) @@ -865,7 +886,7 @@ static bool icmp_unreach(struct sk_buff *skb) * values please see * Documentation/networking/ip-sysctl.txt */ - switch (net->ipv4.sysctl_ip_no_pmtu_disc) { + switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) { default: net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n", &iph->daddr); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index b1ecc91955172..1023f881091ef 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -469,7 +469,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, if (pmc->multiaddr == IGMP_ALL_HOSTS) return skb; - if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(pmc->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return skb; mtu = READ_ONCE(dev->mtu); @@ -595,7 +596,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) if (pmc->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(pmc->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) @@ -738,7 +739,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) return igmpv3_send_report(in_dev, pmc); - if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(group) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return 0; if (type == IGMP_HOST_LEAVE_MESSAGE) @@ -827,7 +829,7 @@ static void igmp_ifc_event(struct in_device *in_dev) struct net *net = dev_net(in_dev->dev); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv); + WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv)); igmp_ifc_start_timer(in_dev, 1); } @@ -922,7 +924,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group) if (group == IGMP_ALL_HOSTS) return false; - if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(group) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return false; rcu_read_lock(); @@ -1008,7 +1011,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * received value was zero, use the default or statically * configured value. */ - in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv; + in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; /* RFC3376, 8.3. Query Response Interval: @@ -1047,7 +1050,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (im->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(im->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; spin_lock_bh(&im->lock); if (im->tm_running) @@ -1188,7 +1191,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im, pmc->interface = im->interface; in_dev_hold(in_dev); pmc->multiaddr = im->multiaddr; - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); pmc->sfmode = im->sfmode; if (pmc->sfmode == MCAST_INCLUDE) { struct ip_sf_list *psf; @@ -1239,9 +1242,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) swap(im->tomb, pmc->tomb); swap(im->sources, pmc->sources); for (psf = im->sources; psf; psf = psf->sf_next) - psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + psf->sf_crcount = in_dev->mr_qrv ?: + READ_ONCE(net->ipv4.sysctl_igmp_qrv); } else { - im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + im->crcount = in_dev->mr_qrv ?: + READ_ONCE(net->ipv4.sysctl_igmp_qrv); } in_dev_put(pmc->interface); kfree_pmc(pmc); @@ -1298,7 +1303,8 @@ static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp) #ifdef CONFIG_IP_MULTICAST if (im->multiaddr == IGMP_ALL_HOSTS) return; - if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(im->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return; reporter = im->reporter; @@ -1340,13 +1346,14 @@ static void igmp_group_added(struct ip_mc_list *im) #ifdef CONFIG_IP_MULTICAST if (im->multiaddr == IGMP_ALL_HOSTS) return; - if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(im->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return; if (in_dev->dead) return; - im->unsolicit_count = net->ipv4.sysctl_igmp_qrv; + im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) { spin_lock_bh(&im->lock); igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY); @@ -1360,7 +1367,7 @@ static void igmp_group_added(struct ip_mc_list *im) * IN() to IN(A). */ if (im->sfmode == MCAST_EXCLUDE) - im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); igmp_ifc_event(in_dev); #endif @@ -1644,7 +1651,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev) if (im->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(im->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; /* a failover is happening and switches @@ -1751,7 +1758,7 @@ static void ip_mc_reset(struct in_device *in_dev) in_dev->mr_qi = IGMP_QUERY_INTERVAL; in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL; - in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; + in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv); } #else static void ip_mc_reset(struct in_device *in_dev) @@ -1885,7 +1892,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #ifdef CONFIG_IP_MULTICAST if (psf->sf_oldin && !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { - psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); psf->sf_next = pmc->tomb; pmc->tomb = psf; rv = 1; @@ -1949,7 +1956,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, /* filter mode change */ pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -2128,7 +2135,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, #ifdef CONFIG_IP_MULTICAST /* else no filters; keep old mode for reports */ - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -2194,7 +2201,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr, count++; } err = -ENOBUFS; - if (count >= net->ipv4.sysctl_igmp_max_memberships) + if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships)) goto done; iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); if (!iml) @@ -2403,9 +2410,10 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct newpsl->sl_addr[i] = psl->sl_addr[i]; /* decrease mem now to avoid the memleak warning */ atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); - kfree_rcu(psl, rcu); } rcu_assign_pointer(pmc->sflist, newpsl); + if (psl) + kfree_rcu(psl, rcu); psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ @@ -2503,11 +2511,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) psl->sl_count, psl->sl_addr, 0); /* decrease mem now to avoid the memleak warning */ atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); - kfree_rcu(psl, rcu); - } else + } else { (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 0, NULL, 0); + } rcu_assign_pointer(pmc->sflist, newpsl); + if (psl) + kfree_rcu(psl, rcu); pmc->sfmode = msf->imsf_fmode; err = 0; done: diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 9ee79f7d3f5fc..3c608f6ceaa93 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -799,7 +799,7 @@ static void reqsk_queue_hash_req(struct request_sock *req, timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); mod_timer(&req->rsk_timer, jiffies + timeout); - inet_ehash_insert(req_to_sk(req), NULL); + inet_ehash_insert(req_to_sk(req), NULL, NULL); /* before letting lookups find us, make sure all req fields * are committed to memory and refcnt initialized. */ diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 4f71aca156662..f8f79672cc5f3 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -200,6 +200,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_state = sk->sk_state; r->idiag_timer = 0; r->idiag_retrans = 0; + r->idiag_expires = 0; if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin)) goto errout; @@ -240,20 +241,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; r->idiag_expires = - jiffies_to_msecs(icsk->icsk_timeout - jiffies); + jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { r->idiag_timer = 4; r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = - jiffies_to_msecs(icsk->icsk_timeout - jiffies); + jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); } else if (timer_pending(&sk->sk_timer)) { r->idiag_timer = 2; r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = - jiffies_to_msecs(sk->sk_timer.expires - jiffies); - } else { - r->idiag_timer = 0; - r->idiag_expires = 0; + jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies); } if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { @@ -338,16 +336,13 @@ static int inet_twsk_diag_fill(struct sock *sk, r = nlmsg_data(nlh); BUG_ON(tw->tw_state != TCP_TIME_WAIT); - tmo = tw->tw_timer.expires - jiffies; - if (tmo < 0) - tmo = 0; - inet_diag_msg_common_fill(r, sk); r->idiag_retrans = 0; r->idiag_state = tw->tw_substate; r->idiag_timer = 3; - r->idiag_expires = jiffies_to_msecs(tmo); + tmo = tw->tw_timer.expires - jiffies; + r->idiag_expires = jiffies_delta_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; r->idiag_uid = 0; @@ -381,7 +376,7 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, offsetof(struct sock, sk_cookie)); tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; - r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0; + r->idiag_expires = jiffies_delta_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; r->idiag_uid = 0; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 10d31733297d7..e0e8a65d561ec 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -204,9 +204,9 @@ void inet_frag_kill(struct inet_frag_queue *fq) /* The RCU read lock provides a memory barrier * guaranteeing that if fqdir->dead is false then * the hash table destruction will not start until - * after we unlock. Paired with inet_frags_exit_net(). + * after we unlock. Paired with fqdir_pre_exit(). */ - if (!fqdir->dead) { + if (!READ_ONCE(fqdir->dead)) { rhashtable_remove_fast(&fqdir->rhashtable, &fq->node, fqdir->f->rhash_params); refcount_dec(&fq->refcnt); @@ -321,9 +321,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) { + /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */ + long high_thresh = READ_ONCE(fqdir->high_thresh); struct inet_frag_queue *fq = NULL, *prev; - if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh) + if (!high_thresh || frag_mem_limit(fqdir) > high_thresh) return NULL; rcu_read_lock(); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 006a34b185378..6e65063572a96 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -20,6 +20,9 @@ #include #include #include +#if IS_ENABLED(CONFIG_IPV6) +#include +#endif #include #include #include @@ -239,8 +242,10 @@ static inline int compute_score(struct sock *sk, struct net *net, if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return -1; + score = sk->sk_bound_dev_if ? 2 : 1; - score = sk->sk_family == PF_INET ? 2 : 1; + if (sk->sk_family == PF_INET) + score++; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } @@ -459,7 +464,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, return -EADDRNOTAVAIL; } -static u32 inet_sk_port_offset(const struct sock *sk) +static u64 inet_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); @@ -468,10 +473,52 @@ static u32 inet_sk_port_offset(const struct sock *sk) inet->inet_dport); } -/* insert a socket into ehash, and eventually remove another one - * (The another one can be a SYN_RECV or TIMEWAIT +/* Searches for an exsiting socket in the ehash bucket list. + * Returns true if found, false otherwise. + */ +static bool inet_ehash_lookup_by_sk(struct sock *sk, + struct hlist_nulls_head *list) +{ + const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num); + const int sdif = sk->sk_bound_dev_if; + const int dif = sk->sk_bound_dev_if; + const struct hlist_nulls_node *node; + struct net *net = sock_net(sk); + struct sock *esk; + + INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr); + + sk_nulls_for_each_rcu(esk, node, list) { + if (esk->sk_hash != sk->sk_hash) + continue; + if (sk->sk_family == AF_INET) { + if (unlikely(INET_MATCH(esk, net, acookie, + sk->sk_daddr, + sk->sk_rcv_saddr, + ports, dif, sdif))) { + return true; + } + } +#if IS_ENABLED(CONFIG_IPV6) + else if (sk->sk_family == AF_INET6) { + if (unlikely(INET6_MATCH(esk, net, + &sk->sk_v6_daddr, + &sk->sk_v6_rcv_saddr, + ports, dif, sdif))) { + return true; + } + } +#endif + } + return false; +} + +/* Insert a socket into ehash, and eventually remove another one + * (The another one can be a SYN_RECV or TIMEWAIT) + * If an existing socket already exists, socket sk is not inserted, + * and sets found_dup_sk parameter to true. */ -bool inet_ehash_insert(struct sock *sk, struct sock *osk) +bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct hlist_nulls_head *list; @@ -490,16 +537,23 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk) if (osk) { WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); ret = sk_nulls_del_node_init_rcu(osk); + } else if (found_dup_sk) { + *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); + if (*found_dup_sk) + ret = false; } + if (ret) __sk_nulls_add_node_rcu(sk, list); + spin_unlock(lock); + return ret; } -bool inet_ehash_nolisten(struct sock *sk, struct sock *osk) +bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) { - bool ok = inet_ehash_insert(sk, osk); + bool ok = inet_ehash_insert(sk, osk, found_dup_sk); if (ok) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -543,7 +597,7 @@ int __inet_hash(struct sock *sk, struct sock *osk) int err = 0; if (sk->sk_state != TCP_LISTEN) { - inet_ehash_nolisten(sk, osk); + inet_ehash_nolisten(sk, osk, NULL); return 0; } WARN_ON(!sk_unhashed(sk)); @@ -617,8 +671,21 @@ void inet_unhash(struct sock *sk) } EXPORT_SYMBOL_GPL(inet_unhash); +/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm + * Note that we use 32bit integers (vs RFC 'short integers') + * because 2^16 is not a multiple of num_ephemeral and this + * property might be used by clever attacker. + * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though + * attacks were since demonstrated, thus we use 65536 instead to really + * give more isolation and privacy, at the expense of 256kB of kernel + * memory. + */ +#define INET_TABLE_PERTURB_SHIFT 16 +#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT) +static u32 *table_perturb; + int __inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk, u32 port_offset, + struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)) { @@ -630,8 +697,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct inet_bind_bucket *tb; u32 remaining, offset; int ret, i, low, high; - static u32 hint; int l3mdev; + u32 index; if (port) { head = &hinfo->bhash[inet_bhashfn(net, port, @@ -639,7 +706,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - inet_ehash_nolisten(sk, NULL); + inet_ehash_nolisten(sk, NULL, NULL); spin_unlock_bh(&head->lock); return 0; } @@ -658,7 +725,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, if (likely(remaining > 1)) remaining &= ~1U; - offset = (hint + port_offset) % remaining; + net_get_random_once(table_perturb, + INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); + index = port_offset & (INET_TABLE_PERTURB_SIZE - 1); + + offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); + offset %= remaining; + /* In first pass we try ports of @low parity. * inet_csk_get_port() does the opposite choice. */ @@ -712,13 +785,19 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, return -EADDRNOTAVAIL; ok: - hint += i + 2; + /* Here we want to add a little bit of randomness to the next source + * port that will be chosen. We use a max() with a random here so that + * on low contention the randomness is maximal and on high contention + * it may be inexistent. + */ + i = max_t(int, i, (prandom_u32() & 7) * 2); + WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2); /* Head lock still held and bh's disabled */ inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); - inet_ehash_nolisten(sk, (struct sock *)tw); + inet_ehash_nolisten(sk, (struct sock *)tw, NULL); } if (tw) inet_twsk_bind_unhash(tw, hinfo); @@ -735,7 +814,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - u32 port_offset = 0; + u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet_sk_port_offset(sk); @@ -785,6 +864,14 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, low_limit, high_limit); init_hashinfo_lhash2(h); + + /* this one is used for source ports of outgoing connections */ + table_perturb = alloc_large_system_hash("Table-perturb", + sizeof(*table_perturb), + INET_TABLE_PERTURB_SIZE, + 0, 0, NULL, NULL, + INET_TABLE_PERTURB_SIZE, + INET_TABLE_PERTURB_SIZE); } int inet_hashinfo2_init_mod(struct inet_hashinfo *h) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index ff327a62c9ce9..a18668552d33d 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -148,16 +148,20 @@ static void inet_peer_gc(struct inet_peer_base *base, struct inet_peer *gc_stack[], unsigned int gc_cnt) { + int peer_threshold, peer_maxttl, peer_minttl; struct inet_peer *p; __u32 delta, ttl; int i; - if (base->total >= inet_peer_threshold) + peer_threshold = READ_ONCE(inet_peer_threshold); + peer_maxttl = READ_ONCE(inet_peer_maxttl); + peer_minttl = READ_ONCE(inet_peer_minttl); + + if (base->total >= peer_threshold) ttl = 0; /* be aggressive */ else - ttl = inet_peer_maxttl - - (inet_peer_maxttl - inet_peer_minttl) / HZ * - base->total / inet_peer_threshold * HZ; + ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ * + base->total / peer_threshold * HZ; for (i = 0; i < gc_cnt; i++) { p = gc_stack[i]; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cfeb8890f94ee..fad803d2d711e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -144,7 +144,8 @@ static void ip_expire(struct timer_list *t) rcu_read_lock(); - if (qp->q.fqdir->dead) + /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(qp->q.fqdir->dead)) goto out_rcu_unlock; spin_lock(&qp->q.lock); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index fedad3a3e61b8..52dbffb7bc2fd 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -432,14 +432,12 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, __be16 proto) { struct ip_tunnel *tunnel = netdev_priv(dev); - - if (tunnel->parms.o_flags & TUNNEL_SEQ) - tunnel->o_seqno++; + __be16 flags = tunnel->parms.o_flags; /* Push GRE header. */ gre_build_header(skb, tunnel->tun_hlen, - tunnel->parms.o_flags, proto, tunnel->parms.o_key, - htonl(tunnel->o_seqno)); + flags, proto, tunnel->parms.o_key, + (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } @@ -499,7 +497,6 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) int tunnel_hlen; int version; int nhoff; - int thoff; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || @@ -533,10 +530,16 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; - thoff = skb_transport_header(skb) - skb_mac_header(skb); - if (skb->protocol == htons(ETH_P_IPV6) && - (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)) - truncate = true; + if (skb->protocol == htons(ETH_P_IPV6)) { + int thoff; + + if (skb_transport_header_was_set(skb)) + thoff = skb_transport_header(skb) - skb_mac_header(skb); + else + thoff = nhoff + sizeof(struct ipv6hdr); + if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) + truncate = true; + } if (version == 1) { erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), @@ -577,8 +580,9 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) key = &info->key; ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, - tunnel_id_to_key32(key->tun_id), key->tos, 0, - skb->mark, skb_get_hash(skb)); + tunnel_id_to_key32(key->tun_id), + key->tos & ~INET_ECN_MASK, 0, skb->mark, + skb_get_hash(skb)); rt = ip_route_output_key(dev_net(dev), &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -613,6 +617,10 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, */ skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); skb_reset_mac_header(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start(skb) < skb->data) + goto free_skb; } else { if (skb_cow_head(skb, dev->needed_headroom)) goto free_skb; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f52bc9c22e5b8..418e939878004 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -161,12 +161,19 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - if (ip_dont_fragment(sk, &rt->dst)) { + /* Do not bother generating IPID for small packets (eg SYNACK) */ + if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) { iph->frag_off = htons(IP_DF); iph->id = 0; } else { iph->frag_off = 0; - __ip_select_ident(net, iph, 1); + /* TCP packets here are SYNACK with fat IPv4/TCP options. + * Avoid using the hashed IP ident generator. + */ + if (sk->sk_protocol == IPPROTO_TCP) + iph->id = (__force __be16)prandom_u32(); + else + __ip_select_ident(net, iph, 1); } if (opt && opt->opt.optlen) { @@ -446,8 +453,9 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) { BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) != offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr)); - memcpy(&iph->saddr, &fl4->saddr, - sizeof(fl4->saddr) + sizeof(fl4->daddr)); + + iph->saddr = fl4->saddr; + iph->daddr = fl4->daddr; } /* Note: skb->sk can be different from sk, in case of tunnels */ diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d71935618871e..2da6896080363 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -259,7 +259,9 @@ static int __net_init ipmr_rules_init(struct net *net) return 0; err2: + rtnl_lock(); ipmr_free_table(mrt); + rtnl_unlock(); err1: fib_rules_unregister(ops); return err; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 6bdb1ab8af617..63ebb87d85331 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -505,8 +505,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) if (IS_ERR(config)) return PTR_ERR(config); } - } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) + } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) { + clusterip_config_entry_put(config); + clusterip_config_put(config); return -EINVAL; + } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index f5f4369c131c9..4d69b3de980a6 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -839,15 +839,36 @@ static void remove_nexthop(struct net *net, struct nexthop *nh, /* if any FIB entries reference this nexthop, any dst entries * need to be regenerated */ -static void nh_rt_cache_flush(struct net *net, struct nexthop *nh) +static void nh_rt_cache_flush(struct net *net, struct nexthop *nh, + struct nexthop *replaced_nh) { struct fib6_info *f6i; + struct nh_group *nhg; + int i; if (!list_empty(&nh->fi_list)) rt_cache_flush(net); list_for_each_entry(f6i, &nh->f6i_list, nh_list) ipv6_stub->fib6_update_sernum(net, f6i); + + /* if an IPv6 group was replaced, we have to release all old + * dsts to make sure all refcounts are released + */ + if (!replaced_nh->is_group) + return; + + /* new dsts must use only the new nexthop group */ + synchronize_net(); + + nhg = rtnl_dereference(replaced_nh->nh_grp); + for (i = 0; i < nhg->num_nh; i++) { + struct nh_grp_entry *nhge = &nhg->nh_entries[i]; + struct nh_info *nhi = rtnl_dereference(nhge->nh->nh_info); + + if (nhi->family == AF_INET6) + ipv6_stub->fib6_nh_release_dsts(&nhi->fib6_nh); + } } static int replace_nexthop_grp(struct net *net, struct nexthop *old, @@ -994,7 +1015,7 @@ static int replace_nexthop(struct net *net, struct nexthop *old, err = replace_nexthop_single(net, old, new, extack); if (!err) { - nh_rt_cache_flush(net, old); + nh_rt_cache_flush(net, old, new); __remove_nexthop(net, new, NULL); nexthop_put(new); @@ -1183,6 +1204,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, .fc_gw4 = cfg->gw.ipv4, .fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0, .fc_flags = cfg->nh_flags, + .fc_nlinfo = cfg->nlinfo, .fc_encap = cfg->nh_encap, .fc_encap_type = cfg->nh_encap_type, }; @@ -1218,6 +1240,7 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh, .fc_ifindex = cfg->nh_ifindex, .fc_gateway = cfg->gw.ipv6, .fc_flags = cfg->nh_flags, + .fc_nlinfo = cfg->nlinfo, .fc_encap = cfg->nh_encap, .fc_encap_type = cfg->nh_encap_type, }; @@ -1229,11 +1252,15 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh, /* sets nh_dev if successful */ err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL, extack); - if (err) + if (err) { + /* IPv6 is not enabled, don't call fib6_nh_release */ + if (err == -EAFNOSUPPORT) + goto out; ipv6_stub->fib6_nh_release(fib6_nh); - else + } else { nh->nh_flags = fib6_nh->fib_nh_flags; - + } +out: return err; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 1c3d5d3702a10..3ff65c9ab8ee6 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -172,16 +172,22 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) struct sock *sk = NULL; struct inet_sock *isk; struct hlist_nulls_node *hnode; - int dif = skb->dev->ifindex; + int dif, sdif; if (skb->protocol == htons(ETH_P_IP)) { + dif = inet_iif(skb); + sdif = inet_sdif(skb); pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", (int)ident, &ip_hdr(skb)->daddr, dif); #if IS_ENABLED(CONFIG_IPV6) } else if (skb->protocol == htons(ETH_P_IPV6)) { + dif = inet6_iif(skb); + sdif = inet6_sdif(skb); pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n", (int)ident, &ipv6_hdr(skb)->daddr, dif); #endif + } else { + return NULL; } read_lock_bh(&ping_table.lock); @@ -220,7 +226,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) continue; } - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && + sk->sk_bound_dev_if != sdif) continue; sock_hold(sk); @@ -297,6 +304,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, struct net *net = sock_net(sk); if (sk->sk_family == AF_INET) { struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; + u32 tb_id = RT_TABLE_LOCAL; int chk_addr_ret; if (addr_len < sizeof(*addr)) @@ -310,7 +318,8 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); - chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); + tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id; + chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) chk_addr_ret = RTN_LOCAL; @@ -351,6 +360,14 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, return -ENODEV; } } + + if (!dev && sk->sk_bound_dev_if) { + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + } has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev, scoped); rcu_read_unlock(); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 3183413ebc6c2..ddc24e57dc555 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -720,6 +720,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int ret = -EINVAL; int chk_addr_ret; + lock_sock(sk); if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) goto out; @@ -739,7 +740,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_saddr = 0; /* Use device */ sk_dst_reset(sk); ret = 0; -out: return ret; +out: + release_sock(sk); + return ret; } /* diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0e976848d4bb9..7004e379c325f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -610,28 +610,35 @@ static void fnhe_flush_routes(struct fib_nh_exception *fnhe) } } -static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) +static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash) { - struct fib_nh_exception *fnhe, *oldest; + struct fib_nh_exception __rcu **fnhe_p, **oldest_p; + struct fib_nh_exception *fnhe, *oldest = NULL; - oldest = rcu_dereference(hash->chain); - for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; - fnhe = rcu_dereference(fnhe->fnhe_next)) { - if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) + for (fnhe_p = &hash->chain; ; fnhe_p = &fnhe->fnhe_next) { + fnhe = rcu_dereference_protected(*fnhe_p, + lockdep_is_held(&fnhe_lock)); + if (!fnhe) + break; + if (!oldest || + time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) { oldest = fnhe; + oldest_p = fnhe_p; + } } fnhe_flush_routes(oldest); - return oldest; + *oldest_p = oldest->fnhe_next; + kfree_rcu(oldest, rcu); } -static inline u32 fnhe_hashfun(__be32 daddr) +static u32 fnhe_hashfun(__be32 daddr) { - static u32 fnhe_hashrnd __read_mostly; - u32 hval; + static siphash_key_t fnhe_hash_key __read_mostly; + u64 hval; - net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd)); - hval = jhash_1word((__force u32) daddr, fnhe_hashrnd); - return hash_32(hval, FNHE_HASH_SHIFT); + net_get_random_once(&fnhe_hash_key, sizeof(fnhe_hash_key)); + hval = siphash_1u32((__force u32)daddr, &fnhe_hash_key); + return hash_64(hval, FNHE_HASH_SHIFT); } static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) @@ -700,16 +707,21 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, if (rt) fill_route_from_fnhe(rt, fnhe); } else { - if (depth > FNHE_RECLAIM_DEPTH) - fnhe = fnhe_oldest(hash); - else { - fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); - if (!fnhe) - goto out_unlock; - - fnhe->fnhe_next = hash->chain; - rcu_assign_pointer(hash->chain, fnhe); + /* Randomize max depth to avoid some side channels attacks. */ + int max_depth = FNHE_RECLAIM_DEPTH + + prandom_u32_max(FNHE_RECLAIM_DEPTH); + + while (depth > max_depth) { + fnhe_remove_oldest(hash); + depth--; } + + fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); + if (!fnhe) + goto out_unlock; + + fnhe->fnhe_next = hash->chain; + fnhe->fnhe_genid = genid; fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; @@ -717,6 +729,8 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, fnhe->fnhe_mtu_locked = lock; fnhe->fnhe_expires = max(1UL, expires); + rcu_assign_pointer(hash->chain, fnhe); + /* Exception created; mark the cached routes for the nexthop * stale, so anyone caching it rechecks if this exception * applies to them. @@ -1409,7 +1423,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) struct fib_info *fi = res->fi; u32 mtu = 0; - if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu || + if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) || fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU)) mtu = fi->fib_mtu; @@ -1761,6 +1775,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #endif RT_CACHE_STAT_INC(in_slow_mc); + skb_dst_drop(skb); skb_dst_set(skb, &rth->dst); return 0; } @@ -2990,7 +3005,7 @@ static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst, udph = skb_put_zero(skb, sizeof(struct udphdr)); udph->source = sport; udph->dest = dport; - udph->len = sizeof(struct udphdr); + udph->len = htons(sizeof(struct udphdr)); udph->check = 0; break; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index a13e687172d78..8d9bf049fc250 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -243,12 +243,12 @@ bool cookie_timestamp_decode(const struct net *net, return true; } - if (!net->ipv4.sysctl_tcp_timestamps) + if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps)) return false; tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0; - if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack) + if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack)) return false; if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK) @@ -257,7 +257,7 @@ bool cookie_timestamp_decode(const struct net *net, tcp_opt->wscale_ok = 1; tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK; - return net->ipv4.sysctl_tcp_window_scaling != 0; + return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0; } EXPORT_SYMBOL(cookie_timestamp_decode); @@ -297,7 +297,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) struct flowi4 fl4; u32 tsoff = 0; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) @@ -333,6 +334,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); treq = tcp_rsk(req); treq->tfo_info = 0; + treq->af_specific = &tcp_request_sock_ipv4_ops; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; treq->ts_off = 0; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1e5830db71f74..b5ac427776295 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -443,7 +443,7 @@ void tcp_init_sock(struct sock *sk) tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; - tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; + tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering); tcp_assign_congestion_control(sk); tp->tsoffset = 0; @@ -712,7 +712,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, int size_goal) { return skb->len < size_goal && - sock_net(sk)->ipv4.sysctl_tcp_autocorking && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) && !tcp_rtx_queue_empty(sk) && refcount_read(&sk->sk_wmem_alloc) > skb->truesize; } @@ -970,7 +970,7 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) */ static void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) { - if (skb && !skb->len) { + if (skb && TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { tcp_unlink_write_queue(skb, sk); if (tcp_write_queue_empty(sk)) tcp_chrono_stop(sk, TCP_CHRONO_BUSY); @@ -1203,7 +1203,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, struct sockaddr *uaddr = msg->msg_name; int err, flags; - if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || + if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & + TFO_CLIENT_ENABLE) || (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && uaddr->sa_family == AF_UNSPEC)) return -EOPNOTSUPP; @@ -1730,11 +1731,13 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, if (!copied) copied = used; break; - } else if (used <= len) { - seq += used; - copied += used; - offset += used; } + if (WARN_ON_ONCE(used > len)) + used = len; + seq += used; + copied += used; + offset += used; + /* If recv_actor drops the lock (e.g. TCP splice * receive) the skb pointer might be invalid when * getting here: tcp_collapse might have deleted it @@ -3266,7 +3269,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_FASTOPEN_CONNECT: if (val > 1 || val < 0) { err = -EINVAL; - } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { + } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) & + TFO_CLIENT_ENABLE) { if (sk->sk_state == TCP_CLOSE) tp->fastopen_connect = val; else @@ -3633,7 +3637,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_LINGER2: val = tp->linger2; if (val >= 0) - val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ; + val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; break; case TCP_DEFER_ACCEPT: val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 6a0c4326d9cf2..bcc13368c8363 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -296,10 +296,9 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, struct sk_psock *psock = sk_psock_get(sk); int ret; - if (unlikely(!psock)) { - sk_msg_free(sk, msg); - return 0; - } + if (unlikely(!psock)) + return -EPIPE; + ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) : tcp_bpf_push_locked(sk, msg, bytes, flags, false); sk_psock_put(sk, psock); @@ -313,6 +312,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, bool cork = false, enospc = sk_msg_full(msg); struct sock *sk_redir; u32 tosend, delta = 0; + u32 eval = __SK_NONE; int ret; more_data: @@ -356,13 +356,24 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, case __SK_REDIRECT: sk_redir = psock->sk_redir; sk_msg_apply_bytes(psock, tosend); + if (!psock->apply_bytes) { + /* Clean up before releasing the sock lock. */ + eval = psock->eval; + psock->eval = __SK_NONE; + psock->sk_redir = NULL; + } if (psock->cork) { cork = true; psock->cork = NULL; } - sk_msg_return(sk, msg, tosend); + sk_msg_return(sk, msg, msg->sg.size); release_sock(sk); + ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags); + + if (eval == __SK_REDIRECT) + sock_put(sk_redir); + lock_sock(sk); if (unlikely(ret < 0)) { int free = sk_msg_free_nocharge(sk, msg); @@ -395,8 +406,11 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, } if (msg && msg->sg.data[msg->sg.start].page_link && - msg->sg.data[msg->sg.start].length) + msg->sg.data[msg->sg.start].length) { + if (eval == __SK_REDIRECT) + sk_mem_charge(sk, msg->sg.size); goto more_data; + } } return ret; } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index ee6c38a73325d..44be7a5a13911 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -341,8 +341,6 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) return; if (tcp_in_slow_start(tp)) { - if (hystart && after(ack, ca->end_seq)) - bictcp_hystart_reset(sk); acked = tcp_slow_start(tp, acked); if (!acked) return; @@ -384,6 +382,9 @@ static void hystart_update(struct sock *sk, u32 delay) if (ca->found & hystart_detect) return; + if (after(tp->snd_una, ca->end_seq)) + bictcp_hystart_reset(sk); + if (hystart_detect & HYSTART_ACK_TRAIN) { u32 now = bictcp_clock(); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 9934cea2b020f..ffa5bb6f9c6e9 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -362,7 +362,7 @@ static bool tcp_fastopen_no_cookie(const struct sock *sk, const struct dst_entry *dst, int flag) { - return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) || + return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) || tcp_sk(sk)->fastopen_no_cookie || (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE)); } @@ -377,7 +377,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, const struct dst_entry *dst) { bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; - int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); struct tcp_fastopen_cookie valid_foc = { .len = -1 }; struct tcp_request_sock *treq=tcp_rsk(req); struct sock *child; @@ -393,8 +393,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, return NULL; } - if (syn_data && - tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) + if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) goto fastopen; if (foc->len == 0) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index be3d96c7750b5..14abf30987804 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -527,7 +527,7 @@ EXPORT_SYMBOL(tcp_fixup_rcvbuf); */ void tcp_init_buffer_space(struct sock *sk) { - int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win; + int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win); struct tcp_sock *tp = tcp_sk(sk); int maxwin; @@ -1000,7 +1000,7 @@ void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, tp->undo_marker ? tp->undo_retrans : 0); #endif tp->reordering = min_t(u32, (metric + mss - 1) / mss, - sock_net(sk)->ipv4.sysctl_tcp_max_reordering); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); } /* This exciting event is worth to be remembered. 8) */ @@ -1302,7 +1302,7 @@ static u8 tcp_sacktag_one(struct sock *sk, if (dup_sack && (sacked & TCPCB_RETRANS)) { if (tp->undo_marker && tp->undo_retrans > 0 && after(end_seq, tp->undo_marker)) - tp->undo_retrans--; + tp->undo_retrans = max_t(int, 0, tp->undo_retrans - pcount); if ((sacked & TCPCB_SACKED_ACKED) && before(start_seq, state->reord)) state->reord = start_seq; @@ -1998,7 +1998,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) return; tp->reordering = min_t(u32, tp->packets_out + addend, - sock_net(sk)->ipv4.sysctl_tcp_max_reordering); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); tp->reord_seen++; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); } @@ -2066,7 +2066,8 @@ EXPORT_SYMBOL(tcp_init_undo); bool tcp_is_rack(const struct sock *sk) { - return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION; + return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_LOSS_DETECTION; } EXPORT_SYMBOL(tcp_is_rack); @@ -2112,6 +2113,7 @@ void tcp_enter_loss(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; + u8 reordering; tcp_timeout_mark_lost(sk); @@ -2132,10 +2134,12 @@ void tcp_enter_loss(struct sock *sk) /* Timeout in disordered state after receiving substantial DUPACKs * suggests that the degree of reordering is over-estimated. */ + reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering); if (icsk->icsk_ca_state <= TCP_CA_Disorder && - tp->sacked_out >= net->ipv4.sysctl_tcp_reordering) + tp->sacked_out >= reordering) tp->reordering = min_t(unsigned int, tp->reordering, - net->ipv4.sysctl_tcp_reordering); + reordering); + tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; tcp_ecn_queue_cwr(tp); @@ -2145,7 +2149,7 @@ void tcp_enter_loss(struct sock *sk) * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing */ tp->frto = !tp->app_disable_frto && - net->ipv4.sysctl_tcp_frto && + READ_ONCE(net->ipv4.sysctl_tcp_frto) && (new_recovery || icsk->icsk_retransmits) && !inet_csk(sk)->icsk_mtup.probe_size; } @@ -2708,12 +2712,15 @@ void tcp_mtup_probe_success(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + u64 val; - /* FIXME: breaks with very large cwnd */ tp->prior_ssthresh = tcp_current_ssthresh(sk); - tp->snd_cwnd = tp->snd_cwnd * - tcp_mss_to_mtu(sk, tp->mss_cache) / - icsk->icsk_mtup.probe_size; + + val = (u64)tp->snd_cwnd * tcp_mss_to_mtu(sk, tp->mss_cache); + do_div(val, icsk->icsk_mtup.probe_size); + WARN_ON_ONCE((u32)val != val); + tp->snd_cwnd = max_t(u32, 1U, val); + tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_jiffies32; tp->snd_ssthresh = tcp_current_ssthresh(sk); @@ -3053,7 +3060,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag) { - u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; + u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ; struct tcp_sock *tp = tcp_sk(sk); if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) { @@ -3468,7 +3475,8 @@ bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) * new SACK or ECE mark may first advance cwnd here and later reduce * cwnd in tcp_fastretrans_alert() based on more states. */ - if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering) + if (tcp_sk(sk)->reordering > + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering)) return flag & FLAG_FORWARD_PROGRESS; return flag & FLAG_DATA_ACKED; @@ -3586,7 +3594,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, if (*last_oow_ack_time) { s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); - if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) { + if (0 <= elapsed && + elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) { NET_INC_STATS(net, mib_idx); return true; /* rate-limited: don't send yet! */ } @@ -3634,7 +3643,7 @@ void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) /* Then check host-wide RFC 5961 rate limit. */ now = jiffies / HZ; if (now != challenge_timestamp) { - u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit; + u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit); u32 half = (ack_limit + 1) >> 1; challenge_timestamp = now; @@ -3898,7 +3907,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_process_tlp_ack(sk, ack, flag); if (tcp_ack_is_dubious(sk, flag)) { - if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) { + if (!(flag & (FLAG_SND_UNA_ADVANCED | + FLAG_NOT_DUP | FLAG_DSACKING_ACK))) { num_dupack = 1; /* Consider if pure acks were aggregated in tcp_add_backlog() */ if (!(flag & FLAG_DATA)) @@ -4084,7 +4094,7 @@ void tcp_parse_options(const struct net *net, break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW && th->syn && - !estab && net->ipv4.sysctl_tcp_window_scaling) { + !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; if (snd_wscale > TCP_MAX_WSCALE) { @@ -4100,7 +4110,7 @@ void tcp_parse_options(const struct net *net, case TCPOPT_TIMESTAMP: if ((opsize == TCPOLEN_TIMESTAMP) && ((estab && opt_rx->tstamp_ok) || - (!estab && net->ipv4.sysctl_tcp_timestamps))) { + (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = get_unaligned_be32(ptr); opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); @@ -4108,7 +4118,7 @@ void tcp_parse_options(const struct net *net, break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && - !estab && net->ipv4.sysctl_tcp_sack) { + !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) { opt_rx->sack_ok = TCP_SACK_SEEN; tcp_sack_reset(opt_rx); } @@ -4444,7 +4454,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) { int mib_idx; if (before(seq, tp->rcv_nxt)) @@ -4490,7 +4500,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); - if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; tcp_rcv_spurious_retrans(sk, skb); @@ -5427,6 +5437,16 @@ static void tcp_new_space(struct sock *sk) sk->sk_write_space(sk); } +/* Caller made space either from: + * 1) Freeing skbs in rtx queues (after tp->snd_una has advanced) + * 2) Sent skbs from output queue (and thus advancing tp->snd_nxt) + * + * We might be able to generate EPOLLOUT to the application if: + * 1) Space consumed in output/rtx queues is below sk->sk_sndbuf/2 + * 2) notsent amount (tp->write_seq - tp->snd_nxt) became + * small enough that tcp_stream_memory_free() decides it + * is time to generate EPOLLOUT. + */ void tcp_check_space(struct sock *sk) { if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { @@ -5490,7 +5510,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) } if (!tcp_is_sack(tp) || - tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr) + tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)) goto send_now; if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) { @@ -5513,7 +5533,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) if (tp->srtt_us && tp->srtt_us < rtt) rtt = tp->srtt_us; - delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns, + delay = min_t(unsigned long, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns), rtt * (NSEC_PER_USEC >> 3)/20); sock_hold(sk); hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay), @@ -5545,7 +5566,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) struct tcp_sock *tp = tcp_sk(sk); u32 ptr = ntohs(th->urg_ptr); - if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg) + if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg)) ptr--; ptr += ntohl(th->seq); @@ -6734,11 +6755,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; const char *msg = "Dropping request"; - bool want_cookie = false; struct net *net = sock_net(sk); + bool want_cookie = false; + u8 syncookies; + + syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); #ifdef CONFIG_SYN_COOKIES - if (net->ipv4.sysctl_tcp_syncookies) { + if (syncookies) { msg = "Sending cookies"; want_cookie = true; __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); @@ -6746,8 +6770,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) #endif __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); - if (!queue->synflood_warned && - net->ipv4.sysctl_tcp_syncookies != 2 && + if (!queue->synflood_warned && syncookies != 2 && xchg(&queue->synflood_warned, 1) == 0) net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", proto, sk->sk_num, msg); @@ -6796,7 +6819,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, struct tcp_sock *tp = tcp_sk(sk); u16 mss; - if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 && !inet_csk_reqsk_queue_is_full(sk)) return 0; @@ -6831,13 +6854,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct dst_entry *dst; struct flowi fl; u32 timeout_init; + u8 syncookies; + + syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); /* TW buckets are converted to open requests without * limitations, they conserve resources and peer is * evidently real one. */ - if ((net->ipv4.sysctl_tcp_syncookies == 2 || - inet_csk_reqsk_queue_is_full(sk)) && !isn) { + if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) { want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name); if (!want_cookie) goto drop; @@ -6891,10 +6916,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_free; if (!want_cookie && !isn) { + int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog); + /* Kill the following clause, if you dislike this way. */ - if (!net->ipv4.sysctl_tcp_syncookies && - (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (net->ipv4.sysctl_max_syn_backlog >> 2)) && + if (!syncookies && + (max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (max_syn_backlog >> 2)) && !tcp_peer_is_proven(req, dst)) { /* Without syncookies last quarter of * backlog is filled with destinations, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 46708ed228989..e5026ea901ef5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -105,10 +105,10 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb) int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) { + int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse); const struct inet_timewait_sock *tw = inet_twsk(sktw); const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); struct tcp_sock *tp = tcp_sk(sk); - int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse; if (reuse == 2) { /* Still does not detect *everything* that goes through @@ -1399,7 +1399,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { .syn_ack_timeout = tcp_syn_ack_timeout, }; -static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { +const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .mss_clamp = TCP_MSS_DEFAULT, #ifdef CONFIG_TCP_MD5SIG .req_md5_lookup = tcp_v4_md5_lookup, @@ -1442,6 +1442,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, bool *own_req) { struct inet_request_sock *ireq; + bool found_dup_sk = false; struct inet_sock *newinet; struct tcp_sock *newtp; struct sock *newsk; @@ -1512,12 +1513,22 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), + &found_dup_sk); if (likely(*own_req)) { tcp_move_syn(newtp, req); ireq->ireq_opt = NULL; } else { newinet->inet_opt = NULL; + + if (!req_unhash && found_dup_sk) { + /* This code path should only be executed in the + * syncookie case only + */ + bh_unlock_sock(newsk); + sock_put(newsk); + newsk = NULL; + } } return newsk; @@ -2326,6 +2337,7 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) static void *tcp_seek_last_pos(struct seq_file *seq) { struct tcp_iter_state *st = seq->private; + int bucket = st->bucket; int offset = st->offset; int orig_num = st->num; void *rc = NULL; @@ -2336,7 +2348,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq) break; st->state = TCP_SEQ_STATE_LISTENING; rc = listening_get_next(seq, NULL); - while (offset-- && rc) + while (offset-- && rc && bucket == st->bucket) rc = listening_get_next(seq, rc); if (rc) break; @@ -2347,7 +2359,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq) if (st->bucket > tcp_hashinfo.ehash_mask) break; rc = established_get_first(seq); - while (offset-- && rc) + while (offset-- && rc && bucket == st->bucket) rc = established_get_next(seq, rc); } diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index c4848e7a0aad1..0af6249a993af 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk) int m; sk_dst_confirm(sk); - if (net->ipv4.sysctl_tcp_nometrics_save || !dst) + if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst) return; rcu_read_lock(); @@ -425,7 +425,8 @@ void tcp_update_metrics(struct sock *sk) if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) { val = tcp_metric_get(tm, TCP_METRIC_REORDERING); if (val < tp->reordering && - tp->reordering != net->ipv4.sysctl_tcp_reordering) + tp->reordering != + READ_ONCE(net->ipv4.sysctl_tcp_reordering)) tcp_metric_set(tm, TCP_METRIC_REORDERING, tp->reordering); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index afa563a440a0e..3d7cccb095ced 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -181,7 +181,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, * Oh well... nobody has a sufficient solution to this * protocol bug yet. */ - if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) { + if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) { kill: inet_twsk_deschedule_put(tw); return TCP_TW_SUCCESS; @@ -548,7 +548,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ - if (newtp->af_specific->md5_lookup(sk, newsk)) + if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req))) newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; #endif if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a170bcf6d6cd0..3997749f44f35 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -82,6 +82,7 @@ void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT, tcp_skb_pcount(skb)); + tcp_check_space(sk); } EXPORT_SYMBOL(tcp_event_new_data_sent); @@ -802,18 +803,18 @@ unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); remaining -= TCPOLEN_MSS_ALIGNED; - if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) { opts->options |= OPTION_TS; opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } - if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) { opts->ws = tp->rx_opt.rcv_wscale; opts->options |= OPTION_WSCALE; remaining -= TCPOLEN_WSCALE_ALIGNED; } - if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!(OPTION_TS & opts->options))) remaining -= TCPOLEN_SACKPERM_ALIGNED; @@ -1820,7 +1821,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ - mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); + mss_now = max(mss_now, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss)); return mss_now; } @@ -1863,10 +1865,10 @@ void tcp_mtup_init(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct net *net = sock_net(sk); - icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1; + icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1; icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + icsk->icsk_af_ops->net_header_len; - icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss)); icsk->icsk_mtup.probe_size = 0; if (icsk->icsk_mtup.enabled) icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; @@ -1999,7 +2001,7 @@ void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; - if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) && (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && !ca_ops->cong_control) tcp_cwnd_application_limited(sk); @@ -2090,7 +2092,7 @@ u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) tso_segs = ca_ops->tso_segs ? ca_ops->tso_segs(sk, mss_now) : tcp_tso_autosize(sk, mss_now, - sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs)); return min_t(u32, tso_segs, sk->sk_gso_max_segs); } EXPORT_SYMBOL(tcp_tso_segs); @@ -2396,7 +2398,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk) u32 interval; s32 delta; - interval = net->ipv4.sysctl_tcp_probe_interval; + interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval); delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp; if (unlikely(delta >= interval * HZ)) { int mss = tcp_current_mss(sk); @@ -2478,7 +2480,7 @@ int tcp_mtu_probe(struct sock *sk) * probing process by not resetting search range to its orignal. */ if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) || - interval < net->ipv4.sysctl_tcp_probe_threshold) { + interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) { /* Check whether enough time has elaplased for * another round of probing. */ @@ -2629,7 +2631,7 @@ bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); if (sk->sk_pacing_status == SK_PACING_NONE) limit = min_t(unsigned long, limit, - sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes)); limit <<= factor; if (static_branch_unlikely(&tcp_tx_delay_enabled) && @@ -2866,7 +2868,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) if (rcu_access_pointer(tp->fastopen_rsk)) return false; - early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans; + early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans); /* Schedule a loss probe in 2*RTT for SACK capable connections * not in loss recovery, that are either limited by cwnd or application. */ @@ -3230,7 +3232,7 @@ void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, struct sk_buff *skb = to, *tmp; bool first = true; - if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)) return; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) return; @@ -3780,7 +3782,7 @@ static void tcp_connect_init(struct sock *sk) * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ tp->tcp_header_len = sizeof(struct tcphdr); - if (sock_net(sk)->ipv4.sysctl_tcp_timestamps) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps)) tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED; #ifdef CONFIG_TCP_MD5SIG @@ -3816,7 +3818,7 @@ static void tcp_connect_init(struct sock *sk) tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, - sock_net(sk)->ipv4.sysctl_tcp_window_scaling, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling), &rcv_wscale, rcv_wnd); @@ -3867,6 +3869,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) */ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; int space, err = 0; @@ -3881,8 +3884,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) * private TCP options. The cost is reduced data space in SYN :( */ tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp); + /* Sync mss_cache after updating the mss_clamp */ + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); - space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - + space = __tcp_mtu_to_mss(sk, icsk->icsk_pmtu_cookie) - MAX_TCP_OPTION_SPACE; space = min_t(size_t, space, fo->size); @@ -4231,7 +4236,7 @@ void tcp_send_probe0(struct sock *sk) icsk->icsk_probes_out++; if (err <= 0) { - if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) + if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2)) icsk->icsk_backoff++; timeout = tcp_probe0_when(sk, TCP_RTO_MAX); } else { @@ -4255,8 +4260,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL, NULL); if (!res) { - __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); if (unlikely(tcp_passive_fastopen(sk))) tcp_sk(sk)->total_retrans++; trace_tcp_retransmit_synack(sk, req); diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 666b514c900cb..940dcfda92b64 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -97,19 +97,23 @@ EXPORT_SYMBOL(tcp_rate_skb_sent); * * If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is * called multiple times. We favor the information from the most recently - * sent skb, i.e., the skb with the highest prior_delivered count. + * sent skb, i.e., the skb with the most recently sent time and the highest + * sequence. */ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + u64 tx_tstamp; if (!scb->tx.delivered_mstamp) return; + tx_tstamp = tcp_skb_timestamp_us(skb); if (!rs->prior_delivered || - after(scb->tx.delivered, rs->prior_delivered)) { + tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp, + scb->end_seq, rs->last_end_seq)) { rs->prior_lost = scb->tx.lost; rs->prior_delivered_ce = scb->tx.delivered_ce; rs->prior_delivered = scb->tx.delivered; @@ -117,9 +121,10 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, rs->is_app_limited = scb->tx.is_app_limited; rs->is_retrans = scb->sacked & TCPCB_RETRANS; rs->tx_in_flight = scb->tx.in_flight; + rs->last_end_seq = scb->end_seq; /* Record send time of most recently ACKed packet: */ - tp->first_tx_mstamp = tcp_skb_timestamp_us(skb); + tp->first_tx_mstamp = tx_tstamp; /* Find the duration of the "send phase" of this window: */ rs->interval_us = tcp_stamp32_us_delta( tp->first_tx_mstamp, diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 1ef85db3588a0..eda8e63d8ab88 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -33,7 +33,8 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk) return 0; if (tp->sacked_out >= tp->reordering && - !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH)) + !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_NO_DUPTHRESH)) return 0; } @@ -207,7 +208,8 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); - if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND || + if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_STATIC_REO_WND) || !rs->prior_delivered) return; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 798544cf01751..aeedae8c309ae 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -158,7 +158,7 @@ EXPORT_SYMBOL(tcp_out_of_resources); */ static int tcp_orphan_retries(struct sock *sk, bool alive) { - int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */ + int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */ /* We know from an ICMP that something is wrong. */ if (sk->sk_err_soft && !alive) @@ -178,7 +178,7 @@ void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) int mss; /* Black hole detection */ - if (!net->ipv4.sysctl_tcp_mtu_probing) + if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing)) return; if (!icsk->icsk_mtup.enabled) { @@ -186,9 +186,9 @@ void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } else { mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; - mss = min(net->ipv4.sysctl_tcp_base_mss, mss); - mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor); - mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); + mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss); + mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor)); + mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss)); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); } tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); @@ -263,7 +263,7 @@ static int tcp_write_timeout(struct sock *sk) retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; expired = icsk->icsk_retransmits >= retry_until; } else { - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) { + if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) { /* Black hole detection */ tcp_mtu_probing(icsk, sk); @@ -272,7 +272,7 @@ static int tcp_write_timeout(struct sock *sk) sk_rethink_txhash(sk); } - retry_until = net->ipv4.sysctl_tcp_retries2; + retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { const bool alive = icsk->icsk_rto < TCP_RTO_MAX; @@ -409,7 +409,7 @@ static void tcp_probe_timer(struct sock *sk) msecs_to_jiffies(icsk->icsk_user_timeout)) goto abort; - max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; + max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; @@ -605,7 +605,7 @@ void tcp_retransmit_timer(struct sock *sk) * linear-timeout retransmissions into a black hole */ if (sk->sk_state == TCP_ESTABLISHED && - (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) && + (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) && tcp_stream_is_thin(tp) && icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { icsk->icsk_backoff = 0; @@ -617,7 +617,7 @@ void tcp_retransmit_timer(struct sock *sk) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, tcp_clamp_rto_to_user_timeout(sk), min(tp->rto_max_thresh, TCP_RTO_MAX)); - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0)) + if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0)) __sk_dst_reset(sk); out:; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 63c45e665d248..9d3c356c91ce7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -386,7 +386,8 @@ static int compute_score(struct sock *sk, struct net *net, dif, sdif); if (!dev_match) return -1; - score += 4; + if (sk->sk_bound_dev_if) + score += 4; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; @@ -543,6 +544,12 @@ void udp_encap_enable(void) } EXPORT_SYMBOL(udp_encap_enable); +void udp_encap_disable(void) +{ + static_branch_dec(&udp_encap_needed_key); +} +EXPORT_SYMBOL(udp_encap_disable); + /* Handler for tunnels with arbitrary destination ports: no socket lookup, go * through error handlers in encapsulations looking for a match. */ @@ -844,7 +851,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, kfree_skb(skb); return -EINVAL; } - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } @@ -981,7 +988,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) __be16 dport; u8 tos; int err, is_udplite = IS_UDPLITE(sk); - int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; + int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE; int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); struct sk_buff *skb; struct ip_options_data opt_copy; @@ -1292,7 +1299,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, } up->len += size; - if (!(up->corkflag || (flags&MSG_MORE))) + if (!(READ_ONCE(up->corkflag) || (flags&MSG_MORE))) ret = udp_push_pending_frames(sk); if (!ret) ret = size; @@ -2562,9 +2569,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case UDP_CORK: if (val != 0) { - up->corkflag = 1; + WRITE_ONCE(up->corkflag, 1); } else { - up->corkflag = 0; + WRITE_ONCE(up->corkflag, 0); lock_sock(sk); push_pending_frames(sk); release_sock(sk); @@ -2687,7 +2694,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case UDP_CORK: - val = up->corkflag; + val = READ_ONCE(up->corkflag); break; case UDP_ENCAP: @@ -2953,7 +2960,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) { seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) - seq_puts(seq, " sl local_address rem_address st tx_queue " + seq_puts(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " "inode ref pointer drops"); else { diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c index 8a4285712808e..9031b7732fece 100644 --- a/net/ipv4/xfrm4_protocol.c +++ b/net/ipv4/xfrm4_protocol.c @@ -298,4 +298,3 @@ void __init xfrm4_protocol_init(void) { xfrm_input_register_afinfo(&xfrm4_input_afinfo); } -EXPORT_SYMBOL(xfrm4_protocol_init); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index f8ed3c3bb928f..a53716c7d93b4 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -14,6 +14,7 @@ #include #include #include +#include int xfrm4_extract_header(struct sk_buff *skb) { @@ -28,6 +29,8 @@ int xfrm4_extract_header(struct sk_buff *skb) memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0, sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); + /* for single flow rps for ipsec */ + rps_flow_node_add(iph, false); return 0; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 366c3792b8604..a0123760fb2c7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -542,7 +542,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, #ifdef CONFIG_IPV6_MROUTE if ((all || type == NETCONFA_MC_FORWARDING) && nla_put_s32(skb, NETCONFA_MC_FORWARDING, - devconf->mc_forwarding) < 0) + atomic_read(&devconf->mc_forwarding)) < 0) goto nla_put_failure; #endif if ((all || type == NETCONFA_PROXY_NEIGH) && @@ -789,6 +789,7 @@ static void dev_forward_change(struct inet6_dev *idev) { struct net_device *dev; struct inet6_ifaddr *ifa; + LIST_HEAD(tmp_addr_list); if (!idev) return; @@ -807,14 +808,24 @@ static void dev_forward_change(struct inet6_dev *idev) } } + read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { if (ifa->flags&IFA_F_TENTATIVE) continue; + list_add_tail(&ifa->if_list_aux, &tmp_addr_list); + } + read_unlock_bh(&idev->lock); + + while (!list_empty(&tmp_addr_list)) { + ifa = list_first_entry(&tmp_addr_list, + struct inet6_ifaddr, if_list_aux); + list_del(&ifa->if_list_aux); if (idev->cnf.forwarding) addrconf_join_anycast(ifa); else addrconf_leave_anycast(ifa); } + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_FORWARDING, dev->ifindex, &idev->cnf); @@ -1091,10 +1102,6 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, goto out; } - if (net->ipv6.devconf_all->disable_policy || - idev->cnf.disable_policy) - f6i->dst_nopolicy = true; - neigh_parms_data_state_setall(idev->nd_parms); ifa->addr = *cfg->pfx; @@ -3111,6 +3118,9 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); if (idev->dev->flags&IFF_POINTOPOINT) { + if (idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_NONE) + return; + addr.s6_addr32[0] = htonl(0xfe800000); scope = IFA_LINK; plen = 64; @@ -3710,8 +3720,10 @@ static int addrconf_ifdown(struct net_device *dev, int how) unsigned long event = how ? NETDEV_UNREGISTER : NETDEV_DOWN; struct net *net = dev_net(dev); struct inet6_dev *idev; - struct inet6_ifaddr *ifa, *tmp; + struct inet6_ifaddr *ifa; + LIST_HEAD(tmp_addr_list); bool keep_addr = false; + bool was_ready; int state, i; ASSERT_RTNL(); @@ -3777,7 +3789,10 @@ static int addrconf_ifdown(struct net_device *dev, int how) addrconf_del_rs_timer(idev); - /* Step 2: clear flags for stateless addrconf */ + /* Step 2: clear flags for stateless addrconf, repeated down + * detection + */ + was_ready = idev->if_flags & IF_READY; if (!how) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); @@ -3798,16 +3813,23 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_lock_bh(&idev->lock); } - list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { + list_for_each_entry(ifa, &idev->addr_list, if_list) + list_add_tail(&ifa->if_list_aux, &tmp_addr_list); + write_unlock_bh(&idev->lock); + + while (!list_empty(&tmp_addr_list)) { struct fib6_info *rt = NULL; bool keep; + ifa = list_first_entry(&tmp_addr_list, + struct inet6_ifaddr, if_list_aux); + list_del(&ifa->if_list_aux); + addrconf_del_dad_work(ifa); keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) && !addr_is_local(&ifa->addr); - write_unlock_bh(&idev->lock); spin_lock_bh(&ifa->lock); if (keep) { @@ -3838,20 +3860,19 @@ static int addrconf_ifdown(struct net_device *dev, int how) addrconf_leave_solict(ifa->idev, &ifa->addr); } - write_lock_bh(&idev->lock); if (!keep) { + write_lock_bh(&idev->lock); list_del_rcu(&ifa->if_list); + write_unlock_bh(&idev->lock); in6_ifa_put(ifa); } } - write_unlock_bh(&idev->lock); - /* Step 5: Discard anycast and multicast list */ if (how) { ipv6_ac_destroy_dev(idev); ipv6_mc_destroy_dev(idev); - } else { + } else if (was_ready) { ipv6_mc_down(idev); } @@ -4177,7 +4198,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, send_rs = send_mld && ipv6_accept_ra(ifp->idev) && ifp->idev->cnf.rtr_solicits != 0 && - (dev->flags&IFF_LOOPBACK) == 0; + (dev->flags & IFF_LOOPBACK) == 0 && + (dev->type != ARPHRD_TUNNEL); read_unlock_bh(&ifp->idev->lock); /* While dad is in progress mld report's source address is in6_addrany. @@ -4921,6 +4943,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) goto error; + spin_lock_bh(&ifa->lock); if (!((ifa->flags&IFA_F_PERMANENT) && (ifa->prefered_lft == INFINITY_LIFE_TIME))) { preferred = ifa->prefered_lft; @@ -4942,6 +4965,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, preferred = INFINITY_LIFE_TIME; valid = INFINITY_LIFE_TIME; } + spin_unlock_bh(&ifa->lock); if (!ipv6_addr_any(&ifa->peer_addr)) { if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 || @@ -5455,7 +5479,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE - array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; + array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding); #endif array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index be83c829f1169..dfceb83b16898 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -222,7 +222,7 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, inet->mc_list = NULL; inet->rcv_tos = 0; - if (net->ipv4.sysctl_ip_no_pmtu_disc) + if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -958,6 +958,7 @@ static const struct ipv6_stub ipv6_stub_impl = { .ip6_mtu_from_fib6 = ip6_mtu_from_fib6, .fib6_nh_init = fib6_nh_init, .fib6_nh_release = fib6_nh_release, + .fib6_nh_release_dsts = fib6_nh_release_dsts, .fib6_update_sernum = fib6_update_sernum_stub, .fib6_rt_update = fib6_rt_update, .ip6_del_rt = ip6_del_rt, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 12570a73def80..b64791d3b0f81 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -231,6 +231,10 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info struct sk_buff *trailer; int tailen = esp->tailen; + if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE || + ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE) + goto cow; + if (!skb_cloned(skb)) { if (tailen <= skb_tailroom(skb)) { nfrags = 1; @@ -440,7 +444,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); u32 padto; - padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached)); + padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached)); if (skb->len < padto) esp.tfclen = padto - skb->len; } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index f9e8fe3ff0c5b..1727269396523 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -260,7 +260,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, return __fib6_rule_action(rule, flp, flags, arg); } -static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) +static bool fib6_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg) { struct fib6_result *res = arg->result; struct rt6_info *rt = res->rt6; @@ -287,8 +287,7 @@ static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg return false; suppress_route: - if (!(arg->flags & FIB_LOOKUP_NOREF)) - ip6_rt_put(rt); + ip6_rt_put_flags(rt, flags); return true; } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index fbe9d4295eac3..528c78bc920e0 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -104,7 +104,7 @@ static inline int compute_score(struct sock *sk, struct net *net, if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return -1; - score = 1; + score = sk->sk_bound_dev_if ? 2 : 1; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } @@ -262,7 +262,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, return -EADDRNOTAVAIL; } -static u32 inet6_sk_port_offset(const struct sock *sk) +static u64 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); @@ -274,7 +274,7 @@ static u32 inet6_sk_port_offset(const struct sock *sk) int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - u32 port_offset = 0; + u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet6_sk_port_offset(sk); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index bb68290ad68d8..ef55489651f87 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -110,7 +110,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i) fn = rcu_dereference_protected(f6i->fib6_node, lockdep_is_held(&f6i->fib6_table->tb6_lock)); if (fn) - fn->fn_sernum = fib6_new_sernum(net); + WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net)); } /* @@ -535,12 +535,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, spin_unlock_bh(&table->tb6_lock); if (res > 0) { cb->args[4] = 1; - cb->args[5] = w->root->fn_sernum; + cb->args[5] = READ_ONCE(w->root->fn_sernum); } } else { - if (cb->args[5] != w->root->fn_sernum) { + int sernum = READ_ONCE(w->root->fn_sernum); + if (cb->args[5] != sernum) { /* Begin at the root if the tree changed */ - cb->args[5] = w->root->fn_sernum; + cb->args[5] = sernum; w->state = FWS_INIT; w->node = w->root; w->skip = w->count; @@ -1276,7 +1277,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt, /* paired with smp_rmb() in rt6_get_cookie_safe() */ smp_wmb(); while (fn) { - fn->fn_sernum = sernum; + WRITE_ONCE(fn->fn_sernum, sernum); fn = rcu_dereference_protected(fn->parent, lockdep_is_held(&rt->fib6_table->tb6_lock)); } @@ -1310,7 +1311,6 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, int err = -ENOMEM; int allow_create = 1; int replace_required = 0; - int sernum = fib6_new_sernum(info->nl_net); if (info->nlh) { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) @@ -1410,7 +1410,7 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, if (!err) { if (rt->nh) list_add(&rt->nh_list, &rt->nh->f6i_list); - __fib6_update_sernum_upto_root(rt, sernum); + __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net)); fib6_start_gc(info->nl_net, rt); } @@ -2069,8 +2069,8 @@ static int fib6_clean_node(struct fib6_walker *w) }; if (c->sernum != FIB6_NO_SERNUM_CHANGE && - w->node->fn_sernum != c->sernum) - w->node->fn_sernum = c->sernum; + READ_ONCE(w->node->fn_sernum) != c->sernum) + WRITE_ONCE(w->node->fn_sernum, c->sernum); if (!c->func) { WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE); @@ -2434,7 +2434,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter, iter->w.state = FWS_INIT; iter->w.node = iter->w.root; iter->w.args = iter; - iter->sernum = iter->w.root->fn_sernum; + iter->sernum = READ_ONCE(iter->w.root->fn_sernum); INIT_LIST_HEAD(&iter->w.lh); fib6_walker_link(net, &iter->w); } @@ -2462,8 +2462,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) { - if (iter->sernum != iter->w.root->fn_sernum) { - iter->sernum = iter->w.root->fn_sernum; + int sernum = READ_ONCE(iter->w.root->fn_sernum); + + if (iter->sernum != sernum) { + iter->sernum = sernum; iter->w.state = FWS_INIT; iter->w.node = iter->w.root; WARN_ON(iter->w.skip); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 0cb8056d98003..da0652d86fd0a 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -720,9 +720,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, else fl6->daddr = tunnel->parms.raddr; - if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen)) - return -ENOMEM; - /* Push GRE header. */ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; @@ -730,6 +727,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; __be16 flags; + int tun_hlen; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || @@ -743,13 +741,17 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, fl6->daddr = key->u.ipv6.dst; fl6->flowlabel = key->label; fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); + fl6->fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); - tunnel->tun_hlen = gre_calc_hlen(flags); + tun_hlen = gre_calc_hlen(flags); - gre_build_header(skb, tunnel->tun_hlen, + if (skb_cow_head(skb, dev->needed_headroom ?: tun_hlen + tunnel->encap_hlen)) + return -ENOMEM; + + gre_build_header(skb, tun_hlen, flags, protocol, tunnel_id_to_key32(tun_info->key.tun_id), (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) @@ -759,6 +761,9 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, if (tunnel->parms.o_flags & TUNNEL_SEQ) tunnel->o_seqno++; + if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen)) + return -ENOMEM; + gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); @@ -925,7 +930,6 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, __be16 proto; __u32 mtu; int nhoff; - int thoff; if (!pskb_inet_may_pull(skb)) goto tx_err; @@ -946,10 +950,16 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; - thoff = skb_transport_header(skb) - skb_mac_header(skb); - if (skb->protocol == htons(ETH_P_IPV6) && - (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)) - truncate = true; + if (skb->protocol == htons(ETH_P_IPV6)) { + int thoff; + + if (skb_transport_header_was_set(skb)) + thoff = skb_transport_header(skb) - skb_mac_header(skb); + else + thoff = nhoff + sizeof(struct ipv6hdr); + if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) + truncate = true; + } if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen)) goto tx_err; @@ -978,6 +988,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + fl6.fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) @@ -1085,6 +1096,7 @@ static void ip6gre_tnl_link_config_common(struct ip6_tnl *t) fl6->flowi6_oif = p->link; fl6->flowlabel = 0; fl6->flowi6_proto = IPPROTO_GRE; + fl6->fl6_gre_key = t->parms.o_key; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; @@ -1530,7 +1542,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) static struct inet6_protocol ip6gre_protocol __read_mostly = { .handler = gre_rcv, .err_handler = ip6gre_err, - .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, + .flags = INET6_PROTO_FINAL, }; static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 7aa900f8a64a7..99da86a83c346 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -486,7 +486,7 @@ int ip6_mc_input(struct sk_buff *skb) /* * IPv6 multicast router mode is now supported ;) */ - if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding && + if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) && !(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 7fbb44736a34b..b7b4ba68f3a20 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -111,6 +111,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (likely(ops && ops->callbacks.gso_segment)) { skb_reset_transport_header(skb); segs = ops->callbacks.gso_segment(skb, features); + if (!segs) + skb->network_header = skb_mac_header(skb) + nhoff - skb->head; } if (IS_ERR_OR_NULL(segs)) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index fc913f09606db..5585e3a94f3ca 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -192,7 +192,7 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { - IPCB(skb)->flags |= IPSKB_REROUTED; + IP6CB(skb)->flags |= IP6SKB_REROUTED; return dst_output(net, sk, skb); } #endif @@ -506,7 +506,7 @@ int ip6_forward(struct sk_buff *skb) goto drop; if (!net->ipv6.devconf_all->disable_policy && - !idev->cnf.disable_policy && + (!idev || !idev->cnf.disable_policy) && !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; @@ -1361,8 +1361,6 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (np->frag_size) mtu = np->frag_size; } - if (mtu < IPV6_MIN_MTU) - return -EINVAL; cork->base.fragsize = mtu; cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; @@ -1424,8 +1422,6 @@ static int __ip6_append_data(struct sock *sk, fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); - maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - - sizeof(struct frag_hdr); headersize = sizeof(struct ipv6hdr) + (opt ? opt->opt_flen + opt->opt_nflen : 0) + @@ -1433,6 +1429,13 @@ static int __ip6_append_data(struct sock *sk, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; + if (mtu <= fragheaderlen || + ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr)) + goto emsgsize; + + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - + sizeof(struct frag_hdr); + /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit * the first fragment */ diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index fd0d1cee2d3f5..878a08c40fffd 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1000,14 +1000,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false, 0, IFA_F_TENTATIVE))) - pr_warn("%s xmit: Local address not yet configured!\n", - p->name); + pr_warn_ratelimited("%s xmit: Local address not yet configured!\n", + p->name); else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && !ipv6_addr_is_multicast(raddr) && unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev, true, 0, IFA_F_TENTATIVE))) - pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", - p->name); + pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n", + p->name); else ret = 1; rcu_read_unlock(); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 12ab6605d9617..8b44d3b53844e 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -795,6 +795,8 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) struct net *net = dev_net(dev); struct vti6_net *ip6n = net_generic(net, vti6_net_id); + memset(&p1, 0, sizeof(p1)); + switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index dd41313d7fa50..6248e00c2bf72 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -246,7 +246,9 @@ static int __net_init ip6mr_rules_init(struct net *net) return 0; err2: + rtnl_lock(); ip6mr_free_table(mrt); + rtnl_unlock(); err1: fib_rules_unregister(ops); return err; @@ -734,7 +736,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, in6_dev = __in6_dev_get(dev); if (in6_dev) { - in6_dev->cnf.mc_forwarding--; + atomic_dec(&in6_dev->cnf.mc_forwarding); inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); @@ -902,7 +904,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt, in6_dev = __in6_dev_get(dev); if (in6_dev) { - in6_dev->cnf.mc_forwarding++; + atomic_inc(&in6_dev->cnf.mc_forwarding); inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); @@ -1551,7 +1553,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) } else { rcu_assign_pointer(mrt->mroute_sk, sk); sock_set_flag(sk, SOCK_RCU_FREE); - net->ipv6.devconf_all->mc_forwarding++; + atomic_inc(&net->ipv6.devconf_all->mc_forwarding); } write_unlock_bh(&mrt_lock); @@ -1584,7 +1586,7 @@ int ip6mr_sk_done(struct sock *sk) * so the RCU grace period before sk freeing * is guaranteed by sk_destruct() */ - net->ipv6.devconf_all->mc_forwarding--; + atomic_dec(&net->ipv6.devconf_all->mc_forwarding); write_unlock_bh(&mrt_lock); inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 8bb543b0e775e..41268612bdd4e 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -273,6 +273,7 @@ ip6t_do_table(struct sk_buff *skb, * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ + acpar.fragoff = 0; acpar.hotdrop = false; acpar.state = state; diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c index b9df879c48d3f..69c021704abd7 100644 --- a/net/ipv6/netfilter/nf_socket_ipv6.c +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -99,7 +99,7 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, { __be16 uninitialized_var(dport), uninitialized_var(sport); const struct in6_addr *daddr = NULL, *saddr = NULL; - struct ipv6hdr *iph = ipv6_hdr(skb); + struct ipv6hdr *iph = ipv6_hdr(skb), ipv6_var; struct sk_buff *data_skb = NULL; int doff = 0; int thoff = 0, tproto; @@ -129,8 +129,6 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, thoff + sizeof(*hp); } else if (tproto == IPPROTO_ICMPV6) { - struct ipv6hdr ipv6_var; - if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, &sport, &dport, &ipv6_var)) return NULL; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 98ac32b49d8c9..051bbd0726dff 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -22,6 +22,11 @@ #include #include +static void ping_v6_destroy(struct sock *sk) +{ + inet6_destroy_sock(sk); +} + /* Compatibility glue so we can support IPv6 when it's compiled as a module */ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) @@ -165,6 +170,7 @@ struct proto pingv6_prot = { .owner = THIS_MODULE, .init = ping_init_sock, .close = ping_close, + .destroy = ping_v6_destroy, .connect = ip6_datagram_connect_v6_only, .disconnect = __udp_disconnect, .setsockopt = ipv6_setsockopt, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d6fc22f7d7a67..00732ee6bbd8a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -1502,17 +1503,24 @@ static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket) static u32 rt6_exception_hash(const struct in6_addr *dst, const struct in6_addr *src) { - static u32 seed __read_mostly; - u32 val; + static siphash_key_t rt6_exception_key __read_mostly; + struct { + struct in6_addr dst; + struct in6_addr src; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .dst = *dst, + }; + u64 val; - net_get_random_once(&seed, sizeof(seed)); - val = jhash(dst, sizeof(*dst), seed); + net_get_random_once(&rt6_exception_key, sizeof(rt6_exception_key)); #ifdef CONFIG_IPV6_SUBTREES if (src) - val = jhash(src, sizeof(*src), val); + combined.src = *src; #endif - return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT); + val = siphash(&combined, sizeof(combined), &rt6_exception_key); + + return hash_64(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT); } /* Helper function to find the cached rt in the hash table @@ -1667,6 +1675,7 @@ static int rt6_insert_exception(struct rt6_info *nrt, struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; struct fib6_nh *nh = res->nh; + int max_depth; int err = 0; spin_lock_bh(&rt6_exception_lock); @@ -1721,7 +1730,9 @@ static int rt6_insert_exception(struct rt6_info *nrt, bucket->depth++; net->ipv6.rt6_stats->fib_rt_cache++; - if (bucket->depth > FIB6_MAX_DEPTH) + /* Randomize max depth to avoid some side channels attacks. */ + max_depth = FIB6_MAX_DEPTH + prandom_u32_max(FIB6_MAX_DEPTH); + while (bucket->depth > max_depth) rt6_exception_remove_oldest(bucket); out: @@ -2686,7 +2697,7 @@ static void ip6_link_failure(struct sk_buff *skb) if (from) { fn = rcu_dereference(from->fib6_node); if (fn && (rt->rt6i_flags & RTF_DEFAULT)) - fn->fn_sernum = -1; + WRITE_ONCE(fn->fn_sernum, -1); } } rcu_read_unlock(); @@ -3574,6 +3585,25 @@ void fib6_nh_release(struct fib6_nh *fib6_nh) fib_nh_common_release(&fib6_nh->nh_common); } +void fib6_nh_release_dsts(struct fib6_nh *fib6_nh) +{ + int cpu; + + if (!fib6_nh->rt6i_pcpu) + return; + + for_each_possible_cpu(cpu) { + struct rt6_info *pcpu_rt, **ppcpu_rt; + + ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu); + pcpu_rt = xchg(ppcpu_rt, NULL); + if (pcpu_rt) { + dst_dev_put(&pcpu_rt->dst); + dst_release(&pcpu_rt->dst); + } + } +} + static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) @@ -4373,7 +4403,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) struct inet6_dev *idev; int type; - if (netif_is_l3_master(skb->dev) && + if (netif_is_l3_master(skb->dev) || dst->dev == net->loopback_dev) idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); else @@ -4453,8 +4483,15 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, } f6i = ip6_route_info_create(&cfg, gfp_flags, NULL); - if (!IS_ERR(f6i)) + if (!IS_ERR(f6i)) { f6i->dst_nocount = true; + + if (!anycast && + (net->ipv6.devconf_all->disable_policy || + idev->cnf.disable_policy)) + f6i->dst_nopolicy = true; + } + return f6i; } @@ -5062,6 +5099,19 @@ static void ip6_route_mpath_notify(struct fib6_info *rt, inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); } +static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla, + struct netlink_ext_ack *extack) +{ + if (nla_len(nla) < sizeof(*gw)) { + NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY"); + return -EINVAL; + } + + *gw = nla_get_in6_addr(nla); + + return 0; +} + static int ip6_route_multipath_add(struct fib6_config *cfg, struct netlink_ext_ack *extack) { @@ -5103,10 +5153,18 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, nla = nla_find(attrs, attrlen, RTA_GATEWAY); if (nla) { - r_cfg.fc_gateway = nla_get_in6_addr(nla); + err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla, + extack); + if (err) + goto cleanup; + r_cfg.fc_flags |= RTF_GATEWAY; } r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); + + /* RTA_ENCAP_TYPE length checked in + * lwtunnel_valid_encap_type_attr + */ nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla) r_cfg.fc_encap_type = nla_get_u16(nla); @@ -5258,7 +5316,13 @@ static int ip6_route_multipath_del(struct fib6_config *cfg, nla = nla_find(attrs, attrlen, RTA_GATEWAY); if (nla) { - nla_memcpy(&r_cfg.fc_gateway, nla, 16); + err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla, + extack); + if (err) { + last_err = err; + goto next_rtnh; + } + r_cfg.fc_flags |= RTF_GATEWAY; } } @@ -5266,6 +5330,7 @@ static int ip6_route_multipath_del(struct fib6_config *cfg, if (err) last_err = err; +next_rtnh: rtnh = rtnh_next(rtnh, &remaining); } @@ -5520,14 +5585,15 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common, - rt->fib6_nh->fib_nh_weight, AF_INET6) < 0) + rt->fib6_nh->fib_nh_weight, AF_INET6, + 0) < 0) goto nla_put_failure; list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) { if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common, sibling->fib6_nh->fib_nh_weight, - AF_INET6) < 0) + AF_INET6, 0) < 0) goto nla_put_failure; } diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index ffcfcd2b128f3..65394abf4736b 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -401,7 +401,6 @@ int __init seg6_hmac_init(void) { return seg6_hmac_init_algo(); } -EXPORT_SYMBOL(seg6_hmac_init); int __net_init seg6_hmac_net_init(struct net *net) { @@ -411,7 +410,6 @@ int __net_init seg6_hmac_net_init(struct net *net) return 0; } -EXPORT_SYMBOL(seg6_hmac_net_init); void seg6_hmac_exit(void) { diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index ab7f124ff5d7e..b626e0b62a549 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -143,6 +143,14 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + + /* the control block has been erased, so we have to set the + * iif once again. + * We read the receiving interface index directly from the + * skb->skb_iif as it is done in the IPv4 receiving path (i.e.: + * ip_rcv_core(...)). + */ + IP6CB(skb)->iif = skb->skb_iif; } hdr->nexthdr = NEXTHDR_ROUTING; @@ -163,6 +171,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) } #endif + hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_postpush_rcsum(skb, hdr, tot_len); return 0; @@ -215,6 +225,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) } #endif + hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); return 0; @@ -276,7 +288,6 @@ static int seg6_do_srh(struct sk_buff *skb) break; } - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); return 0; diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 802eebf8ac4b0..ef5b5cee22017 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -421,7 +421,6 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) if (err) goto drop; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_nexthop(skb, NULL, 0); @@ -453,7 +452,6 @@ static int input_action_end_b6_encap(struct sk_buff *skb, if (err) goto drop; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_nexthop(skb, NULL, 0); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 7f9cae4c49e7e..117d374695fe6 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -310,9 +310,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) : NULL; - rcu_read_lock(); - - ca = t->prl_count < cmax ? t->prl_count : cmax; + ca = min(t->prl_count, cmax); if (!kp) { /* We don't try hard to allocate much memory for @@ -327,7 +325,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, } } - c = 0; + rcu_read_lock(); for_each_prl_rcu(t->prl) { if (c >= cmax) break; @@ -339,7 +337,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, if (kprl.addr != htonl(INADDR_ANY)) break; } -out: + rcu_read_unlock(); len = sizeof(*kp) * c; @@ -348,7 +346,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, ret = -EFAULT; kfree(kp); - +out: return ret; } @@ -1876,7 +1874,6 @@ static int __net_init sit_init_net(struct net *net) return 0; err_reg_dev: - ipip6_dev_free(sitn->fb_tunnel_dev); free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: return err; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 3285dccdd037d..91374ebffa724 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -141,7 +141,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) __u8 rcv_wscale; u32 tsoff = 0; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) @@ -176,6 +177,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); treq = tcp_rsk(req); + treq->af_specific = &tcp_request_sock_ipv6_ops; treq->tfo_listener = false; treq->tfo_info = 0; tcp_reqsock_ts_init(treq); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1479c5348f090..18edb84bc655a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -811,7 +811,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .syn_ack_timeout = tcp_syn_ack_timeout, }; -static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { +const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr), #ifdef CONFIG_TCP_MD5SIG @@ -1153,6 +1153,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; struct inet_sock *newinet; + bool found_dup_sk = false; struct tcp_sock *newtp; struct sock *newsk; #ifdef CONFIG_TCP_MD5SIG @@ -1319,7 +1320,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * tcp_done(newsk); goto out; } - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), + &found_dup_sk); if (*own_req) { tcp_move_syn(newtp, req); @@ -1334,6 +1336,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * skb_set_owner_r(newnp->pktoptions, newsk); } } + } else { + if (!req_unhash && found_dup_sk) { + /* This code path should only be executed in the + * syncookie case only + */ + bh_unlock_sock(newsk); + sock_put(newsk); + newsk = NULL; + } } return newsk; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1a2f4e1a000ac..bab45fd838544 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -133,7 +133,8 @@ static int compute_score(struct sock *sk, struct net *net, dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif); if (!dev_match) return -1; - score++; + if (sk->sk_bound_dev_if) + score++; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; @@ -1134,7 +1135,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } @@ -1234,7 +1235,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int addr_len = msg->msg_namelen; bool connected = false; int ulen = len; - int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; + int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE; int err; int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); @@ -1365,7 +1366,6 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!fl6.flowi6_oif) fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - fl6.flowi6_mark = ipc6.sockc.mark; fl6.flowi6_uid = sk->sk_uid; if (msg->msg_controllen) { @@ -1401,6 +1401,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.opt = opt; fl6.flowi6_proto = sk->sk_protocol; + fl6.flowi6_mark = ipc6.sockc.mark; fl6.daddr = *daddr; if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) fl6.saddr = np->saddr; @@ -1555,8 +1556,10 @@ void udpv6_destroy_sock(struct sock *sk) if (encap_destroy) encap_destroy(sk); } - if (up->encap_enabled) + if (up->encap_enabled) { static_branch_dec(&udpv6_encap_needed_key); + udp_encap_disable(); + } } inet6_destroy_sock(sk); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6b0ed6c593e2b..a6f13fab963f7 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -140,6 +140,19 @@ static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buf return __xfrm6_output_state_finish(x, sk, skb); } +static int xfrm6_noneed_fragment(struct sk_buff *skb) +{ + struct frag_hdr *fh; + u8 prevhdr = ipv6_hdr(skb)->nexthdr; + + if (prevhdr != NEXTHDR_FRAGMENT) + return 0; + fh = (struct frag_hdr *)(skb->data + sizeof(struct ipv6hdr)); + if (fh->nexthdr == NEXTHDR_ESP || fh->nexthdr == NEXTHDR_AUTH) + return 1; + return 0; +} + static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -168,6 +181,9 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) xfrm6_local_rxpmtu(skb, mtu); kfree_skb(skb); return -EMSGSIZE; + } else if (toobig && xfrm6_noneed_fragment(skb)) { + skb->ignore_df = 1; + goto skip_frag; } else if (!skb->ignore_df && toobig && skb->sk) { xfrm_local_error(skb, mtu); kfree_skb(skb); diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 78daadecbdef5..07f9a6163ad83 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -20,6 +20,7 @@ #include #include #include +#include int xfrm6_extract_header(struct sk_buff *skb) { @@ -34,6 +35,7 @@ int xfrm6_extract_header(struct sk_buff *skb) memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); + rps_flow_node_add(iph, true); return 0; } diff --git a/net/key/af_key.c b/net/key/af_key.c index 907d04a474597..32fe99cd01fc8 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1703,7 +1703,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad xfrm_probe_algs(); - supp_skb = compose_sadb_supported(hdr, GFP_KERNEL); + supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO); if (!supp_skb) { if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) pfk->registered &= ~(1<sadb_msg_satype); @@ -2627,7 +2627,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, } return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, - kma ? &k : NULL, net, NULL); + kma ? &k : NULL, net, NULL, 0); out: return err; @@ -2830,6 +2830,10 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb void *ext_hdrs[SADB_EXT_MAX]; int err; + /* Non-zero return value of pfkey_broadcast() does not always signal + * an error and even on an actual error we may still want to process + * the message so rather ignore the return value. + */ pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); @@ -2902,7 +2906,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t) break; if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg)) + if (aalg_tmpl_set(t, aalg) && aalg->available) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); @@ -2920,7 +2924,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!ealg->pfkey_supported) continue; - if (!(ealg_tmpl_set(t, ealg))) + if (!(ealg_tmpl_set(t, ealg) && ealg->available)) continue; for (k = 1; ; k++) { @@ -2931,7 +2935,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg)) + if (aalg_tmpl_set(t, aalg) && aalg->available) sz += sizeof(struct sadb_comb); } } diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 95805a6331be2..421b2c89ce12a 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -886,8 +886,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) } if (tunnel->version == L2TP_HDR_VER_3 && - l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) { + l2tp_session_dec_refcount(session); goto error; + } l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); l2tp_session_dec_refcount(session); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 0fa694bd3f6a9..23e9ce985ed6a 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -515,14 +515,15 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct ipcm6_cookie ipc6; int addr_len = msg->msg_namelen; int transhdrlen = 4; /* zero session-id */ - int ulen = len + transhdrlen; + int ulen; int err; /* Rough check on arithmetic overflow, better check is made in ip6_append_data(). */ - if (len > INT_MAX) + if (len > INT_MAX - transhdrlen) return -EMSGSIZE; + ulen = len + transhdrlen; /* Mirror BSD error message compatibility */ if (msg->msg_flags & MSG_OOB) diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index f35899d45a9af..ff4352f6d168a 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -54,7 +54,7 @@ int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex) dev = dev_get_by_index_rcu(net, ifindex); while (dev && !netif_is_l3_master(dev)) - dev = netdev_master_upper_dev_get(dev); + dev = netdev_master_upper_dev_get_rcu(dev); return dev ? dev->ifindex : 0; } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 0b3adf7594ffe..3b1ea89a340e3 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -276,6 +276,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); + struct net_device *dev = NULL; struct llc_sap *sap; int rc = -EINVAL; @@ -287,14 +288,14 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) goto out; rc = -ENODEV; if (sk->sk_bound_dev_if) { - llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); - if (llc->dev && addr->sllc_arphrd != llc->dev->type) { - dev_put(llc->dev); - llc->dev = NULL; + dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); + if (dev && addr->sllc_arphrd != dev->type) { + dev_put(dev); + dev = NULL; } } else - llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd); - if (!llc->dev) + dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd); + if (!dev) goto out; rc = -EUSERS; llc->laddr.lsap = llc_ui_autoport(); @@ -304,6 +305,11 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) sap = llc_sap_open(llc->laddr.lsap, NULL); if (!sap) goto out; + + /* Note: We do not expect errors from this point. */ + llc->dev = dev; + dev = NULL; + memcpy(llc->laddr.mac, llc->dev->dev_addr, IFHWADDRLEN); memcpy(&llc->addr, addr, sizeof(llc->addr)); /* assign new connection to its SAP */ @@ -311,6 +317,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) sock_reset_flag(sk, SOCK_ZAPPED); rc = 0; out: + dev_put(dev); return rc; } @@ -333,6 +340,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); + struct net_device *dev = NULL; struct llc_sap *sap; int rc = -EINVAL; @@ -348,25 +356,26 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) rc = -ENODEV; rcu_read_lock(); if (sk->sk_bound_dev_if) { - llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); - if (llc->dev) { + dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); + if (dev) { if (is_zero_ether_addr(addr->sllc_mac)) - memcpy(addr->sllc_mac, llc->dev->dev_addr, + memcpy(addr->sllc_mac, dev->dev_addr, IFHWADDRLEN); - if (addr->sllc_arphrd != llc->dev->type || + if (addr->sllc_arphrd != dev->type || !ether_addr_equal(addr->sllc_mac, - llc->dev->dev_addr)) { + dev->dev_addr)) { rc = -EINVAL; - llc->dev = NULL; + dev = NULL; } } - } else - llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, + } else { + dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, addr->sllc_mac); - if (llc->dev) - dev_hold(llc->dev); + } + if (dev) + dev_hold(dev); rcu_read_unlock(); - if (!llc->dev) + if (!dev) goto out; if (!addr->sllc_sap) { rc = -EUSERS; @@ -399,6 +408,11 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) goto out_put; } } + + /* Note: We do not expect errors from this point. */ + llc->dev = dev; + dev = NULL; + llc->laddr.lsap = addr->sllc_sap; memcpy(llc->laddr.mac, addr->sllc_mac, IFHWADDRLEN); memcpy(&llc->addr, addr, sizeof(llc->addr)); @@ -409,6 +423,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) out_put: llc_sap_put(sap); out: + dev_put(dev); release_sock(sk); return rc; } diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 4d1c335e06e57..49ec9bfb6c8e6 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ /** @@ -191,7 +191,8 @@ static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata, sband = ieee80211_get_sband(sdata); if (!sband) return; - he_cap = ieee80211_get_he_iftype_cap(sband, sdata->vif.type); + he_cap = ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); if (!he_cap) return; @@ -292,7 +293,8 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, goto end; } - if (!sta->sta.ht_cap.ht_supported) { + if (!sta->sta.ht_cap.ht_supported && + sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) { ht_dbg(sta->sdata, "STA %pM erroneously requests BA session on tid %d w/o QoS\n", sta->sta.addr, tid); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index b11883d268759..f30cdd7f3a73a 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2019 Intel Corporation + * Copyright (C) 2018 - 2022 Intel Corporation */ #include @@ -106,7 +106,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.addba_req.start_seq_num = cpu_to_le16(start_seq_num << 4); - ieee80211_tx_skb(sdata, skb); + ieee80211_tx_skb_tid(sdata, skb, tid); } void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn) @@ -213,6 +213,8 @@ ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable) struct ieee80211_txq *txq = sta->sta.txq[tid]; struct txq_info *txqi; + lockdep_assert_held(&sta->ampdu_mlme.mtx); + if (!txq) return; @@ -290,7 +292,6 @@ static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid) ieee80211_assign_tid_tx(sta, tid, NULL); ieee80211_agg_splice_finish(sta->sdata, tid); - ieee80211_agg_start_txq(sta, tid, false); kfree_rcu(tid_tx, rcu_head); } @@ -448,6 +449,42 @@ static void sta_addba_resp_timer_expired(struct timer_list *t) ieee80211_stop_tx_ba_session(&sta->sta, tid); } +static void ieee80211_send_addba_with_timeout(struct sta_info *sta, + struct tid_ampdu_tx *tid_tx) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sta->local; + u8 tid = tid_tx->tid; + u16 buf_size; + + /* activate the timer for the recipient's addBA response */ + mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); + ht_dbg(sdata, "activated addBA response timer on %pM tid %d\n", + sta->sta.addr, tid); + + spin_lock_bh(&sta->lock); + sta->ampdu_mlme.last_addba_req_time[tid] = jiffies; + sta->ampdu_mlme.addba_req_num[tid]++; + spin_unlock_bh(&sta->lock); + + if (sta->sta.he_cap.has_he) { + buf_size = local->hw.max_tx_aggregation_subframes; + } else { + /* + * We really should use what the driver told us it will + * transmit as the maximum, but certain APs (e.g. the + * LinkSys WRT120N with FW v1.0.07 build 002 Jun 18 2012) + * will crash when we use a lower number. + */ + buf_size = IEEE80211_MAX_AMPDU_BUF_HT; + } + + /* send AddBA request */ + ieee80211_send_addba_request(sdata, sta->sta.addr, tid, + tid_tx->dialog_token, tid_tx->ssn, + buf_size, tid_tx->timeout); +} + void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; @@ -462,7 +499,6 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) .timeout = 0, }; int ret; - u16 buf_size; tid_tx = rcu_dereference_protected_tid_tx(sta, tid); @@ -485,6 +521,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, ¶ms); + tid_tx->ssn = params.ssn; if (ret) { ht_dbg(sdata, "BA request denied - HW unavailable for %pM tid %d\n", @@ -501,32 +538,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) return; } - /* activate the timer for the recipient's addBA response */ - mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); - ht_dbg(sdata, "activated addBA response timer on %pM tid %d\n", - sta->sta.addr, tid); - - spin_lock_bh(&sta->lock); - sta->ampdu_mlme.last_addba_req_time[tid] = jiffies; - sta->ampdu_mlme.addba_req_num[tid]++; - spin_unlock_bh(&sta->lock); - - if (sta->sta.he_cap.has_he) { - buf_size = local->hw.max_tx_aggregation_subframes; - } else { - /* - * We really should use what the driver told us it will - * transmit as the maximum, but certain APs (e.g. the - * LinkSys WRT120N with FW v1.0.07 build 002 Jun 18 2012) - * will crash when we use a lower number. - */ - buf_size = IEEE80211_MAX_AMPDU_BUF_HT; - } - - /* send AddBA request */ - ieee80211_send_addba_request(sdata, sta->sta.addr, tid, - tid_tx->dialog_token, params.ssn, - buf_size, tid_tx->timeout); + ieee80211_send_addba_with_timeout(sta, tid_tx); } /* @@ -571,7 +583,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, "Requested to start BA session on reserved tid=%d", tid)) return -EINVAL; - if (!pubsta->ht_cap.ht_supported) + if (!pubsta->ht_cap.ht_supported && + sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) return -EINVAL; if (WARN_ON_ONCE(!local->ops->ampdu_action)) @@ -602,6 +615,14 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; } + if (test_sta_flag(sta, WLAN_STA_MFP) && + !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { + ht_dbg(sdata, + "MFP STA not authorized - deny BA session request %pM tid %d\n", + sta->sta.addr, tid); + return -EINVAL; + } + /* * 802.11n-2009 11.5.1.1: If the initiating STA is an HT STA, is a * member of an IBSS, and has no other existing Block Ack agreement @@ -860,6 +881,7 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, { struct ieee80211_sub_if_data *sdata = sta->sdata; bool send_delba = false; + bool start_txq = false; ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n", sta->sta.addr, tid); @@ -877,10 +899,14 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, send_delba = true; ieee80211_remove_tid_tx(sta, tid); + start_txq = true; unlock_sta: spin_unlock_bh(&sta->lock); + if (start_txq) + ieee80211_agg_start_txq(sta, tid, false); + if (send_delba) ieee80211_send_delba(sdata, sta->sta.addr, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 1b50bbf030ed8..16f37fd0ac0e5 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1949,13 +1949,11 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, const struct mesh_setup *setup) { u8 *new_ie; - const u8 *old_ie; struct ieee80211_sub_if_data *sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); /* allocate information elements */ new_ie = NULL; - old_ie = ifmsh->ie; if (setup->ie_len) { new_ie = kmemdup(setup->ie, setup->ie_len, @@ -1965,7 +1963,6 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, } ifmsh->ie_len = setup->ie_len; ifmsh->ie = new_ie; - kfree(old_ie); /* now copy the rest of the setup parameters */ ifmsh->mesh_id_len = setup->mesh_id_len; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 9c94baaf693cb..15f47918cbacd 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1639,12 +1639,9 @@ int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata) if (new_ctx->replace_state == IEEE80211_CHANCTX_REPLACE_NONE) { if (old_ctx) - err = ieee80211_vif_use_reserved_reassign(sdata); - else - err = ieee80211_vif_use_reserved_assign(sdata); + return ieee80211_vif_use_reserved_reassign(sdata); - if (err) - return err; + return ieee80211_vif_use_reserved_assign(sdata); } /* diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 2c9b3eb8b6525..f4c7e0af896b1 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1202,8 +1202,11 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif); - if (local->in_reconfig) + /* In reconfig don't transmit now, but mark for waking later */ + if (local->in_reconfig) { + set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txq->flags); return; + } if (!check_sdata_in_driver(sdata)) return; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e574fbf6745a4..7747a6f46d299 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1082,6 +1082,9 @@ struct tpt_led_trigger { * a scan complete for an aborted scan. * @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being * cancelled. + * @SCAN_BEACON_WAIT: Set whenever we're passive scanning because of radar/no-IR + * and could send a probe request after receiving a beacon. + * @SCAN_BEACON_DONE: Beacon received, we can now send a probe request */ enum { SCAN_SW_SCANNING, @@ -1090,6 +1093,8 @@ enum { SCAN_COMPLETED, SCAN_ABORTED, SCAN_HW_CANCELLED, + SCAN_BEACON_WAIT, + SCAN_BEACON_DONE, }; /** diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 6f576306a4d74..ddc001ad90555 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1875,9 +1875,16 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops); - /* MTU range: 256 - 2304 */ + /* MTU range is normally 256 - 2304, where the upper limit is + * the maximum MSDU size. Monitor interfaces send and receive + * MPDU and A-MSDU frames which may be much larger so we do + * not impose an upper limit in that case. + */ ndev->min_mtu = 256; - ndev->max_mtu = local->hw.max_mtu; + if (type == NL80211_IFTYPE_MONITOR) + ndev->max_mtu = 0; + else + ndev->max_mtu = local->hw.max_mtu; ret = register_netdevice(ndev); if (ret) { diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 1708b64d41094..d7ae7415d54d0 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -60,7 +60,10 @@ static struct mesh_table *mesh_table_alloc(void) atomic_set(&newtbl->entries, 0); spin_lock_init(&newtbl->gates_lock); spin_lock_init(&newtbl->walk_lock); - rhashtable_init(&newtbl->rhead, &mesh_rht_params); + if (rhashtable_init(&newtbl->rhead, &mesh_rht_params)) { + kfree(newtbl); + return NULL; + } return newtbl; } diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 031e905f684a2..bf83f512f748b 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -2,6 +2,7 @@ /* * Copyright 2012-2013, Marco Porsch * Copyright 2012-2013, cozybit Inc. + * Copyright (C) 2021 Intel Corporation */ #include "mesh.h" @@ -584,7 +585,7 @@ void ieee80211_mps_frame_release(struct sta_info *sta, /* only transmit to PS STA with announced, non-zero awake window */ if (test_sta_flag(sta, WLAN_STA_PS_STA) && - (!elems->awake_window || !le16_to_cpu(*elems->awake_window))) + (!elems->awake_window || !get_unaligned_le16(elems->awake_window))) return; if (!test_sta_flag(sta, WLAN_STA_MPSP_OWNER)) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ccaf2389ccc1d..5415e566e09d8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2418,11 +2418,18 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata, u16 tx_time) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - u16 tid = ieee80211_get_tid(hdr); - int ac = ieee80211_ac_from_tid(tid); - struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac]; + u16 tid; + int ac; + struct ieee80211_sta_tx_tspec *tx_tspec; unsigned long now = jiffies; + if (!ieee80211_is_data_qos(hdr->frame_control)) + return; + + tid = ieee80211_get_tid(hdr); + ac = ieee80211_ac_from_tid(tid); + tx_tspec = &ifmgd->tx_tspec[ac]; + if (likely(!tx_tspec->admitted_time)) return; @@ -3399,6 +3406,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, cbss->transmitted_bss->bssid); bss_conf->bssid_indicator = cbss->max_bssid_indicator; bss_conf->bssid_index = cbss->bssid_index; + } else { + bss_conf->nontransmitted = false; + memset(bss_conf->transmitter_bssid, 0, + sizeof(bss_conf->transmitter_bssid)); + bss_conf->bssid_indicator = 0; + bss_conf->bssid_index = 0; } /* @@ -4946,7 +4959,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, */ if (new_sta) { u32 rates = 0, basic_rates = 0; - bool have_higher_than_11mbit; + bool have_higher_than_11mbit = false; int min_rate = INT_MAX, min_rate_index = -1; const struct cfg80211_bss_ies *ies; int shift = ieee80211_vif_get_shift(&sdata->vif); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 670d84e54db73..99d5f8b58e92e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1400,8 +1400,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, goto dont_reorder; /* not part of a BA session */ - if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK && - ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL) + if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_NOACK) goto dont_reorder; /* new, potentially un-ordered, ampdu frame - process it */ @@ -1918,7 +1917,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) int keyid = rx->sta->ptk_idx; sta_ptk = rcu_dereference(rx->sta->ptk[keyid]); - if (ieee80211_has_protected(fc)) { + if (ieee80211_has_protected(fc) && + !(status->flag & RX_FLAG_IV_STRIPPED)) { cs = rx->sta->cipher_scheme; keyid = ieee80211_get_keyid(rx->skb, cs); @@ -2851,13 +2851,13 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) ether_addr_equal(sdata->vif.addr, hdr->addr3)) return RX_CONTINUE; - ac = ieee80211_select_queue_80211(sdata, skb, hdr); + ac = ieee802_1d_to_ac[skb->priority]; q = sdata->vif.hw_queue[ac]; if (ieee80211_queue_stopped(&local->hw, q)) { IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion); return RX_DROP_MONITOR; } - skb_set_queue_mapping(skb, q); + skb_set_queue_mapping(skb, ac); if (!--mesh_hdr->ttl) { if (!is_multicast_ether_addr(hdr->addr1)) @@ -3952,7 +3952,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) if (!bssid) return false; if (ether_addr_equal(sdata->vif.addr, hdr->addr2) || - ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2)) + ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2) || + !is_valid_ether_addr(hdr->addr2)) return false; if (ieee80211_is_beacon(hdr->frame_control)) return true; @@ -4691,7 +4692,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, goto drop; break; case RX_ENC_VHT: - if (WARN_ONCE(status->rate_idx > 9 || + if (WARN_ONCE(status->rate_idx > 11 || !status->nss || status->nss > 8, "Rate marked as a VHT rate but data is invalid: MCS: %d, NSS: %d\n", diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 4d31d9688dc23..344b2c22e75b5 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -252,6 +252,16 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) if (likely(!sdata1 && !sdata2)) return; + if (test_and_clear_bit(SCAN_BEACON_WAIT, &local->scanning)) { + /* + * we were passive scanning because of radar/no-IR, but + * the beacon/proberesp rx gives us an opportunity to upgrade + * to active scan + */ + set_bit(SCAN_BEACON_DONE, &local->scanning); + ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0); + } + if (ieee80211_is_probe_resp(mgmt->frame_control)) { struct cfg80211_scan_request *scan_req; struct cfg80211_sched_scan_request *sched_scan_req; @@ -753,6 +763,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, IEEE80211_CHAN_RADAR)) || !req->n_ssids) { next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; + if (req->n_ssids) + set_bit(SCAN_BEACON_WAIT, &local->scanning); } else { ieee80211_scan_state_send_probe(local, &next_delay); next_delay = IEEE80211_CHANNEL_TIME; @@ -945,6 +957,8 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, !scan_req->n_ssids) { *next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; local->next_scan_state = SCAN_DECISION; + if (scan_req->n_ssids) + set_bit(SCAN_BEACON_WAIT, &local->scanning); return; } @@ -1037,6 +1051,8 @@ void ieee80211_scan_work(struct work_struct *work) goto out; } + clear_bit(SCAN_BEACON_WAIT, &local->scanning); + /* * as long as no delay is required advance immediately * without scheduling a new work @@ -1047,6 +1063,10 @@ void ieee80211_scan_work(struct work_struct *work) goto out_complete; } + if (test_and_clear_bit(SCAN_BEACON_DONE, &local->scanning) && + local->next_scan_state == SCAN_DECISION) + local->next_scan_state = SCAN_SEND_PROBE; + switch (local->next_scan_state) { case SCAN_DECISION: /* if no more bands/channels left, complete scan */ diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 2eb73be9b9865..be0df78d4a799 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -180,6 +180,7 @@ struct tid_ampdu_tx { u8 stop_initiator; bool tx_stop; u16 buf_size; + u16 ssn; u16 failed_bar_ssn; bool bar_pending; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 538722522ffe9..d82d22b6a2a94 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2156,7 +2156,11 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local, } vht_mcs = iterator.this_arg[4] >> 4; + if (vht_mcs > 11) + vht_mcs = 0; vht_nss = iterator.this_arg[4] & 0xF; + if (!vht_nss || vht_nss > 8) + vht_nss = 1; break; /* @@ -3189,7 +3193,9 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata, if (info->control.flags & IEEE80211_TX_CTRL_AMSDU) return true; - if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr))) + if (!ieee80211_amsdu_realloc_pad(local, skb, + sizeof(*amsdu_hdr) + + local->hw.extra_tx_headroom)) return false; data = skb_push(skb, sizeof(*amsdu_hdr)); @@ -3323,6 +3329,14 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head)) goto out; + /* If n == 2, the "while (*frag_tail)" loop above didn't execute + * and frag_tail should be &skb_shinfo(head)->frag_list. + * However, ieee80211_amsdu_prepare_head() can reallocate it. + * Reload frag_tail to have it pointing to the correct place. + */ + if (n == 2) + frag_tail = &skb_shinfo(head)->frag_list; + /* * Pad out the previous subframe to a multiple of 4 by adding the * padding to the next one, that's being added. Note that head->len diff --git a/net/mac80211/util.c b/net/mac80211/util.c index decd46b383938..c1c117fdf3184 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1227,6 +1227,8 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elems->max_idle_period_ie = (void *)pos; break; case WLAN_EID_EXTENSION: + if (!elen) + break; if (pos[0] == WLAN_EID_EXT_HE_MU_EDCA && elen >= (sizeof(*elems->mu_edca_param_set) + 1)) { elems->mu_edca_param_set = (void *)&pos[1]; diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 72920d82928c4..ace44ff966358 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -145,8 +145,8 @@ u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, bool qos; /* all mesh/ocb stations are required to support WME */ - if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || - sdata->vif.type == NL80211_IFTYPE_OCB) + if (sta && (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || + sdata->vif.type == NL80211_IFTYPE_OCB)) qos = true; else if (sta) qos = sta->sta.wme; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index bca47fad5a162..4eed23e276104 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -520,6 +520,9 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, return RX_DROP_UNUSABLE; } + /* reload hdr - skb might have been reallocated */ + hdr = (void *)rx->skb->data; + data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len; if (!rx->sta || data_len < 0) return RX_DROP_UNUSABLE; @@ -749,6 +752,9 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; } + /* reload hdr - skb might have been reallocated */ + hdr = (void *)rx->skb->data; + data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - mic_len; if (!rx->sta || data_len < 0) return RX_DROP_UNUSABLE; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 4701edffb1f7d..d5e3656fc67ca 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1491,22 +1491,52 @@ static void mpls_dev_destroy_rcu(struct rcu_head *head) kfree(mdev); } -static void mpls_ifdown(struct net_device *dev, int event) +static int mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); - u8 alive, deleted; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); for (index = 0; index < net->mpls.platform_labels; index++) { struct mpls_route *rt = rtnl_dereference(platform_label[index]); + bool nh_del = false; + u8 alive = 0; if (!rt) continue; - alive = 0; - deleted = 0; + if (event == NETDEV_UNREGISTER) { + u8 deleted = 0; + + for_nexthops(rt) { + struct net_device *nh_dev = + rtnl_dereference(nh->nh_dev); + + if (!nh_dev || nh_dev == dev) + deleted++; + if (nh_dev == dev) + nh_del = true; + } endfor_nexthops(rt); + + /* if there are no more nexthops, delete the route */ + if (deleted == rt->rt_nhn) { + mpls_route_update(net, index, NULL, NULL); + continue; + } + + if (nh_del) { + size_t size = sizeof(*rt) + rt->rt_nhn * + rt->rt_nh_size; + struct mpls_route *orig = rt; + + rt = kmalloc(size, GFP_KERNEL); + if (!rt) + return -ENOMEM; + memcpy(rt, orig, size); + } + } + change_nexthops(rt) { unsigned int nh_flags = nh->nh_flags; @@ -1530,16 +1560,15 @@ static void mpls_ifdown(struct net_device *dev, int event) next: if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))) alive++; - if (!rtnl_dereference(nh->nh_dev)) - deleted++; } endfor_nexthops(rt); WRITE_ONCE(rt->rt_nhn_alive, alive); - /* if there are no more nexthops, delete the route */ - if (event == NETDEV_UNREGISTER && deleted == rt->rt_nhn) - mpls_route_update(net, index, NULL, NULL); + if (nh_del) + mpls_route_update(net, index, rt, NULL); } + + return 0; } static void mpls_ifup(struct net_device *dev, unsigned int flags) @@ -1607,8 +1636,12 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, return NOTIFY_OK; switch (event) { + int err; + case NETDEV_DOWN: - mpls_ifdown(dev, event); + err = mpls_ifdown(dev, event); + if (err) + return notifier_from_errno(err); break; case NETDEV_UP: flags = dev_get_flags(dev); @@ -1619,13 +1652,18 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, break; case NETDEV_CHANGE: flags = dev_get_flags(dev); - if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) { mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); - else - mpls_ifdown(dev, event); + } else { + err = mpls_ifdown(dev, event); + if (err) + return notifier_from_errno(err); + } break; case NETDEV_UNREGISTER: - mpls_ifdown(dev, event); + err = mpls_ifdown(dev, event); + if (err) + return notifier_from_errno(err); mdev = mpls_dev_get(dev); if (mdev) { mpls_dev_sysctl_unregister(dev, mdev); @@ -1636,8 +1674,6 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, case NETDEV_CHANGENAME: mdev = mpls_dev_get(dev); if (mdev) { - int err; - mpls_dev_sysctl_unregister(dev, mdev); err = mpls_dev_sysctl_register(dev, mdev); if (err) diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index 0187e65176c05..114ef47db76d3 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -18,6 +18,8 @@ #include "internal.h" #include "ncsi-pkt.h" +static const int padding_bytes = 26; + u32 ncsi_calculate_checksum(unsigned char *data, int len) { u32 checksum = 0; @@ -213,12 +215,17 @@ static int ncsi_cmd_handler_oem(struct sk_buff *skb, { struct ncsi_cmd_oem_pkt *cmd; unsigned int len; + int payload; + /* NC-SI spec DSP_0222_1.2.0, section 8.2.2.2 + * requires payload to be padded with 0 to + * 32-bit boundary before the checksum field. + * Ensure the padding bytes are accounted for in + * skb allocation + */ + payload = ALIGN(nca->payload, 4); len = sizeof(struct ncsi_cmd_pkt_hdr) + 4; - if (nca->payload < 26) - len += 26; - else - len += nca->payload; + len += max(payload, padding_bytes); cmd = skb_put_zero(skb, len); memcpy(&cmd->mfr_id, nca->data, nca->payload); @@ -272,6 +279,7 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca) struct net_device *dev = nd->dev; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; + int payload; int len = hlen + tlen; struct sk_buff *skb; struct ncsi_request *nr; @@ -281,14 +289,14 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca) return NULL; /* NCSI command packet has 16-bytes header, payload, 4 bytes checksum. + * Payload needs padding so that the checksum field following payload is + * aligned to 32-bit boundary. * The packet needs padding if its payload is less than 26 bytes to * meet 64 bytes minimal ethernet frame length. */ len += sizeof(struct ncsi_cmd_pkt_hdr) + 4; - if (nca->payload < 26) - len += 26; - else - len += nca->payload; + payload = ALIGN(nca->payload, 4); + len += max(payload, padding_bytes); /* Allocate skb */ skb = alloc_skb(len, GFP_ATOMIC); diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index a33ea45dec054..27700887c3217 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -112,7 +112,11 @@ static int ncsi_write_package_info(struct sk_buff *skb, pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR); if (!pnest) return -ENOMEM; - nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); + rc = nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); + if (rc) { + nla_nest_cancel(skb, pnest); + return rc; + } if ((0x1 << np->id) == ndp->package_whitelist) nla_put_flag(skb, NCSI_PKG_ATTR_FORCED); cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 91efae88e8c2a..ef72819d9d315 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -94,7 +94,7 @@ config NF_CONNTRACK_MARK config NF_CONNTRACK_SECMARK bool 'Connection tracking security mark support' depends on NETWORK_SECMARK - default m if NETFILTER_ADVANCED=n + default y if NETFILTER_ADVANCED=n help This option enables security markings to be applied to connections. Typically they are copied to connections from diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 5d5bdf4500916..451b2df998ea7 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -336,14 +336,15 @@ static int __nf_register_net_hook(struct net *net, int pf, p = nf_entry_dereference(*pp); new_hooks = nf_hook_entries_grow(p, reg); - if (!IS_ERR(new_hooks)) + if (!IS_ERR(new_hooks)) { + hooks_validate(new_hooks); rcu_assign_pointer(*pp, new_hooks); + } mutex_unlock(&nf_hook_mutex); if (IS_ERR(new_hooks)) return PTR_ERR(new_hooks); - hooks_validate(new_hooks); #ifdef CONFIG_NETFILTER_INGRESS if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) net_inc_ingress_queue(); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 500de37858ac8..1b44dfa7ba856 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -132,11 +132,11 @@ htable_size(u8 hbits) { size_t hsize; - /* We must fit both into u32 in jhash and size_t */ + /* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */ if (hbits > 31) return 0; hsize = jhash_size(hbits); - if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *) + if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *) < hsize) return 0; diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 02f2f636798d1..24d9be1471897 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -807,6 +807,31 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head) kmem_cache_free(ip_vs_conn_cachep, cp); } +/* Try to delete connection while not holding reference */ +static void ip_vs_conn_del(struct ip_vs_conn *cp) +{ + if (del_timer(&cp->timer)) { + /* Drop cp->control chain too */ + if (cp->control) + cp->timeout = 0; + ip_vs_conn_expire(&cp->timer); + } +} + +/* Try to delete connection while holding reference */ +static void ip_vs_conn_del_put(struct ip_vs_conn *cp) +{ + if (del_timer(&cp->timer)) { + /* Drop cp->control chain too */ + if (cp->control) + cp->timeout = 0; + __ip_vs_conn_put(cp); + ip_vs_conn_expire(&cp->timer); + } else { + __ip_vs_conn_put(cp); + } +} + static void ip_vs_conn_expire(struct timer_list *t) { struct ip_vs_conn *cp = from_timer(cp, t, timer); @@ -827,14 +852,17 @@ static void ip_vs_conn_expire(struct timer_list *t) /* does anybody control me? */ if (ct) { + bool has_ref = !cp->timeout && __ip_vs_conn_get(ct); + ip_vs_control_del(cp); /* Drop CTL or non-assured TPL if not used anymore */ - if (!cp->timeout && !atomic_read(&ct->n_control) && + if (has_ref && !atomic_read(&ct->n_control) && (!(ct->flags & IP_VS_CONN_F_TEMPLATE) || !(ct->state & IP_VS_CTPL_S_ASSURED))) { IP_VS_DBG(4, "drop controlling connection\n"); - ct->timeout = 0; - ip_vs_conn_expire_now(ct); + ip_vs_conn_del_put(ct); + } else if (has_ref) { + __ip_vs_conn_put(ct); } } @@ -1317,8 +1345,7 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs) drop: IP_VS_DBG(4, "drop connection\n"); - cp->timeout = 0; - ip_vs_conn_expire_now(cp); + ip_vs_conn_del(cp); } cond_resched_rcu(); } @@ -1341,19 +1368,15 @@ static void ip_vs_conn_flush(struct netns_ipvs *ipvs) hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { if (cp->ipvs != ipvs) continue; - /* As timers are expired in LIFO order, restart - * the timer of controlling connection first, so - * that it is expired after us. - */ + if (atomic_read(&cp->n_control)) + continue; cp_c = cp->control; - /* cp->control is valid only with reference to cp */ - if (cp_c && __ip_vs_conn_get(cp)) { + IP_VS_DBG(4, "del connection\n"); + ip_vs_conn_del(cp); + if (cp_c && !atomic_read(&cp_c->n_control)) { IP_VS_DBG(4, "del controlling connection\n"); - ip_vs_conn_expire_now(cp_c); - __ip_vs_conn_put(cp); + ip_vs_conn_del(cp_c); } - IP_VS_DBG(4, "del connection\n"); - ip_vs_conn_expire_now(cp); } cond_resched_rcu(); } @@ -1366,6 +1389,45 @@ static void ip_vs_conn_flush(struct netns_ipvs *ipvs) goto flush_again; } } + +#ifdef CONFIG_SYSCTL +void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs) +{ + int idx; + struct ip_vs_conn *cp, *cp_c; + struct ip_vs_dest *dest; + + rcu_read_lock(); + for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { + if (cp->ipvs != ipvs) + continue; + + dest = cp->dest; + if (!dest || (dest->flags & IP_VS_DEST_F_AVAILABLE)) + continue; + + if (atomic_read(&cp->n_control)) + continue; + + cp_c = cp->control; + IP_VS_DBG(4, "del connection\n"); + ip_vs_conn_del(cp); + if (cp_c && !atomic_read(&cp_c->n_control)) { + IP_VS_DBG(4, "del controlling connection\n"); + ip_vs_conn_del(cp_c); + } + } + cond_resched_rcu(); + + /* netns clean up started, abort delayed work */ + if (!ipvs->enable) + break; + } + rcu_read_unlock(); +} +#endif + /* * per netns init and exit */ @@ -1394,6 +1456,10 @@ int __init ip_vs_conn_init(void) int idx; /* Compute size and mask */ + if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 20) { + pr_info("conn_tab_bits not in [8, 20]. Using default value\n"); + ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; + } ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1; @@ -1417,7 +1483,7 @@ int __init ip_vs_conn_init(void) pr_info("Connection hash table configured " "(size=%d, memory=%ldKbytes)\n", ip_vs_conn_tab_size, - (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024); + (long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024); IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n", sizeof(struct ip_vs_conn)); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 89aa1fc334b19..df26529f81899 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -694,16 +694,10 @@ static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) return ipvs->sysctl_nat_icmp_send; } -static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) -{ - return ipvs->sysctl_expire_nodest_conn; -} - #else static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return 0; } static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return 0; } -static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; } #endif @@ -1982,7 +1976,6 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; int ret, pkts; - int conn_reuse_mode; struct sock *sk; /* Already marked as IPVS request or reply? */ @@ -2059,15 +2052,16 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, ipvs, af, skb, &iph); - conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); - if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) { + if (!iph.fragoffs && is_new_conn(skb, &iph) && cp) { + int conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); bool old_ct = false, resched = false; if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest && unlikely(!atomic_read(&cp->dest->weight))) { resched = true; old_ct = ip_vs_conn_uses_old_conntrack(cp, skb); - } else if (is_new_conn_expected(cp, conn_reuse_mode)) { + } else if (conn_reuse_mode && + is_new_conn_expected(cp, conn_reuse_mode)) { old_ct = ip_vs_conn_uses_old_conntrack(cp, skb); if (!atomic_read(&cp->n_control)) { resched = true; @@ -2092,36 +2086,35 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int } } - if (unlikely(!cp)) { - int v; - - if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph)) - return v; - } - - IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet"); - /* Check the server status */ - if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { + if (cp && cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ + if (sysctl_expire_nodest_conn(ipvs)) { + bool old_ct = ip_vs_conn_uses_old_conntrack(cp, skb); - __u32 flags = cp->flags; - - /* when timer already started, silently drop the packet.*/ - if (timer_pending(&cp->timer)) - __ip_vs_conn_put(cp); - else - ip_vs_conn_put(cp); + if (!old_ct) + cp->flags &= ~IP_VS_CONN_F_NFCT; - if (sysctl_expire_nodest_conn(ipvs) && - !(flags & IP_VS_CONN_F_ONE_PACKET)) { - /* try to expire the connection immediately */ ip_vs_conn_expire_now(cp); + __ip_vs_conn_put(cp); + if (old_ct) + return NF_DROP; + cp = NULL; + } else { + __ip_vs_conn_put(cp); + return NF_DROP; } + } - return NF_DROP; + if (unlikely(!cp)) { + int v; + + if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph)) + return v; } + IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet"); + ip_vs_in_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); if (cp->packet_xmit) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index f93fa0e210979..7a607ab5aebda 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -210,6 +210,17 @@ static void update_defense_level(struct netns_ipvs *ipvs) local_bh_enable(); } +/* Handler for delayed work for expiring no + * destination connections + */ +static void expire_nodest_conn_handler(struct work_struct *work) +{ + struct netns_ipvs *ipvs; + + ipvs = container_of(work, struct netns_ipvs, + expire_nodest_conn_work.work); + ip_vs_expire_nodest_conn_flush(ipvs); +} /* * Timer for checking the defense @@ -224,7 +235,8 @@ static void defense_work_handler(struct work_struct *work) update_defense_level(ipvs); if (atomic_read(&ipvs->dropentry)) ip_vs_random_dropentry(ipvs); - schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); + queue_delayed_work(system_long_wq, &ipvs->defense_work, + DEFENSE_TIMER_PERIOD); } #endif @@ -1163,6 +1175,12 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, list_add(&dest->t_list, &ipvs->dest_trash); dest->idle_start = 0; spin_unlock_bh(&ipvs->dest_trash_lock); + + /* Queue up delayed work to expire all no destination connections. + * No-op when CONFIG_SYSCTL is disabled. + */ + if (!cleanup) + ip_vs_enqueue_expire_nodest_conns(ipvs); } @@ -4047,6 +4065,11 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; tbl[idx++].data = &ipvs->sysctl_schedule_icmp; tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; +#ifdef CONFIG_IP_VS_DEBUG + /* Global sysctls must be ro in non-init netns */ + if (!net_eq(net, &init_net)) + tbl[idx++].mode = 0444; +#endif ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); if (ipvs->sysctl_hdr == NULL) { @@ -4058,7 +4081,12 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) ipvs->sysctl_tbl = tbl; /* Schedule defense work */ INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); - schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); + queue_delayed_work(system_long_wq, &ipvs->defense_work, + DEFENSE_TIMER_PERIOD); + + /* Init delayed work for expiring no dest conn */ + INIT_DELAYED_WORK(&ipvs->expire_nodest_conn_work, + expire_nodest_conn_handler); return 0; } @@ -4067,6 +4095,7 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { struct net *net = ipvs->net; + cancel_delayed_work_sync(&ipvs->expire_nodest_conn_work); cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); unregister_net_sysctl_table(ipvs->sysctl_hdr); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 4a988ce4264cb..d9b6f2001d006 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -66,22 +66,17 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash); struct conntrack_gc_work { struct delayed_work dwork; - u32 last_bucket; + u32 next_bucket; bool exiting; bool early_drop; - long next_gc_run; }; static __read_mostly struct kmem_cache *nf_conntrack_cachep; static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; -/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ -#define GC_MAX_BUCKETS_DIV 128u -/* upper bound of full table scan */ -#define GC_MAX_SCAN_JIFFIES (16u * HZ) -/* desired ratio of entries found to be expired */ -#define GC_EVICT_RATIO 50u +#define GC_SCAN_INTERVAL (120u * HZ) +#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) static struct conntrack_gc_work conntrack_gc_work; @@ -1226,17 +1221,13 @@ static void nf_ct_offload_timeout(struct nf_conn *ct) static void gc_worker(struct work_struct *work) { - unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); - unsigned int i, goal, buckets = 0, expired_count = 0; - unsigned int nf_conntrack_max95 = 0; + unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION; + unsigned int i, hashsz, nf_conntrack_max95 = 0; + unsigned long next_run = GC_SCAN_INTERVAL; struct conntrack_gc_work *gc_work; - unsigned int ratio, scanned = 0; - unsigned long next_run; - gc_work = container_of(work, struct conntrack_gc_work, dwork.work); - goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV; - i = gc_work->last_bucket; + i = gc_work->next_bucket; if (gc_work->early_drop) nf_conntrack_max95 = nf_conntrack_max / 100u * 95u; @@ -1244,22 +1235,21 @@ static void gc_worker(struct work_struct *work) struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; struct hlist_nulls_node *n; - unsigned int hashsz; struct nf_conn *tmp; - i++; rcu_read_lock(); nf_conntrack_get_ht(&ct_hash, &hashsz); - if (i >= hashsz) - i = 0; + if (i >= hashsz) { + rcu_read_unlock(); + break; + } hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { struct net *net; tmp = nf_ct_tuplehash_to_ctrack(h); - scanned++; if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { nf_ct_offload_timeout(tmp); continue; @@ -1267,7 +1257,6 @@ static void gc_worker(struct work_struct *work) if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); - expired_count++; continue; } @@ -1299,7 +1288,14 @@ static void gc_worker(struct work_struct *work) */ rcu_read_unlock(); cond_resched(); - } while (++buckets < goal); + i++; + + if (time_after(jiffies, end_time) && i < hashsz) { + gc_work->next_bucket = i; + next_run = 0; + break; + } + } while (i < hashsz); if (gc_work->exiting) return; @@ -1310,40 +1306,17 @@ static void gc_worker(struct work_struct *work) * * This worker is only here to reap expired entries when system went * idle after a busy period. - * - * The heuristics below are supposed to balance conflicting goals: - * - * 1. Minimize time until we notice a stale entry - * 2. Maximize scan intervals to not waste cycles - * - * Normally, expire ratio will be close to 0. - * - * As soon as a sizeable fraction of the entries have expired - * increase scan frequency. */ - ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio > GC_EVICT_RATIO) { - gc_work->next_gc_run = min_interval; - } else { - unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV; - - BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0); - - gc_work->next_gc_run += min_interval; - if (gc_work->next_gc_run > max) - gc_work->next_gc_run = max; + if (next_run) { + gc_work->early_drop = false; + gc_work->next_bucket = 0; } - - next_run = gc_work->next_gc_run; - gc_work->last_bucket = i; - gc_work->early_drop = false; queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker); - gc_work->next_gc_run = HZ; gc_work->exiting = false; } @@ -1736,15 +1709,17 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) pr_debug("nf_conntrack_in: Can't track with proto module\n"); nf_conntrack_put(&ct->ct_general); skb->_nfct = 0; - NF_CT_STAT_INC_ATOMIC(state->net, invalid); - if (ret == -NF_DROP) - NF_CT_STAT_INC_ATOMIC(state->net, drop); /* Special case: TCP tracker reports an attempt to reopen a * closed/aborted connection. We have to go back and create a * fresh conntrack. */ if (ret == -NF_REPEAT) goto repeat; + + NF_CT_STAT_INC_ATOMIC(state->net, invalid); + if (ret == -NF_DROP) + NF_CT_STAT_INC_ATOMIC(state->net, drop); + ret = -ret; goto out; } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 810cca24b3990..7626f3e1c70a7 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -489,6 +489,15 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); ct->proto.sctp.vtag[!dir] = ih->init_tag; + + /* don't renew timeout on init retransmit so + * port reuse by client or NAT middlebox cannot + * keep entry alive indefinitely (incl. nat info). + */ + if (new_state == SCTP_CONNTRACK_CLOSED && + old_state == SCTP_CONNTRACK_CLOSED && + nf_ct_is_confirmed(ct)) + ignore = true; } ct->proto.sctp.state = new_state; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 848b137151c26..b8cc3339a2495 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -354,8 +354,8 @@ static void tcp_options(const struct sk_buff *skb, length, buff); BUG_ON(ptr == NULL); - state->td_scale = - state->flags = 0; + state->td_scale = 0; + state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL; while (length > 0) { int opcode=*ptr++; @@ -840,6 +840,16 @@ static bool nf_conntrack_tcp_established(const struct nf_conn *ct) test_bit(IPS_ASSURED_BIT, &ct->status); } +static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state) +{ + state->td_end = 0; + state->td_maxend = 0; + state->td_maxwin = 0; + state->td_maxack = 0; + state->td_scale = 0; + state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL; +} + /* Returns verdict for packet, or -1 for invalid. */ int nf_conntrack_tcp_packet(struct nf_conn *ct, struct sk_buff *skb, @@ -946,8 +956,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = ct->proto.tcp.last_flags; - memset(&ct->proto.tcp.seen[dir], 0, - sizeof(struct ip_ct_tcp_state)); + nf_ct_tcp_state_reset(&ct->proto.tcp.seen[dir]); break; } ct->proto.tcp.last_index = index; diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 7365b43f8f980..e3a2d018f4ec5 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -105,15 +105,18 @@ int nf_conntrack_udp_packet(struct nf_conn *ct, */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { unsigned long extra = timeouts[UDP_CT_UNREPLIED]; + bool stream = false; /* Still active after two seconds? Extend timeout. */ - if (time_after(jiffies, ct->proto.udp.stream_ts)) + if (time_after(jiffies, ct->proto.udp.stream_ts)) { extra = timeouts[UDP_CT_REPLIED]; + stream = true; + } nf_ct_refresh_acct(ct, ctinfo, skb, extra); /* Also, more likely to be important, and not a probe */ - if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) + if (stream && !test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else { nf_ct_refresh_acct(ct, ctinfo, skb, diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index 8e8a65d46345b..acd73f717a088 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -9,8 +9,19 @@ #include +struct masq_dev_work { + struct work_struct work; + struct net *net; + union nf_inet_addr addr; + int ifindex; + int (*iter)(struct nf_conn *i, void *data); +}; + +#define MAX_MASQ_WORKER_COUNT 16 + static DEFINE_MUTEX(masq_mutex); static unsigned int masq_refcnt __read_mostly; +static atomic_t masq_worker_count __read_mostly; unsigned int nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, @@ -63,13 +74,71 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, } EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4); -static int device_cmp(struct nf_conn *i, void *ifindex) +static void iterate_cleanup_work(struct work_struct *work) +{ + struct masq_dev_work *w; + + w = container_of(work, struct masq_dev_work, work); + + nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0); + + put_net(w->net); + kfree(w); + atomic_dec(&masq_worker_count); + module_put(THIS_MODULE); +} + +/* Iterate conntrack table in the background and remove conntrack entries + * that use the device/address being removed. + * + * In case too many work items have been queued already or memory allocation + * fails iteration is skipped, conntrack entries will time out eventually. + */ +static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr, + int ifindex, + int (*iter)(struct nf_conn *i, void *data), + gfp_t gfp_flags) +{ + struct masq_dev_work *w; + + if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT) + return; + + net = maybe_get_net(net); + if (!net) + return; + + if (!try_module_get(THIS_MODULE)) + goto err_module; + + w = kzalloc(sizeof(*w), gfp_flags); + if (w) { + /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */ + atomic_inc(&masq_worker_count); + + INIT_WORK(&w->work, iterate_cleanup_work); + w->ifindex = ifindex; + w->net = net; + w->iter = iter; + if (addr) + w->addr = *addr; + schedule_work(&w->work); + return; + } + + module_put(THIS_MODULE); + err_module: + put_net(net); +} + +static int device_cmp(struct nf_conn *i, void *arg) { const struct nf_conn_nat *nat = nfct_nat(i); + const struct masq_dev_work *w = arg; if (!nat) return 0; - return nat->masq_index == (int)(long)ifindex; + return nat->masq_index == w->ifindex; } static int masq_device_event(struct notifier_block *this, @@ -85,8 +154,8 @@ static int masq_device_event(struct notifier_block *this, * and forget them. */ - nf_ct_iterate_cleanup_net(net, device_cmp, - (void *)(long)dev->ifindex, 0, 0); + nf_nat_masq_schedule(net, NULL, dev->ifindex, + device_cmp, GFP_KERNEL); } return NOTIFY_DONE; @@ -94,35 +163,45 @@ static int masq_device_event(struct notifier_block *this, static int inet_cmp(struct nf_conn *ct, void *ptr) { - struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; - struct net_device *dev = ifa->ifa_dev->dev; struct nf_conntrack_tuple *tuple; + struct masq_dev_work *w = ptr; - if (!device_cmp(ct, (void *)(long)dev->ifindex)) + if (!device_cmp(ct, ptr)) return 0; tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - return ifa->ifa_address == tuple->dst.u3.ip; + return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3); } static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev; - struct net *net = dev_net(idev->dev); + const struct in_ifaddr *ifa = ptr; + const struct in_device *idev; + const struct net_device *dev; + union nf_inet_addr addr; + + if (event != NETDEV_DOWN) + return NOTIFY_DONE; /* The masq_dev_notifier will catch the case of the device going * down. So if the inetdev is dead and being destroyed we have * no work to do. Otherwise this is an individual address removal * and we have to perform the flush. */ + idev = ifa->ifa_dev; if (idev->dead) return NOTIFY_DONE; - if (event == NETDEV_DOWN) - nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0); + memset(&addr, 0, sizeof(addr)); + + addr.ip = ifa->ifa_address; + + dev = idev->dev; + nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex, + inet_cmp, GFP_KERNEL); return NOTIFY_DONE; } @@ -136,8 +215,6 @@ static struct notifier_block masq_inet_notifier = { }; #if IS_ENABLED(CONFIG_IPV6) -static atomic_t v6_worker_count __read_mostly; - static int nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, @@ -187,40 +264,6 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, } EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); -struct masq_dev_work { - struct work_struct work; - struct net *net; - struct in6_addr addr; - int ifindex; -}; - -static int inet6_cmp(struct nf_conn *ct, void *work) -{ - struct masq_dev_work *w = (struct masq_dev_work *)work; - struct nf_conntrack_tuple *tuple; - - if (!device_cmp(ct, (void *)(long)w->ifindex)) - return 0; - - tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - - return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6); -} - -static void iterate_cleanup_work(struct work_struct *work) -{ - struct masq_dev_work *w; - - w = container_of(work, struct masq_dev_work, work); - - nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0); - - put_net(w->net); - kfree(w); - atomic_dec(&v6_worker_count); - module_put(THIS_MODULE); -} - /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep). * * Defer it to the system workqueue. @@ -233,36 +276,19 @@ static int masq_inet6_event(struct notifier_block *this, { struct inet6_ifaddr *ifa = ptr; const struct net_device *dev; - struct masq_dev_work *w; - struct net *net; + union nf_inet_addr addr; - if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16) + if (event != NETDEV_DOWN) return NOTIFY_DONE; dev = ifa->idev->dev; - net = maybe_get_net(dev_net(dev)); - if (!net) - return NOTIFY_DONE; - if (!try_module_get(THIS_MODULE)) - goto err_module; + memset(&addr, 0, sizeof(addr)); - w = kmalloc(sizeof(*w), GFP_ATOMIC); - if (w) { - atomic_inc(&v6_worker_count); - - INIT_WORK(&w->work, iterate_cleanup_work); - w->ifindex = dev->ifindex; - w->net = net; - w->addr = ifa->addr; - schedule_work(&w->work); + addr.in6 = ifa->addr; - return NOTIFY_DONE; - } - - module_put(THIS_MODULE); - err_module: - put_net(net); + nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp, + GFP_ATOMIC); return NOTIFY_DONE; } diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index f8f52ff99cfb0..79a393044863b 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -64,6 +64,15 @@ static void nf_queue_entry_release_br_nf_refs(struct sk_buff *skb) #endif } +static void nf_queue_sock_put(struct sock *sk) +{ +#ifdef CONFIG_INET + sock_gen_put(sk); +#else + sock_put(sk); +#endif +} + void nf_queue_entry_release_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; @@ -74,7 +83,7 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry) if (state->out) dev_put(state->out); if (state->sk) - sock_put(state->sk); + nf_queue_sock_put(state->sk); nf_queue_entry_release_br_nf_refs(entry->skb); } @@ -99,18 +108,20 @@ static void nf_queue_entry_get_br_nf_refs(struct sk_buff *skb) } /* Bump dev refs so they don't vanish while packet is out */ -void nf_queue_entry_get_refs(struct nf_queue_entry *entry) +bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; + if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt)) + return false; + if (state->in) dev_hold(state->in); if (state->out) dev_hold(state->out); - if (state->sk) - sock_hold(state->sk); nf_queue_entry_get_br_nf_refs(entry->skb); + return true; } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); @@ -183,6 +194,18 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, break; } + if (skb_sk_is_prefetched(skb)) { + struct sock *sk = skb->sk; + + if (!sk_is_refcounted(sk)) { + if (!refcount_inc_not_zero(&sk->sk_refcnt)) + return -ENOTCONN; + + /* drop refcount on skb_orphan */ + skb->destructor = sock_edemux; + } + } + entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC); if (!entry) { status = -ENOMEM; @@ -201,7 +224,10 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, .size = sizeof(*entry) + route_key_size, }; - nf_queue_entry_get_refs(entry); + if (!nf_queue_entry_get_refs(entry)) { + kfree(entry); + return -ENOTCONN; + } switch (entry->state.pf) { case AF_INET: diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 373ea0e49f12d..58a7d89719b1d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2267,27 +2267,31 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, err = nf_tables_expr_parse(ctx, nla, &info); if (err < 0) - goto err1; + goto err_expr_parse; + + err = -EOPNOTSUPP; + if (!(info.ops->type->flags & NFT_EXPR_STATEFUL)) + goto err_expr_stateful; err = -ENOMEM; expr = kzalloc(info.ops->size, GFP_KERNEL); if (expr == NULL) - goto err2; + goto err_expr_stateful; err = nf_tables_newexpr(ctx, &info, expr); if (err < 0) - goto err3; + goto err_expr_new; return expr; -err3: +err_expr_new: kfree(expr); -err2: +err_expr_stateful: owner = info.ops->type->owner; if (info.ops->type->release_ops) info.ops->type->release_ops(info.ops); module_put(owner); -err1: +err_expr_parse: return ERR_PTR(err); } @@ -5184,12 +5188,15 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, { struct nft_object *newobj; struct nft_trans *trans; - int err; + int err = -ENOMEM; + + if (!try_module_get(type->owner)) + return -ENOENT; trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ, sizeof(struct nft_trans_obj)); if (!trans) - return -ENOMEM; + goto err_trans; newobj = nft_obj_init(ctx, type, attr); if (IS_ERR(newobj)) { @@ -5206,6 +5213,8 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, err_free_trans: kfree(trans); +err_trans: + module_put(type->owner); return err; } @@ -6544,7 +6553,7 @@ static void nft_obj_commit_update(struct nft_trans *trans) if (obj->ops->update) obj->ops->update(obj, newobj); - kfree(newobj); + nft_obj_destroy(&trans->ctx, newobj); } static void nft_commit_release(struct nft_trans *trans) @@ -6561,6 +6570,9 @@ static void nft_commit_release(struct nft_trans *trans) nf_tables_chain_destroy(&trans->ctx); break; case NFT_MSG_DELRULE: + if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + nft_flow_rule_destroy(nft_trans_flow_rule(trans)); + nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELSET: @@ -6882,6 +6894,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_rule_notify(&trans->ctx, nft_trans_rule(trans), NFT_MSG_NEWRULE); + if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + nft_flow_rule_destroy(nft_trans_flow_rule(trans)); + nft_trans_destroy(trans); break; case NFT_MSG_DELRULE: @@ -7109,7 +7124,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { - kfree(nft_trans_obj_newobj(trans)); + nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { trans->ctx.table->use--; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 96c74c4c71762..ceb0ef437e23e 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -153,7 +153,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) struct nft_rule *const *rules; const struct nft_rule *rule; const struct nft_expr *expr, *last; - struct nft_regs regs; + struct nft_regs regs = {}; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; bool genbit = READ_ONCE(net->nft.gencursor); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index ca21f8f4a47c1..581bd1353a447 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -712,9 +712,15 @@ static struct nf_queue_entry * nf_queue_entry_dup(struct nf_queue_entry *e) { struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC); - if (entry) - nf_queue_entry_get_refs(entry); - return entry; + + if (!entry) + return NULL; + + if (nf_queue_entry_get_refs(entry)) + return entry; + + kfree(entry); + return NULL; } #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) @@ -840,11 +846,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) } static int -nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) +nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff) { struct sk_buff *nskb; if (diff < 0) { + unsigned int min_len = skb_transport_offset(e->skb); + + if (data_len < min_len) + return -EINVAL; + if (pskb_trim(e->skb, data_len)) return -ENOMEM; } else if (diff > 0) { diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 95415d2b81c93..6bcc18124e5bd 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -164,17 +164,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, return -EBUSY; priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP])); - switch (priv->op) { - case NFT_DYNSET_OP_ADD: - case NFT_DYNSET_OP_DELETE: - break; - case NFT_DYNSET_OP_UPDATE: - if (!(set->flags & NFT_SET_TIMEOUT)) - return -EOPNOTSUPP; - break; - default: + if (priv->op > NFT_DYNSET_OP_DELETE) return -EOPNOTSUPP; - } timeout = 0; if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { @@ -213,9 +204,6 @@ static int nft_dynset_init(const struct nft_ctx *ctx, return PTR_ERR(priv->expr); err = -EOPNOTSUPP; - if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL)) - goto err1; - if (priv->expr->ops->type->flags & NFT_EXPR_GC) { if (set->flags & NFT_SET_TIMEOUT) goto err1; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 17c0f75dfcdb7..0c5bc3c37ecf4 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -283,7 +283,8 @@ static void nft_nat_inet_eval(const struct nft_expr *expr, { const struct nft_nat *priv = nft_expr_priv(expr); - if (priv->family == nft_pf(pkt)) + if (priv->family == nft_pf(pkt) || + priv->family == NFPROTO_INET) nft_nat_eval(expr, regs, pkt); } diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 921f8f45b17f4..cf0512fc648e7 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -420,6 +420,9 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt, struct sk_buff *skb, unsigned int *l4csum_offset) { + if (pkt->xt.fragoff) + return -1; + switch (pkt->tprot) { case IPPROTO_TCP: *l4csum_offset = offsetof(struct tcphdr, check); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 9de0eb20e9544..e7eb56b4b89e5 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -142,6 +142,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, /* Another cpu may race to insert the element with the same key */ if (prev) { nft_set_elem_destroy(set, he, true); + atomic_dec(&set->nelems); he = prev; } @@ -151,6 +152,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, err2: nft_set_elem_destroy(set, he, true); + atomic_dec(&set->nelems); err1: return false; } diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 637ce3e8c575c..4026ec38526f6 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -14,6 +14,32 @@ struct nft_socket { }; }; +static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt) +{ + const struct net_device *indev = nft_in(pkt); + const struct sk_buff *skb = pkt->skb; + struct sock *sk = NULL; + + if (!indev) + return NULL; + + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev); + break; +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + case NFPROTO_IPV6: + sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev); + break; +#endif + default: + WARN_ON_ONCE(1); + break; + } + + return sk; +} + static void nft_socket_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -27,20 +53,7 @@ static void nft_socket_eval(const struct nft_expr *expr, sk = NULL; if (!sk) - switch(nft_pf(pkt)) { - case NFPROTO_IPV4: - sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt)); - break; -#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) - case NFPROTO_IPV6: - sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt)); - break; -#endif - default: - WARN_ON_ONCE(1); - regs->verdict.code = NFT_BREAK; - return; - } + sk = nft_socket_do_lookup(pkt); if (!sk) { regs->verdict.code = NFT_BREAK; @@ -123,6 +136,16 @@ static int nft_socket_dump(struct sk_buff *skb, return 0; } +static int nft_socket_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT)); +} + static struct nft_expr_type nft_socket_type; static const struct nft_expr_ops nft_socket_ops = { .type = &nft_socket_type, @@ -130,6 +153,7 @@ static const struct nft_expr_ops nft_socket_ops = { .eval = nft_socket_eval, .init = nft_socket_init, .dump = nft_socket_dump, + .validate = nft_socket_validate, }; static struct nft_expr_type nft_socket_type __read_mostly = { diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index e2c1fc6088412..15abb0e496034 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -191,8 +191,10 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx, if (err) goto nf_ct_failure; err = nf_synproxy_ipv6_init(snet, ctx->net); - if (err) + if (err) { + nf_synproxy_ipv4_fini(snet, ctx->net); goto nf_ct_failure; + } break; } diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 8cd3daf0e3db6..1778e4e8ce247 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -144,8 +144,8 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, return -ENOMEM; doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL); if (doi_def->map.std == NULL) { - ret_val = -ENOMEM; - goto add_std_failure; + kfree(doi_def); + return -ENOMEM; } doi_def->type = CIPSO_V4_MAP_TRANS; diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 12aa803b2f689..1ad55f27c582b 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -611,9 +611,8 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info); if (audit_buf != NULL) { audit_log_format(audit_buf, - " nlbl_domain=%s res=%u", - entry->domain ? entry->domain : "(default)", - ret_val == 0 ? 1 : 0); + " nlbl_domain=%s res=1", + entry->domain ? entry->domain : "(default)"); audit_log_end(audit_buf); } diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 5e1239cef0005..91b35b7c80d82 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -885,6 +885,8 @@ int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len, unsigned char bitmask; unsigned char byte; + if (offset >= bitmap_len) + return -1; byte_offset = offset / 8; byte = bitmap[byte_offset]; bit_spot = offset; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9d993b4cf1aff..86b70385dce3b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -148,6 +148,8 @@ static const struct rhashtable_params netlink_rhashtable_params; static inline u32 netlink_group_mask(u32 group) { + if (group > 32) + return 0; return group ? 1 << (group - 1) : 0; } @@ -585,7 +587,10 @@ static int netlink_insert(struct sock *sk, u32 portid) /* We need to ensure that the socket is hashed and visible. */ smp_wmb(); - nlk_sk(sk)->bound = portid; + /* Paired with lockless reads from netlink_bind(), + * netlink_connect() and netlink_sendmsg(). + */ + WRITE_ONCE(nlk_sk(sk)->bound, portid); err: release_sock(sk); @@ -1003,7 +1008,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nlk->ngroups < BITS_PER_LONG) groups &= (1UL << nlk->ngroups) - 1; - bound = nlk->bound; + /* Paired with WRITE_ONCE() in netlink_insert() */ + bound = READ_ONCE(nlk->bound); if (bound) { /* Ensure nlk->portid is up-to-date. */ smp_rmb(); @@ -1089,8 +1095,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, /* No need for barriers here as we return to user-space without * using any of the bound attributes. + * Paired with WRITE_ONCE() in netlink_insert(). */ - if (!nlk->bound) + if (!READ_ONCE(nlk->bound)) err = netlink_autobind(sock); if (err == 0) { @@ -1857,6 +1864,11 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; + if (len == 0) { + pr_warn_once("Zero length message leads to an empty skb\n"); + return -ENODATA; + } + err = scm_send(sock, msg, &scm, true); if (err < 0) return err; @@ -1879,7 +1891,8 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) dst_group = nlk->dst_group; } - if (!nlk->bound) { + /* Paired with WRITE_ONCE() in netlink_insert() */ + if (!READ_ONCE(nlk->bound)) { err = netlink_autobind(sock); if (err) goto out; @@ -1974,7 +1987,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, copied = len; } - skb_reset_transport_header(data_skb); err = skb_copy_datagram_msg(data_skb, 0, msg, copied); if (msg->msg_name) { @@ -2240,6 +2252,13 @@ static int netlink_dump(struct sock *sk) * single netdev. The outcome is MSG_TRUNC error. */ skb_reserve(skb, skb_tailroom(skb) - alloc_size); + + /* Make sure malicious BPF programs can not read unitialized memory + * from skb->head -> skb->data + */ + skb_reset_network_header(skb); + skb_reset_mac_header(skb); + netlink_skb_set_owner_r(skb, sk); if (nlk->dump_done_errno > 0) { @@ -2521,13 +2540,15 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, /* errors reported via destination sk->sk_err, but propagate * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ err = nlmsg_multicast(sk, skb, exclude_portid, group, flags); + if (err == -ESRCH) + err = 0; } if (report) { int err2; err2 = nlmsg_unicast(sk, skb, portid); - if (!err || err == -ESRCH) + if (!err) err = err2; } diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c index 4a9e72073564a..581358dcbdf8d 100644 --- a/net/nfc/af_nfc.c +++ b/net/nfc/af_nfc.c @@ -60,6 +60,9 @@ int nfc_proto_register(const struct nfc_protocol *nfc_proto) proto_tab[nfc_proto->id] = nfc_proto; write_unlock(&proto_tab_lock); + if (rc) + proto_unregister(nfc_proto->proto); + return rc; } EXPORT_SYMBOL(nfc_proto_register); diff --git a/net/nfc/core.c b/net/nfc/core.c index c5f9c3ee82f8e..2d4729d1f0eb9 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -38,7 +38,7 @@ int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -94,13 +94,13 @@ int nfc_dev_up(struct nfc_dev *dev) device_lock(&dev->dev); - if (dev->rfkill && rfkill_blocked(dev->rfkill)) { - rc = -ERFKILL; + if (dev->shutting_down) { + rc = -ENODEV; goto error; } - if (!device_is_registered(&dev->dev)) { - rc = -ENODEV; + if (dev->rfkill && rfkill_blocked(dev->rfkill)) { + rc = -ERFKILL; goto error; } @@ -142,7 +142,7 @@ int nfc_dev_down(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -206,7 +206,7 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -245,7 +245,7 @@ int nfc_stop_poll(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -290,7 +290,7 @@ int nfc_dep_link_up(struct nfc_dev *dev, int target_index, u8 comm_mode) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -334,7 +334,7 @@ int nfc_dep_link_down(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -400,7 +400,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -446,7 +446,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -493,7 +493,7 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; kfree_skb(skb); goto error; @@ -550,7 +550,7 @@ int nfc_enable_se(struct nfc_dev *dev, u32 se_idx) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -599,7 +599,7 @@ int nfc_disable_se(struct nfc_dev *dev, u32 se_idx) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -1118,11 +1118,7 @@ int nfc_register_device(struct nfc_dev *dev) if (rc) pr_err("Could not register llcp device\n"); - rc = nfc_genl_device_added(dev); - if (rc) - pr_debug("The userspace won't be notified that the device %s was added\n", - dev_name(&dev->dev)); - + device_lock(&dev->dev); dev->rfkill = rfkill_alloc(dev_name(&dev->dev), &dev->dev, RFKILL_TYPE_NFC, &nfc_rfkill_ops, dev); if (dev->rfkill) { @@ -1131,6 +1127,13 @@ int nfc_register_device(struct nfc_dev *dev) dev->rfkill = NULL; } } + dev->shutting_down = false; + device_unlock(&dev->dev); + + rc = nfc_genl_device_added(dev); + if (rc) + pr_debug("The userspace won't be notified that the device %s was added\n", + dev_name(&dev->dev)); return 0; } @@ -1147,24 +1150,25 @@ void nfc_unregister_device(struct nfc_dev *dev) pr_debug("dev_name=%s\n", dev_name(&dev->dev)); + rc = nfc_genl_device_removed(dev); + if (rc) + pr_debug("The userspace won't be notified that the device %s " + "was removed\n", dev_name(&dev->dev)); + + device_lock(&dev->dev); if (dev->rfkill) { rfkill_unregister(dev->rfkill); rfkill_destroy(dev->rfkill); + dev->rfkill = NULL; } + dev->shutting_down = true; + device_unlock(&dev->dev); if (dev->ops->check_presence) { - device_lock(&dev->dev); - dev->shutting_down = true; - device_unlock(&dev->dev); del_timer_sync(&dev->check_pres_timer); cancel_work_sync(&dev->check_pres_work); } - rc = nfc_genl_device_removed(dev); - if (rc) - pr_debug("The userspace won't be notified that the device %s " - "was removed\n", dev_name(&dev->dev)); - nfc_llcp_unregister_device(dev); mutex_lock(&nfc_devlist_mutex); diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index e3599ed4a7a87..9c9caa307cf16 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -277,6 +277,7 @@ int digital_tg_configure_hw(struct nfc_digital_dev *ddev, int type, int param) static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech) { struct digital_tg_mdaa_params *params; + int rc; params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) @@ -291,8 +292,12 @@ static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech) get_random_bytes(params->nfcid2 + 2, NFC_NFCID2_MAXSIZE - 2); params->sc = DIGITAL_SENSF_FELICA_SC; - return digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params, - 500, digital_tg_recv_atr_req, NULL); + rc = digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params, + 500, digital_tg_recv_atr_req, NULL); + if (rc) + kfree(params); + + return rc; } static int digital_tg_listen_md(struct nfc_digital_dev *ddev, u8 rf_tech) diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c index 84d2345c75a3f..3adf4589852af 100644 --- a/net/nfc/digital_technology.c +++ b/net/nfc/digital_technology.c @@ -465,8 +465,12 @@ static int digital_in_send_sdd_req(struct nfc_digital_dev *ddev, skb_put_u8(skb, sel_cmd); skb_put_u8(skb, DIGITAL_SDD_REQ_SEL_PAR); - return digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res, - target); + rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res, + target); + if (rc) + kfree_skb(skb); + + return rc; } static void digital_in_recv_sens_res(struct nfc_digital_dev *ddev, void *arg, diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index a7e861eede2d9..bd2174699af97 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -789,6 +789,11 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); + if (!llcp_sock->local) { + release_sock(sk); + return -ENODEV; + } + if (sk->sk_type == SOCK_DGRAM) { DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr, msg->msg_name); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 6a34a0a786eaa..b2e922fcc70da 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -144,12 +144,15 @@ inline int nci_request(struct nci_dev *ndev, { int rc; - if (!test_bit(NCI_UP, &ndev->flags)) - return -ENETDOWN; - /* Serialize all requests */ mutex_lock(&ndev->req_lock); - rc = __nci_request(ndev, req, opt, timeout); + /* check the state after obtaing the lock against any races + * from nci_close_device when the device gets removed. + */ + if (test_bit(NCI_UP, &ndev->flags)) + rc = __nci_request(ndev, req, opt, timeout); + else + rc = -ENETDOWN; mutex_unlock(&ndev->req_lock); return rc; @@ -470,6 +473,11 @@ static int nci_open_device(struct nci_dev *ndev) mutex_lock(&ndev->req_lock); + if (test_bit(NCI_UNREG, &ndev->flags)) { + rc = -ENODEV; + goto done; + } + if (test_bit(NCI_UP, &ndev->flags)) { rc = -EALREADY; goto done; @@ -533,9 +541,17 @@ static int nci_open_device(struct nci_dev *ndev) static int nci_close_device(struct nci_dev *ndev) { nci_req_cancel(ndev, ENODEV); + + /* This mutex needs to be held as a barrier for + * caller nci_unregister_device + */ mutex_lock(&ndev->req_lock); if (!test_and_clear_bit(NCI_UP, &ndev->flags)) { + /* Need to flush the cmd wq in case + * there is a queued/running cmd_work + */ + flush_workqueue(ndev->cmd_wq); del_timer_sync(&ndev->cmd_timer); del_timer_sync(&ndev->data_timer); mutex_unlock(&ndev->req_lock); @@ -570,8 +586,8 @@ static int nci_close_device(struct nci_dev *ndev) /* Flush cmd wq */ flush_workqueue(ndev->cmd_wq); - /* Clear flags */ - ndev->flags = 0; + /* Clear flags except NCI_UNREG */ + ndev->flags &= BIT(NCI_UNREG); mutex_unlock(&ndev->req_lock); @@ -1253,6 +1269,12 @@ void nci_unregister_device(struct nci_dev *ndev) { struct nci_conn_info *conn_info, *n; + /* This set_bit is not protected with specialized barrier, + * However, it is fine because the mutex_lock(&ndev->req_lock); + * in nci_close_device() will help to emit one. + */ + set_bit(NCI_UNREG, &ndev->flags); + nci_close_device(ndev); destroy_workqueue(ndev->cmd_wq); diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index ce3382be937ff..b002e18f38c81 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -118,7 +118,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev, skb_frag = nci_skb_alloc(ndev, (NCI_DATA_HDR_SIZE + frag_len), - GFP_KERNEL); + GFP_ATOMIC); if (skb_frag == NULL) { rc = -ENOMEM; goto free_exit; diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 04e55ccb33836..4fe336ff2bfa1 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -153,7 +153,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, i = 0; skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + - NCI_DATA_HDR_SIZE, GFP_KERNEL); + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; @@ -186,7 +186,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, if (i < data_len) { skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + - NCI_DATA_HDR_SIZE, GFP_KERNEL); + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c index a48297b79f34f..b0ed2b47ac437 100644 --- a/net/nfc/nci/rsp.c +++ b/net/nfc/nci/rsp.c @@ -277,6 +277,8 @@ static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev, ndev->cur_conn_id); if (conn_info) { list_del(&conn_info->list); + if (conn_info == ndev->rf_conn_info) + ndev->rf_conn_info = NULL; devm_kfree(&ndev->nfc_dev->dev, conn_info); } } diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 99b06a16b8086..9e94f732e717c 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -644,8 +644,10 @@ static int nfc_genl_dump_devices_done(struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; - nfc_device_iter_exit(iter); - kfree(iter); + if (iter) { + nfc_device_iter_exit(iter); + kfree(iter); + } return 0; } @@ -1250,7 +1252,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; @@ -1266,7 +1268,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name, genlmsg_end(msg, hdr); - genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); + genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC); return 0; @@ -1400,8 +1402,10 @@ static int nfc_genl_dump_ses_done(struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; - nfc_device_iter_exit(iter); - kfree(iter); + if (iter) { + nfc_device_iter_exit(iter); + kfree(iter); + } return 0; } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 5c68f9ea98810..9e8a5c4862d04 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -377,6 +377,7 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, update_ip_l4_checksum(skb, nh, *addr, new_addr); csum_replace4(&nh->check, *addr, new_addr); skb_clear_hash(skb); + ovs_ct_clear(skb, NULL); *addr = new_addr; } @@ -424,15 +425,47 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, update_ipv6_checksum(skb, l4_proto, addr, new_addr); skb_clear_hash(skb); + ovs_ct_clear(skb, NULL); memcpy(addr, new_addr, sizeof(__be32[4])); } -static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask) +static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 ipv6_tclass, u8 mask) { + u8 old_ipv6_tclass = ipv6_get_dsfield(nh); + + ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum_replace(&skb->csum, (__force __wsum)(old_ipv6_tclass << 12), + (__force __wsum)(ipv6_tclass << 12)); + + ipv6_change_dsfield(nh, ~mask, ipv6_tclass); +} + +static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 mask) +{ + u32 ofl; + + ofl = nh->flow_lbl[0] << 16 | nh->flow_lbl[1] << 8 | nh->flow_lbl[2]; + fl = OVS_MASKED(ofl, fl, mask); + /* Bits 21-24 are always unmasked, so this retains their values. */ - OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16)); - OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8)); - OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask); + nh->flow_lbl[0] = (u8)(fl >> 16); + nh->flow_lbl[1] = (u8)(fl >> 8); + nh->flow_lbl[2] = (u8)fl; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum_replace(&skb->csum, (__force __wsum)htonl(ofl), (__force __wsum)htonl(fl)); +} + +static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, u8 mask) +{ + new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum_replace(&skb->csum, (__force __wsum)(nh->hop_limit << 8), + (__force __wsum)(new_ttl << 8)); + nh->hop_limit = new_ttl; } static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl, @@ -550,18 +583,17 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, } } if (mask->ipv6_tclass) { - ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass); + set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass); flow_key->ip.tos = ipv6_get_dsfield(nh); } if (mask->ipv6_label) { - set_ipv6_fl(nh, ntohl(key->ipv6_label), + set_ipv6_fl(skb, nh, ntohl(key->ipv6_label), ntohl(mask->ipv6_label)); flow_key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); } if (mask->ipv6_hlimit) { - OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit, - mask->ipv6_hlimit); + set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit); flow_key->ip.ttl = nh->hop_limit; } return 0; @@ -634,6 +666,7 @@ static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key, static void set_tp_port(struct sk_buff *skb, __be16 *port, __be16 new_port, __sum16 *check) { + ovs_ct_clear(skb, NULL); inet_proto_csum_replace2(check, skb, *port, new_port, false); *port = new_port; } @@ -673,6 +706,7 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key, uh->dest = dst; flow_key->tp.src = src; flow_key->tp.dst = dst; + ovs_ct_clear(skb, NULL); } skb_clear_hash(skb); @@ -735,6 +769,8 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, sh->checksum = old_csum ^ old_correct_csum ^ new_csum; skb_clear_hash(skb); + ovs_ct_clear(skb, NULL); + flow_key->tp.src = sh->source; flow_key->tp.dst = sh->dest; @@ -1007,7 +1043,7 @@ static int clone(struct datapath *dp, struct sk_buff *skb, int rem = nla_len(attr); bool dont_clone_flow_key; - /* The first action is always 'OVS_CLONE_ATTR_ARG'. */ + /* The first action is always 'OVS_CLONE_ATTR_EXEC'. */ clone_arg = nla_data(attr); dont_clone_flow_key = nla_get_u32(clone_arg); actions = nla_next(clone_arg, &rem); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index b6f98eba71f1b..78448b6888ddc 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -730,6 +730,57 @@ static bool skb_nfct_cached(struct net *net, } #if IS_ENABLED(CONFIG_NF_NAT) +static void ovs_nat_update_key(struct sw_flow_key *key, + const struct sk_buff *skb, + enum nf_nat_manip_type maniptype) +{ + if (maniptype == NF_NAT_MANIP_SRC) { + __be16 src; + + key->ct_state |= OVS_CS_F_SRC_NAT; + if (key->eth.type == htons(ETH_P_IP)) + key->ipv4.addr.src = ip_hdr(skb)->saddr; + else if (key->eth.type == htons(ETH_P_IPV6)) + memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, + sizeof(key->ipv6.addr.src)); + else + return; + + if (key->ip.proto == IPPROTO_UDP) + src = udp_hdr(skb)->source; + else if (key->ip.proto == IPPROTO_TCP) + src = tcp_hdr(skb)->source; + else if (key->ip.proto == IPPROTO_SCTP) + src = sctp_hdr(skb)->source; + else + return; + + key->tp.src = src; + } else { + __be16 dst; + + key->ct_state |= OVS_CS_F_DST_NAT; + if (key->eth.type == htons(ETH_P_IP)) + key->ipv4.addr.dst = ip_hdr(skb)->daddr; + else if (key->eth.type == htons(ETH_P_IPV6)) + memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, + sizeof(key->ipv6.addr.dst)); + else + return; + + if (key->ip.proto == IPPROTO_UDP) + dst = udp_hdr(skb)->dest; + else if (key->ip.proto == IPPROTO_TCP) + dst = tcp_hdr(skb)->dest; + else if (key->ip.proto == IPPROTO_SCTP) + dst = sctp_hdr(skb)->dest; + else + return; + + key->tp.dst = dst; + } +} + /* Modelled after nf_nat_ipv[46]_fn(). * range is only used for new, uninitialized NAT state. * Returns either NF_ACCEPT or NF_DROP. @@ -737,7 +788,7 @@ static bool skb_nfct_cached(struct net *net, static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype) + enum nf_nat_manip_type maniptype, struct sw_flow_key *key) { int hooknum, nh_off, err = NF_ACCEPT; @@ -810,58 +861,11 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, skb_push(skb, nh_off); skb_postpush_rcsum(skb, skb->data, nh_off); - return err; -} - -static void ovs_nat_update_key(struct sw_flow_key *key, - const struct sk_buff *skb, - enum nf_nat_manip_type maniptype) -{ - if (maniptype == NF_NAT_MANIP_SRC) { - __be16 src; - - key->ct_state |= OVS_CS_F_SRC_NAT; - if (key->eth.type == htons(ETH_P_IP)) - key->ipv4.addr.src = ip_hdr(skb)->saddr; - else if (key->eth.type == htons(ETH_P_IPV6)) - memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, - sizeof(key->ipv6.addr.src)); - else - return; - - if (key->ip.proto == IPPROTO_UDP) - src = udp_hdr(skb)->source; - else if (key->ip.proto == IPPROTO_TCP) - src = tcp_hdr(skb)->source; - else if (key->ip.proto == IPPROTO_SCTP) - src = sctp_hdr(skb)->source; - else - return; - - key->tp.src = src; - } else { - __be16 dst; - - key->ct_state |= OVS_CS_F_DST_NAT; - if (key->eth.type == htons(ETH_P_IP)) - key->ipv4.addr.dst = ip_hdr(skb)->daddr; - else if (key->eth.type == htons(ETH_P_IPV6)) - memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, - sizeof(key->ipv6.addr.dst)); - else - return; - - if (key->ip.proto == IPPROTO_UDP) - dst = udp_hdr(skb)->dest; - else if (key->ip.proto == IPPROTO_TCP) - dst = tcp_hdr(skb)->dest; - else if (key->ip.proto == IPPROTO_SCTP) - dst = sctp_hdr(skb)->dest; - else - return; + /* Update the flow key if NAT successful. */ + if (err == NF_ACCEPT) + ovs_nat_update_key(key, skb, maniptype); - key->tp.dst = dst; - } + return err; } /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */ @@ -903,7 +907,7 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, } else { return NF_ACCEPT; /* Connection is not NATed. */ } - err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); + err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype, key); if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) { if (ct->status & IPS_SRC_NAT) { @@ -913,17 +917,13 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, maniptype = NF_NAT_MANIP_SRC; err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, - maniptype); + maniptype, key); } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL, - NF_NAT_MANIP_SRC); + NF_NAT_MANIP_SRC, key); } } - /* Mark NAT done if successful and update the flow key. */ - if (err == NF_ACCEPT) - ovs_nat_update_key(key, skb, maniptype); - return err; } #else /* !CONFIG_NF_NAT */ @@ -1319,7 +1319,8 @@ int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key) if (skb_nfct(skb)) { nf_conntrack_put(skb_nfct(skb)); nf_ct_set(skb, NULL, IP_CT_UNTRACKED); - ovs_ct_fill_key(skb, key); + if (key) + ovs_ct_fill_key(skb, key); } return 0; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 38147e6a20f53..989175ce81d4d 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -265,7 +265,7 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) if (flags & IP6_FH_F_FRAG) { if (frag_off) { key->ip.frag = OVS_FRAG_TYPE_LATER; - key->ip.proto = nexthdr; + key->ip.proto = NEXTHDR_FRAGMENT; return 0; } key->ip.frag = OVS_FRAG_TYPE_FIRST; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index d7559c64795dc..67125939d7eee 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2179,8 +2179,8 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, icmpv6_key->icmpv6_type = ntohs(output->tp.src); icmpv6_key->icmpv6_code = ntohs(output->tp.dst); - if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || - icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { + if (swkey->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || + swkey->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { struct ovs_key_nd *nd_key; nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); @@ -2266,6 +2266,51 @@ static struct sw_flow_actions *nla_alloc_flow_actions(int size) return sfa; } +static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len); + +static void ovs_nla_free_check_pkt_len_action(const struct nlattr *action) +{ + const struct nlattr *a; + int rem; + + nla_for_each_nested(a, action, rem) { + switch (nla_type(a)) { + case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL: + case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER: + ovs_nla_free_nested_actions(nla_data(a), nla_len(a)); + break; + } + } +} + +static void ovs_nla_free_clone_action(const struct nlattr *action) +{ + const struct nlattr *a = nla_data(action); + int rem = nla_len(action); + + switch (nla_type(a)) { + case OVS_CLONE_ATTR_EXEC: + /* The real list of actions follows this attribute. */ + a = nla_next(a, &rem); + ovs_nla_free_nested_actions(a, rem); + break; + } +} + +static void ovs_nla_free_sample_action(const struct nlattr *action) +{ + const struct nlattr *a = nla_data(action); + int rem = nla_len(action); + + switch (nla_type(a)) { + case OVS_SAMPLE_ATTR_ARG: + /* The real list of actions follows this attribute. */ + a = nla_next(a, &rem); + ovs_nla_free_nested_actions(a, rem); + break; + } +} + static void ovs_nla_free_set_action(const struct nlattr *a) { const struct nlattr *ovs_key = nla_data(a); @@ -2279,25 +2324,50 @@ static void ovs_nla_free_set_action(const struct nlattr *a) } } -void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) +static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len) { const struct nlattr *a; int rem; - if (!sf_acts) + /* Whenever new actions are added, the need to update this + * function should be considered. + */ + BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 21); + + if (!actions) return; - nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) { + nla_for_each_attr(a, actions, len, rem) { switch (nla_type(a)) { - case OVS_ACTION_ATTR_SET: - ovs_nla_free_set_action(a); + case OVS_ACTION_ATTR_CHECK_PKT_LEN: + ovs_nla_free_check_pkt_len_action(a); break; + + case OVS_ACTION_ATTR_CLONE: + ovs_nla_free_clone_action(a); + break; + case OVS_ACTION_ATTR_CT: ovs_ct_free_action(a); break; + + case OVS_ACTION_ATTR_SAMPLE: + ovs_nla_free_sample_action(a); + break; + + case OVS_ACTION_ATTR_SET: + ovs_nla_free_set_action(a); + break; } } +} + +void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) +{ + if (!sf_acts) + return; + ovs_nla_free_nested_actions(sf_acts->actions, sf_acts->actions_len); kfree(sf_acts); } @@ -2329,7 +2399,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { + if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) { OVS_NLERR(log, "Flow action size exceeds max %u", MAX_ACTIONS_BUFSIZE); return ERR_PTR(-EMSGSIZE); @@ -3284,7 +3354,9 @@ static int clone_action_to_attr(const struct nlattr *attr, if (!start) return -EMSGSIZE; - err = ovs_nla_put_actions(nla_data(attr), rem, skb); + /* Skipping the OVS_CLONE_ATTR_EXEC that is always the first attribute. */ + attr = nla_next(nla_data(attr), &rem); + err = ovs_nla_put_actions(attr, rem, skb); if (err) nla_nest_cancel(skb, start); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0ffbf3d17911a..a2696acbcd9d2 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1715,6 +1715,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; + match->prot_hook.af_packet_net = read_pnet(&match->net); match->prot_hook.id_match = match_fanout_group; list_add(&match->list, &fanout_list); } @@ -1728,7 +1729,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) err = -ENOSPC; if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) { __dev_remove_pack(&po->prot_hook); - po->fanout = match; + + /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */ + WRITE_ONCE(po->fanout, match); + po->rollover = rollover; rollover = NULL; refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); @@ -2253,8 +2257,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, copy_skb = skb_get(skb); skb_head = skb->data; } - if (copy_skb) + if (copy_skb) { + memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0, + sizeof(PACKET_SKB_CB(copy_skb)->sa.ll)); skb_set_owner_r(copy_skb, sk); + } } snaplen = po->rx_ring.frame_size - macoff; if ((int)snaplen < 0) { @@ -2784,8 +2791,9 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) status = TP_STATUS_SEND_REQUEST; err = po->xmit(skb); - if (unlikely(err > 0)) { - err = net_xmit_errno(err); + if (unlikely(err != 0)) { + if (err > 0) + err = net_xmit_errno(err); if (err && __packet_get_status(po, ph) == TP_STATUS_AVAILABLE) { /* skb was destructed already */ @@ -2986,8 +2994,12 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->no_fcs = 1; err = po->xmit(skb); - if (err > 0 && (err = net_xmit_errno(err)) != 0) - goto out_unlock; + if (unlikely(err != 0)) { + if (err > 0) + err = net_xmit_errno(err); + if (err) + goto out_unlock; + } dev_put(dev); @@ -3294,6 +3306,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po->prot_hook.func = packet_rcv_spkt; po->prot_hook.af_packet_priv = sk; + po->prot_hook.af_packet_net = sock_net(sk); if (proto) { po->prot_hook.type = proto; @@ -3400,6 +3413,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { + const size_t max_len = min(sizeof(skb->cb), + sizeof(struct sockaddr_storage)); int copy_len; /* If the address length field is there to be filled @@ -3422,6 +3437,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, msg->msg_namelen = sizeof(struct sockaddr_ll); } } + if (WARN_ON_ONCE(copy_len > max_len)) { + copy_len = max_len; + msg->msg_namelen = copy_len; + } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } @@ -3874,7 +3893,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv } case PACKET_FANOUT_DATA: { - if (!po->fanout) + /* Paired with the WRITE_ONCE() in fanout_add() */ + if (!READ_ONCE(po->fanout)) return -EINVAL; return fanout_set_data(po, optval, optlen); @@ -4453,9 +4473,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } out_free_pg_vec: - bitmap_free(rx_owner_map); - if (pg_vec) + if (pg_vec) { + bitmap_free(rx_owner_map); free_pg_vec(pg_vec, order, req->tp_block_nr); + } out: return err; } diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 4577e43cb7778..0c5d0f7b8b4bb 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -868,6 +868,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, err = pep_accept_conn(newsk, skb); if (err) { + __sock_put(sk); sock_put(newsk); newsk = NULL; goto drop; @@ -946,6 +947,8 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) ret = -EBUSY; else if (sk->sk_state == TCP_ESTABLISHED) ret = -EISCONN; + else if (!pn->pn_sk.sobject) + ret = -EADDRNOTAVAIL; else ret = pep_sock_enable(sk, NULL, 0); release_sock(sk); diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 06ecf9d2d4bf1..ef6acd7211180 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -131,9 +131,9 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) cpu_relax(); } - ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, + ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len, &off, PAGE_SIZE); - if (unlikely(ret != ibmr->sg_len)) + if (unlikely(ret != ibmr->sg_dma_len)) return ret < 0 ? ret : -EINVAL; if (cmpxchg(&frmr->fr_state, diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 1402e9166a7eb..d55d81b01d372 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -510,7 +510,7 @@ void rds_tcp_tune(struct socket *sock) sk->sk_userlocks |= SOCK_SNDBUF_LOCK; } if (rtn->rcvbuf_size > 0) { - sk->sk_sndbuf = rtn->rcvbuf_size; + sk->sk_rcvbuf = rtn->rcvbuf_size; sk->sk_userlocks |= SOCK_RCVBUF_LOCK; } release_sock(sk); diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index c533076232361..5f32113c9bbda 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -227,8 +227,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh) { struct rose_neigh *s; - rose_stop_ftimer(rose_neigh); - rose_stop_t0timer(rose_neigh); + del_timer_sync(&rose_neigh->ftimer); + del_timer_sync(&rose_neigh->t0timer); skb_queue_purge(&rose_neigh->queue); diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index b3138fc2e552e..f06ddbed3fed6 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -31,89 +31,89 @@ static void rose_idletimer_expiry(struct timer_list *); void rose_start_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); sk->sk_timer.function = rose_heartbeat_expiry; sk->sk_timer.expires = jiffies + 5 * HZ; - add_timer(&sk->sk_timer); + sk_reset_timer(sk, &sk->sk_timer, sk->sk_timer.expires); } void rose_start_t1timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t1; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_t2timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t2; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_t3timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t3; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_hbtimer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->hb; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_idletimer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->idletimer); + sk_stop_timer(sk, &rose->idletimer); if (rose->idle > 0) { rose->idletimer.function = rose_idletimer_expiry; rose->idletimer.expires = jiffies + rose->idle; - add_timer(&rose->idletimer); + sk_reset_timer(sk, &rose->idletimer, rose->idletimer.expires); } } void rose_stop_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); } void rose_stop_timer(struct sock *sk) { - del_timer(&rose_sk(sk)->timer); + sk_stop_timer(sk, &rose_sk(sk)->timer); } void rose_stop_idletimer(struct sock *sk) { - del_timer(&rose_sk(sk)->idletimer); + sk_stop_timer(sk, &rose_sk(sk)->idletimer); } static void rose_heartbeat_expiry(struct timer_list *t) @@ -130,6 +130,7 @@ static void rose_heartbeat_expiry(struct timer_list *t) (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { bh_unlock_sock(sk); rose_destroy_socket(sk); + sock_put(sk); return; } break; @@ -152,6 +153,7 @@ static void rose_heartbeat_expiry(struct timer_list *t) rose_start_heartbeat(sk); bh_unlock_sock(sk); + sock_put(sk); } static void rose_timer_expiry(struct timer_list *t) @@ -181,6 +183,7 @@ static void rose_timer_expiry(struct timer_list *t) break; } bh_unlock_sock(sk); + sock_put(sk); } static void rose_idletimer_expiry(struct timer_list *t) @@ -205,4 +208,5 @@ static void rose_idletimer_expiry(struct timer_list *t) sock_set_flag(sk, SOCK_DEAD); } bh_unlock_sock(sk); + sock_put(sk); } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9fe264bec70ce..cb174f699665b 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -665,20 +665,21 @@ struct rxrpc_call { spinlock_t input_lock; /* Lock for packet input to this call */ - /* receive-phase ACK management */ + /* Receive-phase ACK management (ACKs we send). */ u8 ackr_reason; /* reason to ACK */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ - rxrpc_serial_t ackr_first_seq; /* first sequence number received */ - rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ - rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */ - rxrpc_seq_t ackr_seen; /* Highest packet shown seen */ + rxrpc_seq_t ackr_highest_seq; /* Higest sequence number received */ + atomic_t ackr_nr_unacked; /* Number of unacked packets */ + atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */ /* ping management */ rxrpc_serial_t ping_serial; /* Last ping sent */ ktime_t ping_time; /* Time last ping sent */ - /* transmission-phase ACK management */ + /* Transmission-phase ACK management (ACKs we've received). */ ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ + rxrpc_seq_t acks_first_seq; /* first sequence number received */ + rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */ rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 9ff85ee8337cd..8574e7066d94c 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -157,7 +157,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) { struct sk_buff *skb; - unsigned long resend_at, rto_j; + unsigned long resend_at; rxrpc_seq_t cursor, seq, top; ktime_t now, max_age, oldest, ack_ts; int ix; @@ -165,10 +165,8 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); - rto_j = call->peer->rto_j; - now = ktime_get_real(); - max_age = ktime_sub(now, jiffies_to_usecs(rto_j)); + max_age = ktime_sub(now, jiffies_to_usecs(call->peer->rto_j)); spin_lock_bh(&call->lock); @@ -213,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) } resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest))); - resend_at += jiffies + rto_j; + resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans); WRITE_ONCE(call->resend_at, resend_at); if (unacked) @@ -409,7 +407,8 @@ void rxrpc_process_call(struct work_struct *work) goto recheck_state; } - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) { + if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) && + call->state != RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_resend(call, now); goto recheck_state; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 916d1f455b218..5cf64cf8debf7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -413,8 +413,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); enum rxrpc_call_state state; - unsigned int j, nr_subpackets; - rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; + unsigned int j, nr_subpackets, nr_unacked = 0; + rxrpc_serial_t serial = sp->hdr.serial, ack_serial = serial; rxrpc_seq_t seq0 = sp->hdr.seq, hard_ack; bool immediate_ack = false, jumbo_bad = false; u8 ack = 0; @@ -454,7 +454,6 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) !rxrpc_receiving_reply(call)) goto unlock; - call->ackr_prev_seq = seq0; hard_ack = READ_ONCE(call->rx_hard_ack); nr_subpackets = sp->nr_subpackets; @@ -535,6 +534,9 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) ack_serial = serial; } + if (after(seq0, call->ackr_highest_seq)) + call->ackr_highest_seq = seq0; + /* Queue the packet. We use a couple of memory barriers here as need * to make sure that rx_top is perceived to be set after the buffer * pointer and that the buffer pointer is set after the annotation and @@ -568,6 +570,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) sp = NULL; } + nr_unacked++; + if (last) { set_bit(RXRPC_CALL_RX_LAST, &call->flags); if (!ack) { @@ -587,9 +591,14 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) } call->rx_expect_next = seq + 1; } + if (!ack) + ack_serial = serial; } ack: + if (atomic_add_return(nr_unacked, &call->ackr_nr_unacked) > 2 && !ack) + ack = RXRPC_ACK_IDLE; + if (ack) rxrpc_propose_ACK(call, ack, ack_serial, immediate_ack, true, @@ -808,7 +817,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, static bool rxrpc_is_ack_valid(struct rxrpc_call *call, rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt) { - rxrpc_seq_t base = READ_ONCE(call->ackr_first_seq); + rxrpc_seq_t base = READ_ONCE(call->acks_first_seq); if (after(first_pkt, base)) return true; /* The window advanced */ @@ -816,7 +825,7 @@ static bool rxrpc_is_ack_valid(struct rxrpc_call *call, if (before(first_pkt, base)) return false; /* firstPacket regressed */ - if (after_eq(prev_pkt, call->ackr_prev_seq)) + if (after_eq(prev_pkt, call->acks_prev_seq)) return true; /* previousPacket hasn't regressed. */ /* Some rx implementations put a serial number in previousPacket. */ @@ -891,8 +900,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) /* Discard any out-of-order or duplicate ACKs (outside lock). */ if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, - first_soft_ack, call->ackr_first_seq, - prev_pkt, call->ackr_prev_seq); + first_soft_ack, call->acks_first_seq, + prev_pkt, call->acks_prev_seq); return; } @@ -907,14 +916,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) /* Discard any out-of-order or duplicate ACKs (inside lock). */ if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, - first_soft_ack, call->ackr_first_seq, - prev_pkt, call->ackr_prev_seq); + first_soft_ack, call->acks_first_seq, + prev_pkt, call->acks_prev_seq); goto out; } call->acks_latest_ts = skb->tstamp; - call->ackr_first_seq = first_soft_ack; - call->ackr_prev_seq = prev_pkt; + call->acks_first_seq = first_soft_ack; + call->acks_prev_seq = prev_pkt; /* Parse rwind and mtu sizes if provided. */ if (buf.info.rxMTU) diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index b312aab80fed6..91a503871116b 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -118,6 +118,8 @@ static __net_exit void rxrpc_exit_net(struct net *net) rxnet->live = false; del_timer_sync(&rxnet->peer_keepalive_timer); cancel_work_sync(&rxnet->peer_keepalive_work); + /* Remove the timer again as the worker may have restarted it. */ + del_timer_sync(&rxnet->peer_keepalive_timer); rxrpc_destroy_all_calls(rxnet); rxrpc_destroy_all_connections(rxnet); rxrpc_destroy_all_peers(rxnet); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f8b632a5c6197..6202d2e32914a 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -74,11 +74,18 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, u8 reason) { rxrpc_serial_t serial; + unsigned int tmp; rxrpc_seq_t hard_ack, top, seq; int ix; u32 mtu, jmax; u8 *ackp = pkt->acks; + tmp = atomic_xchg(&call->ackr_nr_unacked, 0); + tmp |= atomic_xchg(&call->ackr_nr_consumed, 0); + if (!tmp && (reason == RXRPC_ACK_DELAY || + reason == RXRPC_ACK_IDLE)) + return 0; + /* Barrier against rxrpc_input_data(). */ serial = call->ackr_serial; hard_ack = READ_ONCE(call->rx_hard_ack); @@ -89,7 +96,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, pkt->ack.bufferSpace = htons(8); pkt->ack.maxSkew = htons(0); pkt->ack.firstPacket = htonl(hard_ack + 1); - pkt->ack.previousPacket = htonl(call->ackr_prev_seq); + pkt->ack.previousPacket = htonl(call->ackr_highest_seq); pkt->ack.serial = htonl(serial); pkt->ack.reason = reason; pkt->ack.nAcks = top - hard_ack; @@ -180,6 +187,10 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason); spin_unlock_bh(&call->lock); + if (n == 0) { + kfree(pkt); + return 0; + } iov[0].iov_base = pkt; iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n; @@ -227,13 +238,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, ntohl(pkt->ack.serial), false, true, rxrpc_propose_ack_retry_tx); - } else { - spin_lock_bh(&call->lock); - if (after(hard_ack, call->ackr_consumed)) - call->ackr_consumed = hard_ack; - if (after(top, call->ackr_seen)) - call->ackr_seen = top; - spin_unlock_bh(&call->lock); } rxrpc_set_keepalive(call); @@ -426,7 +430,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, if (call->peer->rtt_count > 1) { unsigned long nowj = jiffies, ack_lost_at; - ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans); + ack_lost_at = rxrpc_get_rto_backoff(call->peer, false); ack_lost_at += nowj; WRITE_ONCE(call->ack_lost_at, ack_lost_at); rxrpc_reduce_call_timer(call, ack_lost_at, nowj, diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index e011594adcd13..23d0bc4ca3196 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -297,6 +297,12 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, return peer; } +static void rxrpc_free_peer(struct rxrpc_peer *peer) +{ + rxrpc_put_local(peer->local); + kfree_rcu(peer, rcu); +} + /* * Set up a new incoming peer. There shouldn't be any other matching peers * since we've already done a search in the list from the non-reentrant context @@ -363,7 +369,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, spin_unlock_bh(&rxnet->peer_hash_lock); if (peer) - kfree(candidate); + rxrpc_free_peer(candidate); else peer = candidate; } @@ -418,8 +424,7 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) list_del_init(&peer->keepalive_link); spin_unlock_bh(&rxnet->peer_hash_lock); - rxrpc_put_local(peer->local); - kfree_rcu(peer, rcu); + rxrpc_free_peer(peer); } /* @@ -455,8 +460,7 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *peer) if (n == 0) { hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); - rxrpc_put_local(peer->local); - kfree_rcu(peer, rcu); + rxrpc_free_peer(peer); } } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 4f48e3bdd4b4b..c75789ebc514d 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -212,11 +212,9 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) rxrpc_end_rx_phase(call, serial); } else { /* Check to see if there's an ACK that needs sending. */ - if (after_eq(hard_ack, call->ackr_consumed + 2) || - after_eq(top, call->ackr_seen + 2) || - (hard_ack == top && after(hard_ack, call->ackr_consumed))) - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, - true, true, + if (atomic_inc_return(&call->ackr_nr_consumed) > 2) + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial, + true, false, rxrpc_propose_ack_rotate_rx); if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY) rxrpc_send_ack_packet(call, false, NULL); diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c index 928d8b34a3eee..f3f87c9f0209d 100644 --- a/net/rxrpc/rtt.c +++ b/net/rxrpc/rtt.c @@ -23,7 +23,7 @@ static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) { - return _usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us); + return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us); } static u32 rxrpc_bound_rto(u32 rto) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 1a340eb0abf7c..22f020099214d 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -463,6 +463,12 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, success: ret = copied; + if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) { + read_lock_bh(&call->state_lock); + if (call->error < 0) + ret = call->error; + read_unlock_bh(&call->state_lock); + } out: call->tx_pending = skb; _leave(" = %d", ret); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 18dade4e6f9a0..8fc4190725058 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -12,7 +12,7 @@ static struct ctl_table_header *rxrpc_sysctl_reg_table; static const unsigned int four = 4; -static const unsigned int thirtytwo = 32; +static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1; static const unsigned int n_65535 = 65535; static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; static const unsigned long one_jiffy = 1; @@ -97,7 +97,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = (void *)&four, - .extra2 = (void *)&thirtytwo, + .extra2 = (void *)&max_backlog, }, { .procname = "rx_window_size", diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 75132d0ca8870..db1c0139f99c6 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -287,7 +287,8 @@ static int tcf_idr_release_unsafe(struct tc_action *p) } static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct nlattr *nest; int n_i = 0; @@ -303,20 +304,25 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, if (nla_put_string(skb, TCA_KIND, ops->kind)) goto nla_put_failure; + ret = 0; mutex_lock(&idrinfo->lock); idr_for_each_entry_ul(idr, p, tmp, id) { if (IS_ERR(p)) continue; ret = tcf_idr_release_unsafe(p); - if (ret == ACT_P_DELETED) { + if (ret == ACT_P_DELETED) module_put(ops->owner); - n_i++; - } else if (ret < 0) { - mutex_unlock(&idrinfo->lock); - goto nla_put_failure; - } + else if (ret < 0) + break; + n_i++; } mutex_unlock(&idrinfo->lock); + if (ret < 0) { + if (n_i) + NL_SET_ERR_MSG(extack, "Unable to flush all TC actions"); + else + goto nla_put_failure; + } ret = nla_put_u32(skb, TCA_FCNT, n_i); if (ret) @@ -337,7 +343,7 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct tcf_idrinfo *idrinfo = tn->idrinfo; if (type == RTM_DELACTION) { - return tcf_del_walker(idrinfo, skb, ops); + return tcf_del_walker(idrinfo, skb, ops, extack); } else if (type == RTM_GETACTION) { return tcf_dump_walker(idrinfo, skb, cb); } else { @@ -652,15 +658,24 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, restart_act_graph: for (i = 0; i < nr_actions; i++) { const struct tc_action *a = actions[i]; + int repeat_ttl; if (jmp_prgcnt > 0) { jmp_prgcnt -= 1; continue; } + + repeat_ttl = 32; repeat: ret = a->ops->act(skb, a, res); - if (ret == TC_ACT_REPEAT) - goto repeat; /* we need a ttl - JHS */ + + if (unlikely(ret == TC_ACT_REPEAT)) { + if (--repeat_ttl != 0) + goto repeat; + /* suspicious opcode, stop pipeline */ + net_warn_ratelimited("TC_ACT_REPEAT abuse ?\n"); + return TC_ACT_OK; + } if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) { jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index e3ff884a48c56..b87d2a1ee0b16 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -218,6 +219,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, bool want_ingress; bool is_redirect; bool expects_nh; + bool at_ingress; int m_eaction; int mac_len; bool at_nh; @@ -253,7 +255,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, * ingress - that covers the TC S/W datapath. */ is_redirect = tcf_mirred_is_act_redirect(m_eaction); - use_reinsert = skb_at_tc_ingress(skb) && is_redirect && + at_ingress = skb_at_tc_ingress(skb); + use_reinsert = at_ingress && is_redirect && tcf_mirred_can_reinsert(retval); if (!use_reinsert) { skb2 = skb_clone(skb, GFP_ATOMIC); @@ -261,10 +264,12 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, goto out; } + want_ingress = tcf_mirred_act_wants_ingress(m_eaction); + /* All mirred/redirected skbs should clear previous ct info */ nf_reset_ct(skb2); - - want_ingress = tcf_mirred_act_wants_ingress(m_eaction); + if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */ + skb_dst_drop(skb2); expects_nh = want_ingress || !m_mac_header_xmit; at_nh = skb->data == skb_network_header(skb); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index ff4f2437b5925..f095a0fb75c6d 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -148,7 +148,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *pattr; struct tcf_pedit *p; int ret = 0, err; - int ksize; + int i, ksize; u32 index; if (!nla) { @@ -227,6 +227,22 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, p->tcfp_nkeys = parm->nkeys; } memcpy(p->tcfp_keys, parm->keys, ksize); + p->tcfp_off_max_hint = 0; + for (i = 0; i < p->tcfp_nkeys; ++i) { + u32 cur = p->tcfp_keys[i].off; + + /* sanitize the shift value for any later use */ + p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1, + p->tcfp_keys[i].shift); + + /* The AT option can read a single byte, we can bound the actual + * value with uchar max. + */ + cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift; + + /* Each key touches 4 bytes starting from the computed offset */ + p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4); + } p->tcfp_flags = parm->flags; goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); @@ -307,13 +323,18 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = to_pedit(a); + u32 max_offset; int i; - if (skb_unclone(skb, GFP_ATOMIC)) - return p->tcf_action; - spin_lock(&p->tcf_lock); + max_offset = (skb_transport_header_was_set(skb) ? + skb_transport_offset(skb) : + skb_network_offset(skb)) + + p->tcfp_off_max_hint; + if (skb_ensure_writable(skb, min(skb->len, max_offset))) + goto unlock; + tcf_lastuse_update(&p->tcf_tm); if (p->tcfp_nkeys > 0) { @@ -402,6 +423,7 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a, p->tcf_qstats.overlimits++; done: bstats_update(&p->tcf_bstats, skb); +unlock: spin_unlock(&p->tcf_lock); return p->tcf_action; } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 8fd23a8b88a5e..a7660b602237d 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -213,6 +213,20 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, return err; } +static bool tcf_police_mtu_check(struct sk_buff *skb, u32 limit) +{ + u32 len; + + if (skb_is_gso(skb)) + return skb_gso_validate_mac_len(skb, limit); + + len = qdisc_pkt_len(skb); + if (skb_at_tc_ingress(skb)) + len += skb->mac_len; + + return len <= limit; +} + static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -235,7 +249,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a, goto inc_overlimits; } - if (qdisc_pkt_len(skb) <= p->tcfp_mtu) { + if (tcf_police_mtu_check(skb, p->tcfp_mtu)) { if (!p->rate_present) { ret = p->tcfp_result; goto end; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 74450b0f69fc5..214f4efdd9920 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -265,14 +265,12 @@ tcf_sample_get_group(const struct tc_action *a, struct tcf_sample *s = to_sample(a); struct psample_group *group; - spin_lock_bh(&s->tcf_lock); group = rcu_dereference_protected(s->psample_group, lockdep_is_held(&s->tcf_lock)); if (group) { psample_group_take(group); *destructor = tcf_psample_group_put; } - spin_unlock_bh(&s->tcf_lock); return group; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 7f20fd37e01e0..919c7fa5f02d6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1639,10 +1639,10 @@ static int tcf_chain_tp_insert(struct tcf_chain *chain, if (chain->flushing) return -EAGAIN; + RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); if (*chain_info->pprev == chain->filter_chain) tcf_chain0_head_change(chain, tp); tcf_proto_get(tp); - RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); return 0; @@ -1928,9 +1928,9 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, bool prio_allocate; u32 parent; u32 chain_index; - struct Qdisc *q = NULL; + struct Qdisc *q; struct tcf_chain_info chain_info; - struct tcf_chain *chain = NULL; + struct tcf_chain *chain; struct tcf_block *block; struct tcf_proto *tp; unsigned long cl; @@ -1958,6 +1958,8 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, tp = NULL; cl = 0; block = NULL; + q = NULL; + chain = NULL; if (prio == 0) { /* If no priority is provided by the user, @@ -2764,8 +2766,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, struct tcmsg *t; u32 parent; u32 chain_index; - struct Qdisc *q = NULL; - struct tcf_chain *chain = NULL; + struct Qdisc *q; + struct tcf_chain *chain; struct tcf_block *block; unsigned long cl; int err; @@ -2775,6 +2777,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, return -EPERM; replay: + q = NULL; err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) @@ -3436,7 +3439,7 @@ static void tcf_sample_get_group(struct flow_action_entry *entry, int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts, bool rtnl_held) { - const struct tc_action *act; + struct tc_action *act; int i, j, k, err = 0; if (!exts) @@ -3450,6 +3453,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, struct flow_action_entry *entry; entry = &flow_action->entries[j]; + spin_lock_bh(&act->tcfa_lock); if (is_tcf_gact_ok(act)) { entry->id = FLOW_ACTION_ACCEPT; } else if (is_tcf_gact_shot(act)) { @@ -3490,13 +3494,13 @@ int tc_setup_flow_action(struct flow_action *flow_action, break; default: err = -EOPNOTSUPP; - goto err_out; + goto err_out_locked; } } else if (is_tcf_tunnel_set(act)) { entry->id = FLOW_ACTION_TUNNEL_ENCAP; err = tcf_tunnel_encap_get_tunnel(entry, act); if (err) - goto err_out; + goto err_out_locked; } else if (is_tcf_tunnel_release(act)) { entry->id = FLOW_ACTION_TUNNEL_DECAP; } else if (is_tcf_pedit(act)) { @@ -3510,7 +3514,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, break; default: err = -EOPNOTSUPP; - goto err_out; + goto err_out_locked; } entry->mangle.htype = tcf_pedit_htype(act, k); entry->mangle.mask = tcf_pedit_mask(act, k); @@ -3561,15 +3565,17 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mpls_mangle.ttl = tcf_mpls_ttl(act); break; default: - goto err_out; + err = -EOPNOTSUPP; + goto err_out_locked; } } else if (is_tcf_skbedit_ptype(act)) { entry->id = FLOW_ACTION_PTYPE; entry->ptype = tcf_skbedit_ptype(act); } else { err = -EOPNOTSUPP; - goto err_out; + goto err_out_locked; } + spin_unlock_bh(&act->tcfa_lock); if (!is_tcf_pedit(act)) j++; @@ -3583,6 +3589,9 @@ int tc_setup_flow_action(struct flow_action *flow_action, tc_cleanup_flow_action(flow_action); return err; +err_out_locked: + spin_unlock_bh(&act->tcfa_lock); + goto err_out; } EXPORT_SYMBOL(tc_setup_flow_action); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 26979b4853bdb..007fbc1993522 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -784,6 +784,7 @@ static int fl_set_key_mpls(struct nlattr **tb, static void fl_set_key_vlan(struct nlattr **tb, __be16 ethertype, int vlan_id_key, int vlan_prio_key, + int vlan_next_eth_type_key, struct flow_dissector_key_vlan *key_val, struct flow_dissector_key_vlan *key_mask) { @@ -802,6 +803,11 @@ static void fl_set_key_vlan(struct nlattr **tb, } key_val->vlan_tpid = ethertype; key_mask->vlan_tpid = cpu_to_be16(~0); + if (tb[vlan_next_eth_type_key]) { + key_val->vlan_eth_type = + nla_get_be16(tb[vlan_next_eth_type_key]); + key_mask->vlan_eth_type = cpu_to_be16(~0); + } } static void fl_set_key_flag(u32 flower_key, u32 flower_mask, @@ -1076,8 +1082,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb, if (eth_type_vlan(ethertype)) { fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID, - TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan, - &mask->vlan); + TCA_FLOWER_KEY_VLAN_PRIO, + TCA_FLOWER_KEY_VLAN_ETH_TYPE, + &key->vlan, &mask->vlan); if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) { ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]); @@ -1085,6 +1092,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_CVLAN_ID, TCA_FLOWER_KEY_CVLAN_PRIO, + TCA_FLOWER_KEY_CVLAN_ETH_TYPE, &key->cvlan, &mask->cvlan); fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_CVLAN_ETH_TYPE, @@ -2272,13 +2280,13 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, goto nla_put_failure; if (mask->basic.n_proto) { - if (mask->cvlan.vlan_tpid) { + if (mask->cvlan.vlan_eth_type) { if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE, key->basic.n_proto)) goto nla_put_failure; - } else if (mask->vlan.vlan_tpid) { + } else if (mask->vlan.vlan_eth_type) { if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE, - key->basic.n_proto)) + key->vlan.vlan_eth_type)) goto nla_put_failure; } } diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 5efa3e7ace152..315ca2b7e2ed0 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -526,7 +526,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, rcu_assign_pointer(f->next, f1); rcu_assign_pointer(*fp, f); - if (fold && fold->handle && f->handle != fold->handle) { + if (fold) { th = to_hash(fold->handle); h = from_hash(fold->handle >> 16); b = rtnl_dereference(head->table[th]); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index e15ff335953de..ed8d26e6468ca 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -386,14 +386,19 @@ static int u32_init(struct tcf_proto *tp) return 0; } -static int u32_destroy_key(struct tc_u_knode *n, bool free_pf) +static void __u32_destroy_key(struct tc_u_knode *n) { struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); tcf_exts_destroy(&n->exts); - tcf_exts_put_net(&n->exts); if (ht && --ht->refcnt == 0) kfree(ht); + kfree(n); +} + +static void u32_destroy_key(struct tc_u_knode *n, bool free_pf) +{ + tcf_exts_put_net(&n->exts); #ifdef CONFIG_CLS_U32_PERF if (free_pf) free_percpu(n->pf); @@ -402,8 +407,7 @@ static int u32_destroy_key(struct tc_u_knode *n, bool free_pf) if (free_pf) free_percpu(n->pcpu_success); #endif - kfree(n); - return 0; + __u32_destroy_key(n); } /* u32_delete_key_rcu should be called when free'ing a copied @@ -812,10 +816,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, new->flags = n->flags; RCU_INIT_POINTER(new->ht_down, ht); - /* bump reference count as long as we hold pointer to structure */ - if (ht) - ht->refcnt++; - #ifdef CONFIG_CLS_U32_PERF /* Statistics may be incremented by readers during update * so we must keep them in tact. When the node is later destroyed @@ -837,6 +837,10 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, return NULL; } + /* bump reference count as long as we hold pointer to structure */ + if (ht) + ht->refcnt++; + return new; } @@ -903,13 +907,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, tca[TCA_RATE], ovr, extack); if (err) { - u32_destroy_key(new, false); + __u32_destroy_key(new); return err; } err = u32_replace_hw_knode(tp, new, flags, extack); if (err) { - u32_destroy_key(new, false); + __u32_destroy_key(new); return err; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 3b1b5ee521379..6f36df85d23d8 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -510,6 +510,12 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, return stab; } + if (s->size_log > STAB_SIZE_LOG_MAX || + s->cell_log > STAB_SIZE_LOG_MAX) { + NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table"); + return ERR_PTR(-EINVAL); + } + stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); if (!stab) return ERR_PTR(-ENOMEM); @@ -1189,7 +1195,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, err = -ENOENT; if (!ops) { - NL_SET_ERR_MSG(extack, "Specified qdisc not found"); + NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown"); goto err_out; } diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index e8eebe40e0ae9..0eb4d4a568f77 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2724,7 +2724,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data), GFP_KERNEL); if (!q->tins) - goto nomem; + return -ENOMEM; for (i = 0; i < CAKE_MAX_TINS; i++) { struct cake_tin_data *b = q->tins + i; @@ -2754,10 +2754,6 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, q->min_netlen = ~0; q->min_adjlen = ~0; return 0; - -nomem: - cake_destroy(sch); - return -ENOMEM; } static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 39b427dc75128..e5972889cd81c 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1614,7 +1614,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); if (err) { kfree(cl); - return err; + goto failure; } if (tca[TCA_RATE]) { diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 37c8aa75d70c5..56f4c1621e444 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -148,6 +148,9 @@ int fifo_set_limit(struct Qdisc *q, unsigned int limit) if (strncmp(q->ops->id + 1, "fifo", 4) != 0) return 0; + if (!q->ops->change) + return 0; + nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); if (nla) { nla->nla_type = RTM_NEWQDISC; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 76d72c3f52eda..86fb2f953bd5b 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -370,6 +370,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, { struct fq_codel_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_CODEL_MAX + 1]; + u32 quantum = 0; int err; if (!opt) @@ -387,6 +388,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, q->flows_cnt > 65536) return -EINVAL; } + if (tb[TCA_FQ_CODEL_QUANTUM]) { + quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM])); + if (quantum > FQ_CODEL_QUANTUM_MAX) { + NL_SET_ERR_MSG(extack, "Invalid quantum"); + return -EINVAL; + } + } sch_tree_lock(sch); if (tb[TCA_FQ_CODEL_TARGET]) { @@ -413,8 +421,8 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_FQ_CODEL_ECN]) q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]); - if (tb[TCA_FQ_CODEL_QUANTUM]) - q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM])); + if (quantum) + q->quantum = quantum; if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]) q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ccf67c3e0596c..fc2ab0edbf74f 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1313,6 +1313,15 @@ static int qdisc_change_tx_queue_len(struct net_device *dev, return 0; } +void dev_qdisc_change_real_num_tx(struct net_device *dev, + unsigned int new_real_tx) +{ + struct Qdisc *qdisc = dev->qdisc; + + if (qdisc->ops->change_real_num_tx) + qdisc->ops->change_real_num_tx(qdisc, new_real_tx); +} + int dev_qdisc_change_tx_queue_len(struct net_device *dev) { bool up = dev->flags & IFF_UP; @@ -1387,6 +1396,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, { memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; + r->mpu = conf->mpu; r->rate_bytes_ps = max_t(u64, conf->rate, rate64); r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK); r->mult = 1; diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index e79f1afe0cfd6..db18d8a860f9c 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -125,6 +125,29 @@ static void mq_attach(struct Qdisc *sch) priv->qdiscs = NULL; } +static void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx) +{ +#ifdef CONFIG_NET_SCHED + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *qdisc; + unsigned int i; + + for (i = new_real_tx; i < dev->real_num_tx_queues; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + /* Only update the default qdiscs we created, + * qdiscs with handles are always hashed. + */ + if (qdisc != &noop_qdisc && !qdisc->handle) + qdisc_hash_del(qdisc); + } + for (i = dev->real_num_tx_queues; i < new_real_tx; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + if (qdisc != &noop_qdisc && !qdisc->handle) + qdisc_hash_add(qdisc, false); + } +#endif +} + static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct net_device *dev = qdisc_dev(sch); @@ -288,6 +311,7 @@ struct Qdisc_ops mq_qdisc_ops __read_mostly = { .init = mq_init, .destroy = mq_destroy, .attach = mq_attach, + .change_real_num_tx = mq_change_real_num_tx, .dump = mq_dump, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 8766ab5b87880..50e15add6068f 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -306,6 +306,28 @@ static void mqprio_attach(struct Qdisc *sch) priv->qdiscs = NULL; } +static void mqprio_change_real_num_tx(struct Qdisc *sch, + unsigned int new_real_tx) +{ + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *qdisc; + unsigned int i; + + for (i = new_real_tx; i < dev->real_num_tx_queues; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + /* Only update the default qdiscs we created, + * qdiscs with handles are always hashed. + */ + if (qdisc != &noop_qdisc && !qdisc->handle) + qdisc_hash_del(qdisc); + } + for (i = dev->real_num_tx_queues; i < new_real_tx; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + if (qdisc != &noop_qdisc && !qdisc->handle) + qdisc_hash_add(qdisc, false); + } +} + static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch, unsigned long cl) { @@ -529,22 +551,28 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, for (i = tc.offset; i < tc.offset + tc.count; i++) { struct netdev_queue *q = netdev_get_tx_queue(dev, i); struct Qdisc *qdisc = rtnl_dereference(q->qdisc); - struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; - struct gnet_stats_queue __percpu *cpu_qstats = NULL; spin_lock_bh(qdisc_lock(qdisc)); + if (qdisc_is_percpu_stats(qdisc)) { - cpu_bstats = qdisc->cpu_bstats; - cpu_qstats = qdisc->cpu_qstats; + qlen = qdisc_qlen_sum(qdisc); + + __gnet_stats_copy_basic(NULL, &bstats, + qdisc->cpu_bstats, + &qdisc->bstats); + __gnet_stats_copy_queue(&qstats, + qdisc->cpu_qstats, + &qdisc->qstats, + qlen); + } else { + qlen += qdisc->q.qlen; + bstats.bytes += qdisc->bstats.bytes; + bstats.packets += qdisc->bstats.packets; + qstats.backlog += qdisc->qstats.backlog; + qstats.drops += qdisc->qstats.drops; + qstats.requeues += qdisc->qstats.requeues; + qstats.overlimits += qdisc->qstats.overlimits; } - - qlen = qdisc_qlen_sum(qdisc); - __gnet_stats_copy_basic(NULL, &sch->bstats, - cpu_bstats, &qdisc->bstats); - __gnet_stats_copy_queue(&sch->qstats, - cpu_qstats, - &qdisc->qstats, - qlen); spin_unlock_bh(qdisc_lock(qdisc)); } @@ -623,6 +651,7 @@ static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { .init = mqprio_init, .destroy = mqprio_destroy, .attach = mqprio_attach, + .change_real_num_tx = mqprio_change_real_num_tx, .dump = mqprio_dump, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index f4101a920d1f9..1802f134aa407 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1146,9 +1146,9 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_netem_rate rate; struct tc_netem_slot slot; - qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency), + qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency), UINT_MAX); - qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter), + qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter), UINT_MAX); qopt.limit = q->limit; qopt.loss = q->loss; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index b046fd3cac2cf..1eb339d224ae5 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1421,10 +1421,8 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt, if (err < 0) return err; - if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES) - max_classes = QFQ_MAX_AGG_CLASSES; - else - max_classes = qdisc_dev(sch)->tx_queue_len + 1; + max_classes = min_t(u64, (u64)qdisc_dev(sch)->tx_queue_len + 1, + QFQ_MAX_AGG_CLASSES); /* max_cl_shift = floor(log_2(max_classes)) */ max_cl_shift = __fls(max_classes); q->max_agg_classes = 1<base_time); } -static ktime_t taprio_get_time(struct taprio_sched *q) +static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono) { - ktime_t mono = ktime_get(); + /* This pairs with WRITE_ONCE() in taprio_parse_clockid() */ + enum tk_offsets tk_offset = READ_ONCE(q->tk_offset); - switch (q->tk_offset) { + switch (tk_offset) { case TK_OFFS_MAX: return mono; default: - return ktime_mono_to_any(mono, q->tk_offset); + return ktime_mono_to_any(mono, tk_offset); } +} - return KTIME_MAX; +static ktime_t taprio_get_time(const struct taprio_sched *q) +{ + return taprio_mono_to_any(q, ktime_get()); } static void taprio_free_sched_cb(struct rcu_head *head) @@ -321,7 +325,7 @@ static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb) return 0; } - return ktime_mono_to_any(skb->skb_mstamp_ns, q->tk_offset); + return taprio_mono_to_any(q, skb->skb_mstamp_ns); } /* There are a few scenarios where we will have to modify the txtime from @@ -423,7 +427,8 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (unlikely(!child)) return qdisc_drop(skb, sch, to_free); - if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) { + /* sk_flags are only safe to use on full sockets. */ + if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) { if (!is_valid_interval(skb, sch)) return qdisc_drop(skb, sch, to_free); } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { @@ -1342,6 +1347,7 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, } } else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); + enum tk_offsets tk_offset; /* We only support static clockids and we don't allow * for it to be modified after the first init. @@ -1356,22 +1362,24 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, switch (clockid) { case CLOCK_REALTIME: - q->tk_offset = TK_OFFS_REAL; + tk_offset = TK_OFFS_REAL; break; case CLOCK_MONOTONIC: - q->tk_offset = TK_OFFS_MAX; + tk_offset = TK_OFFS_MAX; break; case CLOCK_BOOTTIME: - q->tk_offset = TK_OFFS_BOOT; + tk_offset = TK_OFFS_BOOT; break; case CLOCK_TAI: - q->tk_offset = TK_OFFS_TAI; + tk_offset = TK_OFFS_TAI; break; default: NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); err = -EINVAL; goto out; } + /* This pairs with READ_ONCE() in taprio_mono_to_any */ + WRITE_ONCE(q->tk_offset, tk_offset); q->clockid = clockid; } else { @@ -1503,7 +1511,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, taprio_set_picos_per_byte(dev, q); if (mqprio) { - netdev_set_num_tc(dev, mqprio->num_tc); + err = netdev_set_num_tc(dev, mqprio->num_tc); + if (err) + goto free_sched; for (i = 0; i < mqprio->num_tc; i++) netdev_set_tc_queue(dev, i, mqprio->count[i], @@ -1628,6 +1638,10 @@ static void taprio_destroy(struct Qdisc *sch) list_del(&q->taprio_list); spin_unlock(&taprio_list_lock); + /* Note that taprio_reset() might not be called if an error + * happens in qdisc_create(), after taprio_init() has been called. + */ + hrtimer_cancel(&q->advance_timer); taprio_disable_offload(dev, q, NULL); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index fb6f62264e874..f960b0e1e552c 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -224,9 +224,8 @@ static struct sctp_association *sctp_association_init( if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; - if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, - 0, gfp)) - goto fail_init; + if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp)) + goto stream_free; /* Initialize default path MTU. */ asoc->pathmtu = sp->pathmtu; diff --git a/net/sctp/diag.c b/net/sctp/diag.c index ba9f64fdfd238..5a918e74bb82b 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -61,10 +61,6 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r, r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX; r->idiag_retrans = asoc->rtx_data_chunks; r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies); - } else { - r->idiag_timer = 0; - r->idiag_retrans = 0; - r->idiag_expires = 0; } } @@ -144,13 +140,14 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc, r = nlmsg_data(nlh); BUG_ON(!sk_fullsock(sk)); + r->idiag_timer = 0; + r->idiag_retrans = 0; + r->idiag_expires = 0; if (asoc) { inet_diag_msg_sctpasoc_fill(r, sk, asoc); } else { inet_diag_msg_common_fill(r, sk); r->idiag_state = sk->sk_state; - r->idiag_timer = 0; - r->idiag_retrans = 0; } if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin)) @@ -292,9 +289,8 @@ static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p) return err; } -static int sctp_sock_dump(struct sctp_transport *tsp, void *p) +static int sctp_sock_dump(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; @@ -304,6 +300,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p) int err = 0; lock_sock(sk); + if (ep != tsp->asoc->ep) + goto release; list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; @@ -346,9 +344,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p) return err; } -static int sctp_sock_filter(struct sctp_transport *tsp, void *p) +static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; const struct inet_diag_req_v2 *r = commp->r; @@ -506,8 +503,8 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; - sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump, - net, &pos, &commp); + sctp_transport_traverse_process(sctp_sock_filter, sctp_sock_dump, + net, &pos, &commp); cb->args[2] = pos; done: diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 3067deb0fbec1..665a22d5c725b 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -184,6 +184,18 @@ void sctp_endpoint_free(struct sctp_endpoint *ep) } /* Final destructor for endpoint. */ +static void sctp_endpoint_destroy_rcu(struct rcu_head *head) +{ + struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu); + struct sock *sk = ep->base.sk; + + sctp_sk(sk)->ep = NULL; + sock_put(sk); + + kfree(ep); + SCTP_DBG_OBJCNT_DEC(ep); +} + static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { struct sock *sk; @@ -213,18 +225,13 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) if (sctp_sk(sk)->bind_hash) sctp_put_port(sk); - sctp_sk(sk)->ep = NULL; - /* Give up our hold on the sock */ - sock_put(sk); - - kfree(ep); - SCTP_DBG_OBJCNT_DEC(ep); + call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu); } /* Hold a reference to an endpoint. */ -void sctp_endpoint_hold(struct sctp_endpoint *ep) +int sctp_endpoint_hold(struct sctp_endpoint *ep) { - refcount_inc(&ep->base.refcnt); + return refcount_inc_not_zero(&ep->base.refcnt); } /* Release a reference to an endpoint and clean up if there are diff --git a/net/sctp/input.c b/net/sctp/input.c index db4f917aafd90..c306cb25f5246 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -92,6 +92,7 @@ int sctp_rcv(struct sk_buff *skb) struct sctp_chunk *chunk; union sctp_addr src; union sctp_addr dest; + int bound_dev_if; int family; struct sctp_af *af; struct net *net = dev_net(skb->dev); @@ -169,7 +170,8 @@ int sctp_rcv(struct sk_buff *skb) * If a frame arrives on an interface and the receiving socket is * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB */ - if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) { + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + if (bound_dev_if && (bound_dev_if != af->skb_iif(skb))) { if (transport) { sctp_transport_put(transport); asoc = NULL; @@ -676,7 +678,7 @@ static int sctp_rcv_ootb(struct sk_buff *skb) ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch); /* Break out if chunk length is less then minimal. */ - if (ntohs(ch->length) < sizeof(_ch)) + if (!ch || ntohs(ch->length) < sizeof(_ch)) break; ch_end = offset + SCTP_PAD4(ntohs(ch->length)); @@ -1168,6 +1170,9 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( union sctp_addr_param *param; union sctp_addr paddr; + if (ntohs(ch->length) < sizeof(*asconf) + sizeof(struct sctp_paramhdr)) + return NULL; + /* Skip over the ADDIP header and find the Address parameter */ param = (union sctp_addr_param *)(asconf + 1); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index bb370a7948f42..363a64c124144 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -358,7 +358,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) && ret != RTN_LOCAL && !sp->inet.freebind && - !net->ipv4.sysctl_ip_nonlocal_bind) + !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind)) return 0; if (ipv6_only_sock(sctp_opt2sk(sp))) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 38ca7ce8a44ed..4eebe708c8e4e 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2157,9 +2157,16 @@ static enum sctp_ierror sctp_verify_param(struct net *net, break; case SCTP_PARAM_SET_PRIMARY: - if (ep->asconf_enable) - break; - goto unhandled; + if (!ep->asconf_enable) + goto unhandled; + + if (ntohs(param.p->length) < sizeof(struct sctp_addip_param) + + sizeof(struct sctp_paramhdr)) { + sctp_process_inv_paramlength(asoc, param.p, + chunk, err_chunk); + retval = SCTP_IERROR_ABORT; + } + break; case SCTP_PARAM_HOST_NAME_ADDRESS: /* Tell the peer, we won't support this param. */ @@ -3652,7 +3659,7 @@ struct sctp_chunk *sctp_make_strreset_req( outlen = (sizeof(outreq) + stream_len) * out; inlen = (sizeof(inreq) + stream_len) * in; - retval = sctp_make_reconf(asoc, outlen + inlen); + retval = sctp_make_reconf(asoc, SCTP_PAD4(outlen) + SCTP_PAD4(inlen)); if (!retval) return NULL; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 0d225f891b61b..8d32229199b96 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -458,6 +458,10 @@ void sctp_generate_reconf_event(struct timer_list *t) goto out_unlock; } + /* This happens when the response arrives after the timer is triggered. */ + if (!asoc->strreset_chunk) + goto out_unlock; + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_RECONF), asoc->state, asoc->ep, asoc, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 82a202d71a31e..1d2f633c6c7c3 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -149,6 +149,12 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort( void *arg, struct sctp_cmd_seq *commands); +static enum sctp_disposition +__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands); + /* Small helper function that checks if the chunk length * is of the appropriate length. The 'required_length' argument * is set to be the size of a specific chunk we are testing. @@ -330,6 +336,14 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, if (!chunk->singleton) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the INIT chunk has a valid length. + * Normally, this would cause an ABORT with a Protocol Violation + * error, but since we don't have an association, we'll + * just discard the packet. + */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ @@ -344,14 +358,6 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, if (chunk->sctp_hdr->vtag != 0) return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); - /* Make sure that the INIT chunk has a valid length. - * Normally, this would cause an ABORT with a Protocol Violation - * error, but since we don't have an association, we'll - * just discard the packet. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - /* If the INIT is coming toward a closing socket, we'll send back * and ABORT. Essentially, this catches the race of INIT being * backloged to the socket at the same time as the user isses close(). @@ -697,6 +703,9 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, struct sock *sk; int error = 0; + if (asoc && !sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ @@ -711,7 +720,8 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, * in sctp_unpack_cookie(). */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); /* If the endpoint is not listening or if the number of associations * on the TCP-style socket exceed the max backlog, respond with an @@ -1480,19 +1490,16 @@ static enum sctp_disposition sctp_sf_do_unexpected_init( if (!chunk->singleton) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the INIT chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. */ if (chunk->sctp_hdr->vtag != 0) return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); - /* Make sure that the INIT chunk has a valid length. - * In this case, we generate a protocol violation since we have - * an association established. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); /* Grab the INIT header. */ chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data; @@ -1810,9 +1817,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_a( * its peer. */ if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) { - disposition = sctp_sf_do_9_2_reshutack(net, ep, asoc, - SCTP_ST_CHUNK(chunk->chunk_hdr->type), - chunk, commands); + disposition = __sctp_sf_do_9_2_reshutack(net, ep, asoc, + SCTP_ST_CHUNK(chunk->chunk_hdr->type), + chunk, commands); if (SCTP_DISPOSITION_NOMEM == disposition) goto nomem; @@ -2141,9 +2148,11 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook( * enough for the chunk header. Cookie length verification is * done later. */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) { + if (!sctp_vtag_verify(chunk, asoc)) + asoc = NULL; + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); + } /* "Decode" the chunk. We have no optional parameters so we * are in good shape. @@ -2280,7 +2289,7 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -2326,7 +2335,7 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -2596,7 +2605,7 @@ enum sctp_disposition sctp_sf_do_9_1_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -2909,13 +2918,11 @@ enum sctp_disposition sctp_sf_do_9_2_shut_ctsn( * that belong to this association, it should discard the INIT chunk and * retransmit the SHUTDOWN ACK chunk. */ -enum sctp_disposition sctp_sf_do_9_2_reshutack( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const union sctp_subtype type, - void *arg, - struct sctp_cmd_seq *commands) +static enum sctp_disposition +__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; struct sctp_chunk *reply; @@ -2949,6 +2956,26 @@ enum sctp_disposition sctp_sf_do_9_2_reshutack( return SCTP_DISPOSITION_NOMEM; } +enum sctp_disposition +sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands) +{ + struct sctp_chunk *chunk = arg; + + if (!chunk->singleton) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + + if (chunk->sctp_hdr->vtag != 0) + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); + + return __sctp_sf_do_9_2_reshutack(net, ep, asoc, type, arg, commands); +} + /* * sctp_sf_do_ecn_cwr * @@ -3562,6 +3589,9 @@ enum sctp_disposition sctp_sf_ootb(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); + if (asoc && !sctp_vtag_verify(chunk, asoc)) + asoc = NULL; + ch = (struct sctp_chunkhdr *)chunk->chunk_hdr; do { /* Report violation if the chunk is less then minimal */ @@ -3677,12 +3707,6 @@ static enum sctp_disposition sctp_sf_shut_8_4_5( SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); - /* If the chunk length is invalid, we don't want to process - * the reset of the packet. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - /* We need to discard the rest of the packet to prevent * potential bomming attacks from additional bundled chunks. * This is documented in SCTP Threats ID. @@ -3710,6 +3734,9 @@ enum sctp_disposition sctp_sf_do_8_5_1_E_sa(struct net *net, { struct sctp_chunk *chunk = arg; + if (!sctp_vtag_verify(chunk, asoc)) + asoc = NULL; + /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, @@ -3745,6 +3772,11 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } + /* Make sure that the ASCONF ADDIP chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk))) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + /* ADD-IP: Section 4.1.1 * This chunk MUST be sent in an authenticated way by using * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk @@ -3753,13 +3785,7 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, */ if (!asoc->peer.asconf_capable || (!net->sctp.addip_noauth && !chunk->auth)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, - commands); - - /* Make sure that the ASCONF ADDIP chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); hdr = (struct sctp_addiphdr *)chunk->skb->data; serial = ntohl(hdr->serial); @@ -3888,6 +3914,12 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } + /* Make sure that the ADDIP chunk has a valid length. */ + if (!sctp_chunk_length_valid(asconf_ack, + sizeof(struct sctp_addip_chunk))) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + /* ADD-IP, Section 4.1.2: * This chunk MUST be sent in an authenticated way by using * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk @@ -3896,14 +3928,7 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, */ if (!asoc->peer.asconf_capable || (!net->sctp.addip_noauth && !asconf_ack->auth)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, - commands); - - /* Make sure that the ADDIP chunk has a valid length. */ - if (!sctp_chunk_length_valid(asconf_ack, - sizeof(struct sctp_addip_chunk))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); addip_hdr = (struct sctp_addiphdr *)asconf_ack->skb->data; rcvd_serial = ntohl(addip_hdr->serial); @@ -4475,6 +4500,9 @@ enum sctp_disposition sctp_sf_discard_chunk(struct net *net, { struct sctp_chunk *chunk = arg; + if (asoc && !sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the chunk has a valid length. * Since we don't know the chunk type, we use a general * chunkhdr structure to make a comparison. @@ -4542,6 +4570,9 @@ enum sctp_disposition sctp_sf_violation(struct net *net, { struct sctp_chunk *chunk = arg; + if (!sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, @@ -6248,6 +6279,7 @@ static struct sctp_packet *sctp_ootb_pkt_new( * yet. */ switch (chunk->chunk_hdr->type) { + case SCTP_CID_INIT: case SCTP_CID_INIT_ACK: { struct sctp_initack_chunk *initack; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 2146372adff43..c76b40322ac7d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5395,11 +5395,12 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), } EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p) { +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p) +{ struct rhashtable_iter hti; struct sctp_transport *tsp; + struct sctp_endpoint *ep; int ret; again: @@ -5408,26 +5409,32 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), tsp = sctp_transport_get_idx(net, &hti, *pos + 1); for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { - ret = cb(tsp, p); - if (ret) - break; + ep = tsp->asoc->ep; + if (sctp_endpoint_hold(ep)) { /* asoc can be peeled off */ + ret = cb(ep, tsp, p); + if (ret) + break; + sctp_endpoint_put(ep); + } (*pos)++; sctp_transport_put(tsp); } sctp_transport_walk_stop(&hti); if (ret) { - if (cb_done && !cb_done(tsp, p)) { + if (cb_done && !cb_done(ep, tsp, p)) { (*pos)++; + sctp_endpoint_put(ep); sctp_transport_put(tsp); goto again; } + sctp_endpoint_put(ep); sctp_transport_put(tsp); } return ret; } -EXPORT_SYMBOL_GPL(sctp_for_each_transport); +EXPORT_SYMBOL_GPL(sctp_transport_traverse_process); /* 7.2.1 Association Status (SCTP_STATUS) @@ -5675,7 +5682,7 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) * Set the daddr and initialize id to something more random and also * copy over any ip options. */ - sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk); + sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sock->sk); sp->pf->copy_ip_options(sk, sock->sk); /* Populate the fields of the newsk from the oldsk and migrate the diff --git a/net/sctp/stream.c b/net/sctp/stream.c index cd20638b61514..56762745d6e4e 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, ret = sctp_stream_alloc_out(stream, outcnt, gfp); if (ret) - goto out_err; + return ret; for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; @@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, handle_in: sctp_stream_interleave_init(stream); if (!incnt) - goto out; - - ret = sctp_stream_alloc_in(stream, incnt, gfp); - if (ret) - goto in_err; - - goto out; + return 0; -in_err: - sched->free(stream); - genradix_free(&stream->in); -out_err: - genradix_free(&stream->out); - stream->outcnt = 0; -out: - return ret; + return sctp_stream_alloc_in(stream, incnt, gfp); } int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 99e5f69fbb742..a2e1d34f52c5b 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -163,7 +163,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc, if (!SCTP_SO(&asoc->stream, i)->ext) continue; - ret = n->init_sid(&asoc->stream, i, GFP_KERNEL); + ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC); if (ret) goto err; } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index d2886a04ed7fc..394491692a078 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -183,7 +183,9 @@ static int smc_release(struct socket *sock) /* cleanup for a dangling non-blocking connect */ if (smc->connect_nonblock && sk->sk_state == SMC_INIT) tcp_abort(smc->clcsock->sk, ECONNABORTED); - flush_work(&smc->connect_work); + + if (cancel_work_sync(&smc->connect_work)) + sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */ if (sk->sk_state == SMC_LISTEN) /* smc_close_non_accepted() is called and acquires @@ -467,12 +469,26 @@ static void smc_link_save_peer_info(struct smc_link *link, static void smc_switch_to_fallback(struct smc_sock *smc) { + wait_queue_head_t *smc_wait = sk_sleep(&smc->sk); + wait_queue_head_t *clc_wait = sk_sleep(smc->clcsock->sk); + unsigned long flags; + smc->use_fallback = true; if (smc->sk.sk_socket && smc->sk.sk_socket->file) { smc->clcsock->file = smc->sk.sk_socket->file; smc->clcsock->file->private_data = smc->clcsock; smc->clcsock->wq.fasync_list = smc->sk.sk_socket->wq.fasync_list; + + /* There may be some entries remaining in + * smc socket->wq, which should be removed + * to clcsocket->wq during the fallback. + */ + spin_lock_irqsave(&smc_wait->lock, flags); + spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING); + list_splice_init(&smc_wait->head, &clc_wait->head); + spin_unlock(&clc_wait->lock); + spin_unlock_irqrestore(&smc_wait->lock, flags); } } @@ -787,7 +803,7 @@ static void smc_connect_work(struct work_struct *work) if (smc->clcsock->sk->sk_err) { smc->sk.sk_err = smc->clcsock->sk->sk_err; } else if ((1 << smc->clcsock->sk->sk_state) & - (TCPF_SYN_SENT | TCP_SYN_RECV)) { + (TCPF_SYN_SENT | TCPF_SYN_RECV)) { rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo); if ((rc == -EPIPE) && ((1 << smc->clcsock->sk->sk_state) & @@ -800,6 +816,8 @@ static void smc_connect_work(struct work_struct *work) smc->sk.sk_state = SMC_CLOSED; if (rc == -EPIPE || rc == -EAGAIN) smc->sk.sk_err = EPIPE; + else if (rc == -ECONNREFUSED) + smc->sk.sk_err = ECONNREFUSED; else if (signal_pending(current)) smc->sk.sk_err = -sock_intr_errno(timeo); sock_put(&smc->sk); /* passive closing */ @@ -859,9 +877,9 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, if (rc && rc != -EINPROGRESS) goto out; - sock_hold(&smc->sk); /* sock put in passive closing */ if (smc->use_fallback) goto out; + sock_hold(&smc->sk); /* sock put in passive closing */ if (flags & O_NONBLOCK) { if (schedule_work(&smc->connect_work)) smc->connect_nonblock = 1; @@ -1658,8 +1676,10 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, static int smc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; + bool do_shutdown = true; struct smc_sock *smc; int rc = -EINVAL; + int old_state; int rc1 = 0; smc = smc_sk(sk); @@ -1680,13 +1700,19 @@ static int smc_shutdown(struct socket *sock, int how) if (smc->use_fallback) { rc = kernel_sock_shutdown(smc->clcsock, how); sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; - if (sk->sk_shutdown == SHUTDOWN_MASK) + if (sk->sk_shutdown == SHUTDOWN_MASK) { sk->sk_state = SMC_CLOSED; + sock_put(sk); + } goto out; } switch (how) { case SHUT_RDWR: /* shutdown in both directions */ + old_state = sk->sk_state; rc = smc_close_active(smc); + if (old_state == SMC_ACTIVE && + sk->sk_state == SMC_PEERCLOSEWAIT1) + do_shutdown = false; break; case SHUT_WR: rc = smc_close_shutdown_write(smc); @@ -1696,7 +1722,7 @@ static int smc_shutdown(struct socket *sock, int how) /* nothing more to do because peer is not involved */ break; } - if (smc->clcsock) + if (do_shutdown && smc->clcsock) rc1 = kernel_sock_shutdown(smc->clcsock, how); /* map sock_shutdown_cmd constants to sk_shutdown value range */ sk->sk_shutdown |= how + 1; diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index aee9ccfa99c22..ade1232699bbf 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -164,7 +164,8 @@ static int smc_clc_prfx_set(struct socket *clcsock, goto out_rel; } /* get address to which the internal TCP socket is bound */ - kernel_getsockname(clcsock, (struct sockaddr *)&addrs); + if (kernel_getsockname(clcsock, (struct sockaddr *)&addrs) < 0) + goto out_rel; /* analyze IP specific data of net_device belonging to TCP socket */ addr6 = (struct sockaddr_in6 *)&addrs; rcu_read_lock(); diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index fc06720b53c14..543948d970c52 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -183,6 +183,7 @@ int smc_close_active(struct smc_sock *smc) int old_state; long timeout; int rc = 0; + int rc1 = 0; timeout = current->flags & PF_EXITING ? 0 : sock_flag(sk, SOCK_LINGER) ? @@ -218,6 +219,15 @@ int smc_close_active(struct smc_sock *smc) if (rc) break; sk->sk_state = SMC_PEERCLOSEWAIT1; + + /* actively shutdown clcsock before peer close it, + * prevent peer from entering TIME_WAIT state. + */ + if (smc->clcsock && smc->clcsock->sk) { + rc1 = kernel_sock_shutdown(smc->clcsock, + SHUT_RDWR); + rc = rc ? rc : rc1; + } } else { /* peer event has changed the state */ goto again; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index aeea67f908415..66cdfd5725acf 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -342,8 +342,8 @@ void smc_conn_free(struct smc_connection *conn) } else { smc_cdc_tx_dismiss_slots(conn); } - smc_lgr_unregister_conn(conn); smc_buf_unuse(conn, lgr); /* allow buffer reuse */ + smc_lgr_unregister_conn(conn); conn->lgr = NULL; if (!lgr->conns_num) @@ -632,7 +632,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) !lgr->sync_err && lgr->vlan_id == ini->vlan_id && (role == SMC_CLNT || - lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { + (lgr->conns_num < SMC_RMBS_PER_LGR_MAX && + !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) { /* link group found */ ini->cln_first_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; @@ -733,7 +734,7 @@ static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, */ static inline int smc_rmb_wnd_update_limit(int rmbe_size) { - return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); + return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); } static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 571e6d84da3ba..660608202f284 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -295,8 +295,9 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (!strncmp(ibdev->ibdev->name, ib_name, sizeof(ibdev->ibdev->name)) || - !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, - IB_DEVICE_NAME_MAX - 1)) { + (ibdev->ibdev->dev.parent && + !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, + IB_DEVICE_NAME_MAX - 1))) { goto out; } } diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index 97e8369002d71..9e89bd1f706b5 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -348,12 +348,12 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, } break; } + if (!timeo) + return -EAGAIN; if (signal_pending(current)) { read_done = sock_intr_errno(timeo); break; } - if (!timeo) - return -EAGAIN; } if (!smc_rx_data_available(conn)) { diff --git a/net/socket.c b/net/socket.c index 0ae4ee7223dd5..5e0b7c8de1ef8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1054,7 +1054,7 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, rtnl_unlock(); if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf))) err = -EFAULT; - } else { + } else if (is_socket_ioctl_cmd(cmd)) { struct ifreq ifr; bool need_copyout; if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) @@ -1063,6 +1063,8 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, if (!err && need_copyout) if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) return -EFAULT; + } else { + err = -ENOTTY; } return err; } @@ -3266,6 +3268,8 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, struct ifreq ifreq; u32 data32; + if (!is_socket_ioctl_cmd(cmd)) + return -ENOTTY; if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ)) return -EFAULT; if (get_user(data32, &u_ifreq32->ifr_data)) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index b3815c1e8f2ea..cd9954c4ad808 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -27,18 +27,10 @@ static struct workqueue_struct *strp_wq; -struct _strp_msg { - /* Internal cb structure. struct strp_msg must be first for passing - * to upper layer. - */ - struct strp_msg strp; - int accum_len; -}; - static inline struct _strp_msg *_strp_msg(struct sk_buff *skb) { return (struct _strp_msg *)((void *)skb->cb + - offsetof(struct qdisc_skb_cb, data)); + offsetof(struct sk_skb_cb, strp)); } /* Lower lock held */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index d5470c7fe8792..c0016473a255a 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1937,7 +1937,7 @@ gss_svc_init_net(struct net *net) goto out2; return 0; out2: - destroy_use_gss_proxy_proc_entry(net); + rsi_cache_destroy_net(net); out1: rsc_cache_destroy_net(net); return rv; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b6039642df67e..08e1ccc01e983 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2223,6 +2223,7 @@ call_transmit_status(struct rpc_task *task) * socket just returned a connection error, * then hold onto the transport lock. */ + case -ENOMEM: case -ENOBUFS: rpc_delay(task, HZ>>2); /* fall through */ @@ -2308,6 +2309,7 @@ call_bc_transmit_status(struct rpc_task *task) case -ENOTCONN: case -EPIPE: break; + case -ENOMEM: case -ENOBUFS: rpc_delay(task, HZ>>2); /* fall through */ @@ -2392,6 +2394,11 @@ call_status(struct rpc_task *task) case -EPIPE: case -EAGAIN: break; + case -ENFILE: + case -ENOBUFS: + case -ENOMEM: + rpc_delay(task, HZ>>2); + break; case -EIO: /* shutdown or soft timeout */ goto out_exit; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 8fc4a6b3422f4..32ffa801a5b97 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1039,8 +1039,10 @@ int rpc_malloc(struct rpc_task *task) struct rpc_buffer *buf; gfp_t gfp = GFP_NOFS; + if (RPC_IS_ASYNC(task)) + gfp = GFP_NOWAIT | __GFP_NOWARN; if (RPC_IS_SWAPPER(task)) - gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; + gfp |= __GFP_MEMALLOC; size += sizeof(struct rpc_buffer); if (size <= RPC_BUFFER_MAXSIZE) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 7ef37054071f8..daa4165f11794 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -608,7 +608,11 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, */ xdr->p = (void *)p + frag2bytes; space_left = xdr->buf->buflen - xdr->buf->len; - xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE); + if (space_left - frag1bytes >= PAGE_SIZE) + xdr->end = (void *)p + PAGE_SIZE; + else + xdr->end = (void *)p + space_left - frag1bytes; + xdr->buf->page_len += frag2bytes; xdr->buf->len += nbytes; return p; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 639837b3a5d90..94ae95c57f78a 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -716,6 +716,21 @@ void xprt_disconnect_done(struct rpc_xprt *xprt) } EXPORT_SYMBOL_GPL(xprt_disconnect_done); +/** + * xprt_schedule_autoclose_locked - Try to schedule an autoclose RPC call + * @xprt: transport to disconnect + */ +static void xprt_schedule_autoclose_locked(struct rpc_xprt *xprt) +{ + if (test_and_set_bit(XPRT_CLOSE_WAIT, &xprt->state)) + return; + if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) + queue_work(xprtiod_workqueue, &xprt->task_cleanup); + else if (xprt->snd_task && !test_bit(XPRT_SND_IS_COOKIE, &xprt->state)) + rpc_wake_up_queued_task_set_status(&xprt->pending, + xprt->snd_task, -ENOTCONN); +} + /** * xprt_force_disconnect - force a transport to disconnect * @xprt: transport to disconnect @@ -725,13 +740,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt) { /* Don't race with the test_bit() in xprt_clear_locked() */ spin_lock(&xprt->transport_lock); - set_bit(XPRT_CLOSE_WAIT, &xprt->state); - /* Try to schedule an autoclose RPC call */ - if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) - queue_work(xprtiod_workqueue, &xprt->task_cleanup); - else if (xprt->snd_task) - rpc_wake_up_queued_task_set_status(&xprt->pending, - xprt->snd_task, -ENOTCONN); + xprt_schedule_autoclose_locked(xprt); spin_unlock(&xprt->transport_lock); } EXPORT_SYMBOL_GPL(xprt_force_disconnect); @@ -771,11 +780,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie) goto out; if (test_bit(XPRT_CLOSING, &xprt->state)) goto out; - set_bit(XPRT_CLOSE_WAIT, &xprt->state); - /* Try to schedule an autoclose RPC call */ - if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) - queue_work(xprtiod_workqueue, &xprt->task_cleanup); - xprt_wake_pending_tasks(xprt, -EAGAIN); + xprt_schedule_autoclose_locked(xprt); out: spin_unlock(&xprt->transport_lock); } @@ -820,6 +825,7 @@ bool xprt_lock_connect(struct rpc_xprt *xprt, goto out; if (xprt->snd_task != task) goto out; + set_bit(XPRT_SND_IS_COOKIE, &xprt->state); xprt->snd_task = cookie; ret = true; out: @@ -835,6 +841,7 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie) if (!test_bit(XPRT_LOCKED, &xprt->state)) goto out; xprt->snd_task =NULL; + clear_bit(XPRT_SND_IS_COOKIE, &xprt->state); xprt->ops->release_xprt(xprt, NULL); xprt_schedule_autodisconnect(xprt); out: @@ -861,10 +868,7 @@ void xprt_connect(struct rpc_task *task) if (!xprt_lock_write(xprt, task)) return; - if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) - xprt->ops->close(xprt); - - if (!xprt_connected(xprt)) { + if (!xprt_connected(xprt) && !test_bit(XPRT_CLOSE_WAIT, &xprt->state)) { task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie; rpc_sleep_on_timeout(&xprt->pending, task, NULL, xprt_request_timeout(task->tk_rqstp)); @@ -1536,15 +1540,14 @@ xprt_transmit(struct rpc_task *task) { struct rpc_rqst *next, *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - int counter, status; + int status; spin_lock(&xprt->queue_lock); - counter = 0; - while (!list_empty(&xprt->xmit_queue)) { - if (++counter == 20) + for (;;) { + next = list_first_entry_or_null(&xprt->xmit_queue, + struct rpc_rqst, rq_xmit); + if (!next) break; - next = list_first_entry(&xprt->xmit_queue, - struct rpc_rqst, rq_xmit); xprt_pin_rqst(next); spin_unlock(&xprt->queue_lock); status = xprt_request_transmit(next, task); @@ -1552,13 +1555,16 @@ xprt_transmit(struct rpc_task *task) status = 0; spin_lock(&xprt->queue_lock); xprt_unpin_rqst(next); - if (status == 0) { - if (!xprt_request_data_received(task) || - test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) - continue; - } else if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) - task->tk_status = status; - break; + if (status < 0) { + if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) + task->tk_status = status; + break; + } + /* Was @task transmitted, and has it received a reply? */ + if (xprt_request_data_received(task) && + !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) + break; + cond_resched_lock(&xprt->queue_lock); } spin_unlock(&xprt->queue_lock); } @@ -2002,7 +2008,14 @@ static void xprt_destroy(struct rpc_xprt *xprt) */ wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE); + /* + * xprt_schedule_autodisconnect() can run after XPRT_LOCKED + * is cleared. We use ->transport_lock to ensure the mod_timer() + * can only run *before* del_time_sync(), never after. + */ + spin_lock(&xprt->transport_lock); del_timer_sync(&xprt->timer); + spin_unlock(&xprt->transport_lock); /* * Destroy sockets etc from the system workqueue so they can diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index c091417bd799e..60aaed9457e44 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1042,6 +1042,7 @@ static bool rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) #if defined(CONFIG_SUNRPC_BACKCHANNEL) { + struct rpc_xprt *xprt = &r_xprt->rx_xprt; struct xdr_stream *xdr = &rep->rr_stream; __be32 *p; @@ -1065,6 +1066,10 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) if (*p != cpu_to_be32(RPC_CALL)) return false; + /* No bc service. */ + if (xprt->bc_serv == NULL) + return false; + /* Now that we are sure this is a backchannel call, * advance to the RPC header. */ diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 2f21e3c52bfc1..866bcd99bdc0e 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -626,8 +626,10 @@ xprt_rdma_allocate(struct rpc_task *task) gfp_t flags; flags = RPCRDMA_DEF_GFP; + if (RPC_IS_ASYNC(task)) + flags = GFP_NOWAIT | __GFP_NOWARN; if (RPC_IS_SWAPPER(task)) - flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; + flags |= __GFP_MEMALLOC; if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize, flags)) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8ffc54b6661f8..81f0e03b71b6d 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -872,12 +872,12 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, /** * xs_nospace - handle transmit was incomplete * @req: pointer to RPC request + * @transport: pointer to struct sock_xprt * */ -static int xs_nospace(struct rpc_rqst *req) +static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport) { - struct rpc_xprt *xprt = req->rq_xprt; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct rpc_xprt *xprt = &transport->xprt; struct sock *sk = transport->inet; int ret = -EAGAIN; @@ -891,25 +891,49 @@ static int xs_nospace(struct rpc_rqst *req) /* Don't race with disconnect */ if (xprt_connected(xprt)) { + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); + rcu_read_unlock(); + /* wait for more buffer space */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; xprt_wait_for_buffer_space(xprt); } else ret = -ENOTCONN; spin_unlock(&xprt->transport_lock); + return ret; +} - /* Race breaker in case memory is freed before above code is called */ - if (ret == -EAGAIN) { - struct socket_wq *wq; +static int xs_sock_nospace(struct rpc_rqst *req) +{ + struct sock_xprt *transport = + container_of(req->rq_xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; + int ret = -EAGAIN; - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); - rcu_read_unlock(); + lock_sock(sk); + if (!sock_writeable(sk)) + ret = xs_nospace(req, transport); + release_sock(sk); + return ret; +} - sk->sk_write_space(sk); - } +static int xs_stream_nospace(struct rpc_rqst *req) +{ + struct sock_xprt *transport = + container_of(req->rq_xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; + int ret = -EAGAIN; + + lock_sock(sk); + if (!sk_stream_memory_free(sk)) + ret = xs_nospace(req, transport); + release_sock(sk); return ret; } @@ -965,7 +989,7 @@ static int xs_local_send_request(struct rpc_rqst *req) /* Close the stream if the previous transmission was incomplete */ if (xs_send_request_was_aborted(transport, req)) { - xs_close(xprt); + xprt_force_disconnect(xprt); return -ENOTCONN; } @@ -996,14 +1020,14 @@ static int xs_local_send_request(struct rpc_rqst *req) case -ENOBUFS: break; case -EAGAIN: - status = xs_nospace(req); + status = xs_stream_nospace(req); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); /* fall through */ case -EPIPE: - xs_close(xprt); + xprt_force_disconnect(xprt); status = -ENOTCONN; } @@ -1068,7 +1092,7 @@ static int xs_udp_send_request(struct rpc_rqst *req) /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(req); + status = xs_sock_nospace(req); break; case -ENETUNREACH: case -ENOBUFS: @@ -1181,7 +1205,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(req); + status = xs_stream_nospace(req); break; case -ECONNRESET: case -ECONNREFUSED: @@ -1279,6 +1303,16 @@ static void xs_reset_transport(struct sock_xprt *transport) if (sk == NULL) return; + /* + * Make sure we're calling this in a context from which it is safe + * to call __fput_sync(). In practice that means rpciod and the + * system workqueue. + */ + if (!(current->flags & PF_WQ_WORKER)) { + WARN_ON_ONCE(1); + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + return; + } if (atomic_read(&transport->xprt.swapper)) sk_clear_memalloc(sk); @@ -1302,7 +1336,7 @@ static void xs_reset_transport(struct sock_xprt *transport) mutex_unlock(&transport->recv_mutex); trace_rpc_socket_close(xprt, sock); - fput(filp); + __fput_sync(filp); xprt_disconnect_done(xprt); } @@ -2360,10 +2394,14 @@ static void xs_tcp_setup_socket(struct work_struct *work) struct rpc_xprt *xprt = &transport->xprt; int status = -EIO; - if (!sock) { - sock = xs_create_sock(xprt, transport, - xs_addr(xprt)->sa_family, SOCK_STREAM, - IPPROTO_TCP, true); + if (xprt_connected(xprt)) + goto out; + if (test_and_clear_bit(XPRT_SOCK_CONNECT_SENT, + &transport->sock_state) || + !sock) { + xs_reset_transport(transport); + sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, + SOCK_STREAM, IPPROTO_TCP, true); if (IS_ERR(sock)) { status = PTR_ERR(sock); goto out; @@ -2394,6 +2432,8 @@ static void xs_tcp_setup_socket(struct work_struct *work) break; case 0: case -EINPROGRESS: + set_bit(XPRT_SOCK_CONNECT_SENT, &transport->sock_state); + fallthrough; case -EALREADY: xprt_unlock_connect(xprt, transport); return; @@ -2447,11 +2487,7 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) if (transport->sock != NULL) { dprintk("RPC: xs_connect delayed xprt %p for %lu " - "seconds\n", - xprt, xprt->reestablish_timeout / HZ); - - /* Start by resetting any existing state */ - xs_reset_transport(transport); + "seconds\n", xprt, xprt->reestablish_timeout / HZ); delay = xprt_reconnect_delay(xprt); xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO); @@ -2939,9 +2975,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) } xprt_set_bound(xprt); xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); - ret = ERR_PTR(xs_local_setup_socket(transport)); - if (ret) - goto out_err; break; default: ret = ERR_PTR(-EAFNOSUPPORT); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index ea9ddea35a886..96773555af5ca 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -476,6 +476,9 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev, * necessary to go through this helper. */ netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (netif_is_bridge_master(lower_dev)) + continue; + err = __switchdev_handle_port_obj_add(lower_dev, port_obj_info, check_cb, add_cb); if (err && err != -EOPNOTSUPP) @@ -528,6 +531,9 @@ static int __switchdev_handle_port_obj_del(struct net_device *dev, * necessary to go through this helper. */ netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (netif_is_bridge_master(lower_dev)) + continue; + err = __switchdev_handle_port_obj_del(lower_dev, port_obj_info, check_cb, del_cb); if (err && err != -EOPNOTSUPP) @@ -579,6 +585,9 @@ static int __switchdev_handle_port_attr_set(struct net_device *dev, * necessary to go through this helper. */ netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (netif_is_bridge_master(lower_dev)) + continue; + err = __switchdev_handle_port_attr_set(lower_dev, port_attr_info, check_cb, set_cb); if (err && err != -EOPNOTSUPP) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 8bd2454cc89dc..577f71dd63fb4 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -248,9 +248,8 @@ static int tipc_enable_bearer(struct net *net, const char *name, u32 i; if (!bearer_name_validate(name, &b_names)) { - errstr = "illegal name"; NL_SET_ERR_MSG(extack, "Illegal name"); - goto rejected; + return res; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { diff --git a/net/tipc/link.c b/net/tipc/link.c index f25010261a9e0..8f2ee71c63c6d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1953,15 +1953,18 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); u16 rcv_nxt = l->rcv_nxt; - u16 dlen = msg_data_sz(hdr); + u32 dlen = msg_data_sz(hdr), glen = 0; int mtyp = msg_type(hdr); bool reply = msg_probe(hdr); - u16 glen = 0; void *data; char *if_name; int rc = 0; trace_tipc_proto_rcv(skb, false, l->name); + + if (dlen > U16_MAX) + goto exit; + if (tipc_link_is_blocked(l) || !xmitq) goto exit; @@ -2063,7 +2066,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (glen != tipc_gap_ack_blks_sz(ga->gack_cnt)) ga = NULL; } - + if(glen > dlen) + break; tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr, &l->mon_state, l->bearer_id); diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 58708b4c7719f..e7155a7743001 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -457,6 +457,8 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, state->probing = false; /* Sanity check received domain record */ + if (new_member_cnt > MAX_MON_DOMAIN) + return; if (dlen < dom_rec_len(arrv_dom, 0)) return; if (dlen != dom_rec_len(arrv_dom, new_member_cnt)) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 836e629e8f4ab..661bc2551a0a2 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -290,7 +290,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, pr_warn_ratelimited("Failed to remove binding %u,%u from %x\n", type, lower, node); } else { - pr_warn("Unrecognized name table message received\n"); + pr_warn_ratelimited("Unknown name table message received\n"); } return false; } diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 66a65c2cdb23c..c52083522b28e 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -812,7 +812,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, list_for_each_entry(p, &sr->all_publ, all_publ) if (p->key == *last_key) break; - if (p->key != *last_key) + if (list_entry_is_head(p, &sr->all_publ, all_publ)) return -EPIPE; } else { p = list_first_entry(&sr->all_publ, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a5922ce9109cf..58c4d61d603f6 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -455,6 +455,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sock_init_data(sock, sk); tipc_set_sk_state(sk, TIPC_OPEN); if (tipc_sk_insert(tsk)) { + sk_free(sk); pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; } @@ -1756,6 +1757,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, bool connected = !tipc_sk_type_connectionless(sk); struct tipc_sock *tsk = tipc_sk(sk); int rc, err, hlen, dlen, copy; + struct tipc_skb_cb *skb_cb; struct sk_buff_head xmitq; struct tipc_msg *hdr; struct sk_buff *skb; @@ -1779,6 +1781,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, if (unlikely(rc)) goto exit; skb = skb_peek(&sk->sk_receive_queue); + skb_cb = TIPC_SKB_CB(skb); hdr = buf_msg(skb); dlen = msg_data_sz(hdr); hlen = msg_hdr_sz(hdr); @@ -1798,18 +1801,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, /* Capture data if non-error msg, otherwise just set return value */ if (likely(!err)) { - copy = min_t(int, dlen, buflen); - if (unlikely(copy != dlen)) - m->msg_flags |= MSG_TRUNC; - rc = skb_copy_datagram_msg(skb, hlen, m, copy); + int offset = skb_cb->bytes_read; + + copy = min_t(int, dlen - offset, buflen); + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); + if (unlikely(rc)) + goto exit; + if (unlikely(offset + copy < dlen)) { + if (flags & MSG_EOR) { + if (!(flags & MSG_PEEK)) + skb_cb->bytes_read = offset + copy; + } else { + m->msg_flags |= MSG_TRUNC; + skb_cb->bytes_read = 0; + } + } else { + if (flags & MSG_EOR) + m->msg_flags |= MSG_EOR; + skb_cb->bytes_read = 0; + } } else { copy = 0; rc = 0; - if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) + if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { rc = -ECONNRESET; + goto exit; + } } - if (unlikely(rc)) - goto exit; /* Mark message as group event if applicable */ if (unlikely(grp_evt)) { @@ -1832,6 +1850,9 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, tipc_node_distr_xmit(sock_net(sk), &xmitq); } + if (skb_cb->bytes_read) + goto exit; + tsk_advance_rx_queue(sk); if (likely(!connected)) @@ -2255,7 +2276,7 @@ static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, u32 dport, struct sk_buff_head *xmitq) { - unsigned long time_limit = jiffies + 2; + unsigned long time_limit = jiffies + usecs_to_jiffies(20000); struct sk_buff *skb; unsigned int lim; atomic_t *dcnt; @@ -2678,7 +2699,8 @@ static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list) /* Try again later if dest link is congested */ if (tsk->cong_link_cnt) { - sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100)); + sk_reset_timer(sk, &sk->sk_timer, + jiffies + msecs_to_jiffies(100)); return; } /* Prepare SYN for retransmit */ @@ -3570,7 +3592,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, if (p->key == *last_publ) break; } - if (p->key != *last_publ) { + if (list_entry_is_head(p, &tsk->publications, binding_sock)) { /* We never set seq or call nl_dump_check_consistent() * this means that setting prev_seq here will cause the * consistence check to fail in the netlink callback diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 0f034c3bc37d7..2c3cf47d730bb 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -94,13 +94,16 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx) unsigned long flags; spin_lock_irqsave(&tls_device_lock, flags); + if (unlikely(!refcount_dec_and_test(&ctx->refcount))) + goto unlock; + list_move_tail(&ctx->list, &tls_device_gc_list); /* schedule_work inside the spinlock * to make sure tls_device_down waits for that work. */ schedule_work(&tls_device_gc_work); - +unlock: spin_unlock_irqrestore(&tls_device_lock, flags); } @@ -191,8 +194,7 @@ static void tls_device_sk_destruct(struct sock *sk) clean_acked_data_disable(inet_csk(sk)); } - if (refcount_dec_and_test(&tls_ctx->refcount)) - tls_device_queue_ctx_destruction(tls_ctx); + tls_device_queue_ctx_destruction(tls_ctx); } void tls_device_free_resources_tx(struct sock *sk) @@ -470,11 +472,13 @@ static int tls_push_data(struct sock *sk, copy = min_t(size_t, size, (pfrag->size - pfrag->offset)); copy = min_t(size_t, copy, (max_open_record_len - record->len)); - rc = tls_device_copy_data(page_address(pfrag->page) + - pfrag->offset, copy, msg_iter); - if (rc) - goto handle_error; - tls_append_frag(record, pfrag, copy); + if (copy) { + rc = tls_device_copy_data(page_address(pfrag->page) + + pfrag->offset, copy, msg_iter); + if (rc) + goto handle_error; + tls_append_frag(record, pfrag, copy); + } size -= copy; if (!size) { diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 7fb5c067f4293..af3be9a29d6db 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -35,6 +35,7 @@ * SOFTWARE. */ +#include #include #include #include @@ -43,6 +44,14 @@ #include #include +noinline void tls_err_abort(struct sock *sk, int err) +{ + WARN_ON_ONCE(err >= 0); + /* sk->sk_err should contain a positive error code. */ + sk->sk_err = -err; + sk->sk_error_report(sk); +} + static int __skb_nsg(struct sk_buff *skb, int offset, int len, unsigned int recursion_level) { @@ -416,7 +425,7 @@ int tls_tx_records(struct sock *sk, int flags) tx_err: if (rc < 0 && rc != -EAGAIN) - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); return rc; } @@ -447,7 +456,7 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err) /* If err is already set on socket, return the same code */ if (sk->sk_err) { - ctx->async_wait.err = sk->sk_err; + ctx->async_wait.err = -sk->sk_err; } else { ctx->async_wait.err = err; tls_err_abort(sk, err); @@ -503,7 +512,7 @@ static int tls_do_encryption(struct sock *sk, memcpy(&rec->iv_data[iv_offset], tls_ctx->tx.iv, prot->iv_size + prot->salt_size); - xor_iv_with_seq(prot->version, rec->iv_data, tls_ctx->tx.rec_seq); + xor_iv_with_seq(prot->version, rec->iv_data + iv_offset, tls_ctx->tx.rec_seq); sge->offset += prot->prepend_size; sge->length -= prot->prepend_size; @@ -761,7 +770,7 @@ static int tls_push_record(struct sock *sk, int flags, msg_pl->sg.size + prot->tail_size, i); if (rc < 0) { if (rc != -EINPROGRESS) { - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); if (split) { tls_ctx->pending_open_record_frags = true; tls_merge_open_record(sk, rec, tmp, orig_end); @@ -1470,11 +1479,11 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, } if (prot->version == TLS_1_3_VERSION) memcpy(iv + iv_offset, tls_ctx->rx.iv, - crypto_aead_ivsize(ctx->aead_recv)); + prot->iv_size + prot->salt_size); else memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size); - xor_iv_with_seq(prot->version, iv, tls_ctx->rx.rec_seq); + xor_iv_with_seq(prot->version, iv + iv_offset, tls_ctx->rx.rec_seq); /* Prepare AAD */ tls_make_aad(aad, rxm->full_len - prot->overhead_size + @@ -1822,7 +1831,7 @@ int tls_sw_recvmsg(struct sock *sk, err = decrypt_skb_update(sk, skb, &msg->msg_iter, &chunk, &zc, async_capable); if (err < 0 && err != -EINPROGRESS) { - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); goto recv_end; } @@ -2002,7 +2011,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, } if (err < 0) { - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); goto splice_read_end; } ctx->decrypted = true; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 52ee3a9bb7093..f33e90bd0683b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -440,7 +440,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) * -ECONNREFUSED. Otherwise, if we haven't queued any skbs * to other and its full, we will hang waiting for POLLOUT. */ - if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD)) + if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) return 1; if (connected) @@ -595,20 +595,42 @@ static void unix_release_sock(struct sock *sk, int embrion) static void init_peercred(struct sock *sk) { - put_pid(sk->sk_peer_pid); - if (sk->sk_peer_cred) - put_cred(sk->sk_peer_cred); + const struct cred *old_cred; + struct pid *old_pid; + + spin_lock(&sk->sk_peer_lock); + old_pid = sk->sk_peer_pid; + old_cred = sk->sk_peer_cred; sk->sk_peer_pid = get_pid(task_tgid(current)); sk->sk_peer_cred = get_current_cred(); + spin_unlock(&sk->sk_peer_lock); + + put_pid(old_pid); + put_cred(old_cred); } static void copy_peercred(struct sock *sk, struct sock *peersk) { - put_pid(sk->sk_peer_pid); - if (sk->sk_peer_cred) - put_cred(sk->sk_peer_cred); + const struct cred *old_cred; + struct pid *old_pid; + + if (sk < peersk) { + spin_lock(&sk->sk_peer_lock); + spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock(&peersk->sk_peer_lock); + spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); + } + old_pid = sk->sk_peer_pid; + old_cred = sk->sk_peer_cred; sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); + + spin_unlock(&sk->sk_peer_lock); + spin_unlock(&peersk->sk_peer_lock); + + put_pid(old_pid); + put_cred(old_cred); } static int unix_listen(struct socket *sock, int backlog) @@ -2734,7 +2756,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, other = unix_peer(sk); if (other && unix_peer(other) != sk && - unix_recvq_full(other) && + unix_recvq_full_lockless(other) && unix_dgram_peer_wake_me(sk, other)) writable = 0; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 12e2ddaf887f2..d45d5366115a7 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -192,8 +192,11 @@ void wait_for_unix_gc(void) { /* If number of inflight sockets is insane, * force a garbage collect right now. + * Paired with the WRITE_ONCE() in unix_inflight(), + * unix_notinflight() and gc_in_progress(). */ - if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) + if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && + !READ_ONCE(gc_in_progress)) unix_gc(); wait_event(unix_gc_wait, gc_in_progress == false); } @@ -213,7 +216,9 @@ void unix_gc(void) if (gc_in_progress) goto out; - gc_in_progress = true; + /* Paired with READ_ONCE() in wait_for_unix_gc(). */ + WRITE_ONCE(gc_in_progress, true); + /* First, select candidates for garbage collection. Only * in-flight sockets are considered, and from those only ones * which don't have any external reference. @@ -299,7 +304,10 @@ void unix_gc(void) /* All candidates should have been detached by now. */ BUG_ON(!list_empty(&gc_candidates)); - gc_in_progress = false; + + /* Paired with READ_ONCE() in wait_for_unix_gc(). */ + WRITE_ONCE(gc_in_progress, false); + wake_up(&unix_gc_wait); out: diff --git a/net/unix/scm.c b/net/unix/scm.c index 052ae709ce289..aa27a02478dc1 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -60,7 +60,8 @@ void unix_inflight(struct user_struct *user, struct file *fp) } else { BUG_ON(list_empty(&u->link)); } - unix_tot_inflight++; + /* Paired with READ_ONCE() in wait_for_unix_gc() */ + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); } user->unix_inflight++; spin_unlock(&unix_gc_lock); @@ -80,7 +81,8 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); - unix_tot_inflight--; + /* Paired with READ_ONCE() in wait_for_unix_gc() */ + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); } user->unix_inflight--; spin_unlock(&unix_gc_lock); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index d4104144bab1b..d60d7caacbf5d 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1151,6 +1151,8 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, * non-blocking call. */ err = -EALREADY; + if (flags & O_NONBLOCK) + goto out; break; default: if ((sk->sk_state == TCP_LISTEN) || @@ -1220,6 +1222,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE; sock->state = SS_UNCONNECTED; vsock_transport_cancel_pkt(vsk); + vsock_remove_connected(vsk); goto out_wait; } else if (timeout == 0) { err = -ETIMEDOUT; diff --git a/net/wireless/core.c b/net/wireless/core.c index 5d151e8f89320..f7228afd81ebd 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2019 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -891,9 +891,6 @@ int wiphy_register(struct wiphy *wiphy) return res; } - /* set up regulatory info */ - wiphy_regulatory_register(wiphy); - list_add_rcu(&rdev->list, &cfg80211_rdev_list); cfg80211_rdev_list_generation++; @@ -904,6 +901,9 @@ int wiphy_register(struct wiphy *wiphy) cfg80211_debugfs_rdev_add(rdev); nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); + /* set up regulatory info */ + wiphy_regulatory_register(wiphy); + if (wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) { struct regulatory_request request; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7b170ed6923e7..8459f5b6002e1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3240,6 +3240,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag wdev_lock(wdev); switch (wdev->iftype) { case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: if (wdev->ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) goto nla_put_failure_locked; @@ -3480,7 +3481,7 @@ static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype) { if (!use_4addr) { - if (netdev && (netdev->priv_flags & IFF_BRIDGE_PORT)) + if (netdev && netif_is_bridge_port(netdev)) return -EBUSY; return 0; } @@ -12320,6 +12321,9 @@ static int handle_nan_filter(struct nlattr *attr_filter, i = 0; nla_for_each_nested(attr, attr_filter, rem) { filter[i].filter = nla_memdup(attr, GFP_KERNEL); + if (!filter[i].filter) + goto err; + filter[i].len = nla_len(attr); i++; } @@ -12332,6 +12336,15 @@ static int handle_nan_filter(struct nlattr *attr_filter, } return 0; + +err: + i = 0; + nla_for_each_nested(attr, attr_filter, rem) { + kfree(filter[i].filter); + i++; + } + kfree(filter); + return -ENOMEM; } static int nl80211_nan_add_func(struct sk_buff *skb, @@ -16302,7 +16315,8 @@ void cfg80211_ch_switch_notify(struct net_device *dev, wdev->chandef = *chandef; wdev->preset_chandef = *chandef; - if (wdev->iftype == NL80211_IFTYPE_STATION && + if ((wdev->iftype == NL80211_IFTYPE_STATION || + wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) && !WARN_ON(!wdev->current_bss)) cfg80211_update_assoc_bss_entry(wdev, chandef->chan); diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0f3b57a73670b..74caece779633 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3790,6 +3790,7 @@ void wiphy_regulatory_register(struct wiphy *wiphy) wiphy_update_regulatory(wiphy, lr->initiator); wiphy_all_share_dfs_chan_state(wiphy); + reg_process_self_managed_hints(); } void wiphy_regulatory_deregister(struct wiphy *wiphy) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 1580535d53f86..6bb9437af28bf 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -379,14 +379,17 @@ cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss, } ssid_len = ssid[1]; ssid = ssid + 2; - rcu_read_unlock(); /* check if nontrans_bss is in the list */ list_for_each_entry(bss, &trans_bss->nontrans_list, nontrans_list) { - if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len)) + if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len)) { + rcu_read_unlock(); return 0; + } } + rcu_read_unlock(); + /* add to the list */ list_add_tail(&nontrans_bss->nontrans_list, &trans_bss->nontrans_list); return 0; @@ -1454,11 +1457,13 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, /* this is a nontransmitting bss, we need to add it to * transmitting bss' list if it is not there */ + spin_lock_bh(&rdev->bss_lock); if (cfg80211_add_nontrans_list(non_tx_data->tx_bss, &res->pub)) { if (__cfg80211_unlink_bss(rdev, res)) rdev->bss_generation++; } + spin_unlock_bh(&rdev->bss_lock); } trace_cfg80211_return_bss(&res->pub); diff --git a/net/wireless/util.c b/net/wireless/util.c index f0247eab5bc94..95533732f9d6f 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -975,14 +975,14 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, !(rdev->wiphy.interface_modes & (1 << ntype))) return -EOPNOTSUPP; - /* if it's part of a bridge, reject changing type to station/ibss */ - if ((dev->priv_flags & IFF_BRIDGE_PORT) && - (ntype == NL80211_IFTYPE_ADHOC || - ntype == NL80211_IFTYPE_STATION || - ntype == NL80211_IFTYPE_P2P_CLIENT)) - return -EBUSY; - if (ntype != otype) { + /* if it's part of a bridge, reject changing type to station/ibss */ + if (netif_is_bridge_port(dev) && + (ntype == NL80211_IFTYPE_ADHOC || + ntype == NL80211_IFTYPE_STATION || + ntype == NL80211_IFTYPE_P2P_CLIENT)) + return -EBUSY; + dev->ieee80211_ptr->use_4addr = false; dev->ieee80211_ptr->mesh_id_up_len = 0; wdev_lock(dev->ieee80211_ptr); @@ -991,6 +991,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, switch (otype) { case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: cfg80211_stop_ap(rdev, dev, true); break; case NL80211_IFTYPE_ADHOC: diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index d8d603aa48875..c94aa587e0c92 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1767,10 +1767,15 @@ void x25_kill_by_neigh(struct x25_neigh *nb) write_lock_bh(&x25_list_lock); - sk_for_each(s, &x25_list) - if (x25_sk(s)->neighbour == nb) + sk_for_each(s, &x25_list) { + if (x25_sk(s)->neighbour == nb) { + write_unlock_bh(&x25_list_lock); + lock_sock(s); x25_disconnect(s, ENETUNREACH, 0, 0); - + release_sock(s); + write_lock_bh(&x25_list_lock); + } + } write_unlock_bh(&x25_list_lock); /* Remove any related forwards */ diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 32a378e7011ff..1b138a9d2369a 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -341,6 +341,26 @@ static struct xfrm_algo_desc aalg_list[] = { .pfkey_supported = 0, }, +{ + .name = "hmac(sm3)", + .compat = "sm3", + + .uinfo = { + .auth = { + .icv_truncbits = 256, + .icv_fullbits = 256, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_AALG_SM3_256HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 256, + .sadb_alg_maxbits = 256 + } +}, }; static struct xfrm_algo_desc ealg_list[] = { @@ -552,6 +572,27 @@ static struct xfrm_algo_desc ealg_list[] = { .sadb_alg_maxbits = 288 } }, +{ + .name = "cbc(sm4)", + .compat = "sm4", + + .uinfo = { + .encr = { + .geniv = "echainiv", + .blockbits = 128, + .defkeybits = 128, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_EALG_SM4CBC, + .sadb_alg_ivlen = 16, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, }; static struct xfrm_algo_desc calg_list[] = { diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index bb2292b5260c2..d758e9ec3d008 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -206,6 +206,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, if (x->encap || x->tfcpad) return -EINVAL; + if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND)) + return -EINVAL; + dev = dev_get_by_index(net, xuo->ifindex); if (!dev) { if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { @@ -243,7 +246,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xso->dev = dev; xso->num_exthdrs = 1; - xso->flags = xuo->flags; + /* Don't forward bit that is not implemented */ + xso->flags = xuo->flags & ~XFRM_OFFLOAD_IPV6; err = dev->xfrmdev_ops->xdo_dev_state_add(x); if (err) { diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 74e90d78c3b46..4cfa79e04e3d1 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -300,7 +300,10 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + if (skb->len > 1280) + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + else + goto xmit; } else { if (!(ip_hdr(skb)->frag_off & htons(IP_DF))) goto xmit; @@ -659,11 +662,16 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev, struct netlink_ext_ack *extack) { struct net *net = dev_net(dev); - struct xfrm_if_parms p; + struct xfrm_if_parms p = {}; struct xfrm_if *xi; int err; xfrmi_netlink_parms(data, &p); + if (!p.if_id) { + NL_SET_ERR_MSG(extack, "if_id must be non zero"); + return -EINVAL; + } + xi = xfrmi_locate(net, &p); if (xi) return -EEXIST; @@ -688,9 +696,14 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], { struct xfrm_if *xi = netdev_priv(dev); struct net *net = xi->net; - struct xfrm_if_parms p; + struct xfrm_if_parms p = {}; xfrmi_netlink_parms(data, &p); + if (!p.if_id) { + NL_SET_ERR_MSG(extack, "if_id must be non zero"); + return -EINVAL; + } + xi = xfrmi_locate(net, &p); if (!xi) { xi = netdev_priv(dev); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 32c8163427970..28a8cdef8e51f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -31,8 +31,10 @@ #include #include #include +#include #include #include +#include #if IS_ENABLED(CONFIG_IPV6_MIP6) #include #endif @@ -2677,8 +2679,10 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, *num_xfrms = 0; return 0; } - if (IS_ERR(pols[0])) + if (IS_ERR(pols[0])) { + *num_pols = 0; return PTR_ERR(pols[0]); + } *num_xfrms = pols[0]->xfrm_nr; @@ -2693,6 +2697,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, if (pols[1]) { if (IS_ERR(pols[1])) { xfrm_pols_put(pols, *num_pols); + *num_pols = 0; return PTR_ERR(pols[1]); } (*num_pols)++; @@ -3281,7 +3286,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) fl4->flowi4_proto = iph->protocol; fl4->daddr = reverse ? iph->saddr : iph->daddr; fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos; + fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK; if (!ip_is_fragment(iph)) { switch (iph->protocol) { @@ -3443,6 +3448,26 @@ decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) } fl6->flowi6_proto = nexthdr; return; + case IPPROTO_GRE: + if (!onlyproto && + (nh + offset + 12 < skb->data || + pskb_may_pull(skb, nh + offset + 12 - skb->data))) { + struct gre_base_hdr *gre_hdr; + __be32 *gre_key; + + nh = skb_network_header(skb); + gre_hdr = (struct gre_base_hdr *)(nh + offset); + gre_key = (__be32 *)(gre_hdr + 1); + + if (gre_hdr->flags & GRE_KEY) { + if (gre_hdr->flags & GRE_CSUM) + gre_key++; + fl6->fl6_gre_key = *gre_key; + } + } + fl6->flowi6_proto = nexthdr; + return; + #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: offset += ipv6_optlen(exthdr); @@ -4249,7 +4274,7 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, } static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, - u8 dir, u8 type, struct net *net) + u8 dir, u8 type, struct net *net, u32 if_id) { struct xfrm_policy *pol, *ret = NULL; struct hlist_head *chain; @@ -4258,7 +4283,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, chain, bydst) { - if (xfrm_migrate_selector_match(sel, &pol->selector) && + if ((if_id == 0 || pol->if_id == if_id) && + xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { ret = pol; priority = ret->priority; @@ -4270,7 +4296,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * if ((pol->priority >= priority) && ret) break; - if (xfrm_migrate_selector_match(sel, &pol->selector) && + if ((if_id == 0 || pol->if_id == if_id) && + xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { ret = pol; break; @@ -4386,7 +4413,7 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, struct xfrm_kmaddress *k, struct net *net, - struct xfrm_encap_tmpl *encap) + struct xfrm_encap_tmpl *encap, u32 if_id) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; @@ -4405,14 +4432,14 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } /* Stage 1 - find policy */ - if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) { + if ((pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id)) == NULL) { err = -ENOENT; goto out; } /* Stage 2 - find and update state(s) */ for (i = 0, mp = m; i < num_migrate; i++, mp++) { - if ((x = xfrm_migrate_state_find(mp, net))) { + if ((x = xfrm_migrate_state_find(mp, net, if_id))) { x_cur[nx_cur] = x; nx_cur++; xc = xfrm_state_migrate(x, mp, encap); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index c6b2c99b501b9..bee1a8143d75f 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1539,9 +1539,6 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, memcpy(&x->mark, &orig->mark, sizeof(x->mark)); memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark)); - if (xfrm_init_state(x) < 0) - goto error; - x->props.flags = orig->props.flags; x->props.extra_flags = orig->props.extra_flags; @@ -1563,7 +1560,8 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, return NULL; } -struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net) +struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net, + u32 if_id) { unsigned int h; struct xfrm_state *x = NULL; @@ -1579,6 +1577,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n continue; if (m->reqid && x->props.reqid != m->reqid) continue; + if (if_id != 0 && x->if_id != if_id) + continue; if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, m->old_family) || !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, @@ -1594,6 +1594,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n if (x->props.mode != m->mode || x->id.proto != m->proto) continue; + if (if_id != 0 && x->if_id != if_id) + continue; if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, m->old_family) || !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, @@ -1620,6 +1622,11 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, if (!xc) return NULL; + xc->props.family = m->new_family; + + if (xfrm_init_state(xc) < 0) + goto error; + memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr)); memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr)); @@ -2440,7 +2447,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete_tunnel); -u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu) +u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) { const struct xfrm_type *type = READ_ONCE(x->type); struct crypto_aead *aead; @@ -2471,17 +2478,7 @@ u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu) return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - net_adj) & ~(blksize - 1)) + net_adj - 2; } -EXPORT_SYMBOL_GPL(__xfrm_state_mtu); - -u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) -{ - mtu = __xfrm_state_mtu(x, mtu); - - if (x->props.family == AF_INET6 && mtu < IPV6_MIN_MTU) - return IPV6_MIN_MTU; - - return mtu; -} +EXPORT_SYMBOL_GPL(xfrm_state_mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) { @@ -2491,7 +2488,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) int err; if (family == AF_INET && - xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc) + READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)) x->props.flags |= XFRM_STATE_NOPMTUDISC; err = -EPROTONOSUPPORT; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0cee2d3c6e452..bd44a800e7db7 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2374,6 +2374,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, int n = 0; struct net *net = sock_net(skb->sk); struct xfrm_encap_tmpl *encap = NULL; + u32 if_id = 0; if (attrs[XFRMA_MIGRATE] == NULL) return -EINVAL; @@ -2398,7 +2399,10 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } - err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap); + if (attrs[XFRMA_IF_ID]) + if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + + err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap, if_id); kfree(encap); @@ -2816,7 +2820,7 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) if (x->props.extra_flags) l += nla_total_size(sizeof(x->props.extra_flags)); if (x->xso.dev) - l += nla_total_size(sizeof(x->xso)); + l += nla_total_size(sizeof(struct xfrm_user_offload)); if (x->props.smark.v | x->props.smark.m) { l += nla_total_size(sizeof(x->props.smark.v)); l += nla_total_size(sizeof(x->props.smark.m)); diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh index e68b9ee6814b8..35db26f736b9d 100755 --- a/samples/bpf/test_override_return.sh +++ b/samples/bpf/test_override_return.sh @@ -1,5 +1,6 @@ #!/bin/bash +rm -r tmpmnt rm -f testfile.img dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1 DEVICE=$(losetup --show -f testfile.img) diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c index ea6dae78f0dff..2ed13e9f3fcb0 100644 --- a/samples/bpf/tracex7_user.c +++ b/samples/bpf/tracex7_user.c @@ -13,6 +13,11 @@ int main(int argc, char **argv) char command[256]; int ret; + if (!argv[1]) { + fprintf(stderr, "ERROR: Run with the btrfs device argument!\n"); + return 0; + } + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); if (load_bpf_file(filename)) { diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c index 186315ca88b3f..2701549ee7b3a 100644 --- a/samples/kprobes/kretprobe_example.c +++ b/samples/kprobes/kretprobe_example.c @@ -84,7 +84,7 @@ static int __init kretprobe_init(void) ret = register_kretprobe(&my_kretprobe); if (ret < 0) { pr_err("register_kretprobe failed, returned %d\n", ret); - return -1; + return ret; } pr_info("Planted return probe at %s: %p\n", my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr); diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index ca08f2fe7c344..854e2ba9daa29 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -49,6 +49,7 @@ KBUILD_CFLAGS += -Wno-format KBUILD_CFLAGS += -Wno-sign-compare KBUILD_CFLAGS += -Wno-format-zero-length KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) +KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access) endif endif diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins index 5f7df50cfe7aa..63d21489216b0 100644 --- a/scripts/Makefile.gcc-plugins +++ b/scripts/Makefile.gcc-plugins @@ -19,6 +19,10 @@ gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF) \ += -fplugin-arg-structleak_plugin-byref gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL) \ += -fplugin-arg-structleak_plugin-byref-all +ifdef CONFIG_GCC_PLUGIN_STRUCTLEAK + DISABLE_STRUCTLEAK_PLUGIN += -fplugin-arg-structleak_plugin-disable +endif +export DISABLE_STRUCTLEAK_PLUGIN gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STRUCTLEAK) \ += -DSTRUCTLEAK_PLUGIN diff --git a/scripts/dtc/dtx_diff b/scripts/dtc/dtx_diff index 00fd4738a5877..ae6c766a56ccc 100755 --- a/scripts/dtc/dtx_diff +++ b/scripts/dtc/dtx_diff @@ -56,12 +56,8 @@ Otherwise DTx is treated as a dts source file (aka .dts). or '/include/' to be processed. If DTx_1 and DTx_2 are in different architectures, then this script - may not work since \${ARCH} is part of the include path. Two possible - workarounds: - - `basename $0` \\ - <(ARCH=arch_of_dtx_1 `basename $0` DTx_1) \\ - <(ARCH=arch_of_dtx_2 `basename $0` DTx_2) + may not work since \${ARCH} is part of the include path. The following + workaround can be used: `basename $0` ARCH=arch_of_dtx_1 DTx_1 >tmp_dtx_1.dts `basename $0` ARCH=arch_of_dtx_2 DTx_2 >tmp_dtx_2.dts diff --git a/scripts/faddr2line b/scripts/faddr2line index 6c6439f69a725..94ed98dd899f3 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -44,17 +44,6 @@ set -o errexit set -o nounset -READELF="${CROSS_COMPILE:-}readelf" -ADDR2LINE="${CROSS_COMPILE:-}addr2line" -SIZE="${CROSS_COMPILE:-}size" -NM="${CROSS_COMPILE:-}nm" - -command -v awk >/dev/null 2>&1 || die "awk isn't installed" -command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed" -command -v ${ADDR2LINE} >/dev/null 2>&1 || die "addr2line isn't installed" -command -v ${SIZE} >/dev/null 2>&1 || die "size isn't installed" -command -v ${NM} >/dev/null 2>&1 || die "nm isn't installed" - usage() { echo "usage: faddr2line [--list] ..." >&2 exit 1 @@ -69,6 +58,14 @@ die() { exit 1 } +READELF="${CROSS_COMPILE:-}readelf" +ADDR2LINE="${CROSS_COMPILE:-}addr2line" +AWK="awk" + +command -v ${AWK} >/dev/null 2>&1 || die "${AWK} isn't installed" +command -v ${READELF} >/dev/null 2>&1 || die "${READELF} isn't installed" +command -v ${ADDR2LINE} >/dev/null 2>&1 || die "${ADDR2LINE} isn't installed" + # Try to figure out the source directory prefix so we can remove it from the # addr2line output. HACK ALERT: This assumes that start_kernel() is in # init/main.c! This only works for vmlinux. Otherwise it falls back to @@ -76,7 +73,7 @@ die() { find_dir_prefix() { local objfile=$1 - local start_kernel_addr=$(${READELF} -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}') + local start_kernel_addr=$(${READELF} --symbols --wide $objfile | ${AWK} '$8 == "start_kernel" {printf "0x%s", $2}') [[ -z $start_kernel_addr ]] && return local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr) @@ -97,86 +94,156 @@ __faddr2line() { local dir_prefix=$3 local print_warnings=$4 - local func=${func_addr%+*} - local offset=${func_addr#*+} - offset=${offset%/*} - local size= - [[ $func_addr =~ "/" ]] && size=${func_addr#*/} + local sym_name=${func_addr%+*} + local func_offset=${func_addr#*+} + func_offset=${func_offset%/*} + local user_size= + local file_type + local is_vmlinux=0 + [[ $func_addr =~ "/" ]] && user_size=${func_addr#*/} - if [[ -z $func ]] || [[ -z $offset ]] || [[ $func = $func_addr ]]; then + if [[ -z $sym_name ]] || [[ -z $func_offset ]] || [[ $sym_name = $func_addr ]]; then warn "bad func+offset $func_addr" DONE=1 return fi + # vmlinux uses absolute addresses in the section table rather than + # section offsets. + local file_type=$(${READELF} --file-header $objfile | + ${AWK} '$1 == "Type:" { print $2; exit }') + [[ $file_type = "EXEC" ]] && is_vmlinux=1 + # Go through each of the object's symbols which match the func name. - # In rare cases there might be duplicates. - file_end=$(${SIZE} -Ax $objfile | awk '$1 == ".text" {print $2}') - while read symbol; do - local fields=($symbol) - local sym_base=0x${fields[0]} - local sym_type=${fields[1]} - local sym_end=${fields[3]} - - # calculate the size - local sym_size=$(($sym_end - $sym_base)) + # In rare cases there might be duplicates, in which case we print all + # matches. + while read line; do + local fields=($line) + local sym_addr=0x${fields[1]} + local sym_elf_size=${fields[2]} + local sym_sec=${fields[6]} + local sec_size + local sec_name + + # Get the section size: + sec_size=$(${READELF} --section-headers --wide $objfile | + sed 's/\[ /\[/' | + ${AWK} -v sec=$sym_sec '$1 == "[" sec "]" { print "0x" $6; exit }') + + if [[ -z $sec_size ]]; then + warn "bad section size: section: $sym_sec" + DONE=1 + return + fi + + # Get the section name: + sec_name=$(${READELF} --section-headers --wide $objfile | + sed 's/\[ /\[/' | + ${AWK} -v sec=$sym_sec '$1 == "[" sec "]" { print $2; exit }') + + if [[ -z $sec_name ]]; then + warn "bad section name: section: $sym_sec" + DONE=1 + return + fi + + # Calculate the symbol size. + # + # Unfortunately we can't use the ELF size, because kallsyms + # also includes the padding bytes in its size calculation. For + # kallsyms, the size calculation is the distance between the + # symbol and the next symbol in a sorted list. + local sym_size + local cur_sym_addr + local found=0 + while read line; do + local fields=($line) + cur_sym_addr=0x${fields[1]} + local cur_sym_elf_size=${fields[2]} + local cur_sym_name=${fields[7]:-} + + if [[ $cur_sym_addr = $sym_addr ]] && + [[ $cur_sym_elf_size = $sym_elf_size ]] && + [[ $cur_sym_name = $sym_name ]]; then + found=1 + continue + fi + + if [[ $found = 1 ]]; then + sym_size=$(($cur_sym_addr - $sym_addr)) + [[ $sym_size -lt $sym_elf_size ]] && continue; + found=2 + break + fi + done < <(${READELF} --symbols --wide $objfile | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2) + + if [[ $found = 0 ]]; then + warn "can't find symbol: sym_name: $sym_name sym_sec: $sym_sec sym_addr: $sym_addr sym_elf_size: $sym_elf_size" + DONE=1 + return + fi + + # If nothing was found after the symbol, assume it's the last + # symbol in the section. + [[ $found = 1 ]] && sym_size=$(($sec_size - $sym_addr)) + if [[ -z $sym_size ]] || [[ $sym_size -le 0 ]]; then - warn "bad symbol size: base: $sym_base end: $sym_end" + warn "bad symbol size: sym_addr: $sym_addr cur_sym_addr: $cur_sym_addr" DONE=1 return fi + sym_size=0x$(printf %x $sym_size) - # calculate the address - local addr=$(($sym_base + $offset)) + # Calculate the address from user-supplied offset: + local addr=$(($sym_addr + $func_offset)) if [[ -z $addr ]] || [[ $addr = 0 ]]; then - warn "bad address: $sym_base + $offset" + warn "bad address: $sym_addr + $func_offset" DONE=1 return fi addr=0x$(printf %x $addr) - # weed out non-function symbols - if [[ $sym_type != t ]] && [[ $sym_type != T ]]; then + # If the user provided a size, make sure it matches the symbol's size: + if [[ -n $user_size ]] && [[ $user_size -ne $sym_size ]]; then [[ $print_warnings = 1 ]] && - echo "skipping $func address at $addr due to non-function symbol of type '$sym_type'" - continue - fi - - # if the user provided a size, make sure it matches the symbol's size - if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then - [[ $print_warnings = 1 ]] && - echo "skipping $func address at $addr due to size mismatch ($size != $sym_size)" + echo "skipping $sym_name address at $addr due to size mismatch ($user_size != $sym_size)" continue; fi - # make sure the provided offset is within the symbol's range - if [[ $offset -gt $sym_size ]]; then + # Make sure the provided offset is within the symbol's range: + if [[ $func_offset -gt $sym_size ]]; then [[ $print_warnings = 1 ]] && - echo "skipping $func address at $addr due to size mismatch ($offset > $sym_size)" + echo "skipping $sym_name address at $addr due to size mismatch ($func_offset > $sym_size)" continue fi - # separate multiple entries with a blank line + # In case of duplicates or multiple addresses specified on the + # cmdline, separate multiple entries with a blank line: [[ $FIRST = 0 ]] && echo FIRST=0 - # pass real address to addr2line - echo "$func+$offset/$sym_size:" - local file_lines=$(${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;") - [[ -z $file_lines ]] && return + echo "$sym_name+$func_offset/$sym_size:" + # Pass section address to addr2line and strip absolute paths + # from the output: + local args="--functions --pretty-print --inlines --exe=$objfile" + [[ $is_vmlinux = 0 ]] && args="$args --section=$sec_name" + local output=$(${ADDR2LINE} $args $addr | sed "s; $dir_prefix\(\./\)*; ;") + [[ -z $output ]] && continue + + # Default output (non --list): if [[ $LIST = 0 ]]; then - echo "$file_lines" | while read -r line + echo "$output" | while read -r line do echo $line done DONE=1; - return + continue fi - # show each line with context - echo "$file_lines" | while read -r line + # For --list, show each line with its corresponding source code: + echo "$output" | while read -r line do echo echo $line @@ -184,12 +251,12 @@ __faddr2line() { n1=$[$n-5] n2=$[$n+5] f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g') - awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") { if (NR=='$n') printf(">%d<", NR); else printf(" %d ", NR); printf("\t%s\n", $0)}' $f + ${AWK} 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") { if (NR=='$n') printf(">%d<", NR); else printf(" %d ", NR); printf("\t%s\n", $0)}' $f done DONE=1 - done < <(${NM} -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }') + done < <(${READELF} --symbols --wide $objfile | ${AWK} -v fn=$sym_name '$4 == "FUNC" && $8 == fn') } [[ $# -lt 2 ]] && usage diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index cbe1d6c4b1a51..c84bef1d28955 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -86,25 +86,31 @@ static struct plugin_info latent_entropy_plugin_info = { .help = "disable\tturn off latent entropy instrumentation\n", }; -static unsigned HOST_WIDE_INT seed; -/* - * get_random_seed() (this is a GCC function) generates the seed. - * This is a simple random generator without any cryptographic security because - * the entropy doesn't come from here. - */ +static unsigned HOST_WIDE_INT deterministic_seed; +static unsigned HOST_WIDE_INT rnd_buf[32]; +static size_t rnd_idx = ARRAY_SIZE(rnd_buf); +static int urandom_fd = -1; + static unsigned HOST_WIDE_INT get_random_const(void) { - unsigned int i; - unsigned HOST_WIDE_INT ret = 0; - - for (i = 0; i < 8 * sizeof(ret); i++) { - ret = (ret << 1) | (seed & 1); - seed >>= 1; - if (ret & 1) - seed ^= 0xD800000000000000ULL; + if (deterministic_seed) { + unsigned HOST_WIDE_INT w = deterministic_seed; + w ^= w << 13; + w ^= w >> 7; + w ^= w << 17; + deterministic_seed = w; + return deterministic_seed; } - return ret; + if (urandom_fd < 0) { + urandom_fd = open("/dev/urandom", O_RDONLY); + gcc_assert(urandom_fd >= 0); + } + if (rnd_idx >= ARRAY_SIZE(rnd_buf)) { + gcc_assert(read(urandom_fd, rnd_buf, sizeof(rnd_buf)) == sizeof(rnd_buf)); + rnd_idx = 0; + } + return rnd_buf[rnd_idx++]; } static tree tree_get_random_const(tree type) @@ -549,8 +555,6 @@ static void latent_entropy_start_unit(void *gcc_data __unused, tree type, id; int quals; - seed = get_random_seed(false); - if (in_lto_p) return; @@ -585,6 +589,12 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, const struct plugin_argument * const argv = plugin_info->argv; int i; + /* + * Call get_random_seed() with noinit=true, so that this returns + * 0 in the case where no seed has been passed via -frandom-seed. + */ + deterministic_seed = get_random_seed(true); + static const struct ggc_root_tab gt_ggc_r_gt_latent_entropy[] = { { .base = &latent_entropy_decl, diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c index dbd37460c573e..f46abb315010d 100644 --- a/scripts/gcc-plugins/stackleak_plugin.c +++ b/scripts/gcc-plugins/stackleak_plugin.c @@ -262,6 +262,23 @@ static unsigned int stackleak_cleanup_execute(void) return 0; } +/* + * STRING_CST may or may not be NUL terminated: + * https://gcc.gnu.org/onlinedocs/gccint/Constant-expressions.html + */ +static inline bool string_equal(tree node, const char *string, int length) +{ + if (TREE_STRING_LENGTH(node) < length) + return false; + if (TREE_STRING_LENGTH(node) > length + 1) + return false; + if (TREE_STRING_LENGTH(node) == length + 1 && + TREE_STRING_POINTER(node)[length] != '\0') + return false; + return !memcmp(TREE_STRING_POINTER(node), string, length); +} +#define STRING_EQUAL(node, str) string_equal(node, str, strlen(str)) + static bool stackleak_gate(void) { tree section; @@ -271,13 +288,13 @@ static bool stackleak_gate(void) if (section && TREE_VALUE(section)) { section = TREE_VALUE(TREE_VALUE(section)); - if (!strncmp(TREE_STRING_POINTER(section), ".init.text", 10)) + if (STRING_EQUAL(section, ".init.text")) return false; - if (!strncmp(TREE_STRING_POINTER(section), ".devinit.text", 13)) + if (STRING_EQUAL(section, ".devinit.text")) return false; - if (!strncmp(TREE_STRING_POINTER(section), ".cpuinit.text", 13)) + if (STRING_EQUAL(section, ".cpuinit.text")) return false; - if (!strncmp(TREE_STRING_POINTER(section), ".meminit.text", 13)) + if (STRING_EQUAL(section, ".meminit.text")) return false; } diff --git a/scripts/gdb/linux/config.py b/scripts/gdb/linux/config.py index 90e1565b19671..8843ab3cbaddc 100644 --- a/scripts/gdb/linux/config.py +++ b/scripts/gdb/linux/config.py @@ -24,9 +24,9 @@ def invoke(self, arg, from_tty): filename = arg try: - py_config_ptr = gdb.parse_and_eval("kernel_config_data + 8") - py_config_size = gdb.parse_and_eval( - "sizeof(kernel_config_data) - 1 - 8 * 2") + py_config_ptr = gdb.parse_and_eval("&kernel_config_data") + py_config_ptr_end = gdb.parse_and_eval("&kernel_config_data_end") + py_config_size = py_config_ptr_end - py_config_ptr except gdb.error as e: raise gdb.GdbError("Can't find config, enable CONFIG_IKCONFIG?") diff --git a/scripts/gen_ksymdeps.sh b/scripts/gen_ksymdeps.sh index 1324986e1362c..725e8c9c1b53f 100755 --- a/scripts/gen_ksymdeps.sh +++ b/scripts/gen_ksymdeps.sh @@ -4,7 +4,13 @@ set -e # List of exported symbols -ksyms=$($NM $1 | sed -n 's/.*__ksym_marker_\(.*\)/\1/p' | tr A-Z a-z) +# +# If the object has no symbol, $NM warns 'no symbols'. +# Suppress the stderr. +# TODO: +# Use -q instead of 2>/dev/null when we upgrade the minimum version of +# binutils to 2.37, llvm to 13.0.0. +ksyms=$($NM $1 2>/dev/null | sed -n 's/.*__ksym_marker_\(.*\)/\1/p' | tr A-Z a-z) if [ -z "$ksyms" ]; then exit 0 diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 17298239e3633..5c2493c8e9de8 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -987,14 +987,19 @@ static int conf_write_dep(const char *name) static int conf_touch_deps(void) { - const char *name; + const char *name, *tmp; struct symbol *sym; int res, i; - strcpy(depfile_path, "include/config/"); - depfile_prefix_len = strlen(depfile_path); - name = conf_get_autoconfig_name(); + tmp = strrchr(name, '/'); + depfile_prefix_len = tmp ? tmp - name + 1 : 0; + if (depfile_prefix_len + 1 > sizeof(depfile_path)) + return -1; + + strncpy(depfile_path, name, depfile_prefix_len); + depfile_path[depfile_prefix_len] = 0; + conf_read_simple(name, S_DEF_AUTO); sym_calc_value(modules_sym); diff --git a/scripts/kconfig/preprocess.c b/scripts/kconfig/preprocess.c index 0590f86df6e40..748da578b418c 100644 --- a/scripts/kconfig/preprocess.c +++ b/scripts/kconfig/preprocess.c @@ -141,7 +141,7 @@ static char *do_lineno(int argc, char *argv[]) static char *do_shell(int argc, char *argv[]) { FILE *p; - char buf[256]; + char buf[4096]; char *cmd; size_t nread; int i; diff --git a/scripts/leaking_addresses.pl b/scripts/leaking_addresses.pl index b2d8b8aa2d99e..8f636a23bc3f2 100755 --- a/scripts/leaking_addresses.pl +++ b/scripts/leaking_addresses.pl @@ -455,8 +455,9 @@ sub parse_file open my $fh, "<", $file or return; while ( <$fh> ) { + chomp; if (may_leak_address($_)) { - print $file . ': ' . $_; + printf("$file: $_\n"); } } close $fh; diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 13cda6aa26880..e5aeaf72dcdb8 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1131,7 +1131,7 @@ static const struct sectioncheck sectioncheck[] = { }, /* Do not export init/exit functions or data */ { - .fromsec = { "__ksymtab*", NULL }, + .fromsec = { "___ksymtab*", NULL }, .bad_tosec = { INIT_SECTIONS, EXIT_SECTIONS, NULL }, .mismatch = EXPORT_TO_INIT_EXIT, .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL }, @@ -1283,7 +1283,8 @@ static int secref_whitelist(const struct sectioncheck *mismatch, static inline int is_arm_mapping_symbol(const char *str) { - return str[0] == '$' && strchr("axtd", str[1]) + return str[0] == '$' && + (str[1] == 'a' || str[1] == 'd' || str[1] == 't' || str[1] == 'x') && (str[2] == '\0' || str[2] == '.'); } @@ -1998,7 +1999,7 @@ static char *remove_dot(char *s) if (n && s[n]) { size_t m = strspn(s + n + 1, "0123456789"); - if (m && (s[n + m] == '.' || s[n + m] == 0)) + if (m && (s[n + m + 1] == '.' || s[n + m + 1] == 0)) s[n] = 0; } return s; diff --git a/scripts/package/builddeb b/scripts/package/builddeb index 3c322f1a301e5..d2845183120dc 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -218,7 +218,7 @@ then mkdir -p $output/tools/perf output="O=$output/tools/perf" fi - $MAKE -C $srctree/tools/perf $output LDFLAGS= srctree=$KBUILD_SRC prefix=$tools_dest/usr install + $MAKE -C $srctree/tools/perf $output LDFLAGS= srctree=$KBUILD_SRC DESTDIR=$tools_dest prefix=/usr install $MAKE -C $srctree/tools/bpf/bpftool $output srctree=$(pwd) mkdir -p $tools_dir/usr/sbin @@ -226,7 +226,13 @@ then cp $srctree/tools/bpf/bpftool/bpftool $tools_dir/usr/sbin cp $srctree/tools/bpf/bpftool/bash-completion/bpftool $tools_dir/usr/share/bash-completion/completions - dpkg-shlibdeps $tools_dest/usr/bin/* $tools_dest/usr/lib64/traceevent/plugins/* + if [ "$(uname -m)" = "x86_64" ]; then + LIB=lib64 + else + LIB=lib + fi + + dpkg-shlibdeps $tools_dest/usr/bin/* $tools_dest/usr/$LIB/traceevent/plugins/* create_package "$tools_packagename" "$tools_dir" fi diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 4f84657f55c23..dc1d3696af6b8 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -222,7 +222,7 @@ sub check_objcopy $local_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\S+)"; $weak_regex = "^[0-9a-fA-F]+\\s+([wW])\\s+(\\S+)"; $section_regex = "Disassembly of section\\s+(\\S+):"; -$function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:"; +$function_regex = "^([0-9a-fA-F]+)\\s+<([^^]*?)>:"; $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s(mcount|__fentry__)\$"; $section_type = '@progbits'; $mcount_adjust = 0; @@ -252,7 +252,7 @@ sub check_objcopy } elsif ($arch eq "s390" && $bits == 64) { if ($cc =~ /-DCC_USING_HOTPATCH/) { - $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*brcl\\s*0,[0-9a-f]+ <([^\+]*)>\$"; + $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*(brcl\\s*0,|jgnop\\s*)[0-9a-f]+ <([^\+]*)>\$"; $mcount_adjust = 0; } else { $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$"; diff --git a/security/Kconfig b/security/Kconfig index 2a1a2d3962281..52e5109f2c1b6 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -191,6 +191,9 @@ config HARDENED_USERCOPY_PAGESPAN config FORTIFY_SOURCE bool "Harden common str/mem functions against buffer overflows" depends on ARCH_HAS_FORTIFY_SOURCE + # https://bugs.llvm.org/show_bug.cgi?id=50322 + # https://bugs.llvm.org/show_bug.cgi?id=41459 + depends on !CC_IS_CLANG help Detect overflows of buffers in common string and memory functions where the compiler can determine and validate the buffer sizes. diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 7c4238a040732..95da9f8a7516e 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -1960,9 +1960,6 @@ int __aafs_ns_mkdir(struct aa_ns *ns, struct dentry *parent, const char *name, return error; } - -#define list_entry_is_head(pos, head, member) (&pos->member == (head)) - /** * __next_ns - find the next namespace to list * @root: root namespace to stop search at (NOT NULL) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 5f324d63ceaa3..747a734a08246 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -1459,7 +1459,7 @@ bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp) if (label->hname || labels_ns(label) != ns) return res; - if (aa_label_acntsxprint(&name, ns, label, FLAGS_NONE, gfp) == -1) + if (aa_label_acntsxprint(&name, ns, label, FLAGS_NONE, gfp) < 0) return res; ls = labels_set(label); @@ -1709,7 +1709,7 @@ int aa_label_asxprint(char **strp, struct aa_ns *ns, struct aa_label *label, /** * aa_label_acntsxprint - allocate a __counted string buffer and print label - * @strp: buffer to write to. (MAY BE NULL if @size == 0) + * @strp: buffer to write to. * @ns: namespace profile is being viewed from * @label: label to view (NOT NULL) * @flags: flags controlling what label info is printed diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index 25dac691491b1..ee6bd945f3d6a 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -75,7 +75,7 @@ static struct shash_desc *init_desc(char type, uint8_t hash_algo) { long rc; const char *algo; - struct crypto_shash **tfm, *tmp_tfm = NULL; + struct crypto_shash **tfm, *tmp_tfm; struct shash_desc *desc; if (type == EVM_XATTR_HMAC) { @@ -120,16 +120,13 @@ static struct shash_desc *init_desc(char type, uint8_t hash_algo) alloc: desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(*tfm), GFP_KERNEL); - if (!desc) { - crypto_free_shash(tmp_tfm); + if (!desc) return ERR_PTR(-ENOMEM); - } desc->tfm = *tfm; rc = crypto_shash_init(desc); if (rc) { - crypto_free_shash(tmp_tfm); kfree(desc); return ERR_PTR(rc); } diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 81e3245aec86c..b82291d10e730 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -56,7 +56,7 @@ static struct xattr_list evm_config_default_xattrnames[] = { LIST_HEAD(evm_config_xattrnames); -static int evm_fixmode; +static int evm_fixmode __ro_after_init; static int __init evm_set_fixmode(char *str) { if (strncmp(str, "fix", 3) == 0) diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig index d2054bec49094..44b3315f32352 100644 --- a/security/integrity/ima/Kconfig +++ b/security/integrity/ima/Kconfig @@ -6,7 +6,6 @@ config IMA select SECURITYFS select CRYPTO select CRYPTO_HMAC - select CRYPTO_MD5 select CRYPTO_SHA1 select CRYPTO_HASH_INFO select TCG_TPM if HAS_IOMEM && !UML @@ -70,10 +69,9 @@ choice hash, defined as 20 bytes, and a null terminated pathname, limited to 255 characters. The 'ima-ng' measurement list template permits both larger hash digests and longer - pathnames. + pathnames. The configured default template can be replaced + by specifying "ima_template=" on the boot command line. - config IMA_TEMPLATE - bool "ima" config IMA_NG_TEMPLATE bool "ima-ng (default)" config IMA_SIG_TEMPLATE @@ -83,7 +81,6 @@ endchoice config IMA_DEFAULT_TEMPLATE string depends on IMA - default "ima" if IMA_TEMPLATE default "ima-ng" if IMA_NG_TEMPLATE default "ima-sig" if IMA_SIG_TEMPLATE @@ -103,15 +100,15 @@ choice config IMA_DEFAULT_HASH_SHA256 bool "SHA256" - depends on CRYPTO_SHA256=y && !IMA_TEMPLATE + depends on CRYPTO_SHA256=y config IMA_DEFAULT_HASH_SHA512 bool "SHA512" - depends on CRYPTO_SHA512=y && !IMA_TEMPLATE + depends on CRYPTO_SHA512=y config IMA_DEFAULT_HASH_WP512 bool "WP512" - depends on CRYPTO_WP512=y && !IMA_TEMPLATE + depends on CRYPTO_WP512=y endchoice config IMA_DEFAULT_HASH diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 23b04c6521b25..9368688449b01 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -352,7 +352,8 @@ int ima_appraise_measurement(enum ima_hooks func, goto out; } - status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, rc, iint); + status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, + rc < 0 ? 0 : rc, iint); switch (status) { case INTEGRITY_PASS: case INTEGRITY_PASS_IMMUTABLE: diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index 68571c40d61f6..a3e6fccff7123 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -494,12 +494,12 @@ int __init ima_fs_init(void) return 0; out: + securityfs_remove(ima_policy); securityfs_remove(violations); securityfs_remove(runtime_measurements_count); securityfs_remove(ascii_runtime_measurements); securityfs_remove(binary_runtime_measurements); securityfs_remove(ima_symlink); securityfs_remove(ima_dir); - securityfs_remove(ima_policy); return -1; } diff --git a/security/integrity/ima/ima_mok.c b/security/integrity/ima/ima_mok.c index 1e5c019161738..95cc31525c573 100644 --- a/security/integrity/ima/ima_mok.c +++ b/security/integrity/ima/ima_mok.c @@ -21,7 +21,7 @@ struct key *ima_blacklist_keyring; /* * Allocate the IMA blacklist keyring */ -__init int ima_mok_init(void) +static __init int ima_mok_init(void) { struct key_restriction *restriction; diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index e725d41872713..14aef74d3588a 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -1382,6 +1382,14 @@ int ima_policy_show(struct seq_file *m, void *v) rcu_read_lock(); + /* Do not print rules with inactive LSM labels */ + for (i = 0; i < MAX_LSM_RULES; i++) { + if (entry->lsm[i].args_p && !entry->lsm[i].rule) { + rcu_read_unlock(); + return 0; + } + } + if (entry->action & MEASURE) seq_puts(m, pt(Opt_measure)); if (entry->action & DONT_MEASURE) @@ -1534,6 +1542,10 @@ bool ima_appraise_signature(enum kernel_read_file_id id) if (id >= READING_MAX_ID) return false; + if (id == READING_KEXEC_IMAGE && !(ima_appraise & IMA_APPRAISE_ENFORCE) + && security_locked_down(LOCKDOWN_KEXEC)) + return false; + func = read_idmap[id] ?: FILE_CHECK; rcu_read_lock(); diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c index 6aa6408603e33..2283051d063bc 100644 --- a/security/integrity/ima/ima_template.c +++ b/security/integrity/ima/ima_template.c @@ -29,6 +29,7 @@ static struct ima_template_desc builtin_templates[] = { static LIST_HEAD(defined_templates); static DEFINE_SPINLOCK(template_list); +static int template_setup_done; static const struct ima_template_field supported_fields[] = { {.field_id = "d", .field_init = ima_eventdigest_init, @@ -82,10 +83,11 @@ static int __init ima_template_setup(char *str) struct ima_template_desc *template_desc; int template_len = strlen(str); - if (ima_template) + if (template_setup_done) return 1; - ima_init_template_list(); + if (!ima_template) + ima_init_template_list(); /* * Verify that a template with the supplied name exists. @@ -109,6 +111,7 @@ static int __init ima_template_setup(char *str) } ima_template = template_desc; + template_setup_done = 1; return 1; } __setup("ima_template=", ima_template_setup); @@ -117,7 +120,7 @@ static int __init ima_template_fmt_setup(char *str) { int num_templates = ARRAY_SIZE(builtin_templates); - if (ima_template) + if (template_setup_done) return 1; if (template_desc_init_fields(str, NULL, NULL) < 0) { @@ -128,6 +131,7 @@ static int __init ima_template_fmt_setup(char *str) builtin_templates[num_templates - 1].fmt = str; ima_template = builtin_templates + num_templates - 1; + template_setup_done = 1; return 1; } diff --git a/security/integrity/integrity_audit.c b/security/integrity/integrity_audit.c index 5109173839cc5..c7f0f82a9a110 100644 --- a/security/integrity/integrity_audit.c +++ b/security/integrity/integrity_audit.c @@ -36,6 +36,8 @@ void integrity_audit_msg(int audit_msgno, struct inode *inode, return; ab = audit_log_start(audit_context(), GFP_KERNEL, audit_msgno); + if (!ab) + return; audit_log_format(ab, "pid=%d uid=%u auid=%u ses=%u", task_pid_nr(current), from_kuid(&init_user_ns, current_cred()->uid), diff --git a/security/integrity/platform_certs/keyring_handler.h b/security/integrity/platform_certs/keyring_handler.h index 2462bfa08fe34..cd06bd6072be2 100644 --- a/security/integrity/platform_certs/keyring_handler.h +++ b/security/integrity/platform_certs/keyring_handler.h @@ -30,3 +30,11 @@ efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type); efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type); #endif + +#ifndef UEFI_QUIRK_SKIP_CERT +#define UEFI_QUIRK_SKIP_CERT(vendor, product) \ + .matches = { \ + DMI_MATCH(DMI_BOARD_VENDOR, vendor), \ + DMI_MATCH(DMI_PRODUCT_NAME, product), \ + }, +#endif diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index f0c908241966a..452011428d119 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -11,6 +12,31 @@ #include "../integrity.h" #include "keyring_handler.h" +/* + * On T2 Macs reading the db and dbx efi variables to load UEFI Secure Boot + * certificates causes occurrence of a page fault in Apple's firmware and + * a crash disabling EFI runtime services. The following quirk skips reading + * these variables. + */ +static const struct dmi_system_id uefi_skip_cert[] = { + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,2") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,3") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,4") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,2") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,3") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,4") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir8,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir8,2") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir9,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacMini8,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacPro7,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMac20,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMac20,2") }, + { } +}; + /* * Look to see if a UEFI variable called MokIgnoreDB exists and return true if * it does. @@ -78,6 +104,13 @@ static int __init load_uefi_certs(void) unsigned long dbsize = 0, dbxsize = 0, moksize = 0; efi_status_t status; int rc = 0; + const struct dmi_system_id *dmi_id; + + dmi_id = dmi_first_match(uefi_skip_cert); + if (dmi_id) { + pr_err("Reading UEFI Secure Boot Certs is not supported on T2 Macs.\n"); + return false; + } if (!efi.get_variable) return false; diff --git a/security/keys/keyctl_pkey.c b/security/keys/keyctl_pkey.c index 931d8dfb4a7f4..63e5c646f7620 100644 --- a/security/keys/keyctl_pkey.c +++ b/security/keys/keyctl_pkey.c @@ -135,15 +135,23 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par switch (op) { case KEYCTL_PKEY_ENCRYPT: + if (uparams.in_len > info.max_dec_size || + uparams.out_len > info.max_enc_size) + return -EINVAL; + break; case KEYCTL_PKEY_DECRYPT: if (uparams.in_len > info.max_enc_size || uparams.out_len > info.max_dec_size) return -EINVAL; break; case KEYCTL_PKEY_SIGN: + if (uparams.in_len > info.max_data_size || + uparams.out_len > info.max_sig_size) + return -EINVAL; + break; case KEYCTL_PKEY_VERIFY: - if (uparams.in_len > info.max_sig_size || - uparams.out_len > info.max_data_size) + if (uparams.in_len > info.max_data_size || + uparams.in2_len > info.max_sig_size) return -EINVAL; break; default: @@ -151,7 +159,7 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par } params->in_len = uparams.in_len; - params->out_len = uparams.out_len; + params->out_len = uparams.out_len; /* Note: same as in2_len */ return 0; } diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c index 3f38583bed06f..655a6edb5d7f9 100644 --- a/security/lockdown/lockdown.c +++ b/security/lockdown/lockdown.c @@ -33,10 +33,12 @@ static const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = { [LOCKDOWN_MMIOTRACE] = "unsafe mmio", [LOCKDOWN_DEBUGFS] = "debugfs access", [LOCKDOWN_XMON_WR] = "xmon write access", + [LOCKDOWN_DBG_WRITE_KERNEL] = "use of kgdb/kdb to write kernel RAM", [LOCKDOWN_INTEGRITY_MAX] = "integrity", [LOCKDOWN_KCORE] = "/proc/kcore access", [LOCKDOWN_KPROBES] = "use of kprobes", [LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM", + [LOCKDOWN_DBG_READ_KERNEL] = "use of kgdb/kdb to read kernel RAM", [LOCKDOWN_PERF] = "unsafe use of perf", [LOCKDOWN_TRACEFS] = "use of tracefs", [LOCKDOWN_XMON_RW] = "xmon read and write access", diff --git a/security/security.c b/security/security.c index 1bc000f834e2d..f633717311a34 100644 --- a/security/security.c +++ b/security/security.c @@ -670,25 +670,25 @@ static void __init lsm_early_task(struct task_struct *task) /* Security operations */ -int security_binder_set_context_mgr(struct task_struct *mgr) +int security_binder_set_context_mgr(const struct cred *mgr) { return call_int_hook(binder_set_context_mgr, 0, mgr); } -int security_binder_transaction(struct task_struct *from, - struct task_struct *to) +int security_binder_transaction(const struct cred *from, + const struct cred *to) { return call_int_hook(binder_transaction, 0, from, to); } -int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to) +int security_binder_transfer_binder(const struct cred *from, + const struct cred *to) { return call_int_hook(binder_transfer_binder, 0, from, to); } -int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, struct file *file) +int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file) { return call_int_hook(binder_transfer_file, 0, from, to, file); } @@ -802,9 +802,22 @@ int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) return call_int_hook(fs_context_dup, 0, fc, src_fc); } -int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param) +int security_fs_context_parse_param(struct fs_context *fc, + struct fs_parameter *param) { - return call_int_hook(fs_context_parse_param, -ENOPARAM, fc, param); + struct security_hook_list *hp; + int trc; + int rc = -ENOPARAM; + + hlist_for_each_entry(hp, &security_hook_heads.fs_context_parse_param, + list) { + trc = hp->hook.fs_context_parse_param(fc, param); + if (trc == 0) + rc = 0; + else if (trc != -ENOPARAM) + return trc; + } + return rc; } int security_sb_alloc(struct super_block *sb) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 717a398ef4d05..d9f15c84aab7d 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -995,18 +995,22 @@ static int selinux_sb_clone_mnt_opts(const struct super_block *oldsb, static int selinux_add_opt(int token, const char *s, void **mnt_opts) { struct selinux_mnt_opts *opts = *mnt_opts; + bool is_alloc_opts = false; if (token == Opt_seclabel) /* eaten and completely ignored */ return 0; + if (!s) + return -ENOMEM; + if (!opts) { opts = kzalloc(sizeof(struct selinux_mnt_opts), GFP_KERNEL); if (!opts) return -ENOMEM; *mnt_opts = opts; + is_alloc_opts = true; } - if (!s) - return -ENOMEM; + switch (token) { case Opt_context: if (opts->context || opts->defcontext) @@ -1031,6 +1035,10 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts) } return 0; Einval: + if (is_alloc_opts) { + kfree(opts); + *mnt_opts = NULL; + } pr_warn(SEL_MOUNT_FAIL_MSG); return -EINVAL; } @@ -2050,22 +2058,19 @@ static inline u32 open_file_to_av(struct file *file) /* Hook functions begin here. */ -static int selinux_binder_set_context_mgr(struct task_struct *mgr) +static int selinux_binder_set_context_mgr(const struct cred *mgr) { - u32 mysid = current_sid(); - u32 mgrsid = task_sid(mgr); - return avc_has_perm(&selinux_state, - mysid, mgrsid, SECCLASS_BINDER, + current_sid(), cred_sid(mgr), SECCLASS_BINDER, BINDER__SET_CONTEXT_MGR, NULL); } -static int selinux_binder_transaction(struct task_struct *from, - struct task_struct *to) +static int selinux_binder_transaction(const struct cred *from, + const struct cred *to) { u32 mysid = current_sid(); - u32 fromsid = task_sid(from); - u32 tosid = task_sid(to); + u32 fromsid = cred_sid(from); + u32 tosid = cred_sid(to); int rc; if (mysid != fromsid) { @@ -2076,27 +2081,24 @@ static int selinux_binder_transaction(struct task_struct *from, return rc; } - return avc_has_perm(&selinux_state, - fromsid, tosid, SECCLASS_BINDER, BINDER__CALL, - NULL); + return avc_has_perm(&selinux_state, fromsid, tosid, + SECCLASS_BINDER, BINDER__CALL, NULL); } -static int selinux_binder_transfer_binder(struct task_struct *from, - struct task_struct *to) +static int selinux_binder_transfer_binder(const struct cred *from, + const struct cred *to) { - u32 fromsid = task_sid(from); - u32 tosid = task_sid(to); - return avc_has_perm(&selinux_state, - fromsid, tosid, SECCLASS_BINDER, BINDER__TRANSFER, + cred_sid(from), cred_sid(to), + SECCLASS_BINDER, BINDER__TRANSFER, NULL); } -static int selinux_binder_transfer_file(struct task_struct *from, - struct task_struct *to, +static int selinux_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file) { - u32 sid = task_sid(to); + u32 sid = cred_sid(to); struct file_security_struct *fsec = selinux_file(file); struct dentry *dentry = file->f_path.dentry; struct inode_security_struct *isec; @@ -2853,10 +2855,9 @@ static int selinux_fs_context_parse_param(struct fs_context *fc, return opt; rc = selinux_add_opt(opt, param->string, &fc->security); - if (!rc) { + if (!rc) param->string = NULL; - rc = 1; - } + return rc; } @@ -5740,7 +5741,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, struct common_audit_data ad; struct lsm_network_audit net = {0,}; char *addrp; - u8 proto; + u8 proto = 0; if (sk == NULL) return NF_ACCEPT; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index f62adf3cfce89..a0afe49309c88 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2250,6 +2250,43 @@ size_t security_policydb_len(struct selinux_state *state) return len; } +/** + * ocontext_to_sid - Helper to safely get sid for an ocontext + * @sidtab: SID table + * @c: ocontext structure + * @index: index of the context entry (0 or 1) + * @out_sid: pointer to the resulting SID value + * + * For all ocontexts except OCON_ISID the SID fields are populated + * on-demand when needed. Since updating the SID value is an SMP-sensitive + * operation, this helper must be used to do that safely. + * + * WARNING: This function may return -ESTALE, indicating that the caller + * must retry the operation after re-acquiring the policy pointer! + */ +static int ocontext_to_sid(struct sidtab *sidtab, struct ocontext *c, + size_t index, u32 *out_sid) +{ + int rc; + u32 sid; + + /* Ensure the associated sidtab entry is visible to this thread. */ + sid = smp_load_acquire(&c->sid[index]); + if (!sid) { + rc = sidtab_context_to_sid(sidtab, &c->context[index], &sid); + if (rc) + return rc; + + /* + * Ensure the new sidtab entry is visible to other threads + * when they see the SID. + */ + smp_store_release(&c->sid[index], sid); + } + *out_sid = sid; + return 0; +} + /** * security_port_sid - Obtain the SID for a port. * @protocol: protocol number @@ -2262,10 +2299,12 @@ int security_port_sid(struct selinux_state *state, struct policydb *policydb; struct sidtab *sidtab; struct ocontext *c; - int rc = 0; + int rc; read_lock(&state->ss->policy_rwlock); +retry: + rc = 0; policydb = &state->ss->policydb; sidtab = state->ss->sidtab; @@ -2279,14 +2318,11 @@ int security_port_sid(struct selinux_state *state, } if (c) { - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, - &c->context[0], - &c->sid[0]); - if (rc) - goto out; - } - *out_sid = c->sid[0]; + rc = ocontext_to_sid(sidtab, c, 0, out_sid); + if (rc == -ESTALE) + goto retry; + if (rc) + goto out; } else { *out_sid = SECINITSID_PORT; } @@ -2308,10 +2344,12 @@ int security_ib_pkey_sid(struct selinux_state *state, struct policydb *policydb; struct sidtab *sidtab; struct ocontext *c; - int rc = 0; + int rc; read_lock(&state->ss->policy_rwlock); +retry: + rc = 0; policydb = &state->ss->policydb; sidtab = state->ss->sidtab; @@ -2326,14 +2364,11 @@ int security_ib_pkey_sid(struct selinux_state *state, } if (c) { - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, - &c->context[0], - &c->sid[0]); - if (rc) - goto out; - } - *out_sid = c->sid[0]; + rc = ocontext_to_sid(sidtab, c, 0, out_sid); + if (rc == -ESTALE) + goto retry; + if (rc) + goto out; } else *out_sid = SECINITSID_UNLABELED; @@ -2354,10 +2389,12 @@ int security_ib_endport_sid(struct selinux_state *state, struct policydb *policydb; struct sidtab *sidtab; struct ocontext *c; - int rc = 0; + int rc; read_lock(&state->ss->policy_rwlock); +retry: + rc = 0; policydb = &state->ss->policydb; sidtab = state->ss->sidtab; @@ -2373,14 +2410,11 @@ int security_ib_endport_sid(struct selinux_state *state, } if (c) { - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, - &c->context[0], - &c->sid[0]); - if (rc) - goto out; - } - *out_sid = c->sid[0]; + rc = ocontext_to_sid(sidtab, c, 0, out_sid); + if (rc == -ESTALE) + goto retry; + if (rc) + goto out; } else *out_sid = SECINITSID_UNLABELED; @@ -2399,11 +2433,13 @@ int security_netif_sid(struct selinux_state *state, { struct policydb *policydb; struct sidtab *sidtab; - int rc = 0; + int rc; struct ocontext *c; read_lock(&state->ss->policy_rwlock); +retry: + rc = 0; policydb = &state->ss->policydb; sidtab = state->ss->sidtab; @@ -2415,19 +2451,11 @@ int security_netif_sid(struct selinux_state *state, } if (c) { - if (!c->sid[0] || !c->sid[1]) { - rc = sidtab_context_to_sid(sidtab, - &c->context[0], - &c->sid[0]); - if (rc) - goto out; - rc = sidtab_context_to_sid(sidtab, - &c->context[1], - &c->sid[1]); - if (rc) - goto out; - } - *if_sid = c->sid[0]; + rc = ocontext_to_sid(sidtab, c, 0, if_sid); + if (rc == -ESTALE) + goto retry; + if (rc) + goto out; } else *if_sid = SECINITSID_NETIF; @@ -2469,6 +2497,7 @@ int security_node_sid(struct selinux_state *state, read_lock(&state->ss->policy_rwlock); +retry: policydb = &state->ss->policydb; sidtab = state->ss->sidtab; @@ -2511,14 +2540,11 @@ int security_node_sid(struct selinux_state *state, } if (c) { - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, - &c->context[0], - &c->sid[0]); - if (rc) - goto out; - } - *out_sid = c->sid[0]; + rc = ocontext_to_sid(sidtab, c, 0, out_sid); + if (rc == -ESTALE) + goto retry; + if (rc) + goto out; } else { *out_sid = SECINITSID_NODE; } @@ -2677,7 +2703,7 @@ static inline int __security_genfs_sid(struct selinux_state *state, u16 sclass; struct genfs *genfs; struct ocontext *c; - int rc, cmp = 0; + int cmp = 0; while (path[0] == '/' && path[1] == '/') path++; @@ -2691,9 +2717,8 @@ static inline int __security_genfs_sid(struct selinux_state *state, break; } - rc = -ENOENT; if (!genfs || cmp) - goto out; + return -ENOENT; for (c = genfs->head; c; c = c->next) { len = strlen(c->u.name); @@ -2702,20 +2727,10 @@ static inline int __security_genfs_sid(struct selinux_state *state, break; } - rc = -ENOENT; if (!c) - goto out; - - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, &c->context[0], &c->sid[0]); - if (rc) - goto out; - } + return -ENOENT; - *sid = c->sid[0]; - rc = 0; -out: - return rc; + return ocontext_to_sid(sidtab, c, 0, sid); } /** @@ -2750,13 +2765,15 @@ int security_fs_use(struct selinux_state *state, struct super_block *sb) { struct policydb *policydb; struct sidtab *sidtab; - int rc = 0; + int rc; struct ocontext *c; struct superblock_security_struct *sbsec = sb->s_security; const char *fstype = sb->s_type->name; read_lock(&state->ss->policy_rwlock); +retry: + rc = 0; policydb = &state->ss->policydb; sidtab = state->ss->sidtab; @@ -2769,13 +2786,11 @@ int security_fs_use(struct selinux_state *state, struct super_block *sb) if (c) { sbsec->behavior = c->v.behavior; - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, &c->context[0], - &c->sid[0]); - if (rc) - goto out; - } - sbsec->sid = c->sid[0]; + rc = ocontext_to_sid(sidtab, c, 0, &sbsec->sid); + if (rc == -ESTALE) + goto retry; + if (rc) + goto out; } else { rc = __security_genfs_sid(state, fstype, "/", SECCLASS_DIR, &sbsec->sid); diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 7314196185d15..00e95f8bd7c73 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -346,7 +346,7 @@ int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x, int rc; struct xfrm_sec_ctx *ctx; char *ctx_str = NULL; - int str_len; + u32 str_len; if (!polsec) return 0; diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index 38ac3da4e791e..beeba1a9be170 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -81,23 +81,22 @@ int log_policy = SMACK_AUDIT_DENIED; int smk_access_entry(char *subject_label, char *object_label, struct list_head *rule_list) { - int may = -ENOENT; struct smack_rule *srp; list_for_each_entry_rcu(srp, rule_list, list) { if (srp->smk_object->smk_known == object_label && srp->smk_subject->smk_known == subject_label) { - may = srp->smk_access; - break; + int may = srp->smk_access; + /* + * MAY_WRITE implies MAY_LOCK. + */ + if ((may & MAY_WRITE) == MAY_WRITE) + may |= MAY_LOCK; + return may; } } - /* - * MAY_WRITE implies MAY_LOCK. - */ - if ((may & MAY_WRITE) == MAY_WRITE) - may |= MAY_LOCK; - return may; + return -ENOENT; } /** diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 12c0fa85d9f82..0253cd2e2358a 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2501,7 +2501,7 @@ static int smk_ipv6_check(struct smack_known *subject, #ifdef CONFIG_AUDIT smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net); ad.a.u.net->family = PF_INET6; - ad.a.u.net->dport = ntohs(address->sin6_port); + ad.a.u.net->dport = address->sin6_port; if (act == SMK_RECEIVING) ad.a.u.net->v6info.saddr = address->sin6_addr; else diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index 3823ab2c4e4be..6b6fec04c412b 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -693,9 +693,7 @@ static void smk_cipso_doi(void) printk(KERN_WARNING "%s:%d remove rc = %d\n", __func__, __LINE__, rc); - doip = kmalloc(sizeof(struct cipso_v4_doi), GFP_KERNEL); - if (doip == NULL) - panic("smack: Failed to initialize cipso DOI.\n"); + doip = kmalloc(sizeof(struct cipso_v4_doi), GFP_KERNEL | __GFP_NOFAIL); doip->map.std = NULL; doip->doi = smk_cipso_doi_value; doip->type = CIPSO_V4_MAP_PASS; @@ -714,7 +712,7 @@ static void smk_cipso_doi(void) if (rc != 0) { printk(KERN_WARNING "%s:%d map add rc = %d\n", __func__, __LINE__, rc); - kfree(doip); + netlbl_cfg_cipsov4_del(doip->doi, &nai); return; } } @@ -831,6 +829,7 @@ static int smk_open_cipso(struct inode *inode, struct file *file) static ssize_t smk_set_cipso(struct file *file, const char __user *buf, size_t count, loff_t *ppos, int format) { + struct netlbl_lsm_catmap *old_cat; struct smack_known *skp; struct netlbl_lsm_secattr ncats; char mapcatset[SMK_CIPSOLEN]; @@ -920,9 +919,11 @@ static ssize_t smk_set_cipso(struct file *file, const char __user *buf, rc = smk_netlbl_mls(maplevel, mapcatset, &ncats, SMK_CIPSOLEN); if (rc >= 0) { - netlbl_catmap_free(skp->smk_netlabel.attr.mls.cat); + old_cat = skp->smk_netlabel.attr.mls.cat; skp->smk_netlabel.attr.mls.cat = ncats.attr.mls.cat; skp->smk_netlabel.attr.mls.lvl = ncats.attr.mls.lvl; + synchronize_rcu(); + netlbl_catmap_free(old_cat); rc = count; } diff --git a/security/tomoyo/load_policy.c b/security/tomoyo/load_policy.c index 3445ae6fd4794..363b65be87ab7 100644 --- a/security/tomoyo/load_policy.c +++ b/security/tomoyo/load_policy.c @@ -24,7 +24,7 @@ static const char *tomoyo_loader; static int __init tomoyo_loader_setup(char *str) { tomoyo_loader = str; - return 0; + return 1; } __setup("TOMOYO_loader=", tomoyo_loader_setup); @@ -64,7 +64,7 @@ static const char *tomoyo_trigger; static int __init tomoyo_trigger_setup(char *str) { tomoyo_trigger = str; - return 0; + return 1; } __setup("TOMOYO_trigger=", tomoyo_trigger_setup); diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c index eba0b3395851e..861fc6f4ebfb7 100644 --- a/security/tomoyo/util.c +++ b/security/tomoyo/util.c @@ -1029,6 +1029,8 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) return false; if (!domain) return true; + if (READ_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED])) + return false; list_for_each_entry_rcu(ptr, &domain->acl_info_list, list, srcu_read_lock_held(&tomoyo_ss)) { u16 perm; @@ -1074,14 +1076,12 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) if (count < tomoyo_profile(domain->ns, domain->profile)-> pref[TOMOYO_PREF_MAX_LEARNING_ENTRY]) return true; - if (!domain->flags[TOMOYO_DIF_QUOTA_WARNED]) { - domain->flags[TOMOYO_DIF_QUOTA_WARNED] = true; - /* r->granted = false; */ - tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]); + WRITE_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED], true); + /* r->granted = false; */ + tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]); #ifndef CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING - pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n", - domain->domainname->name); + pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n", + domain->domainname->name); #endif - } return false; } diff --git a/sound/core/Makefile b/sound/core/Makefile index ee4a4a6b99ba7..d123587c0fd8f 100644 --- a/sound/core/Makefile +++ b/sound/core/Makefile @@ -9,7 +9,9 @@ ifneq ($(CONFIG_SND_PROC_FS),) snd-y += info.o snd-$(CONFIG_SND_OSSEMUL) += info_oss.o endif +ifneq ($(CONFIG_M68K),y) snd-$(CONFIG_ISA_DMA_API) += isadma.o +endif snd-$(CONFIG_SND_OSSEMUL) += sound_oss.o snd-$(CONFIG_SND_VMASTER) += vmaster.o snd-$(CONFIG_SND_JACK) += ctljack.o jack.o diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c index d55be1db1a8a5..cca3ed9b06294 100644 --- a/sound/core/control_compat.c +++ b/sound/core/control_compat.c @@ -266,6 +266,7 @@ static int copy_ctl_value_to_user(void __user *userdata, struct snd_ctl_elem_value *data, int type, int count) { + struct snd_ctl_elem_value32 __user *data32 = userdata; int i, size; if (type == SNDRV_CTL_ELEM_TYPE_BOOLEAN || @@ -282,6 +283,8 @@ static int copy_ctl_value_to_user(void __user *userdata, if (copy_to_user(valuep, data->value.bytes.data, size)) return -EFAULT; } + if (copy_to_user(&data32->id, &data->id, sizeof(data32->id))) + return -EFAULT; return 0; } diff --git a/sound/core/jack.c b/sound/core/jack.c index fb26196571a7c..e7ac82d468216 100644 --- a/sound/core/jack.c +++ b/sound/core/jack.c @@ -34,8 +34,11 @@ static int snd_jack_dev_disconnect(struct snd_device *device) #ifdef CONFIG_SND_JACK_INPUT_DEV struct snd_jack *jack = device->device_data; - if (!jack->input_dev) + mutex_lock(&jack->input_dev_lock); + if (!jack->input_dev) { + mutex_unlock(&jack->input_dev_lock); return 0; + } /* If the input device is registered with the input subsystem * then we need to use a different deallocator. */ @@ -44,6 +47,7 @@ static int snd_jack_dev_disconnect(struct snd_device *device) else input_free_device(jack->input_dev); jack->input_dev = NULL; + mutex_unlock(&jack->input_dev_lock); #endif /* CONFIG_SND_JACK_INPUT_DEV */ return 0; } @@ -54,10 +58,13 @@ static int snd_jack_dev_free(struct snd_device *device) struct snd_card *card = device->card; struct snd_jack_kctl *jack_kctl, *tmp_jack_kctl; + down_write(&card->controls_rwsem); list_for_each_entry_safe(jack_kctl, tmp_jack_kctl, &jack->kctl_list, list) { list_del_init(&jack_kctl->list); snd_ctl_remove(card, jack_kctl->kctl); } + up_write(&card->controls_rwsem); + if (jack->private_free) jack->private_free(jack); @@ -79,8 +86,11 @@ static int snd_jack_dev_register(struct snd_device *device) snprintf(jack->name, sizeof(jack->name), "%s %s", card->shortname, jack->id); - if (!jack->input_dev) + mutex_lock(&jack->input_dev_lock); + if (!jack->input_dev) { + mutex_unlock(&jack->input_dev_lock); return 0; + } jack->input_dev->name = jack->name; @@ -105,6 +115,7 @@ static int snd_jack_dev_register(struct snd_device *device) if (err == 0) jack->registered = 1; + mutex_unlock(&jack->input_dev_lock); return err; } #endif /* CONFIG_SND_JACK_INPUT_DEV */ @@ -220,10 +231,16 @@ int snd_jack_new(struct snd_card *card, const char *id, int type, return -ENOMEM; jack->id = kstrdup(id, GFP_KERNEL); + if (jack->id == NULL) { + kfree(jack); + return -ENOMEM; + } - /* don't creat input device for phantom jack */ - if (!phantom_jack) { #ifdef CONFIG_SND_JACK_INPUT_DEV + mutex_init(&jack->input_dev_lock); + + /* don't create input device for phantom jack */ + if (!phantom_jack) { int i; jack->input_dev = input_allocate_device(); @@ -241,8 +258,8 @@ int snd_jack_new(struct snd_card *card, const char *id, int type, input_set_capability(jack->input_dev, EV_SW, jack_switch_types[i]); -#endif /* CONFIG_SND_JACK_INPUT_DEV */ } +#endif /* CONFIG_SND_JACK_INPUT_DEV */ err = snd_device_new(card, SNDRV_DEV_JACK, jack, &ops); if (err < 0) @@ -282,10 +299,14 @@ EXPORT_SYMBOL(snd_jack_new); void snd_jack_set_parent(struct snd_jack *jack, struct device *parent) { WARN_ON(jack->registered); - if (!jack->input_dev) + mutex_lock(&jack->input_dev_lock); + if (!jack->input_dev) { + mutex_unlock(&jack->input_dev_lock); return; + } jack->input_dev->dev.parent = parent; + mutex_unlock(&jack->input_dev_lock); } EXPORT_SYMBOL(snd_jack_set_parent); @@ -333,6 +354,8 @@ EXPORT_SYMBOL(snd_jack_set_key); /** * snd_jack_report - Report the current status of a jack + * Note: This function uses mutexes and should be called from a + * context which can sleep (such as a workqueue). * * @jack: The jack to report status for * @status: The current status of the jack @@ -352,8 +375,11 @@ void snd_jack_report(struct snd_jack *jack, int status) status & jack_kctl->mask_bits); #ifdef CONFIG_SND_JACK_INPUT_DEV - if (!jack->input_dev) + mutex_lock(&jack->input_dev_lock); + if (!jack->input_dev) { + mutex_unlock(&jack->input_dev_lock); return; + } for (i = 0; i < ARRAY_SIZE(jack->key); i++) { int testbit = SND_JACK_BTN_0 >> i; @@ -372,6 +398,7 @@ void snd_jack_report(struct snd_jack *jack, int status) } input_sync(jack->input_dev); + mutex_unlock(&jack->input_dev_lock); #endif /* CONFIG_SND_JACK_INPUT_DEV */ } EXPORT_SYMBOL(snd_jack_report); diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index fe1ea03582cbb..9fc7c81ec6ae5 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -124,6 +124,7 @@ int snd_dma_alloc_pages(int type, struct device *device, size_t size, if (WARN_ON(!device)) return -EINVAL; + size = PAGE_ALIGN(size); dmab->dev.type = type; dmab->dev.dev = device; dmab->bytes = 0; diff --git a/sound/core/oss/mixer_oss.c b/sound/core/oss/mixer_oss.c index 7eb54df5556df..50ec8b8ff68c9 100644 --- a/sound/core/oss/mixer_oss.c +++ b/sound/core/oss/mixer_oss.c @@ -130,11 +130,13 @@ static int snd_mixer_oss_devmask(struct snd_mixer_oss_file *fmixer) if (mixer == NULL) return -EIO; + mutex_lock(&mixer->reg_mutex); for (chn = 0; chn < 31; chn++) { pslot = &mixer->slots[chn]; if (pslot->put_volume || pslot->put_recsrc) result |= 1 << chn; } + mutex_unlock(&mixer->reg_mutex); return result; } @@ -146,11 +148,13 @@ static int snd_mixer_oss_stereodevs(struct snd_mixer_oss_file *fmixer) if (mixer == NULL) return -EIO; + mutex_lock(&mixer->reg_mutex); for (chn = 0; chn < 31; chn++) { pslot = &mixer->slots[chn]; if (pslot->put_volume && pslot->stereo) result |= 1 << chn; } + mutex_unlock(&mixer->reg_mutex); return result; } @@ -161,6 +165,7 @@ static int snd_mixer_oss_recmask(struct snd_mixer_oss_file *fmixer) if (mixer == NULL) return -EIO; + mutex_lock(&mixer->reg_mutex); if (mixer->put_recsrc && mixer->get_recsrc) { /* exclusive */ result = mixer->mask_recsrc; } else { @@ -172,6 +177,7 @@ static int snd_mixer_oss_recmask(struct snd_mixer_oss_file *fmixer) result |= 1 << chn; } } + mutex_unlock(&mixer->reg_mutex); return result; } @@ -182,11 +188,12 @@ static int snd_mixer_oss_get_recsrc(struct snd_mixer_oss_file *fmixer) if (mixer == NULL) return -EIO; + mutex_lock(&mixer->reg_mutex); if (mixer->put_recsrc && mixer->get_recsrc) { /* exclusive */ - int err; unsigned int index; - if ((err = mixer->get_recsrc(fmixer, &index)) < 0) - return err; + result = mixer->get_recsrc(fmixer, &index); + if (result < 0) + goto unlock; result = 1 << index; } else { struct snd_mixer_oss_slot *pslot; @@ -201,7 +208,10 @@ static int snd_mixer_oss_get_recsrc(struct snd_mixer_oss_file *fmixer) } } } - return mixer->oss_recsrc = result; + mixer->oss_recsrc = result; + unlock: + mutex_unlock(&mixer->reg_mutex); + return result; } static int snd_mixer_oss_set_recsrc(struct snd_mixer_oss_file *fmixer, int recsrc) @@ -214,6 +224,7 @@ static int snd_mixer_oss_set_recsrc(struct snd_mixer_oss_file *fmixer, int recsr if (mixer == NULL) return -EIO; + mutex_lock(&mixer->reg_mutex); if (mixer->get_recsrc && mixer->put_recsrc) { /* exclusive input */ if (recsrc & ~mixer->oss_recsrc) recsrc &= ~mixer->oss_recsrc; @@ -239,6 +250,7 @@ static int snd_mixer_oss_set_recsrc(struct snd_mixer_oss_file *fmixer, int recsr } } } + mutex_unlock(&mixer->reg_mutex); return result; } @@ -250,6 +262,7 @@ static int snd_mixer_oss_get_volume(struct snd_mixer_oss_file *fmixer, int slot) if (mixer == NULL || slot > 30) return -EIO; + mutex_lock(&mixer->reg_mutex); pslot = &mixer->slots[slot]; left = pslot->volume[0]; right = pslot->volume[1]; @@ -257,15 +270,21 @@ static int snd_mixer_oss_get_volume(struct snd_mixer_oss_file *fmixer, int slot) result = pslot->get_volume(fmixer, pslot, &left, &right); if (!pslot->stereo) right = left; - if (snd_BUG_ON(left < 0 || left > 100)) - return -EIO; - if (snd_BUG_ON(right < 0 || right > 100)) - return -EIO; + if (snd_BUG_ON(left < 0 || left > 100)) { + result = -EIO; + goto unlock; + } + if (snd_BUG_ON(right < 0 || right > 100)) { + result = -EIO; + goto unlock; + } if (result >= 0) { pslot->volume[0] = left; pslot->volume[1] = right; result = (left & 0xff) | ((right & 0xff) << 8); } + unlock: + mutex_unlock(&mixer->reg_mutex); return result; } @@ -278,6 +297,7 @@ static int snd_mixer_oss_set_volume(struct snd_mixer_oss_file *fmixer, if (mixer == NULL || slot > 30) return -EIO; + mutex_lock(&mixer->reg_mutex); pslot = &mixer->slots[slot]; if (left > 100) left = 100; @@ -288,10 +308,13 @@ static int snd_mixer_oss_set_volume(struct snd_mixer_oss_file *fmixer, if (pslot->put_volume) result = pslot->put_volume(fmixer, pslot, left, right); if (result < 0) - return result; + goto unlock; pslot->volume[0] = left; pslot->volume[1] = right; - return (left & 0xff) | ((right & 0xff) << 8); + result = (left & 0xff) | ((right & 0xff) << 8); + unlock: + mutex_unlock(&mixer->reg_mutex); + return result; } static int snd_mixer_oss_ioctl1(struct snd_mixer_oss_file *fmixer, unsigned int cmd, unsigned long arg) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 0b03777d01116..ad4e0af2d0d03 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -147,7 +147,7 @@ snd_pcm_hw_param_value_min(const struct snd_pcm_hw_params *params, * * Return the maximum value for field PAR. */ -static unsigned int +static int snd_pcm_hw_param_value_max(const struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, int *dir) { @@ -682,18 +682,24 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *oss_params, struct snd_pcm_hw_params *slave_params) { - size_t s; - size_t oss_buffer_size, oss_period_size, oss_periods; - size_t min_period_size, max_period_size; + ssize_t s; + ssize_t oss_buffer_size; + ssize_t oss_period_size, oss_periods; + ssize_t min_period_size, max_period_size; struct snd_pcm_runtime *runtime = substream->runtime; size_t oss_frame_size; oss_frame_size = snd_pcm_format_physical_width(params_format(oss_params)) * params_channels(oss_params) / 8; + oss_buffer_size = snd_pcm_hw_param_value_max(slave_params, + SNDRV_PCM_HW_PARAM_BUFFER_SIZE, + NULL); + if (oss_buffer_size <= 0) + return -EINVAL; oss_buffer_size = snd_pcm_plug_client_size(substream, - snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, NULL)) * oss_frame_size; - if (!oss_buffer_size) + oss_buffer_size * oss_frame_size); + if (oss_buffer_size <= 0) return -EINVAL; oss_buffer_size = rounddown_pow_of_two(oss_buffer_size); if (atomic_read(&substream->mmap_count)) { @@ -730,7 +736,7 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, min_period_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_min(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL)); - if (min_period_size) { + if (min_period_size > 0) { min_period_size *= oss_frame_size; min_period_size = roundup_pow_of_two(min_period_size); if (oss_period_size < min_period_size) @@ -739,7 +745,7 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, max_period_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL)); - if (max_period_size) { + if (max_period_size > 0) { max_period_size *= oss_frame_size; max_period_size = rounddown_pow_of_two(max_period_size); if (oss_period_size > max_period_size) @@ -752,7 +758,7 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, oss_periods = substream->oss.setup.periods; s = snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_PERIODS, NULL); - if (runtime->oss.maxfrags && s > runtime->oss.maxfrags) + if (s > 0 && runtime->oss.maxfrags && s > runtime->oss.maxfrags) s = runtime->oss.maxfrags; if (oss_periods > s) oss_periods = s; @@ -768,6 +774,11 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, if (oss_period_size < 16) return -EINVAL; + + /* don't allocate too large period; 1MB period must be enough */ + if (oss_period_size > 1024 * 1024) + return -ENOMEM; + runtime->oss.period_bytes = oss_period_size; runtime->oss.period_frames = 1; runtime->oss.periods = oss_periods; @@ -878,8 +889,15 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) err = -EINVAL; goto failure; } - choose_rate(substream, sparams, runtime->oss.rate); - snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_CHANNELS, runtime->oss.channels, NULL); + + err = choose_rate(substream, sparams, runtime->oss.rate); + if (err < 0) + goto failure; + err = snd_pcm_hw_param_near(substream, sparams, + SNDRV_PCM_HW_PARAM_CHANNELS, + runtime->oss.channels, NULL); + if (err < 0) + goto failure; format = snd_pcm_oss_format_from(runtime->oss.format); @@ -1032,10 +1050,9 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) goto failure; } #endif - oss_period_size *= oss_frame_size; - - oss_buffer_size = oss_period_size * runtime->oss.periods; - if (oss_buffer_size < 0) { + oss_period_size = array_size(oss_period_size, oss_frame_size); + oss_buffer_size = array_size(oss_period_size, runtime->oss.periods); + if (oss_buffer_size <= 0) { err = -EINVAL; goto failure; } @@ -1946,7 +1963,7 @@ static int snd_pcm_oss_set_fragment1(struct snd_pcm_substream *substream, unsign if (runtime->oss.subdivision || runtime->oss.fragshift) return -EINVAL; fragshift = val & 0xffff; - if (fragshift >= 31) + if (fragshift >= 25) /* should be large enough */ return -EINVAL; runtime->oss.fragshift = fragshift; runtime->oss.maxfrags = (val >> 16) & 0xffff; @@ -2042,7 +2059,7 @@ static int snd_pcm_oss_set_trigger(struct snd_pcm_oss_file *pcm_oss_file, int tr int err, cmd; #ifdef OSS_DEBUG - pcm_dbg(substream->pcm, "pcm_oss: trigger = 0x%x\n", trigger); + pr_debug("pcm_oss: trigger = 0x%x\n", trigger); #endif psubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index da400da1fafe6..8b7bbabeea24b 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -61,7 +61,10 @@ static int snd_pcm_plugin_alloc(struct snd_pcm_plugin *plugin, snd_pcm_uframes_t } if ((width = snd_pcm_format_physical_width(format->format)) < 0) return width; - size = frames * format->channels * width; + size = array3_size(frames, format->channels, width); + /* check for too large period size once again */ + if (size > 1024 * 1024) + return -ENOMEM; if (snd_BUG_ON(size % 8)) return -ENXIO; size /= 8; diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 9a72d641743d9..3561cdceaadc2 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -810,7 +810,11 @@ EXPORT_SYMBOL(snd_pcm_new_internal); static void free_chmap(struct snd_pcm_str *pstr) { if (pstr->chmap_kctl) { - snd_ctl_remove(pstr->pcm->card, pstr->chmap_kctl); + struct snd_card *card = pstr->pcm->card; + + down_write(&card->controls_rwsem); + snd_ctl_remove(card, pstr->chmap_kctl); + up_write(&card->controls_rwsem); pstr->chmap_kctl = NULL; } } @@ -965,6 +969,8 @@ int snd_pcm_attach_substream(struct snd_pcm *pcm, int stream, init_waitqueue_head(&runtime->tsleep); runtime->status->state = SNDRV_PCM_STATE_OPEN; + mutex_init(&runtime->buffer_mutex); + atomic_set(&runtime->buffer_accessing, 0); substream->runtime = runtime; substream->private_data = pcm->private_data; @@ -996,6 +1002,7 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream) substream->runtime = NULL; if (substream->timer) spin_unlock_irq(&substream->timer->lock); + mutex_destroy(&runtime->buffer_mutex); kfree(runtime); put_pid(substream->pid); substream->pid = NULL; diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 1662573a40309..1bce55533519d 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1736,7 +1736,7 @@ static int snd_pcm_lib_ioctl_fifo_size(struct snd_pcm_substream *substream, channels = params_channels(params); frame_size = snd_pcm_format_size(format, channels); if (frame_size > 0) - params->fifo_size /= (unsigned)frame_size; + params->fifo_size /= frame_size; } return 0; } @@ -2211,10 +2211,15 @@ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream, err = -EINVAL; goto _end_unlock; } + if (!atomic_inc_unless_negative(&runtime->buffer_accessing)) { + err = -EBUSY; + goto _end_unlock; + } snd_pcm_stream_unlock_irq(substream); err = writer(substream, appl_ofs, data, offset, frames, transfer); snd_pcm_stream_lock_irq(substream); + atomic_dec(&runtime->buffer_accessing); if (err < 0) goto _end_unlock; err = pcm_accessible_state(runtime); diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c index 7600dcdf5fd4d..9aea1d6fb0547 100644 --- a/sound/core/pcm_memory.c +++ b/sound/core/pcm_memory.c @@ -133,19 +133,20 @@ static void snd_pcm_lib_preallocate_proc_write(struct snd_info_entry *entry, size_t size; struct snd_dma_buffer new_dmab; + mutex_lock(&substream->pcm->open_mutex); if (substream->runtime) { buffer->error = -EBUSY; - return; + goto unlock; } if (!snd_info_get_line(buffer, line, sizeof(line))) { snd_info_get_str(str, line, sizeof(str)); size = simple_strtoul(str, NULL, 10) * 1024; if ((size != 0 && size < 8192) || size > substream->dma_max) { buffer->error = -EINVAL; - return; + goto unlock; } if (substream->dma_buffer.bytes == size) - return; + goto unlock; memset(&new_dmab, 0, sizeof(new_dmab)); new_dmab.dev = substream->dma_buffer.dev; if (size > 0) { @@ -153,7 +154,7 @@ static void snd_pcm_lib_preallocate_proc_write(struct snd_info_entry *entry, substream->dma_buffer.dev.dev, size, &new_dmab) < 0) { buffer->error = -ENOMEM; - return; + goto unlock; } substream->buffer_bytes_max = size; } else { @@ -165,6 +166,8 @@ static void snd_pcm_lib_preallocate_proc_write(struct snd_info_entry *entry, } else { buffer->error = -EINVAL; } + unlock: + mutex_unlock(&substream->pcm->open_mutex); } static inline void preallocate_info_init(struct snd_pcm_substream *substream) diff --git a/sound/core/pcm_misc.c b/sound/core/pcm_misc.c index c4eb561d20086..0956be39b0355 100644 --- a/sound/core/pcm_misc.c +++ b/sound/core/pcm_misc.c @@ -423,7 +423,7 @@ int snd_pcm_format_set_silence(snd_pcm_format_t format, void *data, unsigned int return 0; width = pcm_formats[(INT)format].phys; /* physical width */ pat = pcm_formats[(INT)format].silence; - if (! width) + if (!width || !pat) return -EINVAL; /* signed or 1 byte data */ if (pcm_formats[(INT)format].signd == 1 || width <= 8) { diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 0c5b7a54ca81c..57a4991fa0f36 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -630,6 +630,30 @@ static int snd_pcm_hw_params_choose(struct snd_pcm_substream *pcm, return 0; } +/* acquire buffer_mutex; if it's in r/w operation, return -EBUSY, otherwise + * block the further r/w operations + */ +static int snd_pcm_buffer_access_lock(struct snd_pcm_runtime *runtime) +{ + if (!atomic_dec_unless_positive(&runtime->buffer_accessing)) + return -EBUSY; + mutex_lock(&runtime->buffer_mutex); + return 0; /* keep buffer_mutex, unlocked by below */ +} + +/* release buffer_mutex and clear r/w access flag */ +static void snd_pcm_buffer_access_unlock(struct snd_pcm_runtime *runtime) +{ + mutex_unlock(&runtime->buffer_mutex); + atomic_inc(&runtime->buffer_accessing); +} + +#if IS_ENABLED(CONFIG_SND_PCM_OSS) +#define is_oss_stream(substream) ((substream)->oss.oss) +#else +#define is_oss_stream(substream) false +#endif + static int snd_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { @@ -641,22 +665,25 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; + err = snd_pcm_buffer_access_lock(runtime); + if (err < 0) + return err; snd_pcm_stream_lock_irq(substream); switch (runtime->status->state) { case SNDRV_PCM_STATE_OPEN: case SNDRV_PCM_STATE_SETUP: case SNDRV_PCM_STATE_PREPARED: + if (!is_oss_stream(substream) && + atomic_read(&substream->mmap_count)) + err = -EBADFD; break; default: - snd_pcm_stream_unlock_irq(substream); - return -EBADFD; + err = -EBADFD; + break; } snd_pcm_stream_unlock_irq(substream); -#if IS_ENABLED(CONFIG_SND_PCM_OSS) - if (!substream->oss.oss) -#endif - if (atomic_read(&substream->mmap_count)) - return -EBADFD; + if (err) + goto unlock; params->rmask = ~0U; err = snd_pcm_hw_refine(substream, params); @@ -733,14 +760,19 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, if ((usecs = period_to_usecs(runtime)) >= 0) pm_qos_add_request(&substream->latency_pm_qos_req, PM_QOS_CPU_DMA_LATENCY, usecs); - return 0; + err = 0; _error: - /* hardware might be unusable from this time, - so we force application to retry to set - the correct hardware parameter settings */ - snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN); - if (substream->ops->hw_free != NULL) - substream->ops->hw_free(substream); + if (err) { + /* hardware might be unusable from this time, + * so we force application to retry to set + * the correct hardware parameter settings + */ + snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN); + if (substream->ops->hw_free != NULL) + substream->ops->hw_free(substream); + } + unlock: + snd_pcm_buffer_access_unlock(runtime); return err; } @@ -773,22 +805,29 @@ static int snd_pcm_hw_free(struct snd_pcm_substream *substream) if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; + result = snd_pcm_buffer_access_lock(runtime); + if (result < 0) + return result; snd_pcm_stream_lock_irq(substream); switch (runtime->status->state) { case SNDRV_PCM_STATE_SETUP: case SNDRV_PCM_STATE_PREPARED: + if (atomic_read(&substream->mmap_count)) + result = -EBADFD; break; default: - snd_pcm_stream_unlock_irq(substream); - return -EBADFD; + result = -EBADFD; + break; } snd_pcm_stream_unlock_irq(substream); - if (atomic_read(&substream->mmap_count)) - return -EBADFD; + if (result) + goto unlock; if (substream->ops->hw_free) result = substream->ops->hw_free(substream); snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN); pm_qos_remove_request(&substream->latency_pm_qos_req); + unlock: + snd_pcm_buffer_access_unlock(runtime); return result; } @@ -1025,15 +1064,17 @@ struct action_ops { */ static int snd_pcm_action_group(const struct action_ops *ops, struct snd_pcm_substream *substream, - int state, int do_lock) + int state, int stream_lock) { struct snd_pcm_substream *s = NULL; struct snd_pcm_substream *s1; int res = 0, depth = 1; snd_pcm_group_for_each_entry(s, substream) { - if (do_lock && s != substream) { - if (s->pcm->nonatomic) + if (s != substream) { + if (!stream_lock) + mutex_lock_nested(&s->runtime->buffer_mutex, depth); + else if (s->pcm->nonatomic) mutex_lock_nested(&s->self_group.mutex, depth); else spin_lock_nested(&s->self_group.lock, depth); @@ -1061,18 +1102,18 @@ static int snd_pcm_action_group(const struct action_ops *ops, ops->post_action(s, state); } _unlock: - if (do_lock) { - /* unlock streams */ - snd_pcm_group_for_each_entry(s1, substream) { - if (s1 != substream) { - if (s1->pcm->nonatomic) - mutex_unlock(&s1->self_group.mutex); - else - spin_unlock(&s1->self_group.lock); - } - if (s1 == s) /* end */ - break; + /* unlock streams */ + snd_pcm_group_for_each_entry(s1, substream) { + if (s1 != substream) { + if (!stream_lock) + mutex_unlock(&s1->runtime->buffer_mutex); + else if (s1->pcm->nonatomic) + mutex_unlock(&s1->self_group.mutex); + else + spin_unlock(&s1->self_group.lock); } + if (s1 == s) /* end */ + break; } return res; } @@ -1202,10 +1243,15 @@ static int snd_pcm_action_nonatomic(const struct action_ops *ops, /* Guarantee the group members won't change during non-atomic action */ down_read(&snd_pcm_link_rwsem); + res = snd_pcm_buffer_access_lock(substream->runtime); + if (res < 0) + goto unlock; if (snd_pcm_stream_linked(substream)) res = snd_pcm_action_group(ops, substream, state, 0); else res = snd_pcm_action_single(ops, substream, state); + snd_pcm_buffer_access_unlock(substream->runtime); + unlock: up_read(&snd_pcm_link_rwsem); return res; } @@ -1656,21 +1702,25 @@ static int snd_pcm_do_reset(struct snd_pcm_substream *substream, int state) int err = substream->ops->ioctl(substream, SNDRV_PCM_IOCTL1_RESET, NULL); if (err < 0) return err; + snd_pcm_stream_lock_irq(substream); runtime->hw_ptr_base = 0; runtime->hw_ptr_interrupt = runtime->status->hw_ptr - runtime->status->hw_ptr % runtime->period_size; runtime->silence_start = runtime->status->hw_ptr; runtime->silence_filled = 0; + snd_pcm_stream_unlock_irq(substream); return 0; } static void snd_pcm_post_reset(struct snd_pcm_substream *substream, int state) { struct snd_pcm_runtime *runtime = substream->runtime; + snd_pcm_stream_lock_irq(substream); runtime->control->appl_ptr = runtime->status->hw_ptr; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && runtime->silence_size > 0) snd_pcm_playback_silence(substream, ULONG_MAX); + snd_pcm_stream_unlock_irq(substream); } static const struct action_ops snd_pcm_action_reset = { diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index 71a6ea62c3be7..4ff0b927230c2 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -234,12 +234,15 @@ struct snd_seq_queue *snd_seq_queue_find_name(char *name) /* -------------------------------------------------------- */ +#define MAX_CELL_PROCESSES_IN_QUEUE 1000 + void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) { unsigned long flags; struct snd_seq_event_cell *cell; snd_seq_tick_time_t cur_tick; snd_seq_real_time_t cur_time; + int processed = 0; if (q == NULL) return; @@ -262,6 +265,8 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); + if (++processed >= MAX_CELL_PROCESSES_IN_QUEUE) + goto out; /* the rest processed at the next batch */ } /* Process time queue... */ @@ -271,14 +276,19 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); + if (++processed >= MAX_CELL_PROCESSES_IN_QUEUE) + goto out; /* the rest processed at the next batch */ } + out: /* free lock */ spin_lock_irqsave(&q->check_lock, flags); if (q->check_again) { q->check_again = 0; - spin_unlock_irqrestore(&q->check_lock, flags); - goto __again; + if (processed < MAX_CELL_PROCESSES_IN_QUEUE) { + spin_unlock_irqrestore(&q->check_lock, flags); + goto __again; + } } q->check_blocked = 0; spin_unlock_irqrestore(&q->check_lock, flags); diff --git a/sound/core/seq_device.c b/sound/core/seq_device.c index e9dbad93f9d09..c9223049551c4 100644 --- a/sound/core/seq_device.c +++ b/sound/core/seq_device.c @@ -147,6 +147,8 @@ static int snd_seq_device_dev_free(struct snd_device *device) struct snd_seq_device *dev = device->device_data; cancel_autoload_drivers(); + if (dev->private_free) + dev->private_free(dev); put_device(&dev->dev); return 0; } @@ -174,11 +176,7 @@ static int snd_seq_device_dev_disconnect(struct snd_device *device) static void snd_seq_dev_release(struct device *dev) { - struct snd_seq_device *sdev = to_seq_dev(dev); - - if (sdev->private_free) - sdev->private_free(sdev); - kfree(sdev); + kfree(to_seq_dev(dev)); } /* diff --git a/sound/core/timer.c b/sound/core/timer.c index b5a0ba79bf746..d684aa4150aad 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -595,13 +595,13 @@ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop) if (!timer) return -EINVAL; spin_lock_irqsave(&timer->lock, flags); + list_del_init(&timeri->ack_list); + list_del_init(&timeri->active_list); if (!(timeri->flags & (SNDRV_TIMER_IFLG_RUNNING | SNDRV_TIMER_IFLG_START))) { result = -EBUSY; goto unlock; } - list_del_init(&timeri->ack_list); - list_del_init(&timeri->active_list); if (timer->card && timer->card->shutdown) goto unlock; if (stop) { @@ -636,23 +636,22 @@ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop) static int snd_timer_stop_slave(struct snd_timer_instance *timeri, bool stop) { unsigned long flags; + bool running; spin_lock_irqsave(&slave_active_lock, flags); - if (!(timeri->flags & SNDRV_TIMER_IFLG_RUNNING)) { - spin_unlock_irqrestore(&slave_active_lock, flags); - return -EBUSY; - } + running = timeri->flags & SNDRV_TIMER_IFLG_RUNNING; timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING; if (timeri->timer) { spin_lock(&timeri->timer->lock); list_del_init(&timeri->ack_list); list_del_init(&timeri->active_list); - snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP : - SNDRV_TIMER_EVENT_PAUSE); + if (running) + snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP : + SNDRV_TIMER_EVENT_PAUSE); spin_unlock(&timeri->timer->lock); } spin_unlock_irqrestore(&slave_active_lock, flags); - return 0; + return running ? 0 : -EBUSY; } /* diff --git a/sound/drivers/opl3/opl3_midi.c b/sound/drivers/opl3/opl3_midi.c index 280cc79870cf8..ce38ec09d4087 100644 --- a/sound/drivers/opl3/opl3_midi.c +++ b/sound/drivers/opl3/opl3_midi.c @@ -398,7 +398,7 @@ void snd_opl3_note_on(void *p, int note, int vel, struct snd_midi_channel *chan) } if (instr_4op) { vp2 = &opl3->voices[voice + 3]; - if (vp->state > 0) { + if (vp2->state > 0) { opl3_reg = reg_side | (OPL3_REG_KEYON_BLOCK + voice_offset + 3); reg_val = vp->keyon_reg & ~OPL3_KEYON_BIT; diff --git a/sound/firewire/fcp.c b/sound/firewire/fcp.c index bbfbebf4affbc..df44dd5dc4b22 100644 --- a/sound/firewire/fcp.c +++ b/sound/firewire/fcp.c @@ -240,9 +240,7 @@ int fcp_avc_transaction(struct fw_unit *unit, t.response_match_bytes = response_match_bytes; t.state = STATE_PENDING; init_waitqueue_head(&t.wait); - - if (*(const u8 *)command == 0x00 || *(const u8 *)command == 0x03) - t.deferrable = true; + t.deferrable = (*(const u8 *)command == 0x00 || *(const u8 *)command == 0x03); spin_lock_irq(&transactions_lock); list_add_tail(&t.list, &transactions); diff --git a/sound/firewire/fireworks/fireworks_hwdep.c b/sound/firewire/fireworks/fireworks_hwdep.c index e93eb4616c5f4..c739173c668f3 100644 --- a/sound/firewire/fireworks/fireworks_hwdep.c +++ b/sound/firewire/fireworks/fireworks_hwdep.c @@ -34,6 +34,7 @@ hwdep_read_resp_buf(struct snd_efw *efw, char __user *buf, long remained, type = SNDRV_FIREWIRE_EVENT_EFW_RESPONSE; if (copy_to_user(buf, &type, sizeof(type))) return -EFAULT; + count += sizeof(type); remained -= sizeof(type); buf += sizeof(type); diff --git a/sound/hda/ext/hdac_ext_stream.c b/sound/hda/ext/hdac_ext_stream.c index 6b1b4b834baef..04f4070fbf366 100644 --- a/sound/hda/ext/hdac_ext_stream.c +++ b/sound/hda/ext/hdac_ext_stream.c @@ -106,20 +106,14 @@ void snd_hdac_stream_free_all(struct hdac_bus *bus) } EXPORT_SYMBOL_GPL(snd_hdac_stream_free_all); -/** - * snd_hdac_ext_stream_decouple - decouple the hdac stream - * @bus: HD-audio core bus - * @stream: HD-audio ext core stream object to initialize - * @decouple: flag to decouple - */ -void snd_hdac_ext_stream_decouple(struct hdac_bus *bus, - struct hdac_ext_stream *stream, bool decouple) +void snd_hdac_ext_stream_decouple_locked(struct hdac_bus *bus, + struct hdac_ext_stream *stream, + bool decouple) { struct hdac_stream *hstream = &stream->hstream; u32 val; int mask = AZX_PPCTL_PROCEN(hstream->index); - spin_lock_irq(&bus->reg_lock); val = readw(bus->ppcap + AZX_REG_PP_PPCTL) & mask; if (decouple && !val) @@ -128,6 +122,20 @@ void snd_hdac_ext_stream_decouple(struct hdac_bus *bus, snd_hdac_updatel(bus->ppcap, AZX_REG_PP_PPCTL, mask, 0); stream->decoupled = decouple; +} +EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_decouple_locked); + +/** + * snd_hdac_ext_stream_decouple - decouple the hdac stream + * @bus: HD-audio core bus + * @stream: HD-audio ext core stream object to initialize + * @decouple: flag to decouple + */ +void snd_hdac_ext_stream_decouple(struct hdac_bus *bus, + struct hdac_ext_stream *stream, bool decouple) +{ + spin_lock_irq(&bus->reg_lock); + snd_hdac_ext_stream_decouple_locked(bus, stream, decouple); spin_unlock_irq(&bus->reg_lock); } EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_decouple); @@ -252,6 +260,7 @@ hdac_ext_link_stream_assign(struct hdac_bus *bus, return NULL; } + spin_lock_irq(&bus->reg_lock); list_for_each_entry(stream, &bus->stream_list, list) { struct hdac_ext_stream *hstream = container_of(stream, struct hdac_ext_stream, @@ -266,17 +275,16 @@ hdac_ext_link_stream_assign(struct hdac_bus *bus, } if (!hstream->link_locked) { - snd_hdac_ext_stream_decouple(bus, hstream, true); + snd_hdac_ext_stream_decouple_locked(bus, hstream, true); res = hstream; break; } } if (res) { - spin_lock_irq(&bus->reg_lock); res->link_locked = 1; res->link_substream = substream; - spin_unlock_irq(&bus->reg_lock); } + spin_unlock_irq(&bus->reg_lock); return res; } @@ -292,6 +300,7 @@ hdac_ext_host_stream_assign(struct hdac_bus *bus, return NULL; } + spin_lock_irq(&bus->reg_lock); list_for_each_entry(stream, &bus->stream_list, list) { struct hdac_ext_stream *hstream = container_of(stream, struct hdac_ext_stream, @@ -301,18 +310,17 @@ hdac_ext_host_stream_assign(struct hdac_bus *bus, if (!stream->opened) { if (!hstream->decoupled) - snd_hdac_ext_stream_decouple(bus, hstream, true); + snd_hdac_ext_stream_decouple_locked(bus, hstream, true); res = hstream; break; } } if (res) { - spin_lock_irq(&bus->reg_lock); res->hstream.opened = 1; res->hstream.running = 0; res->hstream.substream = substream; - spin_unlock_irq(&bus->reg_lock); } + spin_unlock_irq(&bus->reg_lock); return res; } @@ -378,15 +386,17 @@ void snd_hdac_ext_stream_release(struct hdac_ext_stream *stream, int type) break; case HDAC_EXT_STREAM_TYPE_HOST: + spin_lock_irq(&bus->reg_lock); if (stream->decoupled && !stream->link_locked) - snd_hdac_ext_stream_decouple(bus, stream, false); + snd_hdac_ext_stream_decouple_locked(bus, stream, false); + spin_unlock_irq(&bus->reg_lock); snd_hdac_stream_release(&stream->hstream); break; case HDAC_EXT_STREAM_TYPE_LINK: - if (stream->decoupled && !stream->hstream.opened) - snd_hdac_ext_stream_decouple(bus, stream, false); spin_lock_irq(&bus->reg_lock); + if (stream->decoupled && !stream->hstream.opened) + snd_hdac_ext_stream_decouple_locked(bus, stream, false); stream->link_locked = 0; stream->link_substream = NULL; spin_unlock_irq(&bus->reg_lock); diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index 7e7be8e4dcf9c..87ba66dcfd478 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -395,8 +395,9 @@ int snd_hdac_bus_reset_link(struct hdac_bus *bus, bool full_reset) if (!full_reset) goto skip_reset; - /* clear STATESTS */ - snd_hdac_chip_writew(bus, STATESTS, STATESTS_INT_MASK); + /* clear STATESTS if not in reset */ + if (snd_hdac_chip_readb(bus, GCTL) & AZX_GCTL_RESET) + snd_hdac_chip_writew(bus, STATESTS, STATESTS_INT_MASK); /* reset controller */ snd_hdac_bus_enter_link_reset(bus); diff --git a/sound/hda/hdac_device.c b/sound/hda/hdac_device.c index b84e12f4f8046..489f996d86bcb 100644 --- a/sound/hda/hdac_device.c +++ b/sound/hda/hdac_device.c @@ -656,6 +656,7 @@ static struct hda_vendor_id hda_vendor_ids[] = { { 0x14f1, "Conexant" }, { 0x17e8, "Chrontel" }, { 0x1854, "LG" }, + { 0x19e5, "Huawei" }, { 0x1aec, "Wolfson Microelectronics" }, { 0x1af4, "QEMU" }, { 0x434d, "C-Media" }, diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c index 682ed39f79b01..b299b8b7f871a 100644 --- a/sound/hda/hdac_stream.c +++ b/sound/hda/hdac_stream.c @@ -289,6 +289,7 @@ struct hdac_stream *snd_hdac_stream_assign(struct hdac_bus *bus, int key = (substream->pcm->device << 16) | (substream->number << 2) | (substream->stream + 1); + spin_lock_irq(&bus->reg_lock); list_for_each_entry(azx_dev, &bus->stream_list, list) { if (azx_dev->direction != substream->stream) continue; @@ -302,13 +303,12 @@ struct hdac_stream *snd_hdac_stream_assign(struct hdac_bus *bus, res = azx_dev; } if (res) { - spin_lock_irq(&bus->reg_lock); res->opened = 1; res->running = 0; res->assigned_key = key; res->substream = substream; - spin_unlock_irq(&bus->reg_lock); } + spin_unlock_irq(&bus->reg_lock); return res; } EXPORT_SYMBOL_GPL(snd_hdac_stream_assign); diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig index b690ed937cbe8..df2e45c8814e9 100644 --- a/sound/isa/Kconfig +++ b/sound/isa/Kconfig @@ -22,7 +22,7 @@ config SND_SB16_DSP menuconfig SND_ISA bool "ISA sound devices" depends on ISA || COMPILE_TEST - depends on ISA_DMA_API + depends on ISA_DMA_API && !M68K default y help Support for sound devices connected via the ISA bus. diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c index fa3c39cff5f85..9ee3a312c6793 100644 --- a/sound/isa/cs423x/cs4236.c +++ b/sound/isa/cs423x/cs4236.c @@ -544,7 +544,7 @@ static int snd_cs423x_pnpbios_detect(struct pnp_dev *pdev, static int dev; int err; struct snd_card *card; - struct pnp_dev *cdev; + struct pnp_dev *cdev, *iter; char cid[PNP_ID_LEN]; if (pnp_device_is_isapnp(pdev)) @@ -560,9 +560,11 @@ static int snd_cs423x_pnpbios_detect(struct pnp_dev *pdev, strcpy(cid, pdev->id[0].id); cid[5] = '1'; cdev = NULL; - list_for_each_entry(cdev, &(pdev->protocol->devices), protocol_list) { - if (!strcmp(cdev->id[0].id, cid)) + list_for_each_entry(iter, &(pdev->protocol->devices), protocol_list) { + if (!strcmp(iter->id[0].id, cid)) { + cdev = iter; break; + } } err = snd_cs423x_card_new(&pdev->dev, dev, &card); if (err < 0) diff --git a/sound/isa/gus/gus_dma.c b/sound/isa/gus/gus_dma.c index a1c770d826dda..6d664dd8dde0b 100644 --- a/sound/isa/gus/gus_dma.c +++ b/sound/isa/gus/gus_dma.c @@ -126,6 +126,8 @@ static void snd_gf1_dma_interrupt(struct snd_gus_card * gus) } block = snd_gf1_dma_next_block(gus); spin_unlock(&gus->dma_lock); + if (!block) + return; snd_gf1_dma_program(gus, block->addr, block->buf_addr, block->count, (unsigned short) block->cmd); kfree(block); #if 0 diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c index d6420d224d097..09b368761cc00 100644 --- a/sound/isa/wavefront/wavefront_synth.c +++ b/sound/isa/wavefront/wavefront_synth.c @@ -1088,7 +1088,8 @@ wavefront_send_sample (snd_wavefront_t *dev, if (dataptr < data_end) { - __get_user (sample_short, dataptr); + if (get_user(sample_short, dataptr)) + return -EFAULT; dataptr += skip; if (data_is_unsigned) { /* GUS ? */ diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig index 7630f808d087c..6edde2f145025 100644 --- a/sound/pci/Kconfig +++ b/sound/pci/Kconfig @@ -279,6 +279,7 @@ config SND_CS46XX_NEW_DSP config SND_CS5530 tristate "CS5530 Audio" depends on ISA_DMA_API && (X86_32 || COMPILE_TEST) + depends on !M68K select SND_SB16_DSP help Say Y here to include support for audio on Cyrix/NatSemi CS5530 chips. diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index 66f6c3bf08e31..6fb192a94762f 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -938,8 +938,8 @@ static int snd_ac97_ad18xx_pcm_get_volume(struct snd_kcontrol *kcontrol, struct int codec = kcontrol->private_value & 3; mutex_lock(&ac97->page_mutex); - ucontrol->value.integer.value[0] = 31 - ((ac97->spec.ad18xx.pcmreg[codec] >> 0) & 31); - ucontrol->value.integer.value[1] = 31 - ((ac97->spec.ad18xx.pcmreg[codec] >> 8) & 31); + ucontrol->value.integer.value[0] = 31 - ((ac97->spec.ad18xx.pcmreg[codec] >> 8) & 31); + ucontrol->value.integer.value[1] = 31 - ((ac97->spec.ad18xx.pcmreg[codec] >> 0) & 31); mutex_unlock(&ac97->page_mutex); return 0; } diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c index df720881eb991..db9d89ba36587 100644 --- a/sound/pci/cmipci.c +++ b/sound/pci/cmipci.c @@ -302,7 +302,6 @@ MODULE_PARM_DESC(joystick_port, "Joystick port address."); #define CM_MICGAINZ 0x01 /* mic boost */ #define CM_MICGAINZ_SHIFT 0 -#define CM_REG_MIXER3 0x24 #define CM_REG_AUX_VOL 0x26 #define CM_VAUXL_MASK 0xf0 #define CM_VAUXR_MASK 0x0f @@ -3310,7 +3309,7 @@ static void snd_cmipci_remove(struct pci_dev *pci) */ static unsigned char saved_regs[] = { CM_REG_FUNCTRL1, CM_REG_CHFORMAT, CM_REG_LEGACY_CTRL, CM_REG_MISC_CTRL, - CM_REG_MIXER0, CM_REG_MIXER1, CM_REG_MIXER2, CM_REG_MIXER3, CM_REG_PLL, + CM_REG_MIXER0, CM_REG_MIXER1, CM_REG_MIXER2, CM_REG_AUX_VOL, CM_REG_PLL, CM_REG_CH0_FRAME1, CM_REG_CH0_FRAME2, CM_REG_CH1_FRAME1, CM_REG_CH1_FRAME2, CM_REG_EXT_MISC, CM_REG_INT_STATUS, CM_REG_INT_HLDCLR, CM_REG_FUNCTRL0, diff --git a/sound/pci/ctxfi/ctamixer.c b/sound/pci/ctxfi/ctamixer.c index d4ff377eb3a34..6d636bdcaa5a3 100644 --- a/sound/pci/ctxfi/ctamixer.c +++ b/sound/pci/ctxfi/ctamixer.c @@ -23,16 +23,15 @@ #define BLANK_SLOT 4094 -static int amixer_master(struct rsc *rsc) +static void amixer_master(struct rsc *rsc) { rsc->conj = 0; - return rsc->idx = container_of(rsc, struct amixer, rsc)->idx[0]; + rsc->idx = container_of(rsc, struct amixer, rsc)->idx[0]; } -static int amixer_next_conj(struct rsc *rsc) +static void amixer_next_conj(struct rsc *rsc) { rsc->conj++; - return container_of(rsc, struct amixer, rsc)->idx[rsc->conj]; } static int amixer_index(const struct rsc *rsc) @@ -331,16 +330,15 @@ int amixer_mgr_destroy(struct amixer_mgr *amixer_mgr) /* SUM resource management */ -static int sum_master(struct rsc *rsc) +static void sum_master(struct rsc *rsc) { rsc->conj = 0; - return rsc->idx = container_of(rsc, struct sum, rsc)->idx[0]; + rsc->idx = container_of(rsc, struct sum, rsc)->idx[0]; } -static int sum_next_conj(struct rsc *rsc) +static void sum_next_conj(struct rsc *rsc) { rsc->conj++; - return container_of(rsc, struct sum, rsc)->idx[rsc->conj]; } static int sum_index(const struct rsc *rsc) diff --git a/sound/pci/ctxfi/ctdaio.c b/sound/pci/ctxfi/ctdaio.c index 27441d498968d..b5e1296af09ee 100644 --- a/sound/pci/ctxfi/ctdaio.c +++ b/sound/pci/ctxfi/ctdaio.c @@ -51,12 +51,12 @@ static struct daio_rsc_idx idx_20k2[NUM_DAIOTYP] = { [SPDIFIO] = {.left = 0x05, .right = 0x85}, }; -static int daio_master(struct rsc *rsc) +static void daio_master(struct rsc *rsc) { /* Actually, this is not the resource index of DAIO. * For DAO, it is the input mapper index. And, for DAI, * it is the output time-slot index. */ - return rsc->conj = rsc->idx; + rsc->conj = rsc->idx; } static int daio_index(const struct rsc *rsc) @@ -64,19 +64,19 @@ static int daio_index(const struct rsc *rsc) return rsc->conj; } -static int daio_out_next_conj(struct rsc *rsc) +static void daio_out_next_conj(struct rsc *rsc) { - return rsc->conj += 2; + rsc->conj += 2; } -static int daio_in_next_conj_20k1(struct rsc *rsc) +static void daio_in_next_conj_20k1(struct rsc *rsc) { - return rsc->conj += 0x200; + rsc->conj += 0x200; } -static int daio_in_next_conj_20k2(struct rsc *rsc) +static void daio_in_next_conj_20k2(struct rsc *rsc) { - return rsc->conj += 0x100; + rsc->conj += 0x100; } static const struct rsc_ops daio_out_rsc_ops = { diff --git a/sound/pci/ctxfi/ctresource.c b/sound/pci/ctxfi/ctresource.c index 0bb5696e44b37..ec5f597b580ad 100644 --- a/sound/pci/ctxfi/ctresource.c +++ b/sound/pci/ctxfi/ctresource.c @@ -109,18 +109,17 @@ static int audio_ring_slot(const struct rsc *rsc) return (rsc->conj << 4) + offset_in_audio_slot_block[rsc->type]; } -static int rsc_next_conj(struct rsc *rsc) +static void rsc_next_conj(struct rsc *rsc) { unsigned int i; for (i = 0; (i < 8) && (!(rsc->msr & (0x1 << i))); ) i++; rsc->conj += (AUDIO_SLOT_BLOCK_NUM >> i); - return rsc->conj; } -static int rsc_master(struct rsc *rsc) +static void rsc_master(struct rsc *rsc) { - return rsc->conj = rsc->idx; + rsc->conj = rsc->idx; } static const struct rsc_ops rsc_generic_ops = { diff --git a/sound/pci/ctxfi/ctresource.h b/sound/pci/ctxfi/ctresource.h index 93e47488a1c1c..92146054af582 100644 --- a/sound/pci/ctxfi/ctresource.h +++ b/sound/pci/ctxfi/ctresource.h @@ -39,8 +39,8 @@ struct rsc { }; struct rsc_ops { - int (*master)(struct rsc *rsc); /* Move to master resource */ - int (*next_conj)(struct rsc *rsc); /* Move to next conjugate resource */ + void (*master)(struct rsc *rsc); /* Move to master resource */ + void (*next_conj)(struct rsc *rsc); /* Move to next conjugate resource */ int (*index)(const struct rsc *rsc); /* Return the index of resource */ /* Return the output slot number */ int (*output_slot)(const struct rsc *rsc); diff --git a/sound/pci/ctxfi/ctsrc.c b/sound/pci/ctxfi/ctsrc.c index 37c18ce84974a..7d2bda0c3d3de 100644 --- a/sound/pci/ctxfi/ctsrc.c +++ b/sound/pci/ctxfi/ctsrc.c @@ -590,16 +590,15 @@ int src_mgr_destroy(struct src_mgr *src_mgr) /* SRCIMP resource manager operations */ -static int srcimp_master(struct rsc *rsc) +static void srcimp_master(struct rsc *rsc) { rsc->conj = 0; - return rsc->idx = container_of(rsc, struct srcimp, rsc)->idx[0]; + rsc->idx = container_of(rsc, struct srcimp, rsc)->idx[0]; } -static int srcimp_next_conj(struct rsc *rsc) +static void srcimp_next_conj(struct rsc *rsc) { rsc->conj++; - return container_of(rsc, struct srcimp, rsc)->idx[rsc->conj]; } static int srcimp_index(const struct rsc *rsc) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 326f95ce5ceb1..c8847de8388f0 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1721,8 +1721,11 @@ void snd_hda_ctls_clear(struct hda_codec *codec) { int i; struct hda_nid_item *items = codec->mixers.list; + + down_write(&codec->card->controls_rwsem); for (i = 0; i < codec->mixers.used; i++) snd_ctl_remove(codec->card, items[i].kctl); + up_write(&codec->card->controls_rwsem); snd_array_free(&codec->mixers); snd_array_free(&codec->nids); } diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index ebb1ee69dd0c3..b8fe0ec5d6247 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -671,13 +671,17 @@ static int azx_position_check(struct azx *chip, struct azx_dev *azx_dev) * the update-IRQ timing. The IRQ is issued before actually the * data is processed. So, we need to process it afterwords in a * workqueue. + * + * Returns 1 if OK to proceed, 0 for delay handling, -1 for skipping update */ static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev) { struct snd_pcm_substream *substream = azx_dev->core.substream; + struct snd_pcm_runtime *runtime = substream->runtime; int stream = substream->stream; u32 wallclk; unsigned int pos; + snd_pcm_uframes_t hwptr, target; wallclk = azx_readl(chip, WALLCLK) - azx_dev->core.start_wallclk; if (wallclk < (azx_dev->core.period_wallclk * 2) / 3) @@ -714,6 +718,24 @@ static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev) /* NG - it's below the first next period boundary */ return chip->bdl_pos_adj ? 0 : -1; azx_dev->core.start_wallclk += wallclk; + + if (azx_dev->core.no_period_wakeup) + return 1; /* OK, no need to check period boundary */ + + if (runtime->hw_ptr_base != runtime->hw_ptr_interrupt) + return 1; /* OK, already in hwptr updating process */ + + /* check whether the period gets really elapsed */ + pos = bytes_to_frames(runtime, pos); + hwptr = runtime->hw_ptr_base + pos; + if (hwptr < runtime->status->hw_ptr) + hwptr += runtime->buffer_size; + target = runtime->hw_ptr_interrupt + runtime->period_size; + if (hwptr < target) { + /* too early wakeup, process it later */ + return chip->bdl_pos_adj ? 0 : -1; + } + return 1; /* OK, it's fine */ } @@ -907,11 +929,7 @@ static unsigned int azx_get_pos_skl(struct azx *chip, struct azx_dev *azx_dev) if (azx_dev->core.substream->stream == SNDRV_PCM_STREAM_PLAYBACK) return azx_skl_get_dpib_pos(chip, azx_dev); - /* For capture, we need to read posbuf, but it requires a delay - * for the possible boundary overlap; the read of DPIB fetches the - * actual posbuf - */ - udelay(20); + /* read of DPIB fetches the actual posbuf */ azx_skl_get_dpib_pos(chip, azx_dev); return azx_get_pos_posbuf(chip, azx_dev); } @@ -1590,6 +1608,7 @@ static struct snd_pci_quirk probe_mask_list[] = { /* forced codec slots */ SND_PCI_QUIRK(0x1043, 0x1262, "ASUS W5Fm", 0x103), SND_PCI_QUIRK(0x1046, 0x1262, "ASUS W5F", 0x103), + SND_PCI_QUIRK(0x1558, 0x0351, "Schenker Dock 15", 0x105), /* WinFast VP200 H (Teradici) user reported broken communication */ SND_PCI_QUIRK(0x3a21, 0x040d, "WinFast VP200 H", 0x101), {} @@ -1775,8 +1794,6 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci, assign_position_fix(chip, check_position_fix(chip, position_fix[dev])); - check_probe_mask(chip, dev); - if (single_cmd < 0) /* allow fallback to single_cmd at errors */ chip->fallback_to_single_cmd = 1; else /* explicitly set to single_cmd or not */ @@ -1808,6 +1825,8 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci, chip->bus.needs_damn_long_delay = 1; } + check_probe_mask(chip, dev); + err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); if (err < 0) { dev_err(card->dev, "Error creating device [card]!\n"); diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 5e2fadb264e4d..d4bfb11493270 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -905,6 +905,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x103c, 0x82b4, "HP ProDesk 600 G3", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x836e, "HP ProBook 455 G5", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x837f, "HP ProBook 470 G5", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x83b2, "HP EliteBook 840 G5", CXT_FIXUP_HP_DOCK), @@ -1012,6 +1013,13 @@ static int patch_conexant_auto(struct hda_codec *codec) snd_hda_pick_fixup(codec, cxt5051_fixup_models, cxt5051_fixups, cxt_fixups); break; + case 0x14f15098: + codec->pin_amp_workaround = 1; + spec->gen.mixer_nid = 0x22; + spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO; + snd_hda_pick_fixup(codec, cxt5066_fixup_models, + cxt5066_fixups, cxt_fixups); + break; case 0x14f150f2: codec->power_save_node = 1; /* Fall through */ @@ -1041,11 +1049,11 @@ static int patch_conexant_auto(struct hda_codec *codec) if (err < 0) goto error; - err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg); + err = cx_auto_parse_beep(codec); if (err < 0) goto error; - err = cx_auto_parse_beep(codec); + err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg); if (err < 0) goto error; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f486e680aed1d..145e494f776a3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -382,6 +382,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0245: case 0x10ec0255: case 0x10ec0256: + case 0x19e58326: case 0x10ec0257: case 0x10ec0282: case 0x10ec0283: @@ -517,6 +518,9 @@ static void alc_shutup_pins(struct hda_codec *codec) struct alc_spec *spec = codec->spec; switch (codec->core.vendor_id) { + case 0x10ec0236: + case 0x10ec0256: + case 0x19e58326: case 0x10ec0283: case 0x10ec0286: case 0x10ec0288: @@ -1924,11 +1928,13 @@ enum { ALC887_FIXUP_ASUS_BASS, ALC887_FIXUP_BASS_CHMAP, ALC1220_FIXUP_GB_DUAL_CODECS, + ALC1220_FIXUP_GB_X570, ALC1220_FIXUP_CLEVO_P950, ALC1220_FIXUP_CLEVO_PB51ED, ALC1220_FIXUP_CLEVO_PB51ED_PINS, ALC887_FIXUP_ASUS_AUDIO, ALC887_FIXUP_ASUS_HMIC, + ALCS1200A_FIXUP_MIC_VREF, }; static void alc889_fixup_coef(struct hda_codec *codec, @@ -2113,6 +2119,30 @@ static void alc1220_fixup_gb_dual_codecs(struct hda_codec *codec, } } +static void alc1220_fixup_gb_x570(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + static const hda_nid_t conn1[] = { 0x0c }; + static const struct coef_fw gb_x570_coefs[] = { + WRITE_COEF(0x07, 0x03c0), + WRITE_COEF(0x1a, 0x01c1), + WRITE_COEF(0x1b, 0x0202), + WRITE_COEF(0x43, 0x3005), + {} + }; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + snd_hda_override_conn_list(codec, 0x14, ARRAY_SIZE(conn1), conn1); + snd_hda_override_conn_list(codec, 0x1b, ARRAY_SIZE(conn1), conn1); + break; + case HDA_FIXUP_ACT_INIT: + alc_process_coef_fw(codec, gb_x570_coefs); + break; + } +} + static void alc1220_fixup_clevo_p950(struct hda_codec *codec, const struct hda_fixup *fix, int action) @@ -2415,6 +2445,10 @@ static const struct hda_fixup alc882_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc1220_fixup_gb_dual_codecs, }, + [ALC1220_FIXUP_GB_X570] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc1220_fixup_gb_x570, + }, [ALC1220_FIXUP_CLEVO_P950] = { .type = HDA_FIXUP_FUNC, .v.func = alc1220_fixup_clevo_p950, @@ -2446,6 +2480,14 @@ static const struct hda_fixup alc882_fixups[] = { .chained = true, .chain_id = ALC887_FIXUP_ASUS_AUDIO, }, + [ALCS1200A_FIXUP_MIC_VREF] = { + .type = HDA_FIXUP_PINCTLS, + .v.pins = (const struct hda_pintbl[]) { + { 0x18, PIN_VREF50 }, /* rear mic */ + { 0x19, PIN_VREF50 }, /* front mic */ + {} + } + }, }; static const struct snd_pci_quirk alc882_fixup_tbl[] = { @@ -2483,6 +2525,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x835f, "Asus Eee 1601", ALC888_FIXUP_EEE1601), SND_PCI_QUIRK(0x1043, 0x84bc, "ASUS ET2700", ALC887_FIXUP_ASUS_BASS), SND_PCI_QUIRK(0x1043, 0x8691, "ASUS ROG Ranger VIII", ALC882_FIXUP_GPIO3), + SND_PCI_QUIRK(0x1043, 0x8797, "ASUS TUF B550M-PLUS", ALCS1200A_FIXUP_MIC_VREF), SND_PCI_QUIRK(0x104d, 0x9043, "Sony Vaio VGC-LN51JGB", ALC882_FIXUP_NO_PRIMARY_HP), SND_PCI_QUIRK(0x104d, 0x9044, "Sony VAIO AiO", ALC882_FIXUP_NO_PRIMARY_HP), SND_PCI_QUIRK(0x104d, 0x9047, "Sony Vaio TT", ALC889_FIXUP_VAIO_TT), @@ -2517,8 +2560,9 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x13fe, 0x1009, "Advantech MIT-W101", ALC886_FIXUP_EAPD), SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), - SND_PCI_QUIRK(0x1458, 0xa0cd, "Gigabyte X570 Aorus Master", ALC1220_FIXUP_CLEVO_P950), - SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1458, 0xa0cd, "Gigabyte X570 Aorus Master", ALC1220_FIXUP_GB_X570), + SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_GB_X570), + SND_PCI_QUIRK(0x1458, 0xa0d5, "Gigabyte X570S Aorus Master", ALC1220_FIXUP_GB_X570), SND_PCI_QUIRK(0x1462, 0x11f7, "MSI-GE63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1229, "MSI-GP73", ALC1220_FIXUP_CLEVO_P950), @@ -2535,11 +2579,16 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65e5, "Clevo PC50D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65f1, "Clevo PC50HS", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65f5, "Clevo PD50PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67f1, "Clevo PC70H[PRS]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67f5, "Clevo PD70PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x70d1, "Clevo PC70[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), - SND_PCI_QUIRK(0x1558, 0x7714, "Clevo X170", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x7714, "Clevo X170SM", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x7715, "Clevo X170KM-G", ALC1220_FIXUP_CLEVO_PB51ED), SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x9506, "Clevo P955HQ", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x950a, "Clevo P955H[PR]", ALC1220_FIXUP_CLEVO_P950), @@ -2590,6 +2639,7 @@ static const struct hda_model_fixup alc882_fixup_models[] = { {.id = ALC882_FIXUP_NO_PRIMARY_HP, .name = "no-primary-hp"}, {.id = ALC887_FIXUP_ASUS_BASS, .name = "asus-bass"}, {.id = ALC1220_FIXUP_GB_DUAL_CODECS, .name = "dual-codecs"}, + {.id = ALC1220_FIXUP_GB_X570, .name = "gb-x570"}, {.id = ALC1220_FIXUP_CLEVO_P950, .name = "clevo-p950"}, {} }; @@ -3147,6 +3197,7 @@ static void alc_disable_headset_jack_key(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_write_coef_idx(codec, 0x48, 0x0); alc_update_coef_idx(codec, 0x49, 0x0045, 0x0); break; @@ -3175,6 +3226,7 @@ static void alc_enable_headset_jack_key(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_write_coef_idx(codec, 0x48, 0xd011); alc_update_coef_idx(codec, 0x49, 0x007f, 0x0045); break; @@ -3521,7 +3573,8 @@ static void alc256_shutup(struct hda_codec *codec) /* If disable 3k pulldown control for alc257, the Mic detection will not work correctly * when booting with headset plugged. So skip setting it for the codec alc257 */ - if (codec->core.vendor_id != 0x10ec0257) + if (codec->core.vendor_id != 0x10ec0236 && + codec->core.vendor_id != 0x10ec0257) alc_update_coef_idx(codec, 0x46, 0, 3 << 12); if (!spec->no_shutup_pins) @@ -4244,6 +4297,12 @@ static void alc_fixup_hp_gpio_led(struct hda_codec *codec, } } +static void alc236_fixup_hp_gpio_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + alc_fixup_hp_gpio_led(codec, action, 0x02, 0x01); +} + static void alc269_fixup_hp_gpio_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -4636,6 +4695,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_process_coef_fw(codec, coef0256); break; case 0x10ec0234: @@ -4751,6 +4811,7 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin, case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_write_coef_idx(codec, 0x45, 0xc489); snd_hda_set_pin_ctl_cache(codec, hp_pin, 0); alc_process_coef_fw(codec, coef0256); @@ -4901,6 +4962,7 @@ static void alc_headset_mode_default(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_write_coef_idx(codec, 0x1b, 0x0e4b); alc_write_coef_idx(codec, 0x45, 0xc089); msleep(50); @@ -5000,6 +5062,7 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_process_coef_fw(codec, coef0256); break; case 0x10ec0234: @@ -5114,6 +5177,7 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_process_coef_fw(codec, coef0256); break; case 0x10ec0234: @@ -5210,6 +5274,7 @@ static void alc_determine_headset_type(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_write_coef_idx(codec, 0x1b, 0x0e4b); alc_write_coef_idx(codec, 0x06, 0x6104); alc_write_coefex_idx(codec, 0x57, 0x3, 0x09a3); @@ -5504,6 +5569,7 @@ static void alc255_set_default_jack_type(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_process_coef_fw(codec, alc256fw); break; } @@ -6107,6 +6173,7 @@ static void alc_combo_jack_hp_jd_restart(struct hda_codec *codec) case 0x10ec0236: case 0x10ec0255: case 0x10ec0256: + case 0x19e58326: alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */ alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15); break; @@ -6360,6 +6427,7 @@ enum { ALC298_FIXUP_LENOVO_SPK_VOLUME, ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER, ALC269_FIXUP_ATIV_BOOK_8, + ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE, ALC221_FIXUP_HP_MIC_NO_PRESENCE, ALC256_FIXUP_ASUS_HEADSET_MODE, ALC256_FIXUP_ASUS_MIC, @@ -6417,8 +6485,10 @@ enum { ALC294_FIXUP_ASUS_GU502_VERBS, ALC285_FIXUP_HP_GPIO_LED, ALC285_FIXUP_HP_MUTE_LED, + ALC236_FIXUP_HP_GPIO_LED, ALC236_FIXUP_HP_MUTE_LED, ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, + ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS, ALC269VC_FIXUP_ACER_HEADSET_MIC, @@ -7236,6 +7306,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_NO_SHUTUP }, + [ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { 0x1a, 0x01813030 }, /* use as headphone mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, [ALC221_FIXUP_HP_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -7692,6 +7772,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_mute_led, }, + [ALC236_FIXUP_HP_GPIO_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc236_fixup_hp_gpio_led, + }, [ALC236_FIXUP_HP_MUTE_LED] = { .type = HDA_FIXUP_FUNC, .v.func = alc236_fixup_hp_mute_led, @@ -7703,6 +7787,14 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, + [ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x20, AC_VERB_SET_COEF_INDEX, 0x08}, + { 0x20, AC_VERB_SET_PROC_COEF, 0x2fcf}, + { } + }, + }, [ALC295_FIXUP_ASUS_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -7971,9 +8063,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x141f, "Acer Spin SP513-54N", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x142b, "Acer Swift SF314-42", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), + SND_PCI_QUIRK(0x1028, 0x053c, "Dell Latitude E5430", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS), SND_PCI_QUIRK(0x1028, 0x05bd, "Dell Latitude E6440", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x05be, "Dell Latitude E6540", ALC292_FIXUP_DELL_E7X), @@ -8080,6 +8174,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2b5e, "HP 288 Pro G2 MT", ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x802e, "HP Z240 SFF", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x802f, "HP Z240", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x820d, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), @@ -8094,14 +8189,18 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), + SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8728, "HP EliteBook 840 G7", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8729, "HP", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8760, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877d, "HP", ALC236_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x89aa, "HP EliteBook 630 G9", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), @@ -8127,6 +8226,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x18f1, "Asus FX505DT", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x194e, "ASUS UX563FD", ALC294_FIXUP_ASUS_HPE), + SND_PCI_QUIRK(0x1043, 0x1970, "ASUS UX550VE", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1982, "ASUS B1400CEPE", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x19ce, "ASUS B9450FA", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE), @@ -8142,6 +8242,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401), + SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401), + SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC), @@ -8174,6 +8276,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), + SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), @@ -8320,6 +8423,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), @@ -8497,6 +8601,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC298_FIXUP_HUAWEI_MBX_STEREO, .name = "huawei-mbx-stereo"}, {.id = ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, .name = "alc256-medion-headset"}, {.id = ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc298-samsung-headphone"}, + {.id = ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc256-samsung-headphone"}, {.id = ALC255_FIXUP_XIAOMI_HEADSET_MIC, .name = "alc255-xiaomi-headset"}, {.id = ALC274_FIXUP_HP_MIC, .name = "alc274-hp-mic-detect"}, {.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"}, @@ -9095,6 +9200,7 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: spec->codec_variant = ALC269_TYPE_ALC256; spec->shutup = alc256_shutup; spec->init_hook = alc256_init; @@ -9160,6 +9266,16 @@ static int patch_alc269(struct hda_codec *codec) snd_hda_pick_fixup(codec, alc269_fixup_models, alc269_fixup_tbl, alc269_fixups); + /* FIXME: both TX300 and ROG Strix G17 have the same SSID, and + * the quirk breaks the latter (bko#214101). + * Clear the wrong entry. + */ + if (codec->fixup_id == ALC282_FIXUP_ASUS_TX300 && + codec->core.vendor_id == 0x10ec0294) { + codec_dbg(codec, "Clear wrong fixup for ASUS ROG Strix G17\n"); + codec->fixup_id = HDA_FIXUP_ID_NOT_SET; + } + snd_hda_pick_pin_fixup(codec, alc269_pin_fixup_tbl, alc269_fixups, true); snd_hda_pick_pin_fixup(codec, alc269_fallback_pin_fixup_tbl, alc269_fixups, false); snd_hda_pick_fixup(codec, NULL, alc269_fixup_vendor_tbl, @@ -9601,6 +9717,27 @@ static void alc671_fixup_hp_headset_mic2(struct hda_codec *codec, } } +static void alc897_hp_automute_hook(struct hda_codec *codec, + struct hda_jack_callback *jack) +{ + struct alc_spec *spec = codec->spec; + int vref; + + snd_hda_gen_hp_automute(codec, jack); + vref = spec->gen.hp_jack_present ? (PIN_HP | AC_PINCTL_VREF_100) : PIN_HP; + snd_hda_codec_write(codec, 0x1b, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, + vref); +} + +static void alc897_fixup_lenovo_headset_mic(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->gen.hp_automute_hook = alc897_hp_automute_hook; + } +} + static const struct coef_fw alc668_coefs[] = { WRITE_COEF(0x01, 0xbebe), WRITE_COEF(0x02, 0xaaaa), WRITE_COEF(0x03, 0x0), WRITE_COEF(0x04, 0x0180), WRITE_COEF(0x06, 0x0), WRITE_COEF(0x07, 0x0f80), @@ -9678,6 +9815,12 @@ enum { ALC671_FIXUP_HP_HEADSET_MIC2, ALC662_FIXUP_ACER_X2660G_HEADSET_MODE, ALC662_FIXUP_ACER_NITRO_HEADSET_MODE, + ALC668_FIXUP_ASUS_NO_HEADSET_MIC, + ALC668_FIXUP_HEADSET_MIC, + ALC668_FIXUP_MIC_DET_COEF, + ALC897_FIXUP_LENOVO_HEADSET_MIC, + ALC897_FIXUP_HEADSET_MIC_PIN, + ALC897_FIXUP_HP_HSMIC_VERB, }; static const struct hda_fixup alc662_fixups[] = { @@ -10061,6 +10204,49 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC662_FIXUP_USI_FUNC }, + [ALC668_FIXUP_ASUS_NO_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1b, 0x04a1112c }, + { } + }, + .chained = true, + .chain_id = ALC668_FIXUP_HEADSET_MIC + }, + [ALC668_FIXUP_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_headset_mic, + .chained = true, + .chain_id = ALC668_FIXUP_MIC_DET_COEF + }, + [ALC668_FIXUP_MIC_DET_COEF] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x20, AC_VERB_SET_COEF_INDEX, 0x15 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0d60 }, + {} + }, + }, + [ALC897_FIXUP_LENOVO_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc897_fixup_lenovo_headset_mic, + }, + [ALC897_FIXUP_HEADSET_MIC_PIN] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x03a11050 }, + { } + }, + .chained = true, + .chain_id = ALC897_FIXUP_LENOVO_HEADSET_MIC + }, + [ALC897_FIXUP_HP_HSMIC_VERB] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -10086,7 +10272,10 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), + SND_PCI_QUIRK(0x103c, 0x8719, "HP", ALC897_FIXUP_HP_HSMIC_VERB), SND_PCI_QUIRK(0x103c, 0x873e, "HP", ALC671_FIXUP_HP_HEADSET_MIC2), + SND_PCI_QUIRK(0x103c, 0x877e, "HP 288 Pro G6", ALC671_FIXUP_HP_HEADSET_MIC2), + SND_PCI_QUIRK(0x103c, 0x885f, "HP 288 Pro G8", ALC671_FIXUP_HP_HEADSET_MIC2), SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50), SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50), @@ -10096,6 +10285,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x177d, "ASUS N551", ALC668_FIXUP_ASUS_Nx51), SND_PCI_QUIRK(0x1043, 0x17bd, "ASUS N751", ALC668_FIXUP_ASUS_Nx51), + SND_PCI_QUIRK(0x1043, 0x185d, "ASUS G551JW", ALC668_FIXUP_ASUS_NO_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1963, "ASUS X71SL", ALC662_FIXUP_ASUS_MODE8), SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x1bf3, "ASUS N76VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), @@ -10104,6 +10294,11 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x14cd, 0x5003, "USI", ALC662_FIXUP_USI_HEADSET_MODE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC662_FIXUP_LENOVO_MULTI_CODECS), + SND_PCI_QUIRK(0x17aa, 0x1057, "Lenovo P360", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x32ca, "Lenovo ThinkCentre M80", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x32cb, "Lenovo ThinkCentre M70", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x32cf, "Lenovo ThinkCentre M950", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x32f7, "Lenovo ThinkCentre M90", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x1849, 0x5892, "ASRock B150M", ALC892_FIXUP_ASROCK_MOBO), @@ -10458,6 +10653,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0b00, "ALCS1200A", patch_alc882), HDA_CODEC_ENTRY(0x10ec1168, "ALC1220", patch_alc882), HDA_CODEC_ENTRY(0x10ec1220, "ALC1220", patch_alc882), + HDA_CODEC_ENTRY(0x19e58326, "HW8326", patch_alc269), {} /* terminator */ }; MODULE_DEVICE_TABLE(hdaudio, snd_hda_id_realtek); diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index a5c1a2c4eae4e..3edb4e25797de 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -520,11 +520,11 @@ static int via_parse_auto_config(struct hda_codec *codec) if (err < 0) return err; - err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg); + err = auto_parse_beep(codec); if (err < 0) return err; - err = auto_parse_beep(codec); + err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg); if (err < 0) return err; diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig index 71f2d42188c46..51e75b7819682 100644 --- a/sound/soc/atmel/Kconfig +++ b/sound/soc/atmel/Kconfig @@ -11,7 +11,6 @@ if SND_ATMEL_SOC config SND_ATMEL_SOC_PDC bool - depends on HAS_DMA config SND_ATMEL_SOC_DMA bool diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c index ca603397651c0..1e0973322cd00 100644 --- a/sound/soc/atmel/atmel_ssc_dai.c +++ b/sound/soc/atmel/atmel_ssc_dai.c @@ -280,7 +280,10 @@ static int atmel_ssc_startup(struct snd_pcm_substream *substream, /* Enable PMC peripheral clock for this SSC */ pr_debug("atmel_ssc_dai: Starting clock\n"); - clk_enable(ssc_p->ssc->clk); + ret = clk_enable(ssc_p->ssc->clk); + if (ret) + return ret; + ssc_p->mck_rate = clk_get_rate(ssc_p->ssc->clk); /* Reset the SSC unless initialized to keep it in a clean state */ diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c index b1bef2bf142dc..d1579896f3a11 100644 --- a/sound/soc/atmel/sam9g20_wm8731.c +++ b/sound/soc/atmel/sam9g20_wm8731.c @@ -46,35 +46,6 @@ */ #undef ENABLE_MIC_INPUT -static struct clk *mclk; - -static int at91sam9g20ek_set_bias_level(struct snd_soc_card *card, - struct snd_soc_dapm_context *dapm, - enum snd_soc_bias_level level) -{ - static int mclk_on; - int ret = 0; - - switch (level) { - case SND_SOC_BIAS_ON: - case SND_SOC_BIAS_PREPARE: - if (!mclk_on) - ret = clk_enable(mclk); - if (ret == 0) - mclk_on = 1; - break; - - case SND_SOC_BIAS_OFF: - case SND_SOC_BIAS_STANDBY: - if (mclk_on) - clk_disable(mclk); - mclk_on = 0; - break; - } - - return ret; -} - static const struct snd_soc_dapm_widget at91sam9g20ek_dapm_widgets[] = { SND_SOC_DAPM_MIC("Int Mic", NULL), SND_SOC_DAPM_SPK("Ext Spk", NULL), @@ -135,7 +106,6 @@ static struct snd_soc_card snd_soc_at91sam9g20ek = { .owner = THIS_MODULE, .dai_link = &at91sam9g20ek_dai, .num_links = 1, - .set_bias_level = at91sam9g20ek_set_bias_level, .dapm_widgets = at91sam9g20ek_dapm_widgets, .num_dapm_widgets = ARRAY_SIZE(at91sam9g20ek_dapm_widgets), @@ -148,7 +118,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; struct device_node *codec_np, *cpu_np; - struct clk *pllb; struct snd_soc_card *card = &snd_soc_at91sam9g20ek; int ret; @@ -162,31 +131,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev) return -EINVAL; } - /* - * Codec MCLK is supplied by PCK0 - set it up. - */ - mclk = clk_get(NULL, "pck0"); - if (IS_ERR(mclk)) { - dev_err(&pdev->dev, "Failed to get MCLK\n"); - ret = PTR_ERR(mclk); - goto err; - } - - pllb = clk_get(NULL, "pllb"); - if (IS_ERR(pllb)) { - dev_err(&pdev->dev, "Failed to get PLLB\n"); - ret = PTR_ERR(pllb); - goto err_mclk; - } - ret = clk_set_parent(mclk, pllb); - clk_put(pllb); - if (ret != 0) { - dev_err(&pdev->dev, "Failed to set MCLK parent\n"); - goto err_mclk; - } - - clk_set_rate(mclk, MCLK_RATE); - card->dev = &pdev->dev; /* Parse device node info */ @@ -214,6 +158,7 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev) cpu_np = of_parse_phandle(np, "atmel,ssc-controller", 0); if (!cpu_np) { dev_err(&pdev->dev, "dai and pcm info missing\n"); + of_node_put(codec_np); return -EINVAL; } at91sam9g20ek_dai.cpus->of_node = cpu_np; @@ -229,9 +174,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev) return ret; -err_mclk: - clk_put(mclk); - mclk = NULL; err: atmel_ssc_put_audio(0); return ret; @@ -241,8 +183,6 @@ static int at91sam9g20ek_audio_remove(struct platform_device *pdev) { struct snd_soc_card *card = platform_get_drvdata(pdev); - clk_disable(mclk); - mclk = NULL; snd_soc_unregister_card(card); atmel_ssc_put_audio(0); diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 229cc89f8c5a5..dfc536cd9d2fc 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -586,21 +586,26 @@ config SND_SOC_CS4349 config SND_SOC_CS47L15 tristate + depends on MFD_CS47L15 config SND_SOC_CS47L24 tristate config SND_SOC_CS47L35 tristate + depends on MFD_CS47L35 config SND_SOC_CS47L85 tristate + depends on MFD_CS47L85 config SND_SOC_CS47L90 tristate + depends on MFD_CS47L90 config SND_SOC_CS47L92 tristate + depends on MFD_CS47L92 # Cirrus Logic Quad-Channel ADC config SND_SOC_CS53L30 @@ -754,7 +759,6 @@ config SND_SOC_MAX98095 config SND_SOC_MAX98357A tristate "Maxim MAX98357A CODEC" - depends on GPIOLIB config SND_SOC_MAX98371 tristate diff --git a/sound/soc/codecs/cpcap.c b/sound/soc/codecs/cpcap.c index 1902689c5ea2c..acd88fe38cd4c 100644 --- a/sound/soc/codecs/cpcap.c +++ b/sound/soc/codecs/cpcap.c @@ -1541,6 +1541,8 @@ static int cpcap_codec_probe(struct platform_device *pdev) { struct device_node *codec_node = of_get_child_by_name(pdev->dev.parent->of_node, "audio-codec"); + if (!codec_node) + return -ENODEV; pdev->dev.of_node = codec_node; diff --git a/sound/soc/codecs/cs35l36.c b/sound/soc/codecs/cs35l36.c index e9b5f76f27a86..aa32b8c26578a 100644 --- a/sound/soc/codecs/cs35l36.c +++ b/sound/soc/codecs/cs35l36.c @@ -444,7 +444,8 @@ static bool cs35l36_volatile_reg(struct device *dev, unsigned int reg) } } -static DECLARE_TLV_DB_SCALE(dig_vol_tlv, -10200, 25, 0); +static const DECLARE_TLV_DB_RANGE(dig_vol_tlv, 0, 912, + TLV_DB_MINMAX_ITEM(-10200, 1200)); static DECLARE_TLV_DB_SCALE(amp_gain_tlv, 0, 1, 1); static const char * const cs35l36_pcm_sftramp_text[] = { diff --git a/sound/soc/codecs/cs4265.c b/sound/soc/codecs/cs4265.c index 2fb65f246b0cf..77af5b67b9bb4 100644 --- a/sound/soc/codecs/cs4265.c +++ b/sound/soc/codecs/cs4265.c @@ -150,7 +150,6 @@ static const struct snd_kcontrol_new cs4265_snd_controls[] = { SOC_SINGLE("E to F Buffer Disable Switch", CS4265_SPDIF_CTL1, 6, 1, 0), SOC_ENUM("C Data Access", cam_mode_enum), - SOC_SINGLE("SPDIF Switch", CS4265_SPDIF_CTL2, 5, 1, 1), SOC_SINGLE("Validity Bit Control Switch", CS4265_SPDIF_CTL2, 3, 1, 0), SOC_ENUM("SPDIF Mono/Stereo", spdif_mono_stereo_enum), @@ -186,7 +185,7 @@ static const struct snd_soc_dapm_widget cs4265_dapm_widgets[] = { SND_SOC_DAPM_SWITCH("Loopback", SND_SOC_NOPM, 0, 0, &loopback_ctl), - SND_SOC_DAPM_SWITCH("SPDIF", SND_SOC_NOPM, 0, 0, + SND_SOC_DAPM_SWITCH("SPDIF", CS4265_SPDIF_CTL2, 5, 1, &spdif_switch), SND_SOC_DAPM_SWITCH("DAC", CS4265_PWRCTL, 1, 1, &dac_switch), diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 6825e874785f2..ebee58eca4d51 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -91,7 +91,7 @@ static const struct reg_default cs42l42_reg_defaults[] = { { CS42L42_ASP_RX_INT_MASK, 0x1F }, { CS42L42_ASP_TX_INT_MASK, 0x0F }, { CS42L42_CODEC_INT_MASK, 0x03 }, - { CS42L42_SRCPL_INT_MASK, 0xFF }, + { CS42L42_SRCPL_INT_MASK, 0x7F }, { CS42L42_VPMON_INT_MASK, 0x01 }, { CS42L42_PLL_LOCK_INT_MASK, 0x01 }, { CS42L42_TSRS_PLUG_INT_MASK, 0x0F }, @@ -128,7 +128,7 @@ static const struct reg_default cs42l42_reg_defaults[] = { { CS42L42_MIXER_CHA_VOL, 0x3F }, { CS42L42_MIXER_ADC_VOL, 0x3F }, { CS42L42_MIXER_CHB_VOL, 0x3F }, - { CS42L42_EQ_COEF_IN0, 0x22 }, + { CS42L42_EQ_COEF_IN0, 0x00 }, { CS42L42_EQ_COEF_IN1, 0x00 }, { CS42L42_EQ_COEF_IN2, 0x00 }, { CS42L42_EQ_COEF_IN3, 0x00 }, @@ -1798,8 +1798,9 @@ static int cs42l42_i2c_probe(struct i2c_client *i2c_client, NULL, cs42l42_irq_thread, IRQF_ONESHOT | IRQF_TRIGGER_LOW, "cs42l42", cs42l42); - - if (ret != 0) + if (ret == -EPROBE_DEFER) + goto err_disable; + else if (ret != 0) dev_err(&i2c_client->dev, "Failed to request IRQ: %d\n", ret); diff --git a/sound/soc/codecs/cs42l52.c b/sound/soc/codecs/cs42l52.c index 2ea4cba3be2ad..6c054b357205b 100644 --- a/sound/soc/codecs/cs42l52.c +++ b/sound/soc/codecs/cs42l52.c @@ -137,7 +137,9 @@ static DECLARE_TLV_DB_SCALE(mic_tlv, 1600, 100, 0); static DECLARE_TLV_DB_SCALE(pga_tlv, -600, 50, 0); -static DECLARE_TLV_DB_SCALE(mix_tlv, -50, 50, 0); +static DECLARE_TLV_DB_SCALE(pass_tlv, -6000, 50, 0); + +static DECLARE_TLV_DB_SCALE(mix_tlv, -5150, 50, 0); static DECLARE_TLV_DB_SCALE(beep_tlv, -56, 200, 0); @@ -351,7 +353,7 @@ static const struct snd_kcontrol_new cs42l52_snd_controls[] = { CS42L52_SPKB_VOL, 0, 0x40, 0xC0, hl_tlv), SOC_DOUBLE_R_SX_TLV("Bypass Volume", CS42L52_PASSTHRUA_VOL, - CS42L52_PASSTHRUB_VOL, 0, 0x88, 0x90, pga_tlv), + CS42L52_PASSTHRUB_VOL, 0, 0x88, 0x90, pass_tlv), SOC_DOUBLE("Bypass Mute", CS42L52_MISC_CTL, 4, 5, 1, 0), @@ -364,7 +366,7 @@ static const struct snd_kcontrol_new cs42l52_snd_controls[] = { CS42L52_ADCB_VOL, 0, 0xA0, 0x78, ipd_tlv), SOC_DOUBLE_R_SX_TLV("ADC Mixer Volume", CS42L52_ADCA_MIXER_VOL, CS42L52_ADCB_MIXER_VOL, - 0, 0x19, 0x7F, ipd_tlv), + 0, 0x19, 0x7F, mix_tlv), SOC_DOUBLE("ADC Switch", CS42L52_ADC_MISC_CTL, 0, 1, 1, 0), diff --git a/sound/soc/codecs/cs42l56.c b/sound/soc/codecs/cs42l56.c index 51d7a87ab4c3b..8be7d83f0ce9a 100644 --- a/sound/soc/codecs/cs42l56.c +++ b/sound/soc/codecs/cs42l56.c @@ -391,9 +391,9 @@ static const struct snd_kcontrol_new cs42l56_snd_controls[] = { SOC_DOUBLE("ADC Boost Switch", CS42L56_GAIN_BIAS_CTL, 3, 2, 1, 1), SOC_DOUBLE_R_SX_TLV("Headphone Volume", CS42L56_HPA_VOLUME, - CS42L56_HPB_VOLUME, 0, 0x84, 0x48, hl_tlv), + CS42L56_HPB_VOLUME, 0, 0x44, 0x48, hl_tlv), SOC_DOUBLE_R_SX_TLV("LineOut Volume", CS42L56_LOA_VOLUME, - CS42L56_LOB_VOLUME, 0, 0x84, 0x48, hl_tlv), + CS42L56_LOB_VOLUME, 0, 0x44, 0x48, hl_tlv), SOC_SINGLE_TLV("Bass Shelving Volume", CS42L56_TONE_CTL, 0, 0x00, 1, tone_tlv), diff --git a/sound/soc/codecs/cs47l15.c b/sound/soc/codecs/cs47l15.c index ece1276f38eb9..1f7148794a5a9 100644 --- a/sound/soc/codecs/cs47l15.c +++ b/sound/soc/codecs/cs47l15.c @@ -122,6 +122,9 @@ static int cs47l15_in1_adc_put(struct snd_kcontrol *kcontrol, snd_soc_kcontrol_component(kcontrol); struct cs47l15 *cs47l15 = snd_soc_component_get_drvdata(component); + if (!!ucontrol->value.integer.value[0] == cs47l15->in1_lp_mode) + return 0; + switch (ucontrol->value.integer.value[0]) { case 0: /* Set IN1 to normal mode */ @@ -150,7 +153,7 @@ static int cs47l15_in1_adc_put(struct snd_kcontrol *kcontrol, break; } - return 0; + return 1; } static const struct snd_kcontrol_new cs47l15_snd_controls[] = { diff --git a/sound/soc/codecs/cs53l30.c b/sound/soc/codecs/cs53l30.c index ed22361b35c14..a5a383b923054 100644 --- a/sound/soc/codecs/cs53l30.c +++ b/sound/soc/codecs/cs53l30.c @@ -347,22 +347,22 @@ static const struct snd_kcontrol_new cs53l30_snd_controls[] = { SOC_ENUM("ADC2 NG Delay", adc2_ng_delay_enum), SOC_SINGLE_SX_TLV("ADC1A PGA Volume", - CS53L30_ADC1A_AFE_CTL, 0, 0x34, 0x18, pga_tlv), + CS53L30_ADC1A_AFE_CTL, 0, 0x34, 0x24, pga_tlv), SOC_SINGLE_SX_TLV("ADC1B PGA Volume", - CS53L30_ADC1B_AFE_CTL, 0, 0x34, 0x18, pga_tlv), + CS53L30_ADC1B_AFE_CTL, 0, 0x34, 0x24, pga_tlv), SOC_SINGLE_SX_TLV("ADC2A PGA Volume", - CS53L30_ADC2A_AFE_CTL, 0, 0x34, 0x18, pga_tlv), + CS53L30_ADC2A_AFE_CTL, 0, 0x34, 0x24, pga_tlv), SOC_SINGLE_SX_TLV("ADC2B PGA Volume", - CS53L30_ADC2B_AFE_CTL, 0, 0x34, 0x18, pga_tlv), + CS53L30_ADC2B_AFE_CTL, 0, 0x34, 0x24, pga_tlv), SOC_SINGLE_SX_TLV("ADC1A Digital Volume", - CS53L30_ADC1A_DIG_VOL, 0, 0xA0, 0x0C, dig_tlv), + CS53L30_ADC1A_DIG_VOL, 0, 0xA0, 0x6C, dig_tlv), SOC_SINGLE_SX_TLV("ADC1B Digital Volume", - CS53L30_ADC1B_DIG_VOL, 0, 0xA0, 0x0C, dig_tlv), + CS53L30_ADC1B_DIG_VOL, 0, 0xA0, 0x6C, dig_tlv), SOC_SINGLE_SX_TLV("ADC2A Digital Volume", - CS53L30_ADC2A_DIG_VOL, 0, 0xA0, 0x0C, dig_tlv), + CS53L30_ADC2A_DIG_VOL, 0, 0xA0, 0x6C, dig_tlv), SOC_SINGLE_SX_TLV("ADC2B Digital Volume", - CS53L30_ADC2B_DIG_VOL, 0, 0xA0, 0x0C, dig_tlv), + CS53L30_ADC2B_DIG_VOL, 0, 0xA0, 0x6C, dig_tlv), }; static const struct snd_soc_dapm_widget cs53l30_dapm_widgets[] = { diff --git a/sound/soc/codecs/da7219.c b/sound/soc/codecs/da7219.c index f83a6eaba12cb..ef8bd9e046374 100644 --- a/sound/soc/codecs/da7219.c +++ b/sound/soc/codecs/da7219.c @@ -446,7 +446,7 @@ static int da7219_tonegen_freq_put(struct snd_kcontrol *kcontrol, struct soc_mixer_control *mixer_ctrl = (struct soc_mixer_control *) kcontrol->private_value; unsigned int reg = mixer_ctrl->reg; - __le16 val; + __le16 val_new, val_old; int ret; /* @@ -454,13 +454,19 @@ static int da7219_tonegen_freq_put(struct snd_kcontrol *kcontrol, * Therefore we need to convert to little endian here to align with * HW registers. */ - val = cpu_to_le16(ucontrol->value.integer.value[0]); + val_new = cpu_to_le16(ucontrol->value.integer.value[0]); mutex_lock(&da7219->ctrl_lock); - ret = regmap_raw_write(da7219->regmap, reg, &val, sizeof(val)); + ret = regmap_raw_read(da7219->regmap, reg, &val_old, sizeof(val_old)); + if (ret == 0 && (val_old != val_new)) + ret = regmap_raw_write(da7219->regmap, reg, + &val_new, sizeof(val_new)); mutex_unlock(&da7219->ctrl_lock); - return ret; + if (ret < 0) + return ret; + + return val_old != val_new; } diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c index fdf64c29f563d..4117ab6e9b6ff 100644 --- a/sound/soc/codecs/es8328.c +++ b/sound/soc/codecs/es8328.c @@ -161,13 +161,16 @@ static int es8328_put_deemph(struct snd_kcontrol *kcontrol, if (deemph > 1) return -EINVAL; + if (es8328->deemph == deemph) + return 0; + ret = es8328_set_deemph(component); if (ret < 0) return ret; es8328->deemph = deemph; - return 0; + return 1; } diff --git a/sound/soc/codecs/madera.c b/sound/soc/codecs/madera.c index 52639811cc525..4a56082a4c43a 100644 --- a/sound/soc/codecs/madera.c +++ b/sound/soc/codecs/madera.c @@ -568,7 +568,13 @@ int madera_out1_demux_put(struct snd_kcontrol *kcontrol, end: snd_soc_dapm_mutex_unlock(dapm); - return snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); + ret = snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); + if (ret < 0) { + dev_err(madera->dev, "Failed to update demux power state: %d\n", ret); + return ret; + } + + return change; } EXPORT_SYMBOL_GPL(madera_out1_demux_put); @@ -847,7 +853,7 @@ static int madera_adsp_rate_put(struct snd_kcontrol *kcontrol, struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; const int adsp_num = e->shift_l; const unsigned int item = ucontrol->value.enumerated.item[0]; - int ret; + int ret = 0; if (item >= e->items) return -EINVAL; @@ -864,10 +870,10 @@ static int madera_adsp_rate_put(struct snd_kcontrol *kcontrol, "Cannot change '%s' while in use by active audio paths\n", kcontrol->id.name); ret = -EBUSY; - } else { + } else if (priv->adsp_rate_cache[adsp_num] != e->values[item]) { /* Volatile register so defer until the codec is powered up */ priv->adsp_rate_cache[adsp_num] = e->values[item]; - ret = 0; + ret = 1; } mutex_unlock(&priv->rate_lock); diff --git a/sound/soc/codecs/max9759.c b/sound/soc/codecs/max9759.c index 00e9d4fd1651f..0c261335c8a16 100644 --- a/sound/soc/codecs/max9759.c +++ b/sound/soc/codecs/max9759.c @@ -64,7 +64,8 @@ static int speaker_gain_control_put(struct snd_kcontrol *kcontrol, struct snd_soc_component *c = snd_soc_kcontrol_component(kcontrol); struct max9759 *priv = snd_soc_component_get_drvdata(c); - if (ucontrol->value.integer.value[0] > 3) + if (ucontrol->value.integer.value[0] < 0 || + ucontrol->value.integer.value[0] > 3) return -EINVAL; priv->gain = ucontrol->value.integer.value[0]; diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index 6b9d326e11b07..ce9f99dd3e87d 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -413,6 +413,9 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol, val = (val >> mc->shift) & mask; + if (sel < 0 || sel > mc->max) + return -EINVAL; + *select = sel; /* Setting a volume is only valid if it is already On */ @@ -427,7 +430,7 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol, mask << mc->shift, sel << mc->shift); - return 0; + return *select != val; } static const char *max98090_perf_pwr_text[] = diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c index 337bddb7c2a49..5a8eedea6be03 100644 --- a/sound/soc/codecs/msm8916-wcd-analog.c +++ b/sound/soc/codecs/msm8916-wcd-analog.c @@ -1195,8 +1195,10 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev) } irq = platform_get_irq_byname(pdev, "mbhc_switch_int"); - if (irq < 0) - return irq; + if (irq < 0) { + ret = irq; + goto err_disable_clk; + } ret = devm_request_threaded_irq(dev, irq, NULL, pm8916_mbhc_switch_irq_handler, @@ -1208,8 +1210,10 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev) if (priv->mbhc_btn_enabled) { irq = platform_get_irq_byname(pdev, "mbhc_but_press_det"); - if (irq < 0) - return irq; + if (irq < 0) { + ret = irq; + goto err_disable_clk; + } ret = devm_request_threaded_irq(dev, irq, NULL, mbhc_btn_press_irq_handler, @@ -1220,8 +1224,10 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev) dev_err(dev, "cannot request mbhc button press irq\n"); irq = platform_get_irq_byname(pdev, "mbhc_but_rel_det"); - if (irq < 0) - return irq; + if (irq < 0) { + ret = irq; + goto err_disable_clk; + } ret = devm_request_threaded_irq(dev, irq, NULL, mbhc_btn_release_irq_handler, @@ -1238,6 +1244,10 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev) return devm_snd_soc_register_component(dev, &pm8916_wcd_analog, pm8916_wcd_analog_dai, ARRAY_SIZE(pm8916_wcd_analog_dai)); + +err_disable_clk: + clk_disable_unprepare(priv->mclk); + return ret; } static int pm8916_wcd_analog_spmi_remove(struct platform_device *pdev) diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c index 09fccacadd6b1..e4cde214b7b2d 100644 --- a/sound/soc/codecs/msm8916-wcd-digital.c +++ b/sound/soc/codecs/msm8916-wcd-digital.c @@ -1201,14 +1201,24 @@ static int msm8916_wcd_digital_probe(struct platform_device *pdev) ret = clk_prepare_enable(priv->mclk); if (ret < 0) { dev_err(dev, "failed to enable mclk %d\n", ret); - return ret; + goto err_clk; } dev_set_drvdata(dev, priv); - return devm_snd_soc_register_component(dev, &msm8916_wcd_digital, + ret = devm_snd_soc_register_component(dev, &msm8916_wcd_digital, msm8916_wcd_digital_dai, ARRAY_SIZE(msm8916_wcd_digital_dai)); + if (ret) + goto err_mclk; + + return 0; + +err_mclk: + clk_disable_unprepare(priv->mclk); +err_clk: + clk_disable_unprepare(priv->ahbclk); + return ret; } static int msm8916_wcd_digital_remove(struct platform_device *pdev) diff --git a/sound/soc/codecs/mt6358.c b/sound/soc/codecs/mt6358.c index bb737fd678cc6..494ba0eeb4336 100644 --- a/sound/soc/codecs/mt6358.c +++ b/sound/soc/codecs/mt6358.c @@ -103,6 +103,7 @@ int mt6358_set_mtkaif_protocol(struct snd_soc_component *cmpnt, priv->mtkaif_protocol = mtkaif_protocol; return 0; } +EXPORT_SYMBOL_GPL(mt6358_set_mtkaif_protocol); static void playback_gpio_set(struct mt6358_priv *priv) { @@ -269,6 +270,7 @@ int mt6358_mtkaif_calibration_enable(struct snd_soc_component *cmpnt) 1 << RG_AUD_PAD_TOP_DAT_MISO_LOOPBACK_SFT); return 0; } +EXPORT_SYMBOL_GPL(mt6358_mtkaif_calibration_enable); int mt6358_mtkaif_calibration_disable(struct snd_soc_component *cmpnt) { @@ -292,6 +294,7 @@ int mt6358_mtkaif_calibration_disable(struct snd_soc_component *cmpnt) capture_gpio_reset(priv); return 0; } +EXPORT_SYMBOL_GPL(mt6358_mtkaif_calibration_disable); int mt6358_set_mtkaif_calibration_phase(struct snd_soc_component *cmpnt, int phase_1, int phase_2) @@ -306,6 +309,7 @@ int mt6358_set_mtkaif_calibration_phase(struct snd_soc_component *cmpnt, phase_2 << RG_AUD_PAD_TOP_PHASE_MODE2_SFT); return 0; } +EXPORT_SYMBOL_GPL(mt6358_set_mtkaif_calibration_phase); /* dl pga gain */ enum { diff --git a/sound/soc/codecs/nau8822.c b/sound/soc/codecs/nau8822.c index 78db3bd0b3bcb..cd163978792e9 100644 --- a/sound/soc/codecs/nau8822.c +++ b/sound/soc/codecs/nau8822.c @@ -740,6 +740,8 @@ static int nau8822_set_pll(struct snd_soc_dai *dai, int pll_id, int source, pll_param->pll_int, pll_param->pll_frac, pll_param->mclk_scaler, pll_param->pre_factor); + snd_soc_component_update_bits(component, + NAU8822_REG_POWER_MANAGEMENT_1, NAU8822_PLL_EN_MASK, NAU8822_PLL_OFF); snd_soc_component_update_bits(component, NAU8822_REG_PLL_N, NAU8822_PLLMCLK_DIV2 | NAU8822_PLLN_MASK, (pll_param->pre_factor ? NAU8822_PLLMCLK_DIV2 : 0) | @@ -757,6 +759,8 @@ static int nau8822_set_pll(struct snd_soc_dai *dai, int pll_id, int source, pll_param->mclk_scaler << NAU8822_MCLKSEL_SFT); snd_soc_component_update_bits(component, NAU8822_REG_CLOCKING, NAU8822_CLKM_MASK, NAU8822_CLKM_PLL); + snd_soc_component_update_bits(component, + NAU8822_REG_POWER_MANAGEMENT_1, NAU8822_PLL_EN_MASK, NAU8822_PLL_ON); return 0; } diff --git a/sound/soc/codecs/nau8822.h b/sound/soc/codecs/nau8822.h index 489191ff187ec..b45d42c15de6b 100644 --- a/sound/soc/codecs/nau8822.h +++ b/sound/soc/codecs/nau8822.h @@ -90,6 +90,9 @@ #define NAU8822_REFIMP_3K 0x3 #define NAU8822_IOBUF_EN (0x1 << 2) #define NAU8822_ABIAS_EN (0x1 << 3) +#define NAU8822_PLL_EN_MASK (0x1 << 5) +#define NAU8822_PLL_ON (0x1 << 5) +#define NAU8822_PLL_OFF (0x0 << 5) /* NAU8822_REG_AUDIO_INTERFACE (0x4) */ #define NAU8822_AIFMT_MASK (0x3 << 3) diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c index 15bd8335f6678..c8ccfa2fff848 100644 --- a/sound/soc/codecs/nau8824.c +++ b/sound/soc/codecs/nau8824.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -27,6 +28,12 @@ #include "nau8824.h" +#define NAU8824_JD_ACTIVE_HIGH BIT(0) + +static int nau8824_quirk; +static int quirk_override = -1; +module_param_named(quirk, quirk_override, uint, 0444); +MODULE_PARM_DESC(quirk, "Board-specific quirk override"); static int nau8824_config_sysclk(struct nau8824 *nau8824, int clk_id, unsigned int freq); @@ -1875,6 +1882,34 @@ static int nau8824_read_device_properties(struct device *dev, return 0; } +/* Please keep this list alphabetically sorted */ +static const struct dmi_system_id nau8824_quirk_table[] = { + { + /* Cyberbook T116 rugged tablet */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Default string"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "20170531"), + }, + .driver_data = (void *)(NAU8824_JD_ACTIVE_HIGH), + }, + {} +}; + +static void nau8824_check_quirks(void) +{ + const struct dmi_system_id *dmi_id; + + if (quirk_override != -1) { + nau8824_quirk = quirk_override; + return; + } + + dmi_id = dmi_first_match(nau8824_quirk_table); + if (dmi_id) + nau8824_quirk = (unsigned long)dmi_id->driver_data; +} + static int nau8824_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { @@ -1899,6 +1934,11 @@ static int nau8824_i2c_probe(struct i2c_client *i2c, nau8824->irq = i2c->irq; sema_init(&nau8824->jd_sem, 1); + nau8824_check_quirks(); + + if (nau8824_quirk & NAU8824_JD_ACTIVE_HIGH) + nau8824->jkdet_polarity = 0; + nau8824_print_device_properties(nau8824); ret = regmap_read(nau8824->regmap, NAU8824_REG_I2C_DEVICE_ID, &value); diff --git a/sound/soc/codecs/rk3328_codec.c b/sound/soc/codecs/rk3328_codec.c index 514ebe16bbfad..4e71ecf54af7b 100644 --- a/sound/soc/codecs/rk3328_codec.c +++ b/sound/soc/codecs/rk3328_codec.c @@ -479,7 +479,7 @@ static int rk3328_platform_probe(struct platform_device *pdev) ret = clk_prepare_enable(rk3328->pclk); if (ret < 0) { dev_err(&pdev->dev, "failed to enable acodec pclk\n"); - return ret; + goto err_unprepare_mclk; } base = devm_platform_ioremap_resource(pdev, 0); diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index 7081142a355e1..c444a56df95ba 100644 --- a/sound/soc/codecs/rt5514.c +++ b/sound/soc/codecs/rt5514.c @@ -419,7 +419,7 @@ static int rt5514_dsp_voice_wake_up_put(struct snd_kcontrol *kcontrol, } } - return 0; + return 1; } static const struct snd_kcontrol_new rt5514_snd_controls[] = { diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index c83f7f5da96b7..a66e93a3af745 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -4074,9 +4074,14 @@ static int rt5645_i2c_remove(struct i2c_client *i2c) if (i2c->irq) free_irq(i2c->irq, rt5645); + /* + * Since the rt5645_btn_check_callback() can queue jack_detect_work, + * the timer need to be delted first + */ + del_timer_sync(&rt5645->btn_check_timer); + cancel_delayed_work_sync(&rt5645->jack_detect_work); cancel_delayed_work_sync(&rt5645->rcclock_work); - del_timer_sync(&rt5645->btn_check_timer); regulator_bulk_disable(ARRAY_SIZE(rt5645->supplies), rt5645->supplies); diff --git a/sound/soc/codecs/rt5663.c b/sound/soc/codecs/rt5663.c index 2943692f66edd..19e2f622718d6 100644 --- a/sound/soc/codecs/rt5663.c +++ b/sound/soc/codecs/rt5663.c @@ -3461,6 +3461,7 @@ static void rt5663_calibrate(struct rt5663_priv *rt5663) static int rt5663_parse_dp(struct rt5663_priv *rt5663, struct device *dev) { int table_size; + int ret; device_property_read_u32(dev, "realtek,dc_offset_l_manual", &rt5663->pdata.dc_offset_l_manual); @@ -3477,9 +3478,13 @@ static int rt5663_parse_dp(struct rt5663_priv *rt5663, struct device *dev) table_size = sizeof(struct impedance_mapping_table) * rt5663->pdata.impedance_sensing_num; rt5663->imp_table = devm_kzalloc(dev, table_size, GFP_KERNEL); - device_property_read_u32_array(dev, + if (!rt5663->imp_table) + return -ENOMEM; + ret = device_property_read_u32_array(dev, "realtek,impedance_sensing_table", (u32 *)rt5663->imp_table, table_size); + if (ret) + return ret; } return 0; @@ -3504,8 +3509,11 @@ static int rt5663_i2c_probe(struct i2c_client *i2c, if (pdata) rt5663->pdata = *pdata; - else - rt5663_parse_dp(rt5663, &i2c->dev); + else { + ret = rt5663_parse_dp(rt5663, &i2c->dev); + if (ret) + return ret; + } for (i = 0; i < ARRAY_SIZE(rt5663->supplies); i++) rt5663->supplies[i].supply = rt5663_supply_names[i]; diff --git a/sound/soc/codecs/rt5668.c b/sound/soc/codecs/rt5668.c index 5716cede99cb4..acc2b34ca334a 100644 --- a/sound/soc/codecs/rt5668.c +++ b/sound/soc/codecs/rt5668.c @@ -1022,11 +1022,13 @@ static void rt5668_jack_detect_handler(struct work_struct *work) container_of(work, struct rt5668_priv, jack_detect_work.work); int val, btn_type; - while (!rt5668->component) - usleep_range(10000, 15000); - - while (!rt5668->component->card->instantiated) - usleep_range(10000, 15000); + if (!rt5668->component || !rt5668->component->card || + !rt5668->component->card->instantiated) { + /* card not yet ready, try later */ + mod_delayed_work(system_power_efficient_wq, + &rt5668->jack_detect_work, msecs_to_jiffies(15)); + return; + } mutex_lock(&rt5668->calibrate_mutex); diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 05e883a65d7a7..a8cf4c7451304 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -1052,11 +1052,13 @@ static void rt5682_jack_detect_handler(struct work_struct *work) container_of(work, struct rt5682_priv, jack_detect_work.work); int val, btn_type; - while (!rt5682->component) - usleep_range(10000, 15000); - - while (!rt5682->component->card->instantiated) - usleep_range(10000, 15000); + if (!rt5682->component || !rt5682->component->card || + !rt5682->component->card->instantiated) { + /* card not yet ready, try later */ + mod_delayed_work(system_power_efficient_wq, + &rt5682->jack_detect_work, msecs_to_jiffies(15)); + return; + } mutex_lock(&rt5682->calibrate_mutex); diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 8a1e485982d81..76d3c0681f37e 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1788,6 +1788,9 @@ static int sgtl5000_i2c_remove(struct i2c_client *client) { struct sgtl5000_priv *sgtl5000 = i2c_get_clientdata(client); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_DIG_POWER, SGTL5000_DIG_POWER_DEFAULT); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, SGTL5000_ANA_POWER_DEFAULT); + clk_disable_unprepare(sgtl5000->mclk); regulator_bulk_disable(sgtl5000->num_supplies, sgtl5000->supplies); regulator_bulk_free(sgtl5000->num_supplies, sgtl5000->supplies); @@ -1795,6 +1798,11 @@ static int sgtl5000_i2c_remove(struct i2c_client *client) return 0; } +static void sgtl5000_i2c_shutdown(struct i2c_client *client) +{ + sgtl5000_i2c_remove(client); +} + static const struct i2c_device_id sgtl5000_id[] = { {"sgtl5000", 0}, {}, @@ -1815,6 +1823,7 @@ static struct i2c_driver sgtl5000_i2c_driver = { }, .probe = sgtl5000_i2c_probe, .remove = sgtl5000_i2c_remove, + .shutdown = sgtl5000_i2c_shutdown, .id_table = sgtl5000_id, }; diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h index 56ec5863f2507..3a808c762299e 100644 --- a/sound/soc/codecs/sgtl5000.h +++ b/sound/soc/codecs/sgtl5000.h @@ -80,6 +80,7 @@ /* * SGTL5000_CHIP_DIG_POWER */ +#define SGTL5000_DIG_POWER_DEFAULT 0x0000 #define SGTL5000_ADC_EN 0x0040 #define SGTL5000_DAC_EN 0x0020 #define SGTL5000_DAP_POWERUP 0x0010 diff --git a/sound/soc/codecs/tscs454.c b/sound/soc/codecs/tscs454.c index c3587af9985c0..3d981441b8d1a 100644 --- a/sound/soc/codecs/tscs454.c +++ b/sound/soc/codecs/tscs454.c @@ -3128,18 +3128,17 @@ static int set_aif_sample_format(struct snd_soc_component *component, unsigned int width; int ret; - switch (format) { - case SNDRV_PCM_FORMAT_S16_LE: + switch (snd_pcm_format_width(format)) { + case 16: width = FV_WL_16; break; - case SNDRV_PCM_FORMAT_S20_3LE: + case 20: width = FV_WL_20; break; - case SNDRV_PCM_FORMAT_S24_3LE: + case 24: width = FV_WL_24; break; - case SNDRV_PCM_FORMAT_S24_LE: - case SNDRV_PCM_FORMAT_S32_LE: + case 32: width = FV_WL_32; break; default: @@ -3337,6 +3336,7 @@ static const struct snd_soc_component_driver soc_component_dev_tscs454 = { .num_dapm_routes = ARRAY_SIZE(tscs454_intercon), .controls = tscs454_snd_controls, .num_controls = ARRAY_SIZE(tscs454_snd_controls), + .endianness = 1, }; #define TSCS454_RATES SNDRV_PCM_RATE_8000_96000 diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c index 81906c25e4a87..016aff97e2fb2 100644 --- a/sound/soc/codecs/wcd9335.c +++ b/sound/soc/codecs/wcd9335.c @@ -4076,6 +4076,16 @@ static int wcd9335_setup_irqs(struct wcd9335_codec *wcd) return ret; } +static void wcd9335_teardown_irqs(struct wcd9335_codec *wcd) +{ + int i; + + /* disable interrupts on all slave ports */ + for (i = 0; i < WCD9335_SLIM_NUM_PORT_REG; i++) + regmap_write(wcd->if_regmap, WCD9335_SLIM_PGD_PORT_INT_EN0 + i, + 0x00); +} + static void wcd9335_cdc_sido_ccl_enable(struct wcd9335_codec *wcd, bool ccl_flag) { @@ -4844,6 +4854,7 @@ static void wcd9335_codec_init(struct snd_soc_component *component) static int wcd9335_codec_probe(struct snd_soc_component *component) { struct wcd9335_codec *wcd = dev_get_drvdata(component->dev); + int ret; int i; snd_soc_component_init_regmap(component, wcd->regmap); @@ -4861,7 +4872,15 @@ static int wcd9335_codec_probe(struct snd_soc_component *component) for (i = 0; i < NUM_CODEC_DAIS; i++) INIT_LIST_HEAD(&wcd->dai[i].slim_ch_list); - return wcd9335_setup_irqs(wcd); + ret = wcd9335_setup_irqs(wcd); + if (ret) + goto free_clsh_ctrl; + + return 0; + +free_clsh_ctrl: + wcd_clsh_ctrl_free(wcd->clsh_ctrl); + return ret; } static void wcd9335_codec_remove(struct snd_soc_component *comp) @@ -4869,7 +4888,7 @@ static void wcd9335_codec_remove(struct snd_soc_component *comp) struct wcd9335_codec *wcd = dev_get_drvdata(comp->dev); wcd_clsh_ctrl_free(wcd->clsh_ctrl); - free_irq(regmap_irq_get_virq(wcd->irq_data, WCD9335_IRQ_SLIMBUS), wcd); + wcd9335_teardown_irqs(wcd); } static int wcd9335_codec_set_sysclk(struct snd_soc_component *comp, diff --git a/sound/soc/codecs/wm2000.c b/sound/soc/codecs/wm2000.c index 72e165cc64439..97ece3114b3dc 100644 --- a/sound/soc/codecs/wm2000.c +++ b/sound/soc/codecs/wm2000.c @@ -536,7 +536,7 @@ static int wm2000_anc_transition(struct wm2000_priv *wm2000, { struct i2c_client *i2c = wm2000->i2c; int i, j; - int ret; + int ret = 0; if (wm2000->anc_mode == mode) return 0; @@ -566,13 +566,13 @@ static int wm2000_anc_transition(struct wm2000_priv *wm2000, ret = anc_transitions[i].step[j](i2c, anc_transitions[i].analogue); if (ret != 0) - return ret; + break; } if (anc_transitions[i].dest == ANC_OFF) clk_disable_unprepare(wm2000->mclk); - return 0; + return ret; } static int wm2000_anc_set_mode(struct wm2000_priv *wm2000) diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index 9dc215b5c5045..06ec3f48c808a 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -413,6 +413,7 @@ static int wm5110_put_dre(struct snd_kcontrol *kcontrol, unsigned int rnew = (!!ucontrol->value.integer.value[1]) << mc->rshift; unsigned int lold, rold; unsigned int lena, rena; + bool change = false; int ret; snd_soc_dapm_mutex_lock(dapm); @@ -440,8 +441,8 @@ static int wm5110_put_dre(struct snd_kcontrol *kcontrol, goto err; } - ret = regmap_update_bits(arizona->regmap, ARIZONA_DRE_ENABLE, - mask, lnew | rnew); + ret = regmap_update_bits_check(arizona->regmap, ARIZONA_DRE_ENABLE, + mask, lnew | rnew, &change); if (ret) { dev_err(arizona->dev, "Failed to set DRE: %d\n", ret); goto err; @@ -454,6 +455,9 @@ static int wm5110_put_dre(struct snd_kcontrol *kcontrol, if (!rnew && rold) wm5110_clear_pga_volume(arizona, mc->rshift); + if (change) + ret = 1; + err: snd_soc_dapm_mutex_unlock(dapm); diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c index fe99584c917f8..9cd91bb0a9022 100644 --- a/sound/soc/codecs/wm8350.c +++ b/sound/soc/codecs/wm8350.c @@ -1535,18 +1535,38 @@ static int wm8350_component_probe(struct snd_soc_component *component) wm8350_clear_bits(wm8350, WM8350_JACK_DETECT, WM8350_JDL_ENA | WM8350_JDR_ENA); - wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L, + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L, wm8350_hpl_jack_handler, 0, "Left jack detect", priv); - wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R, + if (ret != 0) + goto err; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R, wm8350_hpr_jack_handler, 0, "Right jack detect", priv); - wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICSCD, + if (ret != 0) + goto free_jck_det_l; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICSCD, wm8350_mic_handler, 0, "Microphone short", priv); - wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICD, + if (ret != 0) + goto free_jck_det_r; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICD, wm8350_mic_handler, 0, "Microphone detect", priv); + if (ret != 0) + goto free_micscd; return 0; + +free_micscd: + wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_MICSCD, priv); +free_jck_det_r: + wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R, priv); +free_jck_det_l: + wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L, priv); +err: + return ret; } static void wm8350_component_remove(struct snd_soc_component *component) diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c index 6fd1bef848ed9..fa55d79b39b60 100644 --- a/sound/soc/codecs/wm8731.c +++ b/sound/soc/codecs/wm8731.c @@ -601,7 +601,7 @@ static int wm8731_hw_init(struct device *dev, struct wm8731_priv *wm8731) ret = wm8731_reset(wm8731->regmap); if (ret < 0) { dev_err(dev, "Failed to issue reset: %d\n", ret); - goto err_regulator_enable; + goto err; } /* Clear POWEROFF, keep everything else disabled */ @@ -618,10 +618,7 @@ static int wm8731_hw_init(struct device *dev, struct wm8731_priv *wm8731) regcache_mark_dirty(wm8731->regmap); -err_regulator_enable: - /* Regulators will be enabled by bias management */ - regulator_bulk_disable(ARRAY_SIZE(wm8731->supplies), wm8731->supplies); - +err: return ret; } @@ -765,21 +762,27 @@ static int wm8731_i2c_probe(struct i2c_client *i2c, ret = PTR_ERR(wm8731->regmap); dev_err(&i2c->dev, "Failed to allocate register map: %d\n", ret); - return ret; + goto err_regulator_enable; } ret = wm8731_hw_init(&i2c->dev, wm8731); if (ret != 0) - return ret; + goto err_regulator_enable; ret = devm_snd_soc_register_component(&i2c->dev, &soc_component_dev_wm8731, &wm8731_dai, 1); if (ret != 0) { dev_err(&i2c->dev, "Failed to register CODEC: %d\n", ret); - return ret; + goto err_regulator_enable; } return 0; + +err_regulator_enable: + /* Regulators will be enabled by bias management */ + regulator_bulk_disable(ARRAY_SIZE(wm8731->supplies), wm8731->supplies); + + return ret; } static int wm8731_i2c_remove(struct i2c_client *client) diff --git a/sound/soc/codecs/wm8958-dsp2.c b/sound/soc/codecs/wm8958-dsp2.c index 04f23477039a5..c677c068b05ec 100644 --- a/sound/soc/codecs/wm8958-dsp2.c +++ b/sound/soc/codecs/wm8958-dsp2.c @@ -534,7 +534,7 @@ static int wm8958_mbc_put(struct snd_kcontrol *kcontrol, wm8958_dsp_apply(component, mbc, wm8994->mbc_ena[mbc]); - return 0; + return 1; } #define WM8958_MBC_SWITCH(xname, xval) {\ @@ -660,7 +660,7 @@ static int wm8958_vss_put(struct snd_kcontrol *kcontrol, wm8958_dsp_apply(component, vss, wm8994->vss_ena[vss]); - return 0; + return 1; } @@ -734,7 +734,7 @@ static int wm8958_hpf_put(struct snd_kcontrol *kcontrol, wm8958_dsp_apply(component, hpf % 3, ucontrol->value.integer.value[0]); - return 0; + return 1; } #define WM8958_HPF_SWITCH(xname, xval) {\ @@ -828,7 +828,7 @@ static int wm8958_enh_eq_put(struct snd_kcontrol *kcontrol, wm8958_dsp_apply(component, eq, ucontrol->value.integer.value[0]); - return 0; + return 1; } #define WM8958_ENH_EQ_SWITCH(xname, xval) {\ diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index 708fc4ed54eda..512f8899dcbbd 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -752,9 +752,16 @@ static int wm8960_configure_clocking(struct snd_soc_component *component) int i, j, k; int ret; - if (!(iface1 & (1<<6))) { - dev_dbg(component->dev, - "Codec is slave mode, no need to configure clock\n"); + /* + * For Slave mode clocking should still be configured, + * so this if statement should be removed, but some platform + * may not work if the sysclk is not configured, to avoid such + * compatible issue, just add '!wm8960->sysclk' condition in + * this if statement. + */ + if (!(iface1 & (1 << 6)) && !wm8960->sysclk) { + dev_warn(component->dev, + "slave mode, but proceeding with no clock configuration\n"); return 0; } diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index d9d59f45833fd..ebaee468057b8 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -3854,6 +3854,7 @@ static int wm8962_runtime_suspend(struct device *dev) #endif static const struct dev_pm_ops wm8962_pm = { + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) SET_RUNTIME_PM_OPS(wm8962_runtime_suspend, wm8962_runtime_resume, NULL) }; diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 13672928da997..aedfa6b2895bd 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -791,7 +791,7 @@ int wm_adsp_fw_put(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; struct wm_adsp *dsp = snd_soc_component_get_drvdata(component); - int ret = 0; + int ret = 1; if (ucontrol->value.enumerated.item[0] == dsp[e->shift_l].fw) return 0; diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h index 677ecfc1ec68f..afaef20272342 100644 --- a/sound/soc/fsl/fsl_sai.h +++ b/sound/soc/fsl/fsl_sai.h @@ -67,8 +67,8 @@ #define FSL_SAI_xCR3(tx, ofs) (tx ? FSL_SAI_TCR3(ofs) : FSL_SAI_RCR3(ofs)) #define FSL_SAI_xCR4(tx, ofs) (tx ? FSL_SAI_TCR4(ofs) : FSL_SAI_RCR4(ofs)) #define FSL_SAI_xCR5(tx, ofs) (tx ? FSL_SAI_TCR5(ofs) : FSL_SAI_RCR5(ofs)) -#define FSL_SAI_xDR(tx, ofs) (tx ? FSL_SAI_TDR(ofs) : FSL_SAI_RDR(ofs)) -#define FSL_SAI_xFR(tx, ofs) (tx ? FSL_SAI_TFR(ofs) : FSL_SAI_RFR(ofs)) +#define FSL_SAI_xDR0(tx) (tx ? FSL_SAI_TDR0 : FSL_SAI_RDR0) +#define FSL_SAI_xFR0(tx) (tx ? FSL_SAI_TFR0 : FSL_SAI_RFR0) #define FSL_SAI_xMR(tx) (tx ? FSL_SAI_TMR : FSL_SAI_RMR) /* SAI Transmit/Receive Control Register */ diff --git a/sound/soc/fsl/imx-es8328.c b/sound/soc/fsl/imx-es8328.c index fad1eb6253d53..9e602c3456196 100644 --- a/sound/soc/fsl/imx-es8328.c +++ b/sound/soc/fsl/imx-es8328.c @@ -87,6 +87,7 @@ static int imx_es8328_probe(struct platform_device *pdev) if (int_port > MUX_PORT_MAX || int_port == 0) { dev_err(dev, "mux-int-port: hardware only has %d mux ports\n", MUX_PORT_MAX); + ret = -EINVAL; goto fail; } diff --git a/sound/soc/fsl/imx-sgtl5000.c b/sound/soc/fsl/imx-sgtl5000.c index 15e8b9343c354..7106d56a3346c 100644 --- a/sound/soc/fsl/imx-sgtl5000.c +++ b/sound/soc/fsl/imx-sgtl5000.c @@ -120,19 +120,19 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); if (!data) { ret = -ENOMEM; - goto fail; + goto put_device; } comp = devm_kzalloc(&pdev->dev, 3 * sizeof(*comp), GFP_KERNEL); if (!comp) { ret = -ENOMEM; - goto fail; + goto put_device; } data->codec_clk = clk_get(&codec_dev->dev, NULL); if (IS_ERR(data->codec_clk)) { ret = PTR_ERR(data->codec_clk); - goto fail; + goto put_device; } data->clk_frequency = clk_get_rate(data->codec_clk); @@ -158,10 +158,10 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) data->card.dev = &pdev->dev; ret = snd_soc_of_parse_card_name(&data->card, "model"); if (ret) - goto fail; + goto put_device; ret = snd_soc_of_parse_audio_routing(&data->card, "audio-routing"); if (ret) - goto fail; + goto put_device; data->card.num_links = 1; data->card.owner = THIS_MODULE; data->card.dai_link = &data->dai; @@ -176,7 +176,7 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) if (ret != -EPROBE_DEFER) dev_err(&pdev->dev, "snd_soc_register_card failed (%d)\n", ret); - goto fail; + goto put_device; } of_node_put(ssi_np); @@ -184,6 +184,8 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) return 0; +put_device: + put_device(&codec_dev->dev); fail: if (data && !IS_ERR(data->codec_clk)) clk_put(data->codec_clk); diff --git a/sound/soc/fsl/pcm030-audio-fabric.c b/sound/soc/fsl/pcm030-audio-fabric.c index af3c3b90c0aca..83b4a22bf15ac 100644 --- a/sound/soc/fsl/pcm030-audio-fabric.c +++ b/sound/soc/fsl/pcm030-audio-fabric.c @@ -93,16 +93,21 @@ static int pcm030_fabric_probe(struct platform_device *op) dev_err(&op->dev, "platform_device_alloc() failed\n"); ret = platform_device_add(pdata->codec_device); - if (ret) + if (ret) { dev_err(&op->dev, "platform_device_add() failed: %d\n", ret); + platform_device_put(pdata->codec_device); + } ret = snd_soc_register_card(card); - if (ret) + if (ret) { dev_err(&op->dev, "snd_soc_register_card() failed: %d\n", ret); + platform_device_del(pdata->codec_device); + platform_device_put(pdata->codec_device); + } platform_set_drvdata(op, pdata); - return ret; + } static int pcm030_fabric_remove(struct platform_device *op) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index c67b86e2d0c0a..7830d014d9247 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -284,9 +284,6 @@ static const struct snd_soc_dapm_widget byt_rt5640_widgets[] = { static const struct snd_soc_dapm_route byt_rt5640_audio_map[] = { {"Headphone", NULL, "Platform Clock"}, {"Headset Mic", NULL, "Platform Clock"}, - {"Internal Mic", NULL, "Platform Clock"}, - {"Speaker", NULL, "Platform Clock"}, - {"Headset Mic", NULL, "MICBIAS1"}, {"IN2P", NULL, "Headset Mic"}, {"Headphone", NULL, "HPOL"}, @@ -294,19 +291,23 @@ static const struct snd_soc_dapm_route byt_rt5640_audio_map[] = { }; static const struct snd_soc_dapm_route byt_rt5640_intmic_dmic1_map[] = { + {"Internal Mic", NULL, "Platform Clock"}, {"DMIC1", NULL, "Internal Mic"}, }; static const struct snd_soc_dapm_route byt_rt5640_intmic_dmic2_map[] = { + {"Internal Mic", NULL, "Platform Clock"}, {"DMIC2", NULL, "Internal Mic"}, }; static const struct snd_soc_dapm_route byt_rt5640_intmic_in1_map[] = { + {"Internal Mic", NULL, "Platform Clock"}, {"Internal Mic", NULL, "MICBIAS1"}, {"IN1P", NULL, "Internal Mic"}, }; static const struct snd_soc_dapm_route byt_rt5640_intmic_in3_map[] = { + {"Internal Mic", NULL, "Platform Clock"}, {"Internal Mic", NULL, "MICBIAS1"}, {"IN3P", NULL, "Internal Mic"}, }; @@ -348,6 +349,7 @@ static const struct snd_soc_dapm_route byt_rt5640_ssp0_aif2_map[] = { }; static const struct snd_soc_dapm_route byt_rt5640_stereo_spk_map[] = { + {"Speaker", NULL, "Platform Clock"}, {"Speaker", NULL, "SPOLP"}, {"Speaker", NULL, "SPOLN"}, {"Speaker", NULL, "SPORP"}, @@ -355,6 +357,7 @@ static const struct snd_soc_dapm_route byt_rt5640_stereo_spk_map[] = { }; static const struct snd_soc_dapm_route byt_rt5640_mono_spk_map[] = { + {"Speaker", NULL, "Platform Clock"}, {"Speaker", NULL, "SPOLP"}, {"Speaker", NULL, "SPOLN"}, }; diff --git a/sound/soc/intel/skylake/skl-messages.c b/sound/soc/intel/skylake/skl-messages.c index 476ef1897961d..79c6cf2c14bfb 100644 --- a/sound/soc/intel/skylake/skl-messages.c +++ b/sound/soc/intel/skylake/skl-messages.c @@ -802,9 +802,12 @@ static u16 skl_get_module_param_size(struct skl_dev *skl, case SKL_MODULE_TYPE_BASE_OUTFMT: case SKL_MODULE_TYPE_MIC_SELECT: - case SKL_MODULE_TYPE_KPB: return sizeof(struct skl_base_outfmt_cfg); + case SKL_MODULE_TYPE_MIXER: + case SKL_MODULE_TYPE_KPB: + return sizeof(struct skl_base_cfg); + default: /* * return only base cfg when no specific module type is @@ -857,10 +860,14 @@ static int skl_set_module_format(struct skl_dev *skl, case SKL_MODULE_TYPE_BASE_OUTFMT: case SKL_MODULE_TYPE_MIC_SELECT: - case SKL_MODULE_TYPE_KPB: skl_set_base_outfmt_format(skl, module_config, *param_data); break; + case SKL_MODULE_TYPE_MIXER: + case SKL_MODULE_TYPE_KPB: + skl_set_base_module_format(skl, module_config, *param_data); + break; + default: skl_set_base_module_format(skl, module_config, *param_data); break; diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c index 7f287424af9b7..439dd4ba690c4 100644 --- a/sound/soc/intel/skylake/skl-pcm.c +++ b/sound/soc/intel/skylake/skl-pcm.c @@ -1333,21 +1333,6 @@ static int skl_get_module_info(struct skl_dev *skl, return -EIO; } - list_for_each_entry(module, &skl->uuid_list, list) { - if (guid_equal(uuid_mod, &module->uuid)) { - mconfig->id.module_id = module->id; - if (mconfig->module) - mconfig->module->loadable = module->is_loadable; - ret = 0; - break; - } - } - - if (ret) - return ret; - - uuid_mod = &module->uuid; - ret = -EIO; for (i = 0; i < skl->nr_modules; i++) { skl_module = skl->modules[i]; uuid_tplg = &skl_module->uuid; @@ -1357,10 +1342,18 @@ static int skl_get_module_info(struct skl_dev *skl, break; } } + if (skl->nr_modules && ret) return ret; + ret = -EIO; list_for_each_entry(module, &skl->uuid_list, list) { + if (guid_equal(uuid_mod, &module->uuid)) { + mconfig->id.module_id = module->id; + mconfig->module->loadable = module->is_loadable; + ret = 0; + } + for (i = 0; i < MAX_IN_QUEUE; i++) { pin_id = &mconfig->m_in_pin[i].id; if (guid_equal(&pin_id->mod_uuid, &module->uuid)) @@ -1374,7 +1367,7 @@ static int skl_get_module_info(struct skl_dev *skl, } } - return 0; + return ret; } static int skl_populate_modules(struct skl_dev *skl) diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c index 1940b17f27efa..254b796e635d1 100644 --- a/sound/soc/intel/skylake/skl-topology.c +++ b/sound/soc/intel/skylake/skl-topology.c @@ -113,7 +113,7 @@ static int is_skl_dsp_widget_type(struct snd_soc_dapm_widget *w, static void skl_dump_mconfig(struct skl_dev *skl, struct skl_module_cfg *mcfg) { - struct skl_module_iface *iface = &mcfg->module->formats[0]; + struct skl_module_iface *iface = &mcfg->module->formats[mcfg->fmt_idx]; dev_dbg(skl->dev, "Dumping config\n"); dev_dbg(skl->dev, "Input Format:\n"); @@ -195,8 +195,8 @@ static void skl_tplg_update_params_fixup(struct skl_module_cfg *m_cfg, struct skl_module_fmt *in_fmt, *out_fmt; /* Fixups will be applied to pin 0 only */ - in_fmt = &m_cfg->module->formats[0].inputs[0].fmt; - out_fmt = &m_cfg->module->formats[0].outputs[0].fmt; + in_fmt = &m_cfg->module->formats[m_cfg->fmt_idx].inputs[0].fmt; + out_fmt = &m_cfg->module->formats[m_cfg->fmt_idx].outputs[0].fmt; if (params->stream == SNDRV_PCM_STREAM_PLAYBACK) { if (is_fe) { @@ -239,9 +239,9 @@ static void skl_tplg_update_buffer_size(struct skl_dev *skl, /* Since fixups is applied to pin 0 only, ibs, obs needs * change for pin 0 only */ - res = &mcfg->module->resources[0]; - in_fmt = &mcfg->module->formats[0].inputs[0].fmt; - out_fmt = &mcfg->module->formats[0].outputs[0].fmt; + res = &mcfg->module->resources[mcfg->res_idx]; + in_fmt = &mcfg->module->formats[mcfg->fmt_idx].inputs[0].fmt; + out_fmt = &mcfg->module->formats[mcfg->fmt_idx].outputs[0].fmt; if (mcfg->m_type == SKL_MODULE_TYPE_SRCINT) multiplier = 5; @@ -1463,12 +1463,6 @@ static int skl_tplg_tlv_control_set(struct snd_kcontrol *kcontrol, struct skl_dev *skl = get_skl_ctx(w->dapm->dev); if (ac->params) { - /* - * Widget data is expected to be stripped of T and L - */ - size -= 2 * sizeof(unsigned int); - data += 2; - if (size > ac->max) return -EINVAL; ac->size = size; @@ -1637,11 +1631,12 @@ int skl_tplg_update_pipe_params(struct device *dev, struct skl_module_cfg *mconfig, struct skl_pipe_params *params) { - struct skl_module_res *res = &mconfig->module->resources[0]; + struct skl_module_res *res; struct skl_dev *skl = get_skl_ctx(dev); struct skl_module_fmt *format = NULL; u8 cfg_idx = mconfig->pipe->cur_config_idx; + res = &mconfig->module->resources[mconfig->res_idx]; skl_tplg_fill_dma_id(mconfig, params); mconfig->fmt_idx = mconfig->mod_cfg[cfg_idx].fmt_idx; mconfig->res_idx = mconfig->mod_cfg[cfg_idx].res_idx; @@ -1650,9 +1645,9 @@ int skl_tplg_update_pipe_params(struct device *dev, return 0; if (params->stream == SNDRV_PCM_STREAM_PLAYBACK) - format = &mconfig->module->formats[0].inputs[0].fmt; + format = &mconfig->module->formats[mconfig->fmt_idx].inputs[0].fmt; else - format = &mconfig->module->formats[0].outputs[0].fmt; + format = &mconfig->module->formats[mconfig->fmt_idx].outputs[0].fmt; /* set the hw_params */ format->s_freq = params->s_freq; diff --git a/sound/soc/mediatek/mt2701/mt2701-wm8960.c b/sound/soc/mediatek/mt2701/mt2701-wm8960.c index 8c4c89e4c616f..b9ad42112ea18 100644 --- a/sound/soc/mediatek/mt2701/mt2701-wm8960.c +++ b/sound/soc/mediatek/mt2701/mt2701-wm8960.c @@ -129,7 +129,8 @@ static int mt2701_wm8960_machine_probe(struct platform_device *pdev) if (!codec_node) { dev_err(&pdev->dev, "Property 'audio-codec' missing or invalid\n"); - return -EINVAL; + ret = -EINVAL; + goto put_platform_node; } for_each_card_prelinks(card, i, dai_link) { if (dai_link->codecs->name) @@ -140,7 +141,7 @@ static int mt2701_wm8960_machine_probe(struct platform_device *pdev) ret = snd_soc_of_parse_audio_routing(card, "audio-routing"); if (ret) { dev_err(&pdev->dev, "failed to parse audio-routing: %d\n", ret); - return ret; + goto put_codec_node; } ret = devm_snd_soc_register_card(&pdev->dev, card); @@ -148,6 +149,10 @@ static int mt2701_wm8960_machine_probe(struct platform_device *pdev) dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", __func__, ret); +put_codec_node: + of_node_put(codec_node); +put_platform_node: + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8173/mt8173-max98090.c b/sound/soc/mediatek/mt8173/mt8173-max98090.c index 22c00600c999f..32df181801146 100644 --- a/sound/soc/mediatek/mt8173/mt8173-max98090.c +++ b/sound/soc/mediatek/mt8173/mt8173-max98090.c @@ -167,7 +167,8 @@ static int mt8173_max98090_dev_probe(struct platform_device *pdev) if (!codec_node) { dev_err(&pdev->dev, "Property 'audio-codec' missing or invalid\n"); - return -EINVAL; + ret = -EINVAL; + goto put_platform_node; } for_each_card_prelinks(card, i, dai_link) { if (dai_link->codecs->name) @@ -180,6 +181,11 @@ static int mt8173_max98090_dev_probe(struct platform_device *pdev) if (ret) dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", __func__, ret); + + of_node_put(codec_node); + +put_platform_node: + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c index 8717e87bfe264..6f8542329bab9 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c @@ -218,6 +218,8 @@ static int mt8173_rt5650_rt5514_dev_probe(struct platform_device *pdev) if (ret) dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", __func__, ret); + + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c index 9d4dd97211548..727ff0f7f20b1 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c @@ -285,6 +285,8 @@ static int mt8173_rt5650_rt5676_dev_probe(struct platform_device *pdev) if (ret) dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", __func__, ret); + + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650.c b/sound/soc/mediatek/mt8173/mt8173-rt5650.c index ef6f236752867..21e7d4d3ded5a 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650.c @@ -309,6 +309,8 @@ static int mt8173_rt5650_dev_probe(struct platform_device *pdev) if (ret) dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", __func__, ret); + + of_node_put(platform_node); return ret; } diff --git a/sound/soc/meson/g12a-tohdmitx.c b/sound/soc/meson/g12a-tohdmitx.c index 9cfbd343a00c8..cbe47e0cae426 100644 --- a/sound/soc/meson/g12a-tohdmitx.c +++ b/sound/soc/meson/g12a-tohdmitx.c @@ -127,7 +127,7 @@ static int g12a_tohdmitx_i2s_mux_put_enum(struct snd_kcontrol *kcontrol, snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); - return 0; + return 1; } static const struct snd_kcontrol_new g12a_tohdmitx_i2s_mux = diff --git a/sound/soc/mxs/mxs-saif.c b/sound/soc/mxs/mxs-saif.c index 1e38ce8583262..c899a05e896f3 100644 --- a/sound/soc/mxs/mxs-saif.c +++ b/sound/soc/mxs/mxs-saif.c @@ -455,7 +455,10 @@ static int mxs_saif_hw_params(struct snd_pcm_substream *substream, * basic clock which should be fast enough for the internal * logic. */ - clk_enable(saif->clk); + ret = clk_enable(saif->clk); + if (ret) + return ret; + ret = clk_set_rate(saif->clk, 24000000); clk_disable(saif->clk); if (ret) @@ -764,6 +767,7 @@ static int mxs_saif_probe(struct platform_device *pdev) saif->master_id = saif->id; } else { ret = of_alias_get_id(master, "saif"); + of_node_put(master); if (ret < 0) return ret; else diff --git a/sound/soc/mxs/mxs-sgtl5000.c b/sound/soc/mxs/mxs-sgtl5000.c index 9841e1da97826..8282fe6d00dd4 100644 --- a/sound/soc/mxs/mxs-sgtl5000.c +++ b/sound/soc/mxs/mxs-sgtl5000.c @@ -118,6 +118,9 @@ static int mxs_sgtl5000_probe(struct platform_device *pdev) codec_np = of_parse_phandle(np, "audio-codec", 0); if (!saif_np[0] || !saif_np[1] || !codec_np) { dev_err(&pdev->dev, "phandle missing or invalid\n"); + of_node_put(codec_np); + of_node_put(saif_np[0]); + of_node_put(saif_np[1]); return -EINVAL; } diff --git a/sound/soc/qcom/qdsp6/q6routing.c b/sound/soc/qcom/qdsp6/q6routing.c index 745cc9dd14f38..bc65009be875c 100644 --- a/sound/soc/qcom/qdsp6/q6routing.c +++ b/sound/soc/qcom/qdsp6/q6routing.c @@ -440,9 +440,15 @@ static int msm_routing_put_audio_mixer(struct snd_kcontrol *kcontrol, struct session_data *session = &data->sessions[session_id]; if (ucontrol->value.integer.value[0]) { + if (session->port_id == be_id) + return 0; + session->port_id = be_id; snd_soc_dapm_mixer_update_power(dapm, kcontrol, 1, update); } else { + if (session->port_id == -1 || session->port_id != be_id) + return 0; + session->port_id = -1; snd_soc_dapm_mixer_update_power(dapm, kcontrol, 0, update); } diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index 61c984f10d8e6..086c90e095770 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -186,7 +186,9 @@ static int rockchip_i2s_set_fmt(struct snd_soc_dai *cpu_dai, { struct rk_i2s_dev *i2s = to_info(cpu_dai); unsigned int mask = 0, val = 0; + int ret = 0; + pm_runtime_get_sync(cpu_dai->dev); mask = I2S_CKR_MSS_MASK; switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBS_CFS: @@ -199,7 +201,8 @@ static int rockchip_i2s_set_fmt(struct snd_soc_dai *cpu_dai, i2s->is_master_mode = false; break; default: - return -EINVAL; + ret = -EINVAL; + goto err_pm_put; } regmap_update_bits(i2s->regmap, I2S_CKR, mask, val); @@ -213,7 +216,8 @@ static int rockchip_i2s_set_fmt(struct snd_soc_dai *cpu_dai, val = I2S_CKR_CKP_POS; break; default: - return -EINVAL; + ret = -EINVAL; + goto err_pm_put; } regmap_update_bits(i2s->regmap, I2S_CKR, mask, val); @@ -229,14 +233,15 @@ static int rockchip_i2s_set_fmt(struct snd_soc_dai *cpu_dai, case SND_SOC_DAIFMT_I2S: val = I2S_TXCR_IBM_NORMAL; break; - case SND_SOC_DAIFMT_DSP_A: /* PCM no delay mode */ - val = I2S_TXCR_TFS_PCM; - break; - case SND_SOC_DAIFMT_DSP_B: /* PCM delay 1 mode */ + case SND_SOC_DAIFMT_DSP_A: /* PCM delay 1 bit mode */ val = I2S_TXCR_TFS_PCM | I2S_TXCR_PBM_MODE(1); break; + case SND_SOC_DAIFMT_DSP_B: /* PCM no delay mode */ + val = I2S_TXCR_TFS_PCM; + break; default: - return -EINVAL; + ret = -EINVAL; + goto err_pm_put; } regmap_update_bits(i2s->regmap, I2S_TXCR, mask, val); @@ -252,19 +257,23 @@ static int rockchip_i2s_set_fmt(struct snd_soc_dai *cpu_dai, case SND_SOC_DAIFMT_I2S: val = I2S_RXCR_IBM_NORMAL; break; - case SND_SOC_DAIFMT_DSP_A: /* PCM no delay mode */ - val = I2S_RXCR_TFS_PCM; - break; - case SND_SOC_DAIFMT_DSP_B: /* PCM delay 1 mode */ + case SND_SOC_DAIFMT_DSP_A: /* PCM delay 1 bit mode */ val = I2S_RXCR_TFS_PCM | I2S_RXCR_PBM_MODE(1); break; + case SND_SOC_DAIFMT_DSP_B: /* PCM no delay mode */ + val = I2S_RXCR_TFS_PCM; + break; default: - return -EINVAL; + ret = -EINVAL; + goto err_pm_put; } regmap_update_bits(i2s->regmap, I2S_RXCR, mask, val); - return 0; +err_pm_put: + pm_runtime_put(cpu_dai->dev); + + return ret; } static int rockchip_i2s_hw_params(struct snd_pcm_substream *substream, diff --git a/sound/soc/samsung/idma.c b/sound/soc/samsung/idma.c index 65497cd477a50..47f6f5d70853d 100644 --- a/sound/soc/samsung/idma.c +++ b/sound/soc/samsung/idma.c @@ -363,6 +363,8 @@ static int preallocate_idma_buffer(struct snd_pcm *pcm, int stream) buf->addr = idma.lp_tx_addr; buf->bytes = idma_hardware.buffer_bytes_max; buf->area = (unsigned char * __force)ioremap(buf->addr, buf->bytes); + if (!buf->area) + return -ENOMEM; return 0; } diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c index 3447dbdba1f17..6ac7df30a2890 100644 --- a/sound/soc/sh/fsi.c +++ b/sound/soc/sh/fsi.c @@ -816,14 +816,27 @@ static int fsi_clk_enable(struct device *dev, return ret; } - clk_enable(clock->xck); - clk_enable(clock->ick); - clk_enable(clock->div); + ret = clk_enable(clock->xck); + if (ret) + goto err; + ret = clk_enable(clock->ick); + if (ret) + goto disable_xck; + ret = clk_enable(clock->div); + if (ret) + goto disable_ick; clock->count++; } return ret; + +disable_ick: + clk_disable(clock->ick); +disable_xck: + clk_disable(clock->xck); +err: + return ret; } static int fsi_clk_disable(struct device *dev, diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index 9e54d8ae6d2cf..da6e40aef7b6e 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -871,6 +871,11 @@ int snd_soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) return -EINVAL; } + if (!codec_dai) { + dev_err(rtd->card->dev, "Missing codec\n"); + return -EINVAL; + } + /* check client and interface hw capabilities */ if (snd_soc_dai_stream_valid(codec_dai, SNDRV_PCM_STREAM_PLAYBACK) && snd_soc_dai_stream_valid(cpu_dai, SNDRV_PCM_STREAM_PLAYBACK)) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index c0e03cc8ea822..093ab32ea2c3a 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -3362,7 +3362,7 @@ int snd_soc_get_dai_name(struct of_phandle_args *args, for_each_component(pos) { component_of_node = soc_component_to_node(pos); - if (component_of_node != args->np) + if (component_of_node != args->np || !pos->num_dai) continue; ret = snd_soc_component_of_xlate_dai_name(pos, args, dai_name); diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 7c4d5963692dd..56c9c4189f269 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1676,8 +1676,7 @@ static void dapm_seq_run(struct snd_soc_card *card, switch (w->id) { case snd_soc_dapm_pre: if (!w->event) - list_for_each_entry_safe_continue(w, n, list, - power_list); + continue; if (event == SND_SOC_DAPM_STREAM_START) ret = w->event(w, @@ -1689,8 +1688,7 @@ static void dapm_seq_run(struct snd_soc_card *card, case snd_soc_dapm_post: if (!w->event) - list_for_each_entry_safe_continue(w, n, list, - power_list); + continue; if (event == SND_SOC_DAPM_STREAM_START) ret = w->event(w, @@ -2542,10 +2540,16 @@ static struct snd_soc_dapm_widget *dapm_find_widget( return NULL; } -static int snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm, - const char *pin, int status) +/* + * set the DAPM pin status: + * returns 1 when the value has been updated, 0 when unchanged, or a negative + * error code; called from kcontrol put callback + */ +static int __snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm, + const char *pin, int status) { struct snd_soc_dapm_widget *w = dapm_find_widget(dapm, pin, true); + int ret = 0; dapm_assert_locked(dapm); @@ -2558,13 +2562,26 @@ static int snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm, dapm_mark_dirty(w, "pin configuration"); dapm_widget_invalidate_input_paths(w); dapm_widget_invalidate_output_paths(w); + ret = 1; } w->connected = status; if (status == 0) w->force = 0; - return 0; + return ret; +} + +/* + * similar as __snd_soc_dapm_set_pin(), but returns 0 when successful; + * called from several API functions below + */ +static int snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm, + const char *pin, int status) +{ + int ret = __snd_soc_dapm_set_pin(dapm, pin, status); + + return ret < 0 ? ret : 0; } /** @@ -3404,7 +3421,6 @@ int snd_soc_dapm_put_volsw(struct snd_kcontrol *kcontrol, update.val = val; card->update = &update; } - change |= reg_change; ret = soc_dapm_mixer_update_power(card, kcontrol, connect, rconnect); @@ -3510,7 +3526,6 @@ int snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol, update.val = val; card->update = &update; } - change |= reg_change; ret = soc_dapm_mux_update_power(card, kcontrol, item[0], e); @@ -3580,14 +3595,15 @@ int snd_soc_dapm_put_pin_switch(struct snd_kcontrol *kcontrol, { struct snd_soc_card *card = snd_kcontrol_chip(kcontrol); const char *pin = (const char *)kcontrol->private_value; + int ret; - if (ucontrol->value.integer.value[0]) - snd_soc_dapm_enable_pin(&card->dapm, pin); - else - snd_soc_dapm_disable_pin(&card->dapm, pin); + mutex_lock_nested(&card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME); + ret = __snd_soc_dapm_set_pin(&card->dapm, pin, + !!ucontrol->value.integer.value[0]); + mutex_unlock(&card->dapm_mutex); snd_soc_dapm_sync(&card->dapm); - return 0; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_dapm_put_pin_switch); @@ -4029,7 +4045,7 @@ static int snd_soc_dapm_dai_link_put(struct snd_kcontrol *kcontrol, rtd->params_select = ucontrol->value.enumerated.item[0]; - return 0; + return 1; } static void diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 95fc24580f85f..453b61b42dd9e 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -314,7 +314,7 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, unsigned int sign_bit = mc->sign_bit; unsigned int mask = (1 << fls(max)) - 1; unsigned int invert = mc->invert; - int err; + int err, ret; bool type_2r = false; unsigned int val2 = 0; unsigned int val, val_mask; @@ -322,13 +322,27 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, if (sign_bit) mask = BIT(sign_bit + 1) - 1; - val = ((ucontrol->value.integer.value[0] + min) & mask); + val = ucontrol->value.integer.value[0]; + if (mc->platform_max && ((int)val + min) > mc->platform_max) + return -EINVAL; + if (val > max - min) + return -EINVAL; + if (val < 0) + return -EINVAL; + val = (val + min) & mask; if (invert) val = max - val; val_mask = mask << shift; val = val << shift; if (snd_soc_volsw_is_stereo(mc)) { - val2 = ((ucontrol->value.integer.value[1] + min) & mask); + val2 = ucontrol->value.integer.value[1]; + if (mc->platform_max && ((int)val2 + min) > mc->platform_max) + return -EINVAL; + if (val2 > max - min) + return -EINVAL; + if (val2 < 0) + return -EINVAL; + val2 = (val2 + min) & mask; if (invert) val2 = max - val2; if (reg == reg2) { @@ -342,12 +356,18 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, err = snd_soc_component_update_bits(component, reg, val_mask, val); if (err < 0) return err; + ret = err; - if (type_2r) + if (type_2r) { err = snd_soc_component_update_bits(component, reg2, val_mask, - val2); + val2); + /* Don't discard any error code or drop change flag */ + if (ret == 0 || err < 0) { + ret = err; + } + } - return err; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_put_volsw); @@ -422,8 +442,15 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, int err = 0; unsigned int val, val_mask, val2 = 0; + val = ucontrol->value.integer.value[0]; + if (mc->platform_max && val > mc->platform_max) + return -EINVAL; + if (val > max - min) + return -EINVAL; + if (val < 0) + return -EINVAL; val_mask = mask << shift; - val = (ucontrol->value.integer.value[0] + min) & mask; + val = (val + min) & mask; val = val << shift; err = snd_soc_component_update_bits(component, reg, val_mask, val); @@ -496,7 +523,15 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, unsigned int mask = (1 << fls(max)) - 1; unsigned int invert = mc->invert; unsigned int val, val_mask; - int ret; + int err, ret, tmp; + + tmp = ucontrol->value.integer.value[0]; + if (tmp < 0) + return -EINVAL; + if (mc->platform_max && tmp > mc->platform_max) + return -EINVAL; + if (tmp > mc->max - mc->min) + return -EINVAL; if (invert) val = (max - ucontrol->value.integer.value[0]) & mask; @@ -505,11 +540,20 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, val_mask = mask << shift; val = val << shift; - ret = snd_soc_component_update_bits(component, reg, val_mask, val); - if (ret < 0) - return ret; + err = snd_soc_component_update_bits(component, reg, val_mask, val); + if (err < 0) + return err; + ret = err; if (snd_soc_volsw_is_stereo(mc)) { + tmp = ucontrol->value.integer.value[1]; + if (tmp < 0) + return -EINVAL; + if (mc->platform_max && tmp > mc->platform_max) + return -EINVAL; + if (tmp > mc->max - mc->min) + return -EINVAL; + if (invert) val = (max - ucontrol->value.integer.value[1]) & mask; else @@ -517,8 +561,12 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, val_mask = mask << shift; val = val << shift; - ret = snd_soc_component_update_bits(component, rreg, val_mask, + err = snd_soc_component_update_bits(component, rreg, val_mask, val); + /* Don't discard any error code or drop change flag */ + if (ret == 0 || err < 0) { + ret = err; + } } return ret; @@ -889,6 +937,8 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol, unsigned int i, regval, regmask; int err; + if (val < mc->min || val > mc->max) + return -EINVAL; if (invert) val = max - val; val &= mask; diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index c367609433bfc..870b002293535 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -587,7 +587,8 @@ static int soc_tplg_kcontrol_bind_io(struct snd_soc_tplg_ctl_hdr *hdr, if (le32_to_cpu(hdr->ops.info) == SND_SOC_TPLG_CTL_BYTES && k->iface & SNDRV_CTL_ELEM_IFACE_MIXER - && k->access & SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE + && (k->access & SNDRV_CTL_ELEM_ACCESS_TLV_READ + || k->access & SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) && k->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) { struct soc_bytes_ext *sbe; struct snd_soc_tplg_bytes_control *be; @@ -2777,6 +2778,7 @@ EXPORT_SYMBOL_GPL(snd_soc_tplg_widget_remove_all); /* remove dynamic controls from the component driver */ int snd_soc_tplg_component_remove(struct snd_soc_component *comp, u32 index) { + struct snd_card *card = comp->card->snd_card; struct snd_soc_dobj *dobj, *next_dobj; int pass = SOC_TPLG_PASS_END; @@ -2784,6 +2786,7 @@ int snd_soc_tplg_component_remove(struct snd_soc_component *comp, u32 index) while (pass >= SOC_TPLG_PASS_START) { /* remove mixer controls */ + down_write(&card->controls_rwsem); list_for_each_entry_safe(dobj, next_dobj, &comp->dobj_list, list) { @@ -2827,6 +2830,7 @@ int snd_soc_tplg_component_remove(struct snd_soc_component *comp, u32 index) break; } } + up_write(&card->controls_rwsem); pass--; } diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c index 3f645200d3a5c..b3cdd10c83ae1 100644 --- a/sound/soc/sof/intel/hda-dai.c +++ b/sound/soc/sof/intel/hda-dai.c @@ -67,6 +67,7 @@ static struct hdac_ext_stream * return NULL; } + spin_lock_irq(&bus->reg_lock); list_for_each_entry(stream, &bus->stream_list, list) { struct hdac_ext_stream *hstream = stream_to_hdac_ext_stream(stream); @@ -106,12 +107,12 @@ static struct hdac_ext_stream * * is updated in snd_hdac_ext_stream_decouple(). */ if (!res->decoupled) - snd_hdac_ext_stream_decouple(bus, res, true); - spin_lock_irq(&bus->reg_lock); + snd_hdac_ext_stream_decouple_locked(bus, res, true); + res->link_locked = 1; res->link_substream = substream; - spin_unlock_irq(&bus->reg_lock); } + spin_unlock_irq(&bus->reg_lock); return res; } diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c index 356bb134ae93a..7573f3f9f0f21 100644 --- a/sound/soc/sof/intel/hda-loader.c +++ b/sound/soc/sof/intel/hda-loader.c @@ -50,7 +50,7 @@ static int cl_stream_prepare(struct snd_sof_dev *sdev, unsigned int format, ret = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV_SG, &pci->dev, size, dmab); if (ret < 0) { dev_err(sdev->dev, "error: memory alloc failed: %x\n", ret); - goto error; + goto out_put; } hstream->period_bytes = 0;/* initialize period_bytes */ @@ -60,16 +60,17 @@ static int cl_stream_prepare(struct snd_sof_dev *sdev, unsigned int format, ret = hda_dsp_stream_hw_params(sdev, dsp_stream, dmab, NULL); if (ret < 0) { dev_err(sdev->dev, "error: hdac prepare failed: %x\n", ret); - goto error; + goto out_free; } hda_dsp_stream_spib_config(sdev, dsp_stream, HDA_DSP_SPIB_ENABLE, size); return hstream->stream_tag; -error: - hda_dsp_stream_put(sdev, direction, hstream->stream_tag); +out_free: snd_dma_free_pages(dmab); +out_put: + hda_dsp_stream_put(sdev, direction, hstream->stream_tag); return ret; } diff --git a/sound/soc/sti/uniperif_player.c b/sound/soc/sti/uniperif_player.c index 2ed92c990b97c..dd9013c476649 100644 --- a/sound/soc/sti/uniperif_player.c +++ b/sound/soc/sti/uniperif_player.c @@ -91,7 +91,7 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id) SET_UNIPERIF_ITM_BCLR_FIFO_ERROR(player); /* Stop the player */ - snd_pcm_stop_xrun(player->substream); + snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN); } ret = IRQ_HANDLED; @@ -105,7 +105,7 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id) SET_UNIPERIF_ITM_BCLR_DMA_ERROR(player); /* Stop the player */ - snd_pcm_stop_xrun(player->substream); + snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN); ret = IRQ_HANDLED; } @@ -138,7 +138,7 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id) dev_err(player->dev, "Underflow recovery failed\n"); /* Stop the player */ - snd_pcm_stop_xrun(player->substream); + snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN); ret = IRQ_HANDLED; } diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c index 136059331211d..065c5f0d1f5f0 100644 --- a/sound/soc/sti/uniperif_reader.c +++ b/sound/soc/sti/uniperif_reader.c @@ -65,7 +65,7 @@ static irqreturn_t uni_reader_irq_handler(int irq, void *dev_id) if (unlikely(status & UNIPERIF_ITS_FIFO_ERROR_MASK(reader))) { dev_err(reader->dev, "FIFO error detected\n"); - snd_pcm_stop_xrun(reader->substream); + snd_pcm_stop(reader->substream, SNDRV_PCM_STATE_XRUN); ret = IRQ_HANDLED; } diff --git a/sound/soc/ti/davinci-i2s.c b/sound/soc/ti/davinci-i2s.c index d89b5c928c4d7..b2b2dcdb05d4c 100644 --- a/sound/soc/ti/davinci-i2s.c +++ b/sound/soc/ti/davinci-i2s.c @@ -708,7 +708,9 @@ static int davinci_i2s_probe(struct platform_device *pdev) dev->clk = clk_get(&pdev->dev, NULL); if (IS_ERR(dev->clk)) return -ENODEV; - clk_enable(dev->clk); + ret = clk_enable(dev->clk); + if (ret) + goto err_put_clk; dev->dev = &pdev->dev; dev_set_drvdata(&pdev->dev, dev); @@ -730,6 +732,7 @@ static int davinci_i2s_probe(struct platform_device *pdev) snd_soc_unregister_component(&pdev->dev); err_release_clk: clk_disable(dev->clk); +err_put_clk: clk_put(dev->clk); return ret; } diff --git a/sound/soc/uniphier/Kconfig b/sound/soc/uniphier/Kconfig index aa3592ee1358b..ddfa6424c656b 100644 --- a/sound/soc/uniphier/Kconfig +++ b/sound/soc/uniphier/Kconfig @@ -23,7 +23,6 @@ config SND_SOC_UNIPHIER_LD11 tristate "UniPhier LD11/LD20 Device Driver" depends on SND_SOC_UNIPHIER select SND_SOC_UNIPHIER_AIO - select SND_SOC_UNIPHIER_AIO_DMA help This adds ASoC driver for Socionext UniPhier LD11/LD20 input and output that can be used with other codecs. @@ -34,7 +33,6 @@ config SND_SOC_UNIPHIER_PXS2 tristate "UniPhier PXs2 Device Driver" depends on SND_SOC_UNIPHIER select SND_SOC_UNIPHIER_AIO - select SND_SOC_UNIPHIER_AIO_DMA help This adds ASoC driver for Socionext UniPhier PXs2 input and output that can be used with other codecs. diff --git a/sound/soc/xilinx/xlnx_formatter_pcm.c b/sound/soc/xilinx/xlnx_formatter_pcm.c index 1f15c11782ec4..4caee57114b88 100644 --- a/sound/soc/xilinx/xlnx_formatter_pcm.c +++ b/sound/soc/xilinx/xlnx_formatter_pcm.c @@ -37,6 +37,7 @@ #define XLNX_AUD_XFER_COUNT 0x28 #define XLNX_AUD_CH_STS_START 0x2C #define XLNX_BYTES_PER_CH 0x44 +#define XLNX_AUD_ALIGN_BYTES 64 #define AUD_STS_IOC_IRQ_MASK BIT(31) #define AUD_STS_CH_STS_MASK BIT(29) @@ -370,12 +371,32 @@ static int xlnx_formatter_pcm_open(struct snd_pcm_substream *substream) snd_soc_set_runtime_hwparams(substream, &xlnx_pcm_hardware); runtime->private_data = stream_data; - /* Resize the period size divisible by 64 */ + /* Resize the period bytes as divisible by 64 */ err = snd_pcm_hw_constraint_step(runtime, 0, - SNDRV_PCM_HW_PARAM_PERIOD_BYTES, 64); + SNDRV_PCM_HW_PARAM_PERIOD_BYTES, + XLNX_AUD_ALIGN_BYTES); if (err) { dev_err(component->dev, - "unable to set constraint on period bytes\n"); + "Unable to set constraint on period bytes\n"); + return err; + } + + /* Resize the buffer bytes as divisible by 64 */ + err = snd_pcm_hw_constraint_step(runtime, 0, + SNDRV_PCM_HW_PARAM_BUFFER_BYTES, + XLNX_AUD_ALIGN_BYTES); + if (err) { + dev_err(component->dev, + "Unable to set constraint on buffer bytes\n"); + return err; + } + + /* Set periods as integer multiple */ + err = snd_pcm_hw_constraint_integer(runtime, + SNDRV_PCM_HW_PARAM_PERIODS); + if (err < 0) { + dev_err(component->dev, + "Unable to set constraint on periods to be integer\n"); return err; } diff --git a/sound/spi/at73c213.c b/sound/spi/at73c213.c index 4de1ba9a418d9..6e5d315bab59b 100644 --- a/sound/spi/at73c213.c +++ b/sound/spi/at73c213.c @@ -218,7 +218,9 @@ static int snd_at73c213_pcm_open(struct snd_pcm_substream *substream) runtime->hw = snd_at73c213_playback_hw; chip->substream = substream; - clk_enable(chip->ssc->clk); + err = clk_enable(chip->ssc->clk); + if (err) + return err; return 0; } @@ -784,7 +786,9 @@ static int snd_at73c213_chip_init(struct snd_at73c213 *chip) goto out; /* Enable DAC master clock. */ - clk_enable(chip->board->dac_clk); + retval = clk_enable(chip->board->dac_clk); + if (retval) + goto out; /* Initialize at73c213 on SPI bus. */ retval = snd_at73c213_write_reg(chip, DAC_RST, 0x04); @@ -897,7 +901,9 @@ static int snd_at73c213_dev_init(struct snd_card *card, chip->card = card; chip->irq = -1; - clk_enable(chip->ssc->clk); + retval = clk_enable(chip->ssc->clk); + if (retval) + return retval; retval = request_irq(irq, snd_at73c213_interrupt, 0, "at73c213", chip); if (retval) { @@ -1016,7 +1022,9 @@ static int snd_at73c213_remove(struct spi_device *spi) int retval; /* Stop playback. */ - clk_enable(chip->ssc->clk); + retval = clk_enable(chip->ssc->clk); + if (retval) + goto out; ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXDIS)); clk_disable(chip->ssc->clk); @@ -1096,9 +1104,16 @@ static int snd_at73c213_resume(struct device *dev) { struct snd_card *card = dev_get_drvdata(dev); struct snd_at73c213 *chip = card->private_data; + int retval; - clk_enable(chip->board->dac_clk); - clk_enable(chip->ssc->clk); + retval = clk_enable(chip->board->dac_clk); + if (retval) + return retval; + retval = clk_enable(chip->ssc->clk); + if (retval) { + clk_disable(chip->board->dac_clk); + return retval; + } ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXEN)); return 0; diff --git a/sound/synth/emux/emux.c b/sound/synth/emux/emux.c index f65e6c7b139f5..6695530bba9b3 100644 --- a/sound/synth/emux/emux.c +++ b/sound/synth/emux/emux.c @@ -88,7 +88,7 @@ int snd_emux_register(struct snd_emux *emu, struct snd_card *card, int index, ch emu->name = kstrdup(name, GFP_KERNEL); emu->voices = kcalloc(emu->max_voices, sizeof(struct snd_emux_voice), GFP_KERNEL); - if (emu->voices == NULL) + if (emu->name == NULL || emu->voices == NULL) return -ENOMEM; /* create soundfont list */ diff --git a/sound/usb/6fire/comm.c b/sound/usb/6fire/comm.c index 43a2a62d66f7e..49629d4bb327a 100644 --- a/sound/usb/6fire/comm.c +++ b/sound/usb/6fire/comm.c @@ -95,7 +95,7 @@ static int usb6fire_comm_send_buffer(u8 *buffer, struct usb_device *dev) int actual_len; ret = usb_interrupt_msg(dev, usb_sndintpipe(dev, COMM_EP), - buffer, buffer[1] + 2, &actual_len, HZ); + buffer, buffer[1] + 2, &actual_len, 1000); if (ret < 0) return ret; else if (actual_len != buffer[1] + 2) diff --git a/sound/usb/6fire/firmware.c b/sound/usb/6fire/firmware.c index 69137c14d0dcf..2333e8ff3411a 100644 --- a/sound/usb/6fire/firmware.c +++ b/sound/usb/6fire/firmware.c @@ -162,7 +162,7 @@ static int usb6fire_fw_ezusb_write(struct usb_device *device, ret = usb_control_msg(device, usb_sndctrlpipe(device, 0), type, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, 0, data, len, HZ); + value, 0, data, len, 1000); if (ret < 0) return ret; else if (ret != len) @@ -175,7 +175,7 @@ static int usb6fire_fw_ezusb_read(struct usb_device *device, { int ret = usb_control_msg(device, usb_rcvctrlpipe(device, 0), type, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, - 0, data, len, HZ); + 0, data, len, 1000); if (ret < 0) return ret; else if (ret != len) @@ -190,7 +190,7 @@ static int usb6fire_fw_fpga_write(struct usb_device *device, int ret; ret = usb_bulk_msg(device, usb_sndbulkpipe(device, FPGA_EP), data, len, - &actual_len, HZ); + &actual_len, 1000); if (ret < 0) return ret; else if (actual_len != len) diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index 1e38cdda2af69..8ca56ba600cf4 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -113,12 +113,12 @@ static int line6_send_raw_message(struct usb_line6 *line6, const char *buffer, retval = usb_interrupt_msg(line6->usbdev, usb_sndintpipe(line6->usbdev, properties->ep_ctrl_w), (char *)frag_buf, frag_size, - &partial, LINE6_TIMEOUT * HZ); + &partial, LINE6_TIMEOUT); } else { retval = usb_bulk_msg(line6->usbdev, usb_sndbulkpipe(line6->usbdev, properties->ep_ctrl_w), (char *)frag_buf, frag_size, - &partial, LINE6_TIMEOUT * HZ); + &partial, LINE6_TIMEOUT); } if (retval) { @@ -350,7 +350,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data, ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0), 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, (datalen << 8) | 0x21, address, - NULL, 0, LINE6_TIMEOUT * HZ); + NULL, 0, LINE6_TIMEOUT); if (ret < 0) { dev_err(line6->ifcdev, "read request failed (error %d)\n", ret); @@ -365,7 +365,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0x0012, 0x0000, len, 1, - LINE6_TIMEOUT * HZ); + LINE6_TIMEOUT); if (ret < 0) { dev_err(line6->ifcdev, "receive length failed (error %d)\n", ret); @@ -393,7 +393,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data, ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0), 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0x0013, 0x0000, data, datalen, - LINE6_TIMEOUT * HZ); + LINE6_TIMEOUT); if (ret < 0) dev_err(line6->ifcdev, "read failed (error %d)\n", ret); @@ -425,7 +425,7 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data, ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0), 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, 0x0022, address, data, datalen, - LINE6_TIMEOUT * HZ); + LINE6_TIMEOUT); if (ret < 0) { dev_err(line6->ifcdev, @@ -441,7 +441,7 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0x0012, 0x0000, - status, 1, LINE6_TIMEOUT * HZ); + status, 1, LINE6_TIMEOUT); if (ret < 0) { dev_err(line6->ifcdev, diff --git a/sound/usb/line6/driver.h b/sound/usb/line6/driver.h index e5e572ed5f304..890c239e3fc01 100644 --- a/sound/usb/line6/driver.h +++ b/sound/usb/line6/driver.h @@ -27,7 +27,7 @@ #define LINE6_FALLBACK_INTERVAL 10 #define LINE6_FALLBACK_MAXPACKETSIZE 16 -#define LINE6_TIMEOUT 1 +#define LINE6_TIMEOUT 1000 #define LINE6_BUFSIZE_LISTEN 64 #define LINE6_MIDI_MESSAGE_MAXLEN 256 diff --git a/sound/usb/line6/podhd.c b/sound/usb/line6/podhd.c index 5d9954a2d05e6..8b1610bdb8d5c 100644 --- a/sound/usb/line6/podhd.c +++ b/sound/usb/line6/podhd.c @@ -190,7 +190,7 @@ static int podhd_dev_start(struct usb_line6_podhd *pod) ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0), 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, 0x11, 0, - NULL, 0, LINE6_TIMEOUT * HZ); + NULL, 0, LINE6_TIMEOUT); if (ret < 0) { dev_err(pod->line6.ifcdev, "read request failed (error %d)\n", ret); goto exit; @@ -200,7 +200,7 @@ static int podhd_dev_start(struct usb_line6_podhd *pod) ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0), 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0x11, 0x0, - init_bytes, 3, LINE6_TIMEOUT * HZ); + init_bytes, 3, LINE6_TIMEOUT); if (ret < 0) { dev_err(pod->line6.ifcdev, "receive length failed (error %d)\n", ret); @@ -220,7 +220,7 @@ static int podhd_dev_start(struct usb_line6_podhd *pod) USB_REQ_SET_FEATURE, USB_TYPE_STANDARD | USB_RECIP_DEVICE | USB_DIR_OUT, 1, 0, - NULL, 0, LINE6_TIMEOUT * HZ); + NULL, 0, LINE6_TIMEOUT); exit: kfree(init_bytes); return ret; diff --git a/sound/usb/line6/toneport.c b/sound/usb/line6/toneport.c index d0a555dbe324f..21f86c71dad7f 100644 --- a/sound/usb/line6/toneport.c +++ b/sound/usb/line6/toneport.c @@ -128,7 +128,7 @@ static int toneport_send_cmd(struct usb_device *usbdev, int cmd1, int cmd2) ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0), 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, - cmd1, cmd2, NULL, 0, LINE6_TIMEOUT * HZ); + cmd1, cmd2, NULL, 0, LINE6_TIMEOUT); if (ret < 0) { dev_err(&usbdev->dev, "send failed (error %d)\n", ret); diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 33e9a7f6246f7..ce501200e592f 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1210,6 +1210,7 @@ static void snd_usbmidi_output_drain(struct snd_rawmidi_substream *substream) } while (drain_urbs && timeout); finish_wait(&ep->drain_wait, &wait); } + port->active = 0; spin_unlock_irq(&ep->buffer_lock); } diff --git a/sound/usb/misc/ua101.c b/sound/usb/misc/ua101.c index 307b72d5fffa9..77304a29a61d2 100644 --- a/sound/usb/misc/ua101.c +++ b/sound/usb/misc/ua101.c @@ -1020,7 +1020,7 @@ static int detect_usb_format(struct ua101 *ua) fmt_playback->bSubframeSize * ua->playback.channels; epd = &ua->intf[INTF_CAPTURE]->altsetting[1].endpoint[0].desc; - if (!usb_endpoint_is_isoc_in(epd)) { + if (!usb_endpoint_is_isoc_in(epd) || usb_endpoint_maxp(epd) == 0) { dev_err(&ua->dev->dev, "invalid capture endpoint\n"); return -ENXIO; } @@ -1028,7 +1028,7 @@ static int detect_usb_format(struct ua101 *ua) ua->capture.max_packet_bytes = usb_endpoint_maxp(epd); epd = &ua->intf[INTF_PLAYBACK]->altsetting[1].endpoint[0].desc; - if (!usb_endpoint_is_isoc_out(epd)) { + if (!usb_endpoint_is_isoc_out(epd) || usb_endpoint_maxp(epd) == 0) { dev_err(&ua->dev->dev, "invalid playback endpoint\n"); return -ENXIO; } diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index d926869c031b1..1f7c80541d03b 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -2370,9 +2370,10 @@ void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer, if (unitid == 7 && cval->control == UAC_FU_VOLUME) snd_dragonfly_quirk_db_scale(mixer, cval, kctl); break; - /* lowest playback value is muted on C-Media devices */ - case USB_ID(0x0d8c, 0x000c): - case USB_ID(0x0d8c, 0x0014): + /* lowest playback value is muted on some devices */ + case USB_ID(0x0d8c, 0x000c): /* C-Media */ + case USB_ID(0x0d8c, 0x0014): /* C-Media */ + case USB_ID(0x19f7, 0x0003): /* RODE NT-USB */ if (strstr(kctl->id.name, "Playback")) cval->min_mute = 1; break; diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 441335abb4018..c29ccdf9e8bca 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -25,6 +25,16 @@ .idProduct = prod, \ .bInterfaceClass = USB_CLASS_VENDOR_SPEC +/* A standard entry matching with vid/pid and the audio class/subclass */ +#define USB_AUDIO_DEVICE(vend, prod) \ + .match_flags = USB_DEVICE_ID_MATCH_DEVICE | \ + USB_DEVICE_ID_MATCH_INT_CLASS | \ + USB_DEVICE_ID_MATCH_INT_SUBCLASS, \ + .idVendor = vend, \ + .idProduct = prod, \ + .bInterfaceClass = USB_CLASS_AUDIO, \ + .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL + /* HP Thunderbolt Dock Audio Headset */ { USB_DEVICE(0x03f0, 0x0269), @@ -125,6 +135,48 @@ .bInterfaceClass = USB_CLASS_AUDIO, }, +/* + * Creative Technology, Ltd Live! Cam Sync HD [VF0770] + * The device advertises 8 formats, but only a rate of 48kHz is honored by the + * hardware and 24 bits give chopped audio, so only report the one working + * combination. + */ +{ + USB_DEVICE(0x041e, 0x4095), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + { + .ifnum = 2, + .type = QUIRK_AUDIO_STANDARD_MIXER, + }, + { + .ifnum = 3, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .channels = 2, + .fmt_bits = 16, + .iface = 3, + .altsetting = 4, + .altset_idx = 4, + .endpoint = 0x82, + .ep_attr = 0x05, + .rates = SNDRV_PCM_RATE_48000, + .rate_min = 48000, + .rate_max = 48000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { 48000 }, + }, + }, + { + .ifnum = -1 + }, + }, + }, +}, + /* * HP Wireless Audio * When not ignored, causes instability issues for some users, forcing them to @@ -3764,5 +3816,37 @@ ALC1220_VB_DESKTOP(0x26ce, 0x0a01), /* Asrock TRX40 Creator */ } } }, +{ + /* + * Sennheiser GSP670 + * Change order of interfaces loaded + */ + USB_DEVICE(0x1395, 0x0300), + .bInterfaceClass = USB_CLASS_PER_INTERFACE, + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + // Communication + { + .ifnum = 3, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + // Recording + { + .ifnum = 4, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + // Main + { + .ifnum = 1, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = -1 + } + } + } +}, #undef USB_DEVICE_VENDOR_SPEC diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 4d20f3f45f7e2..72223545abfd8 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1841,6 +1841,8 @@ static const struct registration_quirk registration_quirks[] = { REG_QUIRK_ENTRY(0x0951, 0x16ed, 2), /* Kingston HyperX Cloud Alpha S */ REG_QUIRK_ENTRY(0x0951, 0x16ea, 2), /* Kingston HyperX Cloud Flight S */ REG_QUIRK_ENTRY(0x0ecb, 0x1f46, 2), /* JBL Quantum 600 */ + REG_QUIRK_ENTRY(0x0ecb, 0x1f47, 2), /* JBL Quantum 800 */ + REG_QUIRK_ENTRY(0x0ecb, 0x1f4c, 2), /* JBL Quantum 400 */ REG_QUIRK_ENTRY(0x0ecb, 0x2039, 2), /* JBL Quantum 400 */ REG_QUIRK_ENTRY(0x0ecb, 0x203c, 2), /* JBL Quantum 600 */ REG_QUIRK_ENTRY(0x0ecb, 0x203e, 2), /* JBL Quantum 800 */ diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index ff97fdcf63bd5..b1959e04cbb14 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -8,7 +8,7 @@ */ /* handling of USB vendor/product ID pairs as 32-bit numbers */ -#define USB_ID(vendor, product) (((vendor) << 16) | (product)) +#define USB_ID(vendor, product) (((unsigned int)(vendor) << 16) | (product)) #define USB_ID_VENDOR(id) ((id) >> 16) #define USB_ID_PRODUCT(id) ((u16)(id)) diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index 5fd4e32247a6d..a314f13e3292e 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1279,7 +1279,7 @@ static int had_pcm_mmap(struct snd_pcm_substream *substream, { vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); return remap_pfn_range(vma, vma->vm_start, - substream->dma_buffer.addr >> PAGE_SHIFT, + substream->runtime->dma_addr >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot); } diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index e33f83ffe6b06..5ef6ecca3b82e 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -284,6 +284,7 @@ #define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+ 6) /* "" Fill RSB on VM-Exit when EIBRS is enabled */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index a5ea841cc6d22..f0f935f8d9174 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -84,6 +84,7 @@ #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP) #define DISABLED_MASK17 0 #define DISABLED_MASK18 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) +#define DISABLED_MASK19 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/tools/arch/x86/include/asm/emulate_prefix.h b/tools/arch/x86/include/asm/emulate_prefix.h new file mode 100644 index 0000000000000..70f5b98a52869 --- /dev/null +++ b/tools/arch/x86/include/asm/emulate_prefix.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_EMULATE_PREFIX_H +#define _ASM_X86_EMULATE_PREFIX_H + +/* + * Virt escape sequences to trigger instruction emulation; + * ideally these would decode to 'whole' instruction and not destroy + * the instruction stream; sadly this is not true for the 'kvm' one :/ + */ + +#define __XEN_EMULATE_PREFIX 0x0f,0x0b,0x78,0x65,0x6e /* ud2 ; .ascii "xen" */ +#define __KVM_EMULATE_PREFIX 0x0f,0x0b,0x6b,0x76,0x6d /* ud2 ; .ascii "kvm" */ + +#endif diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h index d7f0ae8f3c442..52c6262e6bfd1 100644 --- a/tools/arch/x86/include/asm/insn.h +++ b/tools/arch/x86/include/asm/insn.h @@ -45,6 +45,7 @@ struct insn { struct insn_field immediate2; /* for 64bit imm or seg16 */ }; + int emulate_prefix_size; insn_attr_t attr; unsigned char opnd_bytes; unsigned char addr_bytes; @@ -128,6 +129,11 @@ static inline int insn_is_evex(struct insn *insn) return (insn->vex_prefix.nbytes == 4); } +static inline int insn_has_emulate_prefix(struct insn *insn) +{ + return !!insn->emulate_prefix_size; +} + /* Ensure this instruction is decoded completely */ static inline int insn_complete(struct insn *insn) { diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index 6847d85400a8b..fa5700097f647 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -101,6 +101,7 @@ #define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 #define REQUIRED_MASK18 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) +#define REQUIRED_MASK19 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index 79e048f1d9028..0151dfc6da616 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -13,6 +13,8 @@ #include "../include/asm/inat.h" #include "../include/asm/insn.h" +#include "../include/asm/emulate_prefix.h" + /* Verify next sizeof(t) bytes can be on the same instruction */ #define validate_next(t, insn, n) \ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) @@ -58,6 +60,36 @@ void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) insn->addr_bytes = 4; } +static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX }; +static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX }; + +static int __insn_get_emulate_prefix(struct insn *insn, + const insn_byte_t *prefix, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i]) + goto err_out; + } + + insn->emulate_prefix_size = len; + insn->next_byte += len; + + return 1; + +err_out: + return 0; +} + +static void insn_get_emulate_prefix(struct insn *insn) +{ + if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix))) + return; + + __insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix)); +} + /** * insn_get_prefixes - scan x86 instruction prefix bytes * @insn: &struct insn containing instruction @@ -76,6 +108,8 @@ void insn_get_prefixes(struct insn *insn) if (prefixes->got) return; + insn_get_emulate_prefix(insn); + nb = 0; lb = 0; b = peek_next(insn_byte_t, insn); diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index 815ac9804aee9..f79bba931cbfb 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only include ../../../scripts/Makefile.include -include ../../../scripts/utilities.mak INSTALL ?= install RM ?= rm -f diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 67e9faaa4e773..adea596a10a2a 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only include ../../scripts/Makefile.include -include ../../scripts/utilities.mak ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(CURDIR))) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 7d3cfb0ccbe61..4b03983acbefe 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -362,6 +362,8 @@ int main(int argc, char **argv) }; int opt, ret; + setlinebuf(stdout); + last_do_help = do_help; pretty_output = false; json_output = false; diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 8c6e1ea67f213..1ea26bb8c5791 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -52,7 +52,6 @@ FEATURE_TESTS_BASIC := \ numa_num_possible_cpus \ libperl \ libpython \ - libpython-version \ libslang \ libslang-include-subdir \ libcrypto \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 054e09ab4a9e4..88392219d425e 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -30,7 +30,6 @@ FILES= \ test-numa_num_possible_cpus.bin \ test-libperl.bin \ test-libpython.bin \ - test-libpython-version.bin \ test-libslang.bin \ test-libslang-include-subdir.bin \ test-libcrypto.bin \ @@ -205,18 +204,22 @@ strip-libs = $(filter-out -l%,$(1)) PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) -PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` +PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) +ifeq ($(CC_NO_CLANG), 0) + PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS)) + FLAGS_PERL_EMBED += -Wno-compound-token-split-by-macro +endif + $(OUTPUT)test-libperl.bin: $(BUILD) $(FLAGS_PERL_EMBED) $(OUTPUT)test-libpython.bin: $(BUILD) $(FLAGS_PYTHON_EMBED) -$(OUTPUT)test-libpython-version.bin: - $(BUILD) - $(OUTPUT)test-libbfd.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 88145e8cde1a6..6eaeaf2da36ea 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -14,10 +14,6 @@ # include "test-libpython.c" #undef main -#define main main_test_libpython_version -# include "test-libpython-version.c" -#undef main - #define main main_test_libperl # include "test-libperl.c" #undef main @@ -193,7 +189,6 @@ int main(int argc, char *argv[]) { main_test_libpython(); - main_test_libpython_version(); main_test_libperl(); main_test_hello(); main_test_libelf(); diff --git a/tools/build/feature/test-libpython-version.c b/tools/build/feature/test-libpython-version.c deleted file mode 100644 index 47714b942d4d3..0000000000000 --- a/tools/build/feature/test-libpython-version.c +++ /dev/null @@ -1,11 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include - -#if PY_VERSION_HEX >= 0x03000000 - #error -#endif - -int main(void) -{ - return 0; -} diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py index b8c082c9fd7d5..c4ff907c078b5 100644 --- a/tools/cgroup/iocost_monitor.py +++ b/tools/cgroup/iocost_monitor.py @@ -28,7 +28,8 @@ parser.add_argument('--cgroup', action='append', metavar='REGEX', help='Regex for target cgroups, ') parser.add_argument('--interval', '-i', metavar='SECONDS', type=float, default=1, - help='Monitoring interval in seconds') + help='Monitoring interval in seconds (0 exits immediately ' + 'after checking requirements)') parser.add_argument('--json', action='store_true', help='Output in json') args = parser.parse_args() @@ -44,8 +45,7 @@ def err(s): err('The kernel does not have iocost enabled') IOC_RUNNING = prog['IOC_RUNNING'].value_() -NR_USAGE_SLOTS = prog['NR_USAGE_SLOTS'].value_() -HWEIGHT_WHOLE = prog['HWEIGHT_WHOLE'].value_() +WEIGHT_ONE = prog['WEIGHT_ONE'].value_() VTIME_PER_SEC = prog['VTIME_PER_SEC'].value_() VTIME_PER_USEC = prog['VTIME_PER_USEC'].value_() AUTOP_SSD_FAST = prog['AUTOP_SSD_FAST'].value_() @@ -72,7 +72,7 @@ def walk(self, blkcg, q_id, parent_path): name = BlkgIterator.blkcg_name(blkcg) path = parent_path + '/' + name if parent_path else name blkg = drgn.Object(prog, 'struct blkcg_gq', - address=radix_tree_lookup(blkcg.blkg_tree, q_id)) + address=radix_tree_lookup(blkcg.blkg_tree.address_of_(), q_id)) if not blkg.address_: return @@ -99,7 +99,7 @@ def __init__(self, ioc): self.period_ms = ioc.period_us.value_() / 1_000 self.period_at = ioc.period_at.value_() / 1_000_000 self.vperiod_at = ioc.period_at_vtime.value_() / VTIME_PER_SEC - self.vrate_pct = ioc.vtime_rate.counter.value_() * 100 / VTIME_PER_USEC + self.vrate_pct = ioc.vtime_base_rate.value_() * 100 / VTIME_PER_USEC self.busy_level = ioc.busy_level.value_() self.autop_idx = ioc.autop_idx.value_() self.user_cost_model = ioc.user_cost_model.value_() @@ -135,7 +135,7 @@ def table_preamble_str(self): def table_header_str(self): return f'{"":25} active {"weight":>9} {"hweight%":>13} {"inflt%":>6} ' \ - f'{"dbt":>3} {"delay":>6} {"usages%"}' + f'{"debt":>7} {"delay":>7} {"usage%"}' class IocgStat: def __init__(self, iocg): @@ -143,11 +143,11 @@ def __init__(self, iocg): blkg = iocg.pd.blkg self.is_active = not list_empty(iocg.active_list.address_of_()) - self.weight = iocg.weight.value_() - self.active = iocg.active.value_() - self.inuse = iocg.inuse.value_() - self.hwa_pct = iocg.hweight_active.value_() * 100 / HWEIGHT_WHOLE - self.hwi_pct = iocg.hweight_inuse.value_() * 100 / HWEIGHT_WHOLE + self.weight = iocg.weight.value_() / WEIGHT_ONE + self.active = iocg.active.value_() / WEIGHT_ONE + self.inuse = iocg.inuse.value_() / WEIGHT_ONE + self.hwa_pct = iocg.hweight_active.value_() * 100 / WEIGHT_ONE + self.hwi_pct = iocg.hweight_inuse.value_() * 100 / WEIGHT_ONE self.address = iocg.value_() vdone = iocg.done_vtime.counter.value_() @@ -159,23 +159,13 @@ def __init__(self, iocg): else: self.inflight_pct = 0 - # vdebt used to be an atomic64_t and is now u64, support both - try: - self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000 - except: - self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000 - - self.use_delay = blkg.use_delay.counter.value_() - self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000 - - usage_idx = iocg.usage_idx.value_() - self.usages = [] - self.usage = 0 - for i in range(NR_USAGE_SLOTS): - usage = iocg.usages[(usage_idx + i) % NR_USAGE_SLOTS].value_() - upct = usage * 100 / HWEIGHT_WHOLE - self.usages.append(upct) - self.usage = max(self.usage, upct) + self.usage = (100 * iocg.usage_delta_us.value_() / + ioc.period_us.value_()) if self.active else 0 + self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000 + if blkg.use_delay.counter.value_() != 0: + self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000 + else: + self.delay_ms = 0 def dict(self, now, path): out = { 'cgroup' : path, @@ -188,25 +178,20 @@ def dict(self, now, path): 'hweight_inuse_pct' : self.hwi_pct, 'inflight_pct' : self.inflight_pct, 'debt_ms' : self.debt_ms, - 'use_delay' : self.use_delay, 'delay_ms' : self.delay_ms, 'usage_pct' : self.usage, 'address' : self.address } - for i in range(len(self.usages)): - out[f'usage_pct_{i}'] = str(self.usages[i]) return out def table_row_str(self, path): out = f'{path[-28:]:28} ' \ f'{"*" if self.is_active else " "} ' \ - f'{self.inuse:5}/{self.active:5} ' \ + f'{round(self.inuse):5}/{round(self.active):5} ' \ f'{self.hwi_pct:6.2f}/{self.hwa_pct:6.2f} ' \ f'{self.inflight_pct:6.2f} ' \ - f'{min(math.ceil(self.debt_ms), 999):3} ' \ - f'{min(self.use_delay, 99):2}*'\ - f'{min(math.ceil(self.delay_ms), 999):03} ' - for u in self.usages: - out += f'{min(round(u), 999):03d}:' + f'{self.debt_ms:7.2f} ' \ + f'{self.delay_ms:7.2f} '\ + f'{min(self.usage, 999):6.2f}' out = out.rstrip(':') return out @@ -233,7 +218,7 @@ def table_row_str(self, path): root_iocg = None ioc = None -for i, ptr in radix_tree_for_each(blkcg_root.blkg_tree): +for i, ptr in radix_tree_for_each(blkcg_root.blkg_tree.address_of_()): blkg = drgn.Object(prog, 'struct blkcg_gq', address=ptr) try: if devname == blkg.q.kobj.parent.name.string_().decode('utf-8'): @@ -248,6 +233,9 @@ def table_row_str(self, path): if ioc is None: err(f'Could not find ioc for {devname}'); +if interval == 0: + sys.exit(0) + # Keep printing while True: now = time.time() diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 2551e9b71167b..b8cecb66d28b7 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -422,16 +422,22 @@ struct stat { }) /* startup code */ +/* + * x86-64 System V ABI mandates: + * 1) %rsp must be 16-byte aligned right before the function call. + * 2) The deepest stack frame should be zero (the %rbp). + * + */ asm(".section .text\n" ".global _start\n" "_start:\n" "pop %rdi\n" // argc (first arg, %rdi) "mov %rsp, %rsi\n" // argv[] (second arg, %rsi) "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx) - "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned when - "sub $8, %rsp\n" // entering the callee + "xor %ebp, %ebp\n" // zero the stack frame + "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call "call main\n" // main() returns the status code, we'll exit with it. - "movzb %al, %rdi\n" // retrieve exit code from 8 lower bits + "mov %eax, %edi\n" // retrieve exit code (32 bit) "mov $60, %rax\n" // NR_exit == 60 "syscall\n" // really exit "hlt\n" // ensure it does not return @@ -600,20 +606,28 @@ struct sys_stat_struct { }) /* startup code */ +/* + * i386 System V ABI mandates: + * 1) last pushed argument must be 16-byte aligned. + * 2) The deepest stack frame should be set to zero + * + */ asm(".section .text\n" ".global _start\n" "_start:\n" "pop %eax\n" // argc (first arg, %eax) "mov %esp, %ebx\n" // argv[] (second arg, %ebx) "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx) - "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned when + "xor %ebp, %ebp\n" // zero the stack frame + "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before + "sub $4, %esp\n" // the call instruction (args are aligned) "push %ecx\n" // push all registers on the stack so that we "push %ebx\n" // support both regparm and plain stack modes "push %eax\n" "call main\n" // main() returns the status code in %eax - "movzbl %al, %ebx\n" // retrieve exit code from lower 8 bits - "movl $1, %eax\n" // NR_exit == 1 - "int $0x80\n" // exit now + "mov %eax, %ebx\n" // retrieve exit code (32-bit int) + "movl $1, %eax\n" // NR_exit == 1 + "int $0x80\n" // exit now "hlt\n" // ensure it does not ""); @@ -797,7 +811,6 @@ asm(".section .text\n" "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc) "bl main\n" // main() returns the status code, we'll exit with it. - "and %r0, %r0, $0xff\n" // limit exit code to 8 bits "movs r7, $1\n" // NR_exit == 1 "svc $0x00\n" ""); @@ -994,7 +1007,6 @@ asm(".section .text\n" "add x2, x2, x1\n" // + argv "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee "bl main\n" // main() returns the status code, we'll exit with it. - "and x0, x0, 0xff\n" // limit exit code to 8 bits "mov x8, 93\n" // NR_exit == 93 "svc #0\n" ""); @@ -1199,7 +1211,7 @@ asm(".section .text\n" "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there! "jal main\n" // main() returns the status code, we'll exit with it. "nop\n" // delayed slot - "and $a0, $v0, 0xff\n" // limit exit code to 8 bits + "move $a0, $v0\n" // retrieve 32-bit exit code from v0 "li $v0, 4001\n" // NR_exit == 4001 "syscall\n" ".end __start\n" @@ -1397,7 +1409,6 @@ asm(".section .text\n" "add a2,a2,a1\n" // + argv "andi sp,a1,-16\n" // sp must be 16-byte aligned "call main\n" // main() returns the status code, we'll exit with it. - "andi a0, a0, 0xff\n" // limit exit code to 8 bits "li a7, 93\n" // NR_exit == 93 "ecall\n" ""); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index bb6a3e932d98c..2c9206c5c2bef 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -10,6 +10,7 @@ #include #include +#include /* Extended instruction set based on top of classic BPF */ @@ -73,7 +74,7 @@ struct bpf_insn { /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ - __u8 data[]; /* Arbitrary size */ + __u8 data[0]; /* Arbitrary size */ }; struct bpf_cgroup_storage_key { @@ -1310,8 +1311,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if current task belongs to the cgroup2. - * * 1, if current task does not belong to the cgroup2. + * * 1, if current task belongs to the cgroup2. + * * 0, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) @@ -1585,6 +1586,10 @@ union bpf_attr { * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the + * L2 type as Ethernet. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -2290,7 +2295,7 @@ union bpf_attr { * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a - * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*. * It checks the selected socket is matching the incoming * request in the socket buffer. * Return @@ -3737,13 +3742,11 @@ union bpf_attr { FN(bpf_per_cpu_ptr), \ FN(bpf_this_cpu_ptr), \ FN(redirect_peer), \ - FN(unsafe_helper), \ - /* - * unsafe_helper is the helper functions we implement - * ourselves. In order to ensure that the order of the new helper - * functions which backported from community kernel is consistent, the - * unsafe_helper need to be be placed last. - */ + /* */ + +#ifndef INT_MAX +#define INT_MAX 0x7fffffff +#endif /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3752,6 +3755,10 @@ union bpf_attr { enum bpf_func_id { __BPF_FUNC_MAPPER(__BPF_ENUM_FN) __BPF_FUNC_MAX_ID, + + __BPF_BYTEDANCE_FUNC_MIN_ID = INT_MAX / 2, + __BPF_ENUM_FN(unsafe_helper), + __BPF_BYTEDANCE_FUNC_MAX_ID, }; #undef __BPF_ENUM_FN @@ -3827,6 +3834,8 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2), BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), + BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5), + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), }; enum { @@ -3975,7 +3984,8 @@ struct bpf_sock { __u32 src_ip4; __u32 src_ip6[4]; __u32 src_port; /* host byte order */ - __u32 dst_port; /* network byte order */ + __be16 dst_port; /* network byte order */ + __u16 :16; /* zero padding */ __u32 dst_ip4; __u32 dst_ip6[4]; __u32 state; @@ -4043,15 +4053,6 @@ struct bpf_sock_tuple { }; }; -struct bpf_unsafe_ctx { - void *ctx; - __u16 mod; - __u16 cmd; - __u16 flags; - __u16 data_len; - char data[]; -}; - struct bpf_xdp_sock { __u32 queue_id; }; @@ -4071,6 +4072,15 @@ enum xdp_action { XDP_REDIRECT, }; +struct bpf_unsafe_ctx { + void *ctx; + __u16 mod; + __u16 cmd; + __u16 flags; + __u16 data_len; + char data[]; +}; + /* user accessible metadata for XDP packet hook * new fields must be added to the end of this structure */ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index d606a358480da..41daf0fa95b9f 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -100,22 +100,18 @@ static int btf_parse_hdr(struct btf *btf) return -EINVAL; } - if (meta_left < hdr->type_off) { - pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off); + if (meta_left < hdr->str_off + hdr->str_len) { + pr_debug("Invalid BTF total size:%u\n", btf->data_size); return -EINVAL; } - if (meta_left < hdr->str_off) { - pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off); + if (hdr->type_off + hdr->type_len > hdr->str_off) { + pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n", + hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len); return -EINVAL; } - if (hdr->type_off >= hdr->str_off) { - pr_debug("BTF type section offset >= string section offset. No type?\n"); - return -EINVAL; - } - - if (hdr->type_off & 0x02) { + if (hdr->type_off % 4) { pr_debug("BTF type section is not aligned to 4 bytes\n"); return -EINVAL; } diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index b2fc452504501..a1176a9e8430a 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -1366,6 +1366,11 @@ static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id, if (s->name_resolved) return *cached_name ? *cached_name : orig_name; + if (btf_is_fwd(t) || (btf_is_enum(t) && btf_vlen(t) == 0)) { + s->name_resolved = 1; + return orig_name; + } + dup_cnt = btf_dump_name_dups(d, name_map, orig_name); if (dup_cnt > 1) { const size_t max_len = 256; diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h index 794a375dad360..b2aec04fce8f6 100644 --- a/tools/lib/subcmd/subcmd-util.h +++ b/tools/lib/subcmd/subcmd-util.h @@ -50,15 +50,8 @@ static NORETURN inline void die(const char *err, ...) static inline void *xrealloc(void *ptr, size_t size) { void *ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) { - ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) - die("Out of memory, realloc failed"); - } + if (!ret) + die("Out of memory, realloc failed"); return ret; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 06aaf04e629c2..bae6b261481db 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -144,6 +144,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "usercopy_abort", "machine_real_restart", "rewind_stack_do_exit", + "cpu_bringup_and_idle", }; if (!func) diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index c3ae1e8ae1194..2a1261bfbb625 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -4,6 +4,7 @@ FILES=' arch/x86/include/asm/inat_types.h arch/x86/include/asm/orc_types.h +arch/x86/include/asm/emulate_prefix.h arch/x86/lib/x86-opcode-map.txt arch/x86/tools/gen-insn-attr-x86.awk ' @@ -46,4 +47,4 @@ done check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"' check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"' check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"' -check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]"' +check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"' diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 9832affd5d54b..b94d9afad3f79 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -118,10 +118,10 @@ FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) -FEATURE_CHECK_LDFLAGS-libunwind-arm = -lunwind -lunwind-arm -FEATURE_CHECK_LDFLAGS-libunwind-aarch64 = -lunwind -lunwind-aarch64 -FEATURE_CHECK_LDFLAGS-libunwind-x86 = -lunwind -llzma -lunwind-x86 -FEATURE_CHECK_LDFLAGS-libunwind-x86_64 = -lunwind -llzma -lunwind-x86_64 +FEATURE_CHECK_LDFLAGS-libunwind-arm += -lunwind -lunwind-arm +FEATURE_CHECK_LDFLAGS-libunwind-aarch64 += -lunwind -lunwind-aarch64 +FEATURE_CHECK_LDFLAGS-libunwind-x86 += -lunwind -llzma -lunwind-x86 +FEATURE_CHECK_LDFLAGS-libunwind-x86_64 += -lunwind -llzma -lunwind-x86_64 FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto @@ -247,8 +247,6 @@ endif FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) -FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) -FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) FEATURE_CHECK_LDFLAGS-libaio = -lrt @@ -708,6 +706,9 @@ else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) CFLAGS += -DHAVE_LIBPERL_SUPPORT + ifeq ($(CC_NO_CLANG), 0) + CFLAGS += -Wno-compound-token-split-by-macro + endif $(call detected,CONFIG_LIBPERL) endif endif diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 30d97121dc4fb..c54013806ba4a 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -224,6 +224,7 @@ int bench_futex_lock_pi(int argc, const char **argv) print_summary(); free(worker); + perf_cpu_map__put(cpu); return ret; err: usage_with_options(bench_futex_lock_pi_usage, options); diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index a00a6891447ab..11b7dbd374864 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -215,6 +215,7 @@ int bench_futex_requeue(int argc, const char **argv) print_summary(); free(worker); + perf_cpu_map__put(cpu); return ret; err: usage_with_options(bench_futex_requeue_usage, options); diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index a053cf2b70397..c3033d0905db1 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -319,6 +319,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) print_summary(); free(blocked_worker); + perf_cpu_map__put(cpu); return ret; } #endif /* HAVE_PTHREAD_BARRIER */ diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 58906e9499bb0..ad44a78392dc4 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -209,5 +209,6 @@ int bench_futex_wake(int argc, const char **argv) print_summary(); free(worker); + perf_cpu_map__put(cpu); return ret; } diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 5797253b97005..69d62e57a0c36 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1630,7 +1630,7 @@ static int __bench_numa(const char *name) "GB/sec,", "total-speed", "GB/sec total speed"); if (g->p.show_details >= 2) { - char tname[14 + 2 * 10 + 1]; + char tname[14 + 2 * 11 + 1]; struct thread_data *td; for (p = 0; p < g->p.nr_proc; p++) { for (t = 0; t < g->p.nr_threads; t++) { diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index f2e9d2b1b913d..29d460c301767 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -953,8 +953,8 @@ percent_rmt_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused, double per_left; double per_right; - per_left = PERCENT(left, lcl_hitm); - per_right = PERCENT(right, lcl_hitm); + per_left = PERCENT(left, rmt_hitm); + per_right = PERCENT(right, rmt_hitm); return per_left - per_right; } @@ -2733,9 +2733,7 @@ static int perf_c2c__report(int argc, const char **argv) "the input file to process"), OPT_INCR('N', "node-info", &c2c.node_info, "show extra node info in report (repeat for more info)"), -#ifdef HAVE_SLANG_SUPPORT OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"), -#endif OPT_BOOLEAN(0, "stats", &c2c.stats_only, "Display only statistic tables (implies --stdio)"), OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full, @@ -2762,6 +2760,10 @@ static int perf_c2c__report(int argc, const char **argv) if (argc) usage_with_options(report_c2c_usage, options); +#ifndef HAVE_SLANG_SUPPORT + c2c.use_stdio = true; +#endif + if (c2c.stats_only) c2c.use_stdio = true; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index d3c0b04e2e22b..dc228bdf2bbc2 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -569,14 +569,17 @@ static int report__browse_hists(struct report *rep) int ret; struct perf_session *session = rep->session; struct evlist *evlist = session->evlist; - const char *help = perf_tip(system_path(TIPDIR)); + char *help = NULL, *path = NULL; - if (help == NULL) { + path = system_path(TIPDIR); + if (perf_tip(&help, path) || help == NULL) { /* fallback for people who don't install perf ;-) */ - help = perf_tip(DOCDIR); - if (help == NULL) - help = "Cannot load tips.txt file, please install perf!"; + free(path); + path = system_path(DOCDIR); + if (perf_tip(&help, path) || help == NULL) + help = strdup("Cannot load tips.txt file, please install perf!"); } + free(path); switch (use_browser) { case 1: @@ -598,7 +601,7 @@ static int report__browse_hists(struct report *rep) ret = perf_evlist__tty_browse_hists(evlist, rep, help); break; } - + free(help); return ret; } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index f3ff825d9dd33..bbf1f2d3387e3 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2308,7 +2308,7 @@ static int process_switch_event(struct perf_tool *tool, if (perf_event__process_switch(tool, event, sample, machine) < 0) return -1; - if (scripting_ops && scripting_ops->process_switch) + if (scripting_ops && scripting_ops->process_switch && !filter_cpu(sample)) scripting_ops->process_switch(event, sample, machine); if (!script->show_switch_events) @@ -3779,11 +3779,15 @@ int cmd_script(int argc, const char **argv) goto out_delete; uname(&uts); - if (data.is_pipe || /* assume pipe_mode indicates native_arch */ - !strcmp(uts.machine, session->header.env.arch) || - (!strcmp(uts.machine, "x86_64") && - !strcmp(session->header.env.arch, "i386"))) + if (data.is_pipe) { /* Assume pipe_mode indicates native_arch */ native_arch = true; + } else if (session->header.env.arch) { + if (!strcmp(uts.machine, session->header.env.arch)) + native_arch = true; + else if (!strcmp(uts.machine, "x86_64") && + !strcmp(session->header.env.arch, "i386")) + native_arch = true; + } script.session = session; script__setup_sample_type(&script); diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 6a4758de98aff..68781a2d07728 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -29,6 +29,7 @@ arch/x86/include/asm/disabled-features.h arch/x86/include/asm/required-features.h arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/inat_types.h +arch/x86/include/asm/emulate_prefix.h arch/x86/include/uapi/asm/prctl.h arch/x86/lib/x86-opcode-map.txt arch/x86/tools/gen-insn-attr-x86.awk @@ -117,7 +118,7 @@ check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include " -B check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"' check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"' check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"' -check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]"' +check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"' # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 27f94b0bb8747..505e2a2f1872b 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -433,7 +433,7 @@ void pthread__unblock_sigwinch(void) static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) { - return eprintf(level, verbose, fmt, ap); + return veprintf(level, verbose, fmt, ap); } int main(int argc, const char **argv) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 47f57f5829d3a..a4244bf242e6a 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -567,7 +567,7 @@ int json_events(const char *fn, } else if (json_streq(map, field, "ExtSel")) { char *code = NULL; addfield(map, &code, "", "", val); - eventcode |= strtoul(code, NULL, 0) << 21; + eventcode |= strtoul(code, NULL, 0) << 8; free(code); } else if (json_streq(map, field, "EventName")) { addfield(map, &name, "", "", val); diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh index 045723b3d9928..c62af807198de 100755 --- a/tools/perf/tests/shell/record+zstd_comp_decomp.sh +++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh @@ -12,7 +12,7 @@ skip_if_no_z_record() { collect_z_record() { echo "Collecting compressed record file:" - [[ "$(uname -m)" != s390x ]] && gflag='-g' + [ "$(uname -m)" != s390x ] && gflag='-g' $perf_tool record -o $trace_file $gflag -z -F 5000 -- \ dd count=500 if=/dev/urandom of=/dev/null } diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index f736755000616..9ae316445f04b 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -472,6 +472,18 @@ struct perf_hpp_list perf_hpp_list = { #undef __HPP_SORT_ACC_FN #undef __HPP_SORT_RAW_FN +static void fmt_free(struct perf_hpp_fmt *fmt) +{ + /* + * At this point fmt should be completely + * unhooked, if not it's a bug. + */ + BUG_ON(!list_empty(&fmt->list)); + BUG_ON(!list_empty(&fmt->sort_list)); + + if (fmt->free) + fmt->free(fmt); +} void perf_hpp__init(void) { @@ -535,9 +547,10 @@ void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list, list_add(&format->sort_list, &list->sorts); } -void perf_hpp__column_unregister(struct perf_hpp_fmt *format) +static void perf_hpp__column_unregister(struct perf_hpp_fmt *format) { list_del_init(&format->list); + fmt_free(format); } void perf_hpp__cancel_cumulate(void) @@ -609,19 +622,6 @@ void perf_hpp__append_sort_keys(struct perf_hpp_list *list) } -static void fmt_free(struct perf_hpp_fmt *fmt) -{ - /* - * At this point fmt should be completely - * unhooked, if not it's a bug. - */ - BUG_ON(!list_empty(&fmt->list)); - BUG_ON(!list_empty(&fmt->sort_list)); - - if (fmt->free) - fmt->free(fmt); -} - void perf_hpp__reset_output_field(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt, *tmp; diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index f7ed5d122e229..782c0c8a9a836 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -108,7 +108,11 @@ static int perf_env__fetch_btf(struct perf_env *env, node->data_size = data_size; memcpy(node->data, data, data_size); - perf_env__insert_btf(env, node); + if (!perf_env__insert_btf(env, node)) { + /* Insertion failed because of a duplicate. */ + free(node); + return -1; + } return 0; } @@ -467,7 +471,7 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, synthesize_bpf_prog_name(name, KSYM_NAME_LEN, info, btf, 0); fprintf(fp, "# bpf_prog_info %u: %s addr 0x%llx size %u\n", info->id, name, prog_addrs[0], prog_lens[0]); - return; + goto out; } fprintf(fp, "# bpf_prog_info %u:\n", info->id); @@ -477,4 +481,6 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, fprintf(fp, "# \tsub_prog %u: %s addr 0x%llx size %u\n", i, name, prog_addrs[i], prog_lens[i]); } +out: + btf__free(btf); } diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index a3f912615690f..3c874f52f1a25 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -44,10 +44,6 @@ int perf_data__create_dir(struct perf_data *data, int nr) if (!files) return -ENOMEM; - data->dir.version = PERF_DIR_VERSION; - data->dir.files = files; - data->dir.nr = nr; - for (i = 0; i < nr; i++) { struct perf_data_file *file = &files[i]; @@ -62,6 +58,9 @@ int perf_data__create_dir(struct perf_data *data, int nr) file->fd = ret; } + data->dir.version = PERF_DIR_VERSION; + data->dir.files = files; + data->dir.nr = nr; return 0; out_err: diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 259868a390198..252d990712496 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -3,6 +3,7 @@ #define __PERF_DATA_H #include +#include enum perf_data_mode { PERF_DATA_MODE_WRITE, diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index e55114f0336f0..682146d043793 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -143,7 +143,7 @@ static int trace_event_printer(enum binary_printer_ops op, break; case BINARY_PRINT_CHAR_DATA: printed += color_fprintf(fp, color, "%c", - isprint(ch) ? ch : '.'); + isprint(ch) && isascii(ch) ? ch : '.'); break; case BINARY_PRINT_CHAR_PAD: printed += color_fprintf(fp, color, " "); diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 0fafcf264d235..ef64e197bc8df 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -69,12 +69,13 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, return node; } -void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) +bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) { struct rb_node *parent = NULL; __u32 btf_id = btf_node->id; struct btf_node *node; struct rb_node **p; + bool ret = true; down_write(&env->bpf_progs.lock); p = &env->bpf_progs.btfs.rb_node; @@ -88,6 +89,7 @@ void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) p = &(*p)->rb_right; } else { pr_debug("duplicated btf %u\n", btf_id); + ret = false; goto out; } } @@ -97,6 +99,7 @@ void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) env->bpf_progs.btfs_cnt++; out: up_write(&env->bpf_progs.lock); + return ret; } struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index db40906e29373..37028215d4a53 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -117,6 +117,6 @@ void perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node); struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, __u32 prog_id); -void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); +bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 4792731307947..ecce30f086de7 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -361,7 +361,6 @@ enum { }; void perf_hpp__init(void); -void perf_hpp__column_unregister(struct perf_hpp_fmt *format); void perf_hpp__cancel_cumulate(void); void perf_hpp__setup_output_field(struct perf_hpp_list *list); void perf_hpp__reset_output_field(struct perf_hpp_list *list); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 767fe1bfd922c..8c3addc2e9e1e 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2020,6 +2020,7 @@ static int add_callchain_ip(struct thread *thread, al.filtered = 0; al.sym = NULL; + al.srcline = NULL; if (!cpumode) { thread__find_cpumode_addr_location(thread, ip, &al); } else { diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 6357ac508ad1e..67b7d2af1755d 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2954,6 +2954,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, for (j = 0; j < num_matched_functions; j++) { sym = syms[j]; + if (sym->type != STT_FUNC) + continue; + tev = (*tevs) + ret; tp = &tev->point; if (ret == num_matched_functions) { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 8ff2c98e90322..01e15b445cb58 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1960,6 +1960,7 @@ prefetch_event(char *buf, u64 head, size_t mmap_size, bool needs_swap, union perf_event *error) { union perf_event *event; + u16 event_size; /* * Ensure we have enough space remaining to read @@ -1972,15 +1973,23 @@ prefetch_event(char *buf, u64 head, size_t mmap_size, if (needs_swap) perf_event_header__bswap(&event->header); - if (head + event->header.size <= mmap_size) + event_size = event->header.size; + if (head + event_size <= mmap_size) return event; /* We're not fetching the event so swap back again */ if (needs_swap) perf_event_header__bswap(&event->header); - pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx:" - " fuzzed or compressed perf.data?\n",__func__, head, event->header.size, mmap_size); + /* Check if the event fits into the next mmapped buf. */ + if (event_size <= mmap_size - head % page_size) { + /* Remap buf and fetch again. */ + return NULL; + } + + /* Invalid input. Event size should never exceed mmap_size. */ + pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:" + " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size); return error; } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 2ec0a32da5793..0b185b1090ff3 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -230,6 +230,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, return NULL; } +static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr) +{ + size_t i, phdrnum; + u64 sz; + + if (elf_getphdrnum(elf, &phdrnum)) + return -1; + + for (i = 0; i < phdrnum; i++) { + if (gelf_getphdr(elf, i, phdr) == NULL) + return -1; + + if (phdr->p_type != PT_LOAD) + continue; + + sz = max(phdr->p_memsz, phdr->p_filesz); + if (!sz) + continue; + + if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz)) + return 0; + } + + /* Not found any valid program header */ + return -1; +} + static bool want_demangle(bool is_kernel_sym) { return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; @@ -1091,6 +1118,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, sym.st_value); used_opd = true; } + /* * When loading symbols in a data mapping, ABS symbols (which * has a value of SHN_ABS in its st_shndx) failed at @@ -1127,11 +1155,20 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, goto out_elf_end; } else if ((used_opd && runtime_ss->adjust_symbols) || (!used_opd && syms_ss->adjust_symbols)) { + GElf_Phdr phdr; + + if (elf_read_program_header(syms_ss->elf, + (u64)sym.st_value, &phdr)) { + pr_warning("%s: failed to find program header for " + "symbol: %s st_value: %#" PRIx64 "\n", + __func__, elf_name, (u64)sym.st_value); + continue; + } pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " - "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, - (u64)sym.st_value, (u64)shdr.sh_addr, - (u64)shdr.sh_offset); - sym.st_value -= shdr.sh_addr - shdr.sh_offset; + "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n", + __func__, (u64)sym.st_value, (u64)phdr.p_vaddr, + (u64)phdr.p_offset); + sym.st_value -= phdr.p_vaddr - phdr.p_offset; } demangled = demangle_sym(dso, kmodule, elf_name); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 901ad7f6f4dcc..ea2c7beff4868 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -231,7 +231,7 @@ void symbols__fixup_end(struct rb_root_cached *symbols) prev = curr; curr = rb_entry(nd, struct symbol, rb_node); - if (prev->end == prev->start && prev->end != curr->start) + if (prev->end == prev->start || prev->end != curr->start) arch__symbols__fixup_end(prev, curr); } diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index ae56c766eda16..b3c1ae288b478 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -343,32 +343,32 @@ fetch_kernel_version(unsigned int *puint, char *str, return 0; } -const char *perf_tip(const char *dirpath) +int perf_tip(char **strp, const char *dirpath) { struct strlist *tips; struct str_node *node; - char *tip = NULL; struct strlist_config conf = { .dirname = dirpath, .file_only = true, }; + int ret = 0; + *strp = NULL; tips = strlist__new("tips.txt", &conf); if (tips == NULL) - return errno == ENOENT ? NULL : - "Tip: check path of tips.txt or get more memory! ;-p"; + return -errno; if (strlist__nr_entries(tips) == 0) goto out; node = strlist__entry(tips, random() % strlist__nr_entries(tips)); - if (asprintf(&tip, "Tip: %s", node->s) < 0) - tip = (char *)"Tip: get more memory! ;-)"; + if (asprintf(strp, "Tip: %s", node->s) < 0) + ret = -ENOMEM; out: strlist__delete(tips); - return tip; + return ret; } char *perf_exe(char *buf, int len) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9969b8b46f7c3..e4a7e1cafc70a 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -37,7 +37,7 @@ int fetch_kernel_version(unsigned int *puint, #define KVER_FMT "%d.%d.%d" #define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) -const char *perf_tip(const char *dirpath); +int perf_tip(char **strp, const char *dirpath); #ifndef HAVE_SCHED_GETCPU_SUPPORT int sched_getcpu(void); diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 988326b67a916..8bf6b01b35608 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3865,6 +3865,7 @@ rapl_dram_energy_units_probe(int model, double rapl_energy_units) case INTEL_FAM6_HASWELL_X: /* HSX */ case INTEL_FAM6_BROADWELL_X: /* BDX */ case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ + case INTEL_FAM6_ICELAKE_X: /* ICX */ return (rapl_dram_energy_units = 15.3 / 1000000); default: return (rapl_energy_units); diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c deleted file mode 100644 index 3a469099f30d8..0000000000000 --- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c +++ /dev/null @@ -1,329 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2018 Facebook -// Copyright (c) 2019 Cloudflare -// Copyright (c) 2020 Isovalent, Inc. -/* - * Test that the socket assign program is able to redirect traffic towards a - * socket, regardless of whether the port or address destination of the traffic - * matches the port. - */ - -#define _GNU_SOURCE -#include -#include -#include -#include - -#include "test_progs.h" - -#define BIND_PORT 1234 -#define CONNECT_PORT 4321 -#define TEST_DADDR (0xC0A80203) -#define NS_SELF "/proc/self/ns/net" -#define SERVER_MAP_PATH "/sys/fs/bpf/tc/globals/server_map" - -static const struct timeval timeo_sec = { .tv_sec = 3 }; -static const size_t timeo_optlen = sizeof(timeo_sec); -static int stop, duration; - -static bool -configure_stack(void) -{ - char tc_cmd[BUFSIZ]; - - /* Move to a new networking namespace */ - if (CHECK_FAIL(unshare(CLONE_NEWNET))) - return false; - - /* Configure necessary links, routes */ - if (CHECK_FAIL(system("ip link set dev lo up"))) - return false; - if (CHECK_FAIL(system("ip route add local default dev lo"))) - return false; - if (CHECK_FAIL(system("ip -6 route add local default dev lo"))) - return false; - - /* Load qdisc, BPF program */ - if (CHECK_FAIL(system("tc qdisc add dev lo clsact"))) - return false; - sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf", - "direct-action object-file ./test_sk_assign.o", - "section classifier/sk_assign_test", - (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose"); - if (CHECK(system(tc_cmd), "BPF load failed;", - "run with -vv for more info\n")) - return false; - - return true; -} - -static int -start_server(const struct sockaddr *addr, socklen_t len, int type) -{ - int fd; - - fd = socket(addr->sa_family, type, 0); - if (CHECK_FAIL(fd == -1)) - goto out; - if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec, - timeo_optlen))) - goto close_out; - if (CHECK_FAIL(bind(fd, addr, len) == -1)) - goto close_out; - if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1)) - goto close_out; - - goto out; -close_out: - close(fd); - fd = -1; -out: - return fd; -} - -static int -connect_to_server(const struct sockaddr *addr, socklen_t len, int type) -{ - int fd = -1; - - fd = socket(addr->sa_family, type, 0); - if (CHECK_FAIL(fd == -1)) - goto out; - if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec, - timeo_optlen))) - goto close_out; - if (CHECK_FAIL(connect(fd, addr, len))) - goto close_out; - - goto out; -close_out: - close(fd); - fd = -1; -out: - return fd; -} - -static in_port_t -get_port(int fd) -{ - struct sockaddr_storage ss; - socklen_t slen = sizeof(ss); - in_port_t port = 0; - - if (CHECK_FAIL(getsockname(fd, (struct sockaddr *)&ss, &slen))) - return port; - - switch (ss.ss_family) { - case AF_INET: - port = ((struct sockaddr_in *)&ss)->sin_port; - break; - case AF_INET6: - port = ((struct sockaddr_in6 *)&ss)->sin6_port; - break; - default: - CHECK(1, "Invalid address family", "%d\n", ss.ss_family); - } - return port; -} - -static ssize_t -rcv_msg(int srv_client, int type) -{ - struct sockaddr_storage ss; - char buf[BUFSIZ]; - socklen_t slen; - - if (type == SOCK_STREAM) - return read(srv_client, &buf, sizeof(buf)); - else - return recvfrom(srv_client, &buf, sizeof(buf), 0, - (struct sockaddr *)&ss, &slen); -} - -static int -run_test(int server_fd, const struct sockaddr *addr, socklen_t len, int type) -{ - int client = -1, srv_client = -1; - char buf[] = "testing"; - in_port_t port; - int ret = 1; - - client = connect_to_server(addr, len, type); - if (client == -1) { - perror("Cannot connect to server"); - goto out; - } - - if (type == SOCK_STREAM) { - srv_client = accept(server_fd, NULL, NULL); - if (CHECK_FAIL(srv_client == -1)) { - perror("Can't accept connection"); - goto out; - } - } else { - srv_client = server_fd; - } - if (CHECK_FAIL(write(client, buf, sizeof(buf)) != sizeof(buf))) { - perror("Can't write on client"); - goto out; - } - if (CHECK_FAIL(rcv_msg(srv_client, type) != sizeof(buf))) { - perror("Can't read on server"); - goto out; - } - - port = get_port(srv_client); - if (CHECK_FAIL(!port)) - goto out; - /* SOCK_STREAM is connected via accept(), so the server's local address - * will be the CONNECT_PORT rather than the BIND port that corresponds - * to the listen socket. SOCK_DGRAM on the other hand is connectionless - * so we can't really do the same check there; the server doesn't ever - * create a socket with CONNECT_PORT. - */ - if (type == SOCK_STREAM && - CHECK(port != htons(CONNECT_PORT), "Expected", "port %u but got %u", - CONNECT_PORT, ntohs(port))) - goto out; - else if (type == SOCK_DGRAM && - CHECK(port != htons(BIND_PORT), "Expected", - "port %u but got %u", BIND_PORT, ntohs(port))) - goto out; - - ret = 0; -out: - close(client); - if (srv_client != server_fd) - close(srv_client); - if (ret) - WRITE_ONCE(stop, 1); - return ret; -} - -static void -prepare_addr(struct sockaddr *addr, int family, __u16 port, bool rewrite_addr) -{ - struct sockaddr_in *addr4; - struct sockaddr_in6 *addr6; - - switch (family) { - case AF_INET: - addr4 = (struct sockaddr_in *)addr; - memset(addr4, 0, sizeof(*addr4)); - addr4->sin_family = family; - addr4->sin_port = htons(port); - if (rewrite_addr) - addr4->sin_addr.s_addr = htonl(TEST_DADDR); - else - addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); - break; - case AF_INET6: - addr6 = (struct sockaddr_in6 *)addr; - memset(addr6, 0, sizeof(*addr6)); - addr6->sin6_family = family; - addr6->sin6_port = htons(port); - addr6->sin6_addr = in6addr_loopback; - if (rewrite_addr) - addr6->sin6_addr.s6_addr32[3] = htonl(TEST_DADDR); - break; - default: - fprintf(stderr, "Invalid family %d", family); - } -} - -struct test_sk_cfg { - const char *name; - int family; - struct sockaddr *addr; - socklen_t len; - int type; - bool rewrite_addr; -}; - -#define TEST(NAME, FAMILY, TYPE, REWRITE) \ -{ \ - .name = NAME, \ - .family = FAMILY, \ - .addr = (FAMILY == AF_INET) ? (struct sockaddr *)&addr4 \ - : (struct sockaddr *)&addr6, \ - .len = (FAMILY == AF_INET) ? sizeof(addr4) : sizeof(addr6), \ - .type = TYPE, \ - .rewrite_addr = REWRITE, \ -} - -void test_sk_assign(void) -{ - struct sockaddr_in addr4; - struct sockaddr_in6 addr6; - struct test_sk_cfg tests[] = { - TEST("ipv4 tcp port redir", AF_INET, SOCK_STREAM, false), - TEST("ipv4 tcp addr redir", AF_INET, SOCK_STREAM, true), - TEST("ipv6 tcp port redir", AF_INET6, SOCK_STREAM, false), - TEST("ipv6 tcp addr redir", AF_INET6, SOCK_STREAM, true), - TEST("ipv4 udp port redir", AF_INET, SOCK_DGRAM, false), - TEST("ipv4 udp addr redir", AF_INET, SOCK_DGRAM, true), - TEST("ipv6 udp port redir", AF_INET6, SOCK_DGRAM, false), - TEST("ipv6 udp addr redir", AF_INET6, SOCK_DGRAM, true), - }; - __s64 server = -1; - int server_map; - int self_net; - int i; - - self_net = open(NS_SELF, O_RDONLY); - if (CHECK_FAIL(self_net < 0)) { - perror("Unable to open "NS_SELF); - return; - } - - if (!configure_stack()) { - perror("configure_stack"); - goto cleanup; - } - - server_map = bpf_obj_get(SERVER_MAP_PATH); - if (CHECK_FAIL(server_map < 0)) { - perror("Unable to open " SERVER_MAP_PATH); - goto cleanup; - } - - for (i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) { - struct test_sk_cfg *test = &tests[i]; - const struct sockaddr *addr; - const int zero = 0; - int err; - - if (!test__start_subtest(test->name)) - continue; - prepare_addr(test->addr, test->family, BIND_PORT, false); - addr = (const struct sockaddr *)test->addr; - server = start_server(addr, test->len, test->type); - if (server == -1) - goto close; - - err = bpf_map_update_elem(server_map, &zero, &server, BPF_ANY); - if (CHECK_FAIL(err)) { - perror("Unable to update server_map"); - goto close; - } - - /* connect to unbound ports */ - prepare_addr(test->addr, test->family, CONNECT_PORT, - test->rewrite_addr); - if (run_test(server, addr, test->len, test->type)) - goto close; - - close(server); - server = -1; - } - -close: - close(server); - close(server_map); -cleanup: - if (CHECK_FAIL(unlink(SERVER_MAP_PATH))) - perror("Unable to unlink " SERVER_MAP_PATH); - if (CHECK_FAIL(setns(self_net, CLONE_NEWNET))) - perror("Failed to setns("NS_SELF")"); - close(self_net); -} diff --git a/tools/testing/selftests/bpf/prog_tests/udp_limit.c b/tools/testing/selftests/bpf/prog_tests/udp_limit.c deleted file mode 100644 index 2aba09d4d01bf..0000000000000 --- a/tools/testing/selftests/bpf/prog_tests/udp_limit.c +++ /dev/null @@ -1,75 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include "udp_limit.skel.h" - -#include -#include - -static int duration; - -void test_udp_limit(void) -{ - struct udp_limit *skel; - int fd1 = -1, fd2 = -1; - int cgroup_fd; - - cgroup_fd = test__join_cgroup("/udp_limit"); - if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno)) - return; - - skel = udp_limit__open_and_load(); - if (CHECK(!skel, "skel-load", "errno %d", errno)) - goto close_cgroup_fd; - - skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd); - skel->links.sock_release = bpf_program__attach_cgroup(skel->progs.sock_release, cgroup_fd); - if (CHECK(IS_ERR(skel->links.sock) || IS_ERR(skel->links.sock_release), - "cg-attach", "sock %ld sock_release %ld", - PTR_ERR(skel->links.sock), - PTR_ERR(skel->links.sock_release))) - goto close_skeleton; - - /* BPF program enforces a single UDP socket per cgroup, - * verify that. - */ - fd1 = socket(AF_INET, SOCK_DGRAM, 0); - if (CHECK(fd1 < 0, "fd1", "errno %d", errno)) - goto close_skeleton; - - fd2 = socket(AF_INET, SOCK_DGRAM, 0); - if (CHECK(fd2 >= 0, "fd2", "errno %d", errno)) - goto close_skeleton; - - /* We can reopen again after close. */ - close(fd1); - fd1 = -1; - - fd1 = socket(AF_INET, SOCK_DGRAM, 0); - if (CHECK(fd1 < 0, "fd1-again", "errno %d", errno)) - goto close_skeleton; - - /* Make sure the program was invoked the expected - * number of times: - * - open fd1 - BPF_CGROUP_INET_SOCK_CREATE - * - attempt to openfd2 - BPF_CGROUP_INET_SOCK_CREATE - * - close fd1 - BPF_CGROUP_INET_SOCK_RELEASE - * - open fd1 again - BPF_CGROUP_INET_SOCK_CREATE - */ - if (CHECK(skel->bss->invocations != 4, "bss-invocations", - "invocations=%d", skel->bss->invocations)) - goto close_skeleton; - - /* We should still have a single socket in use */ - if (CHECK(skel->bss->in_use != 1, "bss-in_use", - "in_use=%d", skel->bss->in_use)) - goto close_skeleton; - -close_skeleton: - if (fd1 >= 0) - close(fd1); - if (fd2 >= 0) - close(fd2); - udp_limit__destroy(skel); -close_cgroup_fd: - close(cgroup_fd); -} diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index d4a02fe44a126..0620580a5c16c 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -94,7 +94,7 @@ typedef void (* (*signal_t)(int, void (*)(int)))(int); typedef char * (*fn_ptr_arr1_t[10])(int **); -typedef char * (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int)); +typedef char * (* (* const fn_ptr_arr2_t[5])())(char * (*)(int)); struct struct_w_typedefs { int_t a; diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 067eb625d01c5..5ba8d39c4d544 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -349,7 +349,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, void *payload) { void *location; - uint32_t len; + uint64_t len; data->str_lens[idx] = 0; location = calc_location(&cfg->str_locs[idx], tls_base); @@ -381,7 +381,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, struct strobe_map_descr* descr = &data->map_descrs[idx]; struct strobe_map_raw map; void *location; - uint32_t len; + uint64_t len; int i; descr->tag_len = 0; /* presume no tag is set */ diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c deleted file mode 100644 index 1ecd987005d2c..0000000000000 --- a/tools/testing/selftests/bpf/progs/test_sk_assign.c +++ /dev/null @@ -1,186 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2019 Cloudflare Ltd. -// Copyright (c) 2020 Isovalent, Inc. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Pin map under /sys/fs/bpf/tc/globals/ */ -#define PIN_GLOBAL_NS 2 - -/* Must match struct bpf_elf_map layout from iproute2 */ -struct { - __u32 type; - __u32 size_key; - __u32 size_value; - __u32 max_elem; - __u32 flags; - __u32 id; - __u32 pinning; -} server_map SEC("maps") = { - .type = BPF_MAP_TYPE_SOCKMAP, - .size_key = sizeof(int), - .size_value = sizeof(__u64), - .max_elem = 1, - .pinning = PIN_GLOBAL_NS, -}; - -int _version SEC("version") = 1; -char _license[] SEC("license") = "GPL"; - -/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */ -static inline struct bpf_sock_tuple * -get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp) -{ - void *data_end = (void *)(long)skb->data_end; - void *data = (void *)(long)skb->data; - struct bpf_sock_tuple *result; - struct ethhdr *eth; - __u64 tuple_len; - __u8 proto = 0; - __u64 ihl_len; - - eth = (struct ethhdr *)(data); - if (eth + 1 > data_end) - return NULL; - - if (eth->h_proto == bpf_htons(ETH_P_IP)) { - struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth)); - - if (iph + 1 > data_end) - return NULL; - if (iph->ihl != 5) - /* Options are not supported */ - return NULL; - ihl_len = iph->ihl * 4; - proto = iph->protocol; - *ipv4 = true; - result = (struct bpf_sock_tuple *)&iph->saddr; - } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { - struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth)); - - if (ip6h + 1 > data_end) - return NULL; - ihl_len = sizeof(*ip6h); - proto = ip6h->nexthdr; - *ipv4 = false; - result = (struct bpf_sock_tuple *)&ip6h->saddr; - } else { - return (struct bpf_sock_tuple *)data; - } - - if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) - return NULL; - - *tcp = (proto == IPPROTO_TCP); - return result; -} - -static inline int -handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) -{ - struct bpf_sock_tuple ln = {0}; - struct bpf_sock *sk; - const int zero = 0; - size_t tuple_len; - __be16 dport; - int ret; - - tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); - if ((void *)tuple + tuple_len > (void *)(long)skb->data_end) - return TC_ACT_SHOT; - - sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); - if (sk) - goto assign; - - dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport; - if (dport != bpf_htons(4321)) - return TC_ACT_OK; - - sk = bpf_map_lookup_elem(&server_map, &zero); - if (!sk) - return TC_ACT_SHOT; - -assign: - ret = bpf_sk_assign(skb, sk, 0); - bpf_sk_release(sk); - return ret; -} - -static inline int -handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) -{ - struct bpf_sock_tuple ln = {0}; - struct bpf_sock *sk; - const int zero = 0; - size_t tuple_len; - __be16 dport; - int ret; - - tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); - if ((void *)tuple + tuple_len > (void *)(long)skb->data_end) - return TC_ACT_SHOT; - - sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); - if (sk) { - if (sk->state != BPF_TCP_LISTEN) - goto assign; - bpf_sk_release(sk); - } - - dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport; - if (dport != bpf_htons(4321)) - return TC_ACT_OK; - - sk = bpf_map_lookup_elem(&server_map, &zero); - if (!sk) - return TC_ACT_SHOT; - - if (sk->state != BPF_TCP_LISTEN) { - bpf_sk_release(sk); - return TC_ACT_SHOT; - } - -assign: - ret = bpf_sk_assign(skb, sk, 0); - bpf_sk_release(sk); - return ret; -} - -SEC("classifier/sk_assign_test") -int bpf_sk_assign_test(struct __sk_buff *skb) -{ - struct bpf_sock_tuple *tuple, ln = {0}; - bool ipv4 = false; - bool tcp = false; - int tuple_len; - int ret = 0; - - tuple = get_tuple(skb, &ipv4, &tcp); - if (!tuple) - return TC_ACT_SHOT; - - /* Note that the verifier socket return type for bpf_skc_lookup_tcp() - * differs from bpf_sk_lookup_udp(), so even though the C-level type is - * the same here, if we try to share the implementations they will - * fail to verify because we're crossing pointer types. - */ - if (tcp) - ret = handle_tcp(skb, tuple, ipv4); - else - ret = handle_udp(skb, tuple, ipv4); - - return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT; -} diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c deleted file mode 100644 index fe182616b112a..0000000000000 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c +++ /dev/null @@ -1,148 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#ifndef ctx_ptr -# define ctx_ptr(field) (void *)(long)(field) -#endif - -#define ip4_src 0xac100164 /* 172.16.1.100 */ -#define ip4_dst 0xac100264 /* 172.16.2.100 */ - -#define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ - 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } -#define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ - 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } - -#ifndef v6_equal -# define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \ - a.s6_addr32[1] == b.s6_addr32[1] && \ - a.s6_addr32[2] == b.s6_addr32[2] && \ - a.s6_addr32[3] == b.s6_addr32[3]) -#endif - -enum { - dev_src, - dev_dst, -}; - -struct bpf_map_def SEC("maps") ifindex_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 2, -}; - -static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb, - __be32 addr) -{ - void *data_end = ctx_ptr(skb->data_end); - void *data = ctx_ptr(skb->data); - struct iphdr *ip4h; - - if (data + sizeof(struct ethhdr) > data_end) - return false; - - ip4h = (struct iphdr *)(data + sizeof(struct ethhdr)); - if ((void *)(ip4h + 1) > data_end) - return false; - - return ip4h->daddr == addr; -} - -static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb, - struct in6_addr addr) -{ - void *data_end = ctx_ptr(skb->data_end); - void *data = ctx_ptr(skb->data); - struct ipv6hdr *ip6h; - - if (data + sizeof(struct ethhdr) > data_end) - return false; - - ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr)); - if ((void *)(ip6h + 1) > data_end) - return false; - - return v6_equal(ip6h->daddr, addr); -} - -static __always_inline int get_dev_ifindex(int which) -{ - int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); - - return ifindex ? *ifindex : 0; -} - -SEC("chk_egress") int tc_chk(struct __sk_buff *skb) -{ - void *data_end = ctx_ptr(skb->data_end); - void *data = ctx_ptr(skb->data); - __u32 *raw = data; - - if (data + sizeof(struct ethhdr) > data_end) - return TC_ACT_SHOT; - - return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK; -} - -SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) -{ - __u8 zero[ETH_ALEN * 2]; - bool redirect = false; - - switch (skb->protocol) { - case __bpf_constant_htons(ETH_P_IP): - redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_src)); - break; - case __bpf_constant_htons(ETH_P_IPV6): - redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_src); - break; - } - - if (!redirect) - return TC_ACT_OK; - - __builtin_memset(&zero, 0, sizeof(zero)); - if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) - return TC_ACT_SHOT; - - return bpf_redirect_neigh(get_dev_ifindex(dev_src), 0); -} - -SEC("src_ingress") int tc_src(struct __sk_buff *skb) -{ - __u8 zero[ETH_ALEN * 2]; - bool redirect = false; - - switch (skb->protocol) { - case __bpf_constant_htons(ETH_P_IP): - redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_dst)); - break; - case __bpf_constant_htons(ETH_P_IPV6): - redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_dst); - break; - } - - if (!redirect) - return TC_ACT_OK; - - __builtin_memset(&zero, 0, sizeof(zero)); - if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) - return TC_ACT_SHOT; - - return bpf_redirect_neigh(get_dev_ifindex(dev_dst), 0); -} - -char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c deleted file mode 100644 index fc84a7685aa2c..0000000000000 --- a/tools/testing/selftests/bpf/progs/test_tc_peer.c +++ /dev/null @@ -1,45 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include - -#include -#include -#include - -#include - -enum { - dev_src, - dev_dst, -}; - -struct bpf_map_def SEC("maps") ifindex_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 2, -}; - -static __always_inline int get_dev_ifindex(int which) -{ - int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); - - return ifindex ? *ifindex : 0; -} - -SEC("chk_egress") int tc_chk(struct __sk_buff *skb) -{ - return TC_ACT_SHOT; -} - -SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) -{ - return bpf_redirect_peer(get_dev_ifindex(dev_src), 0); -} - -SEC("src_ingress") int tc_src(struct __sk_buff *skb) -{ - return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0); -} - -char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/udp_limit.c b/tools/testing/selftests/bpf/progs/udp_limit.c deleted file mode 100644 index 8429b22525a79..0000000000000 --- a/tools/testing/selftests/bpf/progs/udp_limit.c +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -#include -#include -#include - -int invocations = 0, in_use = 0; - -SEC("cgroup/sock_create") -int sock(struct bpf_sock *ctx) -{ - __u32 key; - - if (ctx->type != SOCK_DGRAM) - return 1; - - __sync_fetch_and_add(&invocations, 1); - - if (in_use > 0) { - /* BPF_CGROUP_INET_SOCK_RELEASE is _not_ called - * when we return an error from the BPF - * program! - */ - return 0; - } - - __sync_fetch_and_add(&in_use, 1); - return 1; -} - -SEC("cgroup/sock_release") -int sock_release(struct bpf_sock *ctx) -{ - __u32 key; - - if (ctx->type != SOCK_DGRAM) - return 1; - - __sync_fetch_and_add(&invocations, 1); - __sync_fetch_and_add(&in_use, -1); - return 1; -} diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c index 57912e7c94b0a..9ed477776eca8 100644 --- a/tools/testing/selftests/bpf/progs/xdp_tx.c +++ b/tools/testing/selftests/bpf/progs/xdp_tx.c @@ -3,7 +3,7 @@ #include #include "bpf_helpers.h" -SEC("tx") +SEC("xdp") int xdp_tx(struct xdp_md *xdp) { return XDP_TX; diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index 0262f7b374f9c..4b9a26caa2c2e 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -359,15 +359,15 @@ static struct bpf_align_test tests[] = { * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"}, + {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"}, - {33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"}, + {33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"}, + {33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"}, }, }, { @@ -410,15 +410,15 @@ static struct bpf_align_test tests[] = { /* Adding 14 makes R6 be (4n+2) */ {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* Packet pointer has (4n+2) offset */ - {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, - {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"}, + {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"}, /* Newly read value in R6 was shifted left by 2, so has * known alignment of 4. */ @@ -426,15 +426,15 @@ static struct bpf_align_test tests[] = { /* Added (4n) to packet pointer's (4n+2) var_off, giving * another (4n+2). */ - {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, - {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, + {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"}, + {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, + {23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"}, }, }, { @@ -469,16 +469,16 @@ static struct bpf_align_test tests[] = { .matches = { {4, "R5_w=pkt_end(id=0,off=0,imm=0)"}, /* (ptr - ptr) << 2 == unknown, (4n) */ - {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"}, + {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"}, /* (4n) + 14 == (4n+2). We blow our bounds, because * the add could overflow. */ - {7, "R5_w=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"}, + {7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>=0 */ - {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, /* packet pointer + nonnegative (4n+2) */ - {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, - {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. * We checked the bounds, but it might have been able * to overflow if the packet pointer started in the @@ -486,7 +486,7 @@ static struct bpf_align_test tests[] = { * So we did not get a 'range' on R6, and the access * attempt will fail. */ - {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, } }, { @@ -528,7 +528,7 @@ static struct bpf_align_test tests[] = { /* New unknown value in R7 is (4n) */ {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Subtracting it from R6 blows our unsigned bounds */ - {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"}, + {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>= 0 */ {14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* At the time the word size load is performed from R5, @@ -537,7 +537,8 @@ static struct bpf_align_test tests[] = { * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"}, + }, }, { @@ -579,18 +580,18 @@ static struct bpf_align_test tests[] = { /* Adding 14 makes R6 be (4n+2) */ {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"}, + {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"}, /* New unknown value in R7 is (4n), >= 76 */ {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, + {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, + {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, }, }, }; diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh index ec4e15948e406..5252b91f48a18 100755 --- a/tools/testing/selftests/bpf/test_lirc_mode2.sh +++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh @@ -3,6 +3,7 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +ret=$ksft_skip msg="skip all tests:" if [ $UID != 0 ]; then @@ -25,7 +26,7 @@ do fi done -if [ -n $LIRCDEV ]; +if [ -n "$LIRCDEV" ]; then TYPE=lirc_mode2 ./test_lirc_mode2_user $LIRCDEV $INPUTDEV @@ -36,3 +37,5 @@ then echo -e ${GREEN}"PASS: $TYPE"${NC} fi fi + +exit $ret diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh index 59ea56945e6cd..6c69c42b1d607 100755 --- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh @@ -112,6 +112,22 @@ setup() ip netns add "${NS2}" ip netns add "${NS3}" + # rp_filter gets confused by what these tests are doing, so disable it + ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0 + + # disable IPv6 DAD because it sometimes takes too long and fails tests + ip netns exec ${NS1} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${NS3} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${NS1} sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec ${NS2} sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec ${NS3} sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip link add veth1 type veth peer name veth2 ip link add veth3 type veth peer name veth4 ip link add veth5 type veth peer name veth6 @@ -236,11 +252,6 @@ setup() ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF} ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF} - # rp_filter gets confused by what these tests are doing, so disable it - ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0 - TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX) sleep 1 # reduce flakiness @@ -286,7 +297,7 @@ test_ping() ip netns exec ${NS1} ping -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null RET=$? elif [ "${PROTO}" == "IPv6" ] ; then - ip netns exec ${NS1} ping6 -c 1 -W 6 -I veth1 ${IPv6_DST} 2>&1 > /dev/null + ip netns exec ${NS1} ping6 -c 1 -W 1 -I veth1 ${IPv6_DST} 2>&1 > /dev/null RET=$? else echo " test_ping: unknown PROTO: ${PROTO}" diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 1c4219ceced2f..45c7a55f0b8b5 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -972,7 +972,7 @@ static void test_sockmap(unsigned int tasks, void *data) FD_ZERO(&w); FD_SET(sfd[3], &w); - to.tv_sec = 1; + to.tv_sec = 30; to.tv_usec = 0; s = select(sfd[3] + 1, &w, NULL, NULL, &to); if (s == -1) { diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 48bbe8e0ce48d..4369bc46bf9c2 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -289,7 +289,7 @@ int extract_build_id(char *build_id, size_t size) if (getline(&line, &len, fp) == -1) goto err; - fclose(fp); + pclose(fp); if (len > size) len = size; @@ -298,7 +298,7 @@ int extract_build_id(char *build_id, size_t size) free(line); return 0; err: - fclose(fp); + pclose(fp); return -1; } diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh deleted file mode 100755 index 6d74825621401..0000000000000 --- a/tools/testing/selftests/bpf/test_tc_redirect.sh +++ /dev/null @@ -1,204 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link -# between src and dst. The netns fwd has veth links to each src and dst. The -# client is in src and server in dst. The test installs a TC BPF program to each -# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the -# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace -# switch from ingress side; it also installs a checker prog on the egress side -# to drop unexpected traffic. - -if [[ $EUID -ne 0 ]]; then - echo "This script must be run as root" - echo "FAIL" - exit 1 -fi - -# check that needed tools are present -command -v nc >/dev/null 2>&1 || \ - { echo >&2 "nc is not available"; exit 1; } -command -v dd >/dev/null 2>&1 || \ - { echo >&2 "dd is not available"; exit 1; } -command -v timeout >/dev/null 2>&1 || \ - { echo >&2 "timeout is not available"; exit 1; } -command -v ping >/dev/null 2>&1 || \ - { echo >&2 "ping is not available"; exit 1; } -command -v ping6 >/dev/null 2>&1 || \ - { echo >&2 "ping6 is not available"; exit 1; } -command -v perl >/dev/null 2>&1 || \ - { echo >&2 "perl is not available"; exit 1; } -command -v jq >/dev/null 2>&1 || \ - { echo >&2 "jq is not available"; exit 1; } -command -v bpftool >/dev/null 2>&1 || \ - { echo >&2 "bpftool is not available"; exit 1; } - -readonly GREEN='\033[0;92m' -readonly RED='\033[0;31m' -readonly NC='\033[0m' # No Color - -readonly PING_ARG="-c 3 -w 10 -q" - -readonly TIMEOUT=10 - -readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)" -readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)" -readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)" - -readonly IP4_SRC="172.16.1.100" -readonly IP4_DST="172.16.2.100" - -readonly IP6_SRC="::1:dead:beef:cafe" -readonly IP6_DST="::2:dead:beef:cafe" - -readonly IP4_SLL="169.254.0.1" -readonly IP4_DLL="169.254.0.2" -readonly IP4_NET="169.254.0.0" - -netns_cleanup() -{ - ip netns del ${NS_SRC} - ip netns del ${NS_FWD} - ip netns del ${NS_DST} -} - -netns_setup() -{ - ip netns add "${NS_SRC}" - ip netns add "${NS_FWD}" - ip netns add "${NS_DST}" - - ip link add veth_src type veth peer name veth_src_fwd - ip link add veth_dst type veth peer name veth_dst_fwd - - ip link set veth_src netns ${NS_SRC} - ip link set veth_src_fwd netns ${NS_FWD} - - ip link set veth_dst netns ${NS_DST} - ip link set veth_dst_fwd netns ${NS_FWD} - - ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src - ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst - - # The fwd netns automatically get a v6 LL address / routes, but also - # needs v4 one in order to start ARP probing. IP4_NET route is added - # to the endpoints so that the ARP processing will reply. - - ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd - ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd - - ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad - ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad - - ip -netns ${NS_SRC} link set dev veth_src up - ip -netns ${NS_FWD} link set dev veth_src_fwd up - - ip -netns ${NS_DST} link set dev veth_dst up - ip -netns ${NS_FWD} link set dev veth_dst_fwd up - - ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global - ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global - ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global - - ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global - ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global - - ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global - ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global - ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global - - ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global - ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global - - fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address) - fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address) - - ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src - ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst - - ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src - ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst -} - -netns_test_connectivity() -{ - set +e - - ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &" - ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &" - - TEST="TCPv4 connectivity test" - ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004" - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - TEST="TCPv6 connectivity test" - ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006" - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - TEST="ICMPv4 connectivity test" - ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST} - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - TEST="ICMPv6 connectivity test" - ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST} - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - set -e -} - -hex_mem_str() -{ - perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1 -} - -netns_setup_bpf() -{ - local obj=$1 - - ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact - ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress - ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress - - ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact - ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress - ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress - - veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex) - veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex) - - progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]') - for prog in $progs; do - map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]') - if [ ! -z "$map" ]; then - bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src) - bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst) - fi - done -} - -trap netns_cleanup EXIT -set -e - -netns_setup -netns_setup_bpf test_tc_neigh.o -netns_test_connectivity -netns_cleanup -netns_setup -netns_setup_bpf test_tc_peer.o -netns_test_connectivity diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh index ba8ffcdaac302..995278e684b6e 100755 --- a/tools/testing/selftests/bpf/test_xdp_veth.sh +++ b/tools/testing/selftests/bpf/test_xdp_veth.sh @@ -108,7 +108,7 @@ ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1 ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2 ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy -ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx +ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy trap cleanup EXIT diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index 92c02e4a1b626..313b345eddcc3 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -411,16 +411,14 @@ BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 31), /* r1 = 0xffff'fffe (NOT 0!) */ BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 2), - /* computes OOB pointer */ + /* error on computing OOB pointer */ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - /* OOB access */ - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), /* exit */ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, - .errstr = "R0 invalid mem access", + .errstr = "math between map_value pointer and 4294967294 is not allowed", .result = REJECT, }, { diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index 9ed192e14f5fe..b2ce50bb935b8 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -121,7 +121,25 @@ .result = ACCEPT, }, { - "sk_fullsock(skb->sk): sk->dst_port [narrow load]", + "sk_fullsock(skb->sk): sk->dst_port [word load] (backward compatibility)", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->dst_port [half load]", .insns = { BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), @@ -139,7 +157,64 @@ .result = ACCEPT, }, { - "sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]", + "sk_fullsock(skb->sk): sk->dst_port [half load] (invalid)", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid sock access", +}, +{ + "sk_fullsock(skb->sk): sk->dst_port [byte load]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port)), + BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->dst_port [byte load] (invalid)", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid sock access", +}, +{ + "sk_fullsock(skb->sk): past sk->dst_port [half load] (invalid)", .insns = { BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), @@ -149,7 +224,7 @@ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, dst_port)), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, diff --git a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c index bfb97383e6b5a..b4ec228eb95d0 100644 --- a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c @@ -35,7 +35,7 @@ .prog_type = BPF_PROG_TYPE_XDP, }, { - "XDP pkt read, pkt_data' > pkt_end, good access", + "XDP pkt read, pkt_data' > pkt_end, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -87,6 +87,41 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data' > pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' > pkt_end, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_end > pkt_data', good access", .insns = { @@ -106,16 +141,16 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end > pkt_data', bad access 1", + "XDP pkt read, pkt_end > pkt_data', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -142,6 +177,42 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_end > pkt_data', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end > pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_data' < pkt_end, good access", .insns = { @@ -161,16 +232,16 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data' < pkt_end, bad access 1", + "XDP pkt read, pkt_data' < pkt_end, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -198,7 +269,43 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end < pkt_data', good access", + "XDP pkt read, pkt_data' < pkt_end, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' < pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end < pkt_data', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -250,6 +357,41 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_end < pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end < pkt_data', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_data' >= pkt_end, good access", .insns = { @@ -268,15 +410,15 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data' >= pkt_end, bad access 1", + "XDP pkt read, pkt_data' >= pkt_end, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -304,7 +446,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end >= pkt_data', good access", + "XDP pkt read, pkt_data' >= pkt_end, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' >= pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end >= pkt_data', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -359,7 +535,44 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data' <= pkt_end, good access", + "XDP pkt read, pkt_end >= pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end >= pkt_data', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' <= pkt_end, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -413,6 +626,43 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data' <= pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' <= pkt_end, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_end <= pkt_data', good access", .insns = { @@ -431,15 +681,15 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end <= pkt_data', bad access 1", + "XDP pkt read, pkt_end <= pkt_data', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -467,7 +717,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' > pkt_data, good access", + "XDP pkt read, pkt_end <= pkt_data', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end <= pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' > pkt_data, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -519,6 +803,41 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_meta' > pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' > pkt_data, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_data > pkt_meta', good access", .insns = { @@ -538,16 +857,16 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data > pkt_meta', bad access 1", + "XDP pkt read, pkt_data > pkt_meta', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -574,6 +893,42 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data > pkt_meta', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data > pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_meta' < pkt_data, good access", .insns = { @@ -593,16 +948,16 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' < pkt_data, bad access 1", + "XDP pkt read, pkt_meta' < pkt_data, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -630,7 +985,43 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data < pkt_meta', good access", + "XDP pkt read, pkt_meta' < pkt_data, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' < pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data < pkt_meta', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -682,6 +1073,41 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data < pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data < pkt_meta', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_meta' >= pkt_data, good access", .insns = { @@ -700,15 +1126,15 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' >= pkt_data, bad access 1", + "XDP pkt read, pkt_meta' >= pkt_data, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -736,7 +1162,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data >= pkt_meta', good access", + "XDP pkt read, pkt_meta' >= pkt_data, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' >= pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data >= pkt_meta', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -791,7 +1251,44 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' <= pkt_data, good access", + "XDP pkt read, pkt_data >= pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data >= pkt_meta', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' <= pkt_data, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -845,6 +1342,43 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_meta' <= pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' <= pkt_data, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_data <= pkt_meta', good access", .insns = { @@ -863,15 +1397,15 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data <= pkt_meta', bad access 1", + "XDP pkt read, pkt_data <= pkt_meta', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -898,3 +1432,37 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data <= pkt_meta', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data <= pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 5e939ff1e3f95..4c2f2866cf40a 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -16,6 +16,7 @@ #include "cgroup_util.h" +/* Returns read len on success, or -errno on failure. */ static ssize_t read_text(const char *path, char *buf, size_t max_len) { ssize_t len; @@ -23,35 +24,29 @@ static ssize_t read_text(const char *path, char *buf, size_t max_len) fd = open(path, O_RDONLY); if (fd < 0) - return fd; + return -errno; len = read(fd, buf, max_len - 1); - if (len < 0) - goto out; - buf[len] = 0; -out: + if (len >= 0) + buf[len] = 0; + close(fd); - return len; + return len < 0 ? -errno : len; } +/* Returns written len on success, or -errno on failure. */ static ssize_t write_text(const char *path, char *buf, ssize_t len) { int fd; fd = open(path, O_WRONLY | O_APPEND); if (fd < 0) - return fd; + return -errno; len = write(fd, buf, len); - if (len < 0) { - close(fd); - return len; - } - close(fd); - - return len; + return len < 0 ? -errno : len; } char *cg_name(const char *root, const char *name) @@ -84,16 +79,16 @@ char *cg_control(const char *cgroup, const char *control) return ret; } +/* Returns 0 on success, or -errno on failure. */ int cg_read(const char *cgroup, const char *control, char *buf, size_t len) { char path[PATH_MAX]; + ssize_t ret; snprintf(path, sizeof(path), "%s/%s", cgroup, control); - if (read_text(path, buf, len) >= 0) - return 0; - - return -1; + ret = read_text(path, buf, len); + return ret >= 0 ? 0 : ret; } int cg_read_strcmp(const char *cgroup, const char *control, @@ -158,17 +153,15 @@ long cg_read_key_long(const char *cgroup, const char *control, const char *key) return atol(ptr + strlen(key)); } +/* Returns 0 on success, or -errno on failure. */ int cg_write(const char *cgroup, const char *control, char *buf) { char path[PATH_MAX]; - ssize_t len = strlen(buf); + ssize_t len = strlen(buf), ret; snprintf(path, sizeof(path), "%s/%s", cgroup, control); - - if (write_text(path, buf, len) == len) - return 0; - - return -1; + ret = write_text(path, buf, len); + return ret == len ? 0 : ret; } int cg_find_unified_root(char *root, size_t len) @@ -202,7 +195,7 @@ int cg_find_unified_root(char *root, size_t len) int cg_create(const char *cgroup) { - return mkdir(cgroup, 0644); + return mkdir(cgroup, 0755); } int cg_wait_for_proc_count(const char *cgroup, int count) @@ -416,5 +409,6 @@ char proc_read_text(int pid, const char *item, char *buf, size_t size) snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); - return read_text(path, buf, size); + size = read_text(path, buf, size); + return size < 0 ? -1 : size; } diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 79053a4f47838..599234c5e496c 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -1,8 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#define _GNU_SOURCE #include +#include #include +#include #include +#include +#include #include #include @@ -354,6 +359,166 @@ static int test_cgcore_internal_process_constraint(const char *root) return ret; } +/* + * cgroup migration permission check should be performed based on the + * credentials at the time of open instead of write. + */ +static int test_cgcore_lesser_euid_open(const char *root) +{ + const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ + int ret = KSFT_FAIL; + char *cg_test_a = NULL, *cg_test_b = NULL; + char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; + int cg_test_b_procs_fd = -1; + uid_t saved_uid; + + cg_test_a = cg_name(root, "cg_test_a"); + cg_test_b = cg_name(root, "cg_test_b"); + + if (!cg_test_a || !cg_test_b) + goto cleanup; + + cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); + cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); + + if (!cg_test_a_procs || !cg_test_b_procs) + goto cleanup; + + if (cg_create(cg_test_a) || cg_create(cg_test_b)) + goto cleanup; + + if (cg_enter_current(cg_test_a)) + goto cleanup; + + if (chown(cg_test_a_procs, test_euid, -1) || + chown(cg_test_b_procs, test_euid, -1)) + goto cleanup; + + saved_uid = geteuid(); + if (seteuid(test_euid)) + goto cleanup; + + cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR); + + if (seteuid(saved_uid)) + goto cleanup; + + if (cg_test_b_procs_fd < 0) + goto cleanup; + + if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + if (cg_test_b_procs_fd >= 0) + close(cg_test_b_procs_fd); + if (cg_test_b) + cg_destroy(cg_test_b); + if (cg_test_a) + cg_destroy(cg_test_a); + free(cg_test_b_procs); + free(cg_test_a_procs); + free(cg_test_b); + free(cg_test_a); + return ret; +} + +struct lesser_ns_open_thread_arg { + const char *path; + int fd; + int err; +}; + +static int lesser_ns_open_thread_fn(void *arg) +{ + struct lesser_ns_open_thread_arg *targ = arg; + + targ->fd = open(targ->path, O_RDWR); + targ->err = errno; + return 0; +} + +/* + * cgroup migration permission check should be performed based on the cgroup + * namespace at the time of open instead of write. + */ +static int test_cgcore_lesser_ns_open(const char *root) +{ + static char stack[65536]; + const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ + int ret = KSFT_FAIL; + char *cg_test_a = NULL, *cg_test_b = NULL; + char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; + int cg_test_b_procs_fd = -1; + struct lesser_ns_open_thread_arg targ = { .fd = -1 }; + pid_t pid; + int status; + + cg_test_a = cg_name(root, "cg_test_a"); + cg_test_b = cg_name(root, "cg_test_b"); + + if (!cg_test_a || !cg_test_b) + goto cleanup; + + cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); + cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); + + if (!cg_test_a_procs || !cg_test_b_procs) + goto cleanup; + + if (cg_create(cg_test_a) || cg_create(cg_test_b)) + goto cleanup; + + if (cg_enter_current(cg_test_b)) + goto cleanup; + + if (chown(cg_test_a_procs, test_euid, -1) || + chown(cg_test_b_procs, test_euid, -1)) + goto cleanup; + + targ.path = cg_test_b_procs; + pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack), + CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD, + &targ); + if (pid < 0) + goto cleanup; + + if (waitpid(pid, &status, 0) < 0) + goto cleanup; + + if (!WIFEXITED(status)) + goto cleanup; + + cg_test_b_procs_fd = targ.fd; + if (cg_test_b_procs_fd < 0) + goto cleanup; + + if (cg_enter_current(cg_test_a)) + goto cleanup; + + if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + if (cg_test_b_procs_fd >= 0) + close(cg_test_b_procs_fd); + if (cg_test_b) + cg_destroy(cg_test_b); + if (cg_test_a) + cg_destroy(cg_test_a); + free(cg_test_b_procs); + free(cg_test_a_procs); + free(cg_test_b); + free(cg_test_a); + return ret; +} + #define T(x) { x, #x } struct corecg_test { int (*fn)(const char *root); @@ -366,6 +531,8 @@ struct corecg_test { T(test_cgcore_parent_becomes_threaded), T(test_cgcore_invalid_domain), T(test_cgcore_populated), + T(test_cgcore_lesser_euid_open), + T(test_cgcore_lesser_ns_open), }; #undef T diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index c19a97dd02d49..94e16e383bcf8 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -210,13 +210,17 @@ static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) static int alloc_anon_noexit(const char *cgroup, void *arg) { int ppid = getppid(); + size_t size = (unsigned long)arg; + char *buf, *ptr; - if (alloc_anon(cgroup, arg)) - return -1; + buf = malloc(size); + for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) + *ptr = 0; while (getppid() == ppid) sleep(1); + free(buf); return 0; } @@ -679,6 +683,111 @@ static int test_memcg_max(const char *root) return ret; } +/* + * This test checks that memory.reclaim reclaims the given + * amount of memory (from both anon and file, if possible). + */ +static int test_memcg_reclaim(const char *root) +{ + int ret = KSFT_FAIL, fd, retries; + char *memcg; + long current, expected_usage, to_reclaim; + char buf[64]; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + current = cg_read_long(memcg, "memory.current"); + if (current != 0) + goto cleanup; + + fd = get_temp_fd(); + if (fd < 0) + goto cleanup; + + cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); + + /* + * If swap is enabled, try to reclaim from both anon and file, else try + * to reclaim from file only. + */ + if (is_swap_enabled()) { + cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); + expected_usage = MB(100); + } else + expected_usage = MB(50); + + /* + * Wait until current usage reaches the expected usage (or we run out of + * retries). + */ + retries = 5; + while (!values_close(cg_read_long(memcg, "memory.current"), + expected_usage, 10)) { + if (retries--) { + sleep(1); + continue; + } else { + fprintf(stderr, + "failed to allocate %ld for memcg reclaim test\n", + expected_usage); + goto cleanup; + } + } + + /* + * Reclaim until current reaches 30M, this makes sure we hit both anon + * and file if swap is enabled. + */ + retries = 5; + while (true) { + int err; + + current = cg_read_long(memcg, "memory.current"); + to_reclaim = current - MB(30); + + /* + * We only keep looping if we get EAGAIN, which means we could + * not reclaim the full amount. + */ + if (to_reclaim <= 0) + goto cleanup; + + + snprintf(buf, sizeof(buf), "%ld", to_reclaim); + err = cg_write(memcg, "memory.reclaim", buf); + if (!err) { + /* + * If writing succeeds, then the written amount should have been + * fully reclaimed (and maybe more). + */ + current = cg_read_long(memcg, "memory.current"); + if (!values_close(current, MB(30), 3) && current > MB(30)) + goto cleanup; + break; + } + + /* The kernel could not reclaim the full amount, try again. */ + if (err == -EAGAIN && retries--) + continue; + + /* We got an unexpected error or ran out of retries. */ + goto cleanup; + } + + ret = KSFT_PASS; +cleanup: + cg_destroy(memcg); + free(memcg); + close(fd); + + return ret; +} + static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) { long mem_max = (long)arg; @@ -1181,6 +1290,7 @@ struct memcg_test { T(test_memcg_low), T(test_memcg_high), T(test_memcg_max), + T(test_memcg_reclaim), T(test_memcg_oom_events), T(test_memcg_swap_max), T(test_memcg_sock), diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh index fedcb7b35af9f..af5ea50ed5c0e 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh @@ -172,6 +172,17 @@ flooding_filters_add() local lsb local i + # Prevent unwanted packets from entering the bridge and interfering + # with the test. + tc qdisc add dev br0 clsact + tc filter add dev br0 egress protocol all pref 1 handle 1 \ + matchall skip_hw action drop + tc qdisc add dev $h1 clsact + tc filter add dev $h1 egress protocol all pref 1 handle 1 \ + flower skip_hw dst_mac de:ad:be:ef:13:37 action pass + tc filter add dev $h1 egress protocol all pref 2 handle 2 \ + matchall skip_hw action drop + tc qdisc add dev $rp2 clsact for i in $(eval echo {1..$num_remotes}); do @@ -194,6 +205,12 @@ flooding_filters_del() done tc qdisc del dev $rp2 clsact + + tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall + tc filter del dev $h1 egress protocol all pref 1 handle 1 flower + tc qdisc del dev $h1 clsact + tc filter del dev br0 egress protocol all pref 1 handle 1 matchall + tc qdisc del dev br0 clsact } flooding_check_packets() diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile index 12631f0076a10..11e157d7533b8 100644 --- a/tools/testing/selftests/futex/Makefile +++ b/tools/testing/selftests/futex/Makefile @@ -11,7 +11,7 @@ all: @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ - make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ if [ -e $$DIR/$(TEST_PROGS) ]; then \ rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \ fi \ @@ -32,6 +32,6 @@ override define CLEAN @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ - make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ done endef diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index 231d79e57774e..cfe75536d8a55 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "test_util.h" @@ -43,10 +44,39 @@ int main(int argc, char *argv[]) { int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID); int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + /* + * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds + + * an arbitrary number for everything else. + */ + int nr_fds_wanted = kvm_max_vcpus + 100; + struct rlimit rl; printf("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id); printf("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus); + /* + * Check that we're allowed to open nr_fds_wanted file descriptors and + * try raising the limits if needed. + */ + TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); + + if (rl.rlim_cur < nr_fds_wanted) { + rl.rlim_cur = nr_fds_wanted; + if (rl.rlim_max < nr_fds_wanted) { + int old_rlim_max = rl.rlim_max; + rl.rlim_max = nr_fds_wanted; + + int r = setrlimit(RLIMIT_NOFILE, &rl); + if (r < 0) { + printf("RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n", + old_rlim_max, nr_fds_wanted); + exit(KSFT_SKIP); + } + } else { + TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); + } + } + /* * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID. * Userspace is supposed to use KVM_CAP_MAX_VCPUS as the maximum ID diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index 6cd91970fbad3..3b2a426070c44 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -73,20 +73,19 @@ void ucall_uninit(struct kvm_vm *vm) void ucall(uint64_t cmd, int nargs, ...) { - struct ucall uc = { - .cmd = cmd, - }; + struct ucall uc = {}; va_list va; int i; + WRITE_ONCE(uc.cmd, cmd); nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; va_start(va, nargs); for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); + WRITE_ONCE(uc.args[i], va_arg(va, uint64_t)); va_end(va); - *ucall_exit_mmio_addr = (vm_vaddr_t)&uc; + WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc); } uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c index 00bb97d76000f..92419b66b0578 100644 --- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c +++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c @@ -82,8 +82,9 @@ int get_warnings_count(void) FILE *f; f = popen("dmesg | grep \"WARNING:\" | wc -l", "r"); - fscanf(f, "%d", &warnings); - fclose(f); + if (fscanf(f, "%d", &warnings) < 1) + warnings = 0; + pclose(f); return warnings; } diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 67386aa3f31d1..8794ce382bf5a 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -48,6 +48,7 @@ ARCH ?= $(SUBARCH) # When local build is done, headers are installed in the default # INSTALL_HDR_PATH usr/include. .PHONY: khdr +.NOTPARALLEL: khdr: ifndef KSFT_KHDR_INSTALL_DONE ifeq (1,$(DEFAULT_INSTALL_HDR_PATH)) diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index c67d32eeb668e..290cec2a6a338 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -421,6 +421,7 @@ static void mfd_fail_write(int fd) printf("mmap()+mprotect() didn't fail as expected\n"); abort(); } + munmap(p, mfd_def_size); } /* verify PUNCH_HOLE fails */ diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c index b019e0b8221c7..84fda3b490735 100644 --- a/tools/testing/selftests/mqueue/mq_perf_tests.c +++ b/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -180,6 +180,9 @@ void shutdown(int exit_val, char *err_cause, int line_no) if (in_shutdown++) return; + /* Free the cpu_set allocated using CPU_ALLOC in main function */ + CPU_FREE(cpu_set); + for (i = 0; i < num_cpus_to_pin; i++) if (cpu_threads[i]) { pthread_kill(cpu_threads[i], SIGUSR1); @@ -551,6 +554,12 @@ int main(int argc, char *argv[]) perror("sysconf(_SC_NPROCESSORS_ONLN)"); exit(1); } + + if (getuid() != 0) + ksft_exit_skip("Not running as root, but almost all tests " + "require root in order to modify\nsystem settings. " + "Exiting.\n"); + cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN)); cpu_set = CPU_ALLOC(cpus_online); if (cpu_set == NULL) { @@ -589,7 +598,7 @@ int main(int argc, char *argv[]) cpu_set)) { fprintf(stderr, "Any given CPU may " "only be given once.\n"); - exit(1); + goto err_code; } else CPU_SET_S(cpus_to_pin[cpu], cpu_set_size, cpu_set); @@ -607,7 +616,7 @@ int main(int argc, char *argv[]) queue_path = malloc(strlen(option) + 2); if (!queue_path) { perror("malloc()"); - exit(1); + goto err_code; } queue_path[0] = '/'; queue_path[1] = 0; @@ -622,17 +631,12 @@ int main(int argc, char *argv[]) fprintf(stderr, "Must pass at least one CPU to continuous " "mode.\n"); poptPrintUsage(popt_context, stderr, 0); - exit(1); + goto err_code; } else if (!continuous_mode) { num_cpus_to_pin = 1; cpus_to_pin[0] = cpus_online - 1; } - if (getuid() != 0) - ksft_exit_skip("Not running as root, but almost all tests " - "require root in order to modify\nsystem settings. " - "Exiting.\n"); - max_msgs = fopen(MAX_MSGS, "r+"); max_msgsize = fopen(MAX_MSGSIZE, "r+"); if (!max_msgs) @@ -740,4 +744,9 @@ int main(int argc, char *argv[]) sleep(1); } shutdown(0, "", 0); + +err_code: + CPU_FREE(cpu_set); + exit(1); + } diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 38133da2973d4..d2ac09b35dcf5 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -757,10 +757,16 @@ ipv4_ping() setup set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null ipv4_ping_novrf + setup + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv4_ping_novrf log_subsection "With VRF" setup "yes" ipv4_ping_vrf + setup "yes" + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv4_ping_vrf } ################################################################################ @@ -1491,8 +1497,9 @@ ipv4_addr_bind_vrf() for a in ${NSA_IP} ${VRF_IP} do log_start + show_hint "Socket not bound to VRF, but address is in VRF" run_cmd nettest -s -R -P icmp -l ${a} -b - log_test_addr ${a} $? 0 "Raw socket bind to local address" + log_test_addr ${a} $? 1 "Raw socket bind to local address" log_start run_cmd nettest -s -R -P icmp -l ${a} -d ${NSA_DEV} -b @@ -1884,7 +1891,7 @@ ipv6_ping_vrf() log_start show_hint "Fails since VRF device does not support linklocal or multicast" run_cmd ${ping6} -c1 -w1 ${a} - log_test_addr ${a} $? 2 "ping out, VRF bind" + log_test_addr ${a} $? 1 "ping out, VRF bind" done for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV} @@ -2004,10 +2011,16 @@ ipv6_ping() log_subsection "No VRF" setup ipv6_ping_novrf + setup + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv6_ping_novrf log_subsection "With VRF" setup "yes" ipv6_ping_vrf + setup "yes" + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv6_ping_vrf } ################################################################################ @@ -2890,11 +2903,14 @@ ipv6_addr_bind_novrf() run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" + # Sadly, the kernel allows binding a socket to a device and then + # binding to an address not on the device. So this test passes + # when it really should not a=${NSA_LO_IP6} log_start - show_hint "Should fail with 'Cannot assign requested address'" - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b - log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address" + show_hint "Tecnically should fail since address is not on device but kernel allows" + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b + log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address" } ipv6_addr_bind_vrf() @@ -2935,10 +2951,15 @@ ipv6_addr_bind_vrf() run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address with device bind" + # Sadly, the kernel allows binding a socket to a device and then + # binding to an address not on the device. The only restriction + # is that the address is valid in the L3 domain. So this test + # passes when it really should not a=${VRF_IP6} log_start - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b - log_test_addr ${a} $? 1 "TCP socket bind to VRF address with device bind" + show_hint "Tecnically should fail since address is not on device but kernel allows" + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b + log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind" a=${NSA_LO_IP6} log_start @@ -3450,8 +3471,8 @@ EOF ################################################################################ # main -TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_addr_bind ipv4_runtime ipv4_netfilter" -TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_addr_bind ipv6_runtime ipv6_netfilter" +TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter" +TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter" TESTS_OTHER="use_cases" PAUSE_ON_FAIL=no diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 4e19a1c00ddd8..6986086035d6c 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -444,24 +444,63 @@ fib_rp_filter_test() setup set -e + ip netns add ns2 + ip netns set ns2 auto + + ip -netns ns2 link set dev lo up + + $IP link add name veth1 type veth peer name veth2 + $IP link set dev veth2 netns ns2 + $IP address add 192.0.2.1/24 dev veth1 + ip -netns ns2 address add 192.0.2.1/24 dev veth2 + $IP link set dev veth1 up + ip -netns ns2 link set dev veth2 up + $IP link set dev lo address 52:54:00:6a:c7:5e - $IP link set dummy0 address 52:54:00:6a:c7:5e - $IP link add dummy1 type dummy - $IP link set dummy1 address 52:54:00:6a:c7:5e - $IP link set dev dummy1 up + $IP link set dev veth1 address 52:54:00:6a:c7:5e + ip -netns ns2 link set dev lo address 52:54:00:6a:c7:5e + ip -netns ns2 link set dev veth2 address 52:54:00:6a:c7:5e + + # 1. (ns2) redirect lo's egress to veth2's egress + ip netns exec ns2 tc qdisc add dev lo parent root handle 1: fq_codel + ip netns exec ns2 tc filter add dev lo parent 1: protocol arp basic \ + action mirred egress redirect dev veth2 + ip netns exec ns2 tc filter add dev lo parent 1: protocol ip basic \ + action mirred egress redirect dev veth2 + + # 2. (ns1) redirect veth1's ingress to lo's ingress + $NS_EXEC tc qdisc add dev veth1 ingress + $NS_EXEC tc filter add dev veth1 ingress protocol arp basic \ + action mirred ingress redirect dev lo + $NS_EXEC tc filter add dev veth1 ingress protocol ip basic \ + action mirred ingress redirect dev lo + + # 3. (ns1) redirect lo's egress to veth1's egress + $NS_EXEC tc qdisc add dev lo parent root handle 1: fq_codel + $NS_EXEC tc filter add dev lo parent 1: protocol arp basic \ + action mirred egress redirect dev veth1 + $NS_EXEC tc filter add dev lo parent 1: protocol ip basic \ + action mirred egress redirect dev veth1 + + # 4. (ns2) redirect veth2's ingress to lo's ingress + ip netns exec ns2 tc qdisc add dev veth2 ingress + ip netns exec ns2 tc filter add dev veth2 ingress protocol arp basic \ + action mirred ingress redirect dev lo + ip netns exec ns2 tc filter add dev veth2 ingress protocol ip basic \ + action mirred ingress redirect dev lo + $NS_EXEC sysctl -qw net.ipv4.conf.all.rp_filter=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.accept_local=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.route_localnet=1 - - $NS_EXEC tc qd add dev dummy1 parent root handle 1: fq_codel - $NS_EXEC tc filter add dev dummy1 parent 1: protocol arp basic action mirred egress redirect dev lo - $NS_EXEC tc filter add dev dummy1 parent 1: protocol ip basic action mirred egress redirect dev lo + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.accept_local=1 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1 set +e - run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 198.51.100.1" + run_cmd "ip netns exec ns2 ping -w1 -c1 192.0.2.1" log_test $? 0 "rp_filter passes local packets" - run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 127.0.0.1" + run_cmd "ip netns exec ns2 ping -w1 -c1 127.0.0.1" log_test $? 0 "rp_filter passes loopback packets" cleanup diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index e2adb533c8fcb..e71c61ee4cc67 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -13,6 +13,8 @@ NETIFS[p5]=veth4 NETIFS[p6]=veth5 NETIFS[p7]=veth6 NETIFS[p8]=veth7 +NETIFS[p9]=veth8 +NETIFS[p10]=veth9 ############################################################################## # Defines diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 85c587a03c8a5..f190ad58e00d4 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -894,6 +894,7 @@ learning_test() # FDB entry was installed. bridge link set dev $br_port1 flood off + ip link set $host1_if promisc on tc qdisc add dev $host1_if ingress tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \ flower dst_mac $mac action drop @@ -904,7 +905,7 @@ learning_test() tc -j -s filter show dev $host1_if ingress \ | jq -e ".[] | select(.options.handle == 101) \ | select(.options.actions[0].stats.packets == 1)" &> /dev/null - check_fail $? "Packet reached second host when should not" + check_fail $? "Packet reached first host when should not" $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q sleep 1 @@ -943,6 +944,7 @@ learning_test() tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower tc qdisc del dev $host1_if ingress + ip link set $host1_if promisc off bridge link set dev $br_port1 flood on @@ -960,6 +962,7 @@ flood_test_do() # Add an ACL on `host2_if` which will tell us whether the packet # was flooded to it or not. + ip link set $host2_if promisc on tc qdisc add dev $host2_if ingress tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ flower dst_mac $mac action drop @@ -977,6 +980,7 @@ flood_test_do() tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower tc qdisc del dev $host2_if ingress + ip link set $host2_if promisc off return $err } diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh index a3402cd8d5b68..9ff22f28032dd 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh @@ -61,9 +61,12 @@ setup_prepare() vrf_prepare mirror_gre_topo_create + # Avoid changing br1's PVID while it is operational as a L3 interface. + ip link set dev br1 down ip link set dev $swp3 master br1 bridge vlan add dev br1 vid 555 pvid untagged self + ip link set dev br1 up ip address add dev br1 192.0.2.129/28 ip address add dev br1 2001:db8:2::1/64 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 3429767cadcdd..88be9083b923b 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -579,7 +579,6 @@ setup_routing() { setup() { [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip - cleanup for arg do eval setup_${arg} || { echo " ${arg} not supported"; return 1; } done @@ -590,7 +589,7 @@ trace() { for arg do [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue - ${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null & + ${ns_cmd} tcpdump --immediate-mode -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null & tcpdump_pids="${tcpdump_pids} $!" ns_cmd= done @@ -1182,6 +1181,10 @@ run_test() { unset IFS + # Since cleanup() relies on variables modified by this subshell, it + # has to run in this context. + trap cleanup EXIT + if [ "$VERBOSE" = "1" ]; then printf "\n##########################################################################\n\n" fi diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh index 09f9ed92cbe4c..a44b9aca74272 100755 --- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh +++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh @@ -118,11 +118,11 @@ echo "[ OK ]" # Move the underlay to a non-default VRF ip -netns hv-1 link set veth0 vrf vrf-underlay -ip -netns hv-1 link set veth0 down -ip -netns hv-1 link set veth0 up +ip -netns hv-1 link set vxlan0 down +ip -netns hv-1 link set vxlan0 up ip -netns hv-2 link set veth0 vrf vrf-underlay -ip -netns hv-2 link set veth0 down -ip -netns hv-2 link set veth0 up +ip -netns hv-2 link set vxlan0 down +ip -netns hv-2 link set vxlan0 up echo -n "Check VM connectivity through VXLAN (underlay in a VRF) " ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index c66da6ffd6d8d..7badaf215de28 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -156,13 +156,13 @@ struct testcase testcases_v4[] = { }, { /* send max number of min sized segments */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + .tlen = UDP_MAX_SEGMENTS, .gso_len = 1, - .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + .r_num_mss = UDP_MAX_SEGMENTS, }, { /* send max number + 1 of min sized segments: fail */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1, + .tlen = UDP_MAX_SEGMENTS + 1, .gso_len = 1, .tfail = true, }, @@ -259,13 +259,13 @@ struct testcase testcases_v6[] = { }, { /* send max number of min sized segments */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + .tlen = UDP_MAX_SEGMENTS, .gso_len = 1, - .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + .r_num_mss = UDP_MAX_SEGMENTS, }, { /* send max number + 1 of min sized segments: fail */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1, + .tlen = UDP_MAX_SEGMENTS + 1, .gso_len = 1, .tfail = true, }, diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 80b5d352702e5..dc932fd653634 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -120,7 +120,7 @@ run_all() { run_udp "${ipv4_args}" echo "ipv6" - run_tcp "${ipv4_args}" + run_tcp "${ipv6_args}" run_udp "${ipv6_args}" } diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index 76a24052f4b47..6a193425c367f 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -293,19 +293,17 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) { + const char *bind_addr = NULL; int c; - /* bind to any by default */ - setup_sockaddr(PF_INET6, "::", &cfg_bind_addr); while ((c = getopt(argc, argv, "4b:C:Gl:n:p:rR:S:tv")) != -1) { switch (c) { case '4': cfg_family = PF_INET; cfg_alen = sizeof(struct sockaddr_in); - setup_sockaddr(PF_INET, "0.0.0.0", &cfg_bind_addr); break; case 'b': - setup_sockaddr(cfg_family, optarg, &cfg_bind_addr); + bind_addr = optarg; break; case 'C': cfg_connect_timeout_ms = strtoul(optarg, NULL, 0); @@ -341,6 +339,11 @@ static void parse_opts(int argc, char **argv) } } + if (!bind_addr) + bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0"; + + setup_sockaddr(cfg_family, bind_addr, &cfg_bind_addr); + if (optind != argc) usage(argv[0]); diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index 17512a43885e7..f1fdaa2702913 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -419,6 +419,7 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) { + const char *bind_addr = NULL; int max_len, hdrlen; int c; @@ -446,7 +447,7 @@ static void parse_opts(int argc, char **argv) cfg_cpu = strtol(optarg, NULL, 0); break; case 'D': - setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); + bind_addr = optarg; break; case 'l': cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; @@ -492,6 +493,11 @@ static void parse_opts(int argc, char **argv) } } + if (!bind_addr) + bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0"; + + setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr); + if (optind != argc) usage(argv[0]); diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 4144984ebee56..d0c56d5a5c528 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -2,6 +2,7 @@ # Makefile for netfilter selftests TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ - conntrack_icmp_related.sh nft_flowtable.sh + conntrack_icmp_related.sh nft_flowtable.sh \ + conntrack_vrf.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh new file mode 100755 index 0000000000000..8b5ea92345882 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh @@ -0,0 +1,241 @@ +#!/bin/sh + +# This script demonstrates interaction of conntrack and vrf. +# The vrf driver calls the netfilter hooks again, with oif/iif +# pointing at the VRF device. +# +# For ingress, this means first iteration has iifname of lower/real +# device. In this script, thats veth0. +# Second iteration is iifname set to vrf device, tvrf in this script. +# +# For egress, this is reversed: first iteration has the vrf device, +# second iteration is done with the lower/real/veth0 device. +# +# test_ct_zone_in demonstrates unexpected change of nftables +# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack +# connection on VRF rcv" +# +# It was possible to assign conntrack zone to a packet (or mark it for +# `notracking`) in the prerouting chain before conntrack, based on real iif. +# +# After the change, the zone assignment is lost and the zone is assigned based +# on the VRF master interface (in case such a rule exists). +# assignment is lost. Instead, assignment based on the `iif` matching +# Thus it is impossible to distinguish packets based on the original +# interface. +# +# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem +# that was supposed to be fixed by the commit mentioned above to make sure +# that any fix to test case 1 won't break masquerade again. + +ksft_skip=4 + +IP0=172.30.30.1 +IP1=172.30.30.2 +PFXL=30 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns0="ns0-$sfx" +ns1="ns1-$sfx" + +cleanup() +{ + ip netns pids $ns0 | xargs kill 2>/dev/null + ip netns pids $ns1 | xargs kill 2>/dev/null + + ip netns del $ns0 $ns1 +} + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add "$ns0" +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace $ns0" + exit $ksft_skip +fi +ip netns add "$ns1" + +trap cleanup EXIT + +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0 +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 + +ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not add veth device" + exit $ksft_skip +fi + +ip -net $ns0 li add tvrf type vrf table 9876 +if [ $? -ne 0 ];then + echo "SKIP: Could not add vrf device" + exit $ksft_skip +fi + +ip -net $ns0 li set lo up + +ip -net $ns0 li set veth0 master tvrf +ip -net $ns0 li set tvrf up +ip -net $ns0 li set veth0 up +ip -net $ns1 li set veth0 up + +ip -net $ns0 addr add $IP0/$PFXL dev veth0 +ip -net $ns1 addr add $IP1/$PFXL dev veth0 + +ip netns exec $ns1 iperf3 -s > /dev/null 2>&1& +if [ $? -ne 0 ];then + echo "SKIP: Could not start iperf3" + exit $ksft_skip +fi + +# test vrf ingress handling. +# The incoming connection should be placed in conntrack zone 1, +# as decided by the first iteration of the ruleset. +test_ct_zone_in() +{ +ip netns exec $ns0 nft -f - < /dev/null + + # should be in zone 1, not zone 2 + count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l) + if [ $count -eq 1 ]; then + echo "PASS: entry found in conntrack zone 1" + else + echo "FAIL: entry not found in conntrack zone 1" + count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l) + if [ $count -eq 1 ]; then + echo "FAIL: entry found in zone 2 instead" + else + echo "FAIL: entry not in zone 1 or 2, dumping table" + ip netns exec $ns0 conntrack -L + ip netns exec $ns0 nft list ruleset + fi + fi +} + +# add masq rule that gets evaluated w. outif set to vrf device. +# This tests the first iteration of the packet through conntrack, +# oifname is the vrf device. +test_masquerade_vrf() +{ + local qdisc=$1 + + if [ "$qdisc" != "default" ]; then + tc -net $ns0 qdisc add dev tvrf root $qdisc + fi + + ip netns exec $ns0 conntrack -F 2>/dev/null + +ip netns exec $ns0 nft -f - </dev/null + if [ $? -ne 0 ]; then + echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device" + ret=1 + return + fi + + # must also check that nat table was evaluated on second (lower device) iteration. + ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' && + ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]' + if [ $? -eq 0 ]; then + echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)" + else + echo "FAIL: vrf rules have unexpected counter value" + ret=1 + fi + + if [ "$qdisc" != "default" ]; then + tc -net $ns0 qdisc del dev tvrf root + fi +} + +# add masq rule that gets evaluated w. outif set to veth device. +# This tests the 2nd iteration of the packet through conntrack, +# oifname is the lower device (veth0 in this case). +test_masquerade_veth() +{ + ip netns exec $ns0 conntrack -F 2>/dev/null +ip netns exec $ns0 nft -f - < /dev/null + if [ $? -ne 0 ]; then + echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device" + ret=1 + return + fi + + # must also check that nat table was evaluated on second (lower device) iteration. + ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' + if [ $? -eq 0 ]; then + echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device" + else + echo "FAIL: vrf masq rule has unexpected counter value" + ret=1 + fi +} + +test_ct_zone_in +test_masquerade_vrf "default" +test_masquerade_vrf "pfifo" +test_masquerade_veth + +exit $ret diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index 16571ac1dab40..1ccb12fe2511a 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -174,7 +174,6 @@ fi ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null if [ $? -ne 0 ];then echo "ERROR: ns1 cannot reach ns2" 1>&2 - bash exit 1 fi diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index d7e07f4c3d7fc..4e15e81673104 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -374,6 +374,45 @@ EOF return $lret } +test_local_dnat_portonly() +{ + local family=$1 + local daddr=$2 + local lret=0 + local sr_s + local sr_r + +ip netns exec "$ns0" nft -f /dev/stdin < #include +#include "../kselftest.h" #include "rseq.h" -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - struct percpu_lock_entry { intptr_t v; } __attribute__((aligned(128))); @@ -168,7 +167,7 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, for (;;) { struct percpu_list_node *head; intptr_t *targetptr, expectnot, *load; - off_t offset; + long offset; int ret, cpu; cpu = rseq_cpu_start(); diff --git a/tools/testing/selftests/rseq/compiler.h b/tools/testing/selftests/rseq/compiler.h new file mode 100644 index 0000000000000..876eb6a7f75be --- /dev/null +++ b/tools/testing/selftests/rseq/compiler.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq/compiler.h + * + * Work-around asm goto compiler bugs. + * + * (C) Copyright 2021 - Mathieu Desnoyers + */ + +#ifndef RSEQ_COMPILER_H +#define RSEQ_COMPILER_H + +/* + * gcc prior to 4.8.2 miscompiles asm goto. + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 + * + * gcc prior to 8.1.0 miscompiles asm goto at O1. + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103908 + * + * clang prior to version 13.0.1 miscompiles asm goto at O2. + * https://github.com/llvm/llvm-project/issues/52735 + * + * Work around these issues by adding a volatile inline asm with + * memory clobber in the fallthrough after the asm goto and at each + * label target. Emit this for all compilers in case other similar + * issues are found in the future. + */ +#define rseq_after_asm_goto() asm volatile ("" : : : "memory") + +#endif /* RSEQ_COMPILER_H_ */ diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index e8a657a5f48a0..1f399aaf1efe2 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -159,7 +159,7 @@ unsigned int yield_mod_cnt, nr_abort; " cbnz " INJECT_ASM_REG ", 222b\n" \ "333:\n" -#elif __PPC__ +#elif defined(__PPC__) #define RSEQ_INJECT_INPUT \ , [loop_cnt_1]"m"(loop_cnt[1]) \ @@ -366,9 +366,7 @@ void *test_percpu_spinlock_thread(void *arg) abort(); reps = thread_data->reps; for (i = 0; i < reps; i++) { - int cpu = rseq_cpu_start(); - - cpu = rseq_this_cpu_lock(&data->lock); + int cpu = rseq_this_cpu_lock(&data->lock); data->c[cpu].count++; rseq_percpu_unlock(&data->lock, cpu); #ifndef BENCHMARK @@ -549,7 +547,7 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, for (;;) { struct percpu_list_node *head; intptr_t *targetptr, expectnot, *load; - off_t offset; + long offset; int ret; cpu = rseq_cpu_start(); diff --git a/tools/testing/selftests/rseq/rseq-abi.h b/tools/testing/selftests/rseq/rseq-abi.h new file mode 100644 index 0000000000000..a8c44d9af71fb --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-abi.h @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +#ifndef _RSEQ_ABI_H +#define _RSEQ_ABI_H + +/* + * rseq-abi.h + * + * Restartable sequences system call API + * + * Copyright (c) 2015-2022 Mathieu Desnoyers + */ + +#include +#include + +enum rseq_abi_cpu_id_state { + RSEQ_ABI_CPU_ID_UNINITIALIZED = -1, + RSEQ_ABI_CPU_ID_REGISTRATION_FAILED = -2, +}; + +enum rseq_abi_flags { + RSEQ_ABI_FLAG_UNREGISTER = (1 << 0), +}; + +enum rseq_abi_cs_flags_bit { + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, +}; + +enum rseq_abi_cs_flags { + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT = + (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT), + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL = + (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT), + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE = + (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT), +}; + +/* + * struct rseq_abi_cs is aligned on 4 * 8 bytes to ensure it is always + * contained within a single cache-line. It is usually declared as + * link-time constant data. + */ +struct rseq_abi_cs { + /* Version of this structure. */ + __u32 version; + /* enum rseq_abi_cs_flags */ + __u32 flags; + __u64 start_ip; + /* Offset from start_ip. */ + __u64 post_commit_offset; + __u64 abort_ip; +} __attribute__((aligned(4 * sizeof(__u64)))); + +/* + * struct rseq_abi is aligned on 4 * 8 bytes to ensure it is always + * contained within a single cache-line. + * + * A single struct rseq_abi per thread is allowed. + */ +struct rseq_abi { + /* + * Restartable sequences cpu_id_start field. Updated by the + * kernel. Read by user-space with single-copy atomicity + * semantics. This field should only be read by the thread which + * registered this data structure. Aligned on 32-bit. Always + * contains a value in the range of possible CPUs, although the + * value may not be the actual current CPU (e.g. if rseq is not + * initialized). This CPU number value should always be compared + * against the value of the cpu_id field before performing a rseq + * commit or returning a value read from a data structure indexed + * using the cpu_id_start value. + */ + __u32 cpu_id_start; + /* + * Restartable sequences cpu_id field. Updated by the kernel. + * Read by user-space with single-copy atomicity semantics. This + * field should only be read by the thread which registered this + * data structure. Aligned on 32-bit. Values + * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED + * have a special semantic: the former means "rseq uninitialized", + * and latter means "rseq initialization failed". This value is + * meant to be read within rseq critical sections and compared + * with the cpu_id_start value previously read, before performing + * the commit instruction, or read and compared with the + * cpu_id_start value before returning a value loaded from a data + * structure indexed using the cpu_id_start value. + */ + __u32 cpu_id; + /* + * Restartable sequences rseq_cs field. + * + * Contains NULL when no critical section is active for the current + * thread, or holds a pointer to the currently active struct rseq_cs. + * + * Updated by user-space, which sets the address of the currently + * active rseq_cs at the beginning of assembly instruction sequence + * block, and set to NULL by the kernel when it restarts an assembly + * instruction sequence block, as well as when the kernel detects that + * it is preempting or delivering a signal outside of the range + * targeted by the rseq_cs. Also needs to be set to NULL by user-space + * before reclaiming memory that contains the targeted struct rseq_cs. + * + * Read and set by the kernel. Set by user-space with single-copy + * atomicity semantics. This field should only be updated by the + * thread which registered this data structure. Aligned on 64-bit. + */ + union { + __u64 ptr64; + + /* + * The "arch" field provides architecture accessor for + * the ptr field based on architecture pointer size and + * endianness. + */ + struct { +#ifdef __LP64__ + __u64 ptr; +#elif defined(__BYTE_ORDER) ? (__BYTE_ORDER == __BIG_ENDIAN) : defined(__BIG_ENDIAN) + __u32 padding; /* Initialized to zero. */ + __u32 ptr; +#else + __u32 ptr; + __u32 padding; /* Initialized to zero. */ +#endif + } arch; + } rseq_cs; + + /* + * Restartable sequences flags field. + * + * This field should only be updated by the thread which + * registered this data structure. Read by the kernel. + * Mainly used for single-stepping through rseq critical sections + * with debuggers. + * + * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT + * Inhibit instruction sequence block restart on preemption + * for this thread. + * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL + * Inhibit instruction sequence block restart on signal + * delivery for this thread. + * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE + * Inhibit instruction sequence block restart on migration for + * this thread. + */ + __u32 flags; +} __attribute__((aligned(4 * sizeof(__u64)))); + +#endif /* _RSEQ_ABI_H */ diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h index 5943c816c07ce..893a11eca9d51 100644 --- a/tools/testing/selftests/rseq/rseq-arm.h +++ b/tools/testing/selftests/rseq/rseq-arm.h @@ -147,14 +147,11 @@ do { \ teardown \ "b %l[" __rseq_str(cmpfail_label) "]\n\t" -#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("") - static inline __attribute__((always_inline)) int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -185,8 +182,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -198,30 +195,31 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -255,8 +253,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -270,19 +268,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -292,7 +292,6 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ #ifdef RSEQ_COMPARE_TWICE @@ -316,8 +315,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [count] "Ir" (count) RSEQ_INJECT_INPUT @@ -328,14 +327,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -347,7 +347,6 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -381,8 +380,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -398,19 +397,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -422,7 +423,6 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -457,8 +457,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -474,19 +474,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -498,7 +500,6 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -537,8 +538,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -554,21 +555,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -582,7 +586,6 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -657,8 +660,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, "8:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -678,21 +681,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -706,7 +709,6 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -782,8 +784,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, "8:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -803,21 +805,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h index 200dae9e4208c..cbe190a4d0056 100644 --- a/tools/testing/selftests/rseq/rseq-arm64.h +++ b/tools/testing/selftests/rseq/rseq-arm64.h @@ -230,8 +230,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "Qo" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -242,24 +242,28 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) @@ -287,8 +291,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "Qo" (*v), [expectnot] "r" (expectnot), [load] "Qo" (*load), @@ -300,16 +304,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -337,8 +346,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "Qo" (*v), [count] "r" (count) RSEQ_INJECT_INPUT @@ -348,12 +357,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -388,8 +400,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -402,17 +414,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -447,8 +463,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -461,17 +477,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -508,8 +528,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "Qo" (*v), [expect] "r" (expect), [v2] "Qo" (*v2), @@ -522,19 +542,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -569,8 +594,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -584,17 +609,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -629,8 +658,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -644,17 +673,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h b/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h new file mode 100644 index 0000000000000..38c5846615714 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq-generic-thread-pointer.h + * + * (C) Copyright 2021 - Mathieu Desnoyers + */ + +#ifndef _RSEQ_GENERIC_THREAD_POINTER +#define _RSEQ_GENERIC_THREAD_POINTER + +#ifdef __cplusplus +extern "C" { +#endif + +/* Use gcc builtin thread pointer. */ +static inline void *rseq_thread_pointer(void) +{ + return __builtin_thread_pointer(); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h index e989e7c14b097..878739fae2fde 100644 --- a/tools/testing/selftests/rseq/rseq-mips.h +++ b/tools/testing/selftests/rseq/rseq-mips.h @@ -154,14 +154,11 @@ do { \ teardown \ "b %l[" __rseq_str(cmpfail_label) "]\n\t" -#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("") - static inline __attribute__((always_inline)) int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -190,8 +187,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -203,14 +200,11 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -222,11 +216,10 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -258,8 +251,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -273,14 +266,11 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -295,7 +285,6 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ #ifdef RSEQ_COMPARE_TWICE @@ -319,8 +308,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [count] "Ir" (count) RSEQ_INJECT_INPUT @@ -331,10 +320,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE @@ -350,7 +337,6 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -382,8 +368,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -399,14 +385,11 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -423,7 +406,6 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -456,8 +438,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -473,14 +455,11 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -497,7 +476,6 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -532,8 +510,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -549,14 +527,11 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -577,7 +552,6 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -649,8 +623,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, "8:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -670,21 +644,16 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("cpu_id comparison failed"); error2: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("expected value comparison failed"); #endif } @@ -698,7 +667,6 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -771,8 +739,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, "8:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -792,21 +760,16 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("cpu_id comparison failed"); error2: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h b/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h new file mode 100644 index 0000000000000..263eee84fb760 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq-ppc-thread-pointer.h + * + * (C) Copyright 2021 - Mathieu Desnoyers + */ + +#ifndef _RSEQ_PPC_THREAD_POINTER +#define _RSEQ_PPC_THREAD_POINTER + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void *rseq_thread_pointer(void) +{ +#ifdef __powerpc64__ + register void *__result asm ("r13"); +#else + register void *__result asm ("r2"); +#endif + asm ("" : "=r" (__result)); + return __result; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h index 76be90196fe4f..bab8e0b9fb115 100644 --- a/tools/testing/selftests/rseq/rseq-ppc.h +++ b/tools/testing/selftests/rseq/rseq-ppc.h @@ -47,10 +47,13 @@ do { \ #ifdef __PPC64__ -#define STORE_WORD "std " -#define LOAD_WORD "ld " -#define LOADX_WORD "ldx " -#define CMP_WORD "cmpd " +#define RSEQ_STORE_LONG(arg) "std%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */ +#define RSEQ_STORE_INT(arg) "stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */ +#define RSEQ_LOAD_LONG(arg) "ld%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */ +#define RSEQ_LOAD_INT(arg) "lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */ +#define RSEQ_LOADX_LONG "ldx " /* From base register ("b" constraint) */ +#define RSEQ_CMP_LONG "cmpd " +#define RSEQ_CMP_LONG_INT "cmpdi " #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ start_ip, post_commit_offset, abort_ip) \ @@ -89,10 +92,13 @@ do { \ #else /* #ifdef __PPC64__ */ -#define STORE_WORD "stw " -#define LOAD_WORD "lwz " -#define LOADX_WORD "lwzx " -#define CMP_WORD "cmpw " +#define RSEQ_STORE_LONG(arg) "stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */ +#define RSEQ_STORE_INT(arg) RSEQ_STORE_LONG(arg) /* To memory ("m" constraint) */ +#define RSEQ_LOAD_LONG(arg) "lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */ +#define RSEQ_LOAD_INT(arg) RSEQ_LOAD_LONG(arg) /* From memory ("m" constraint) */ +#define RSEQ_LOADX_LONG "lwzx " /* From base register ("b" constraint) */ +#define RSEQ_CMP_LONG "cmpw " +#define RSEQ_CMP_LONG_INT "cmpwi " #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ start_ip, post_commit_offset, abort_ip) \ @@ -125,7 +131,7 @@ do { \ RSEQ_INJECT_ASM(1) \ "lis %%r17, (" __rseq_str(cs_label) ")@ha\n\t" \ "addi %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \ - "stw %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \ + RSEQ_STORE_INT(rseq_cs) "%%r17, %[" __rseq_str(rseq_cs) "]\n\t" \ __rseq_str(label) ":\n\t" #endif /* #ifdef __PPC64__ */ @@ -136,7 +142,7 @@ do { \ #define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ RSEQ_INJECT_ASM(2) \ - "lwz %%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \ + RSEQ_LOAD_INT(current_cpu_id) "%%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \ "cmpw cr7, %[" __rseq_str(cpu_id) "], %%r17\n\t" \ "bne- cr7, " __rseq_str(label) "\n\t" @@ -153,25 +159,25 @@ do { \ * RSEQ_ASM_OP_* (else): doesn't have hard-code registers(unless cr7) */ #define RSEQ_ASM_OP_CMPEQ(var, expect, label) \ - LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \ - CMP_WORD "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \ + RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \ + RSEQ_CMP_LONG "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \ "bne- cr7, " __rseq_str(label) "\n\t" #define RSEQ_ASM_OP_CMPNE(var, expectnot, label) \ - LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \ - CMP_WORD "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \ + RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \ + RSEQ_CMP_LONG "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \ "beq- cr7, " __rseq_str(label) "\n\t" #define RSEQ_ASM_OP_STORE(value, var) \ - STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" + RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" /* Load @var to r17 */ #define RSEQ_ASM_OP_R_LOAD(var) \ - LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" + RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" /* Store r17 to @var */ #define RSEQ_ASM_OP_R_STORE(var) \ - STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t" + RSEQ_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" /* Add @count to r17 */ #define RSEQ_ASM_OP_R_ADD(count) \ @@ -179,11 +185,11 @@ do { \ /* Load (r17 + voffp) to r17 */ #define RSEQ_ASM_OP_R_LOADX(voffp) \ - LOADX_WORD "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t" + RSEQ_LOADX_LONG "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t" /* TODO: implement a faster memcpy. */ #define RSEQ_ASM_OP_R_MEMCPY() \ - "cmpdi %%r19, 0\n\t" \ + RSEQ_CMP_LONG_INT "%%r19, 0\n\t" \ "beq 333f\n\t" \ "addi %%r20, %%r20, -1\n\t" \ "addi %%r21, %%r21, -1\n\t" \ @@ -191,16 +197,16 @@ do { \ "lbzu %%r18, 1(%%r20)\n\t" \ "stbu %%r18, 1(%%r21)\n\t" \ "addi %%r19, %%r19, -1\n\t" \ - "cmpdi %%r19, 0\n\t" \ + RSEQ_CMP_LONG_INT "%%r19, 0\n\t" \ "bne 222b\n\t" \ "333:\n\t" \ #define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \ - STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \ + RSEQ_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \ __rseq_str(post_commit_label) ":\n\t" #define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \ - STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \ + RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \ __rseq_str(post_commit_label) ":\n\t" static inline __attribute__((always_inline)) @@ -235,8 +241,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -248,23 +254,28 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) @@ -301,8 +312,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -316,16 +327,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -359,8 +375,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [count] "r" (count) @@ -372,12 +388,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -419,8 +438,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -436,16 +455,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -489,8 +513,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -506,16 +530,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -560,8 +589,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -577,18 +606,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -635,8 +670,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -653,16 +688,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -711,8 +751,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -729,23 +769,23 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } -#undef STORE_WORD -#undef LOAD_WORD -#undef LOADX_WORD -#undef CMP_WORD - #endif /* !RSEQ_SKIP_FASTPATH */ diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h index 8ef94ad1cbb45..4e6dc5f0cb429 100644 --- a/tools/testing/selftests/rseq/rseq-s390.h +++ b/tools/testing/selftests/rseq/rseq-s390.h @@ -165,8 +165,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -178,16 +178,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -198,7 +203,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) */ static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) @@ -233,8 +238,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -248,16 +253,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -288,8 +298,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [count] "r" (count) @@ -301,12 +311,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -347,8 +360,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -364,16 +377,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -426,8 +444,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -443,18 +461,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -534,8 +558,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -555,16 +579,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-skip.h b/tools/testing/selftests/rseq/rseq-skip.h index 72750b5905a96..7b53dac1fcdd9 100644 --- a/tools/testing/selftests/rseq/rseq-skip.h +++ b/tools/testing/selftests/rseq/rseq-skip.h @@ -13,7 +13,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { return -1; } diff --git a/tools/testing/selftests/rseq/rseq-thread-pointer.h b/tools/testing/selftests/rseq/rseq-thread-pointer.h new file mode 100644 index 0000000000000..977c25d758b2a --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-thread-pointer.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq-thread-pointer.h + * + * (C) Copyright 2021 - Mathieu Desnoyers + */ + +#ifndef _RSEQ_THREAD_POINTER +#define _RSEQ_THREAD_POINTER + +#if defined(__x86_64__) || defined(__i386__) +#include "rseq-x86-thread-pointer.h" +#elif defined(__PPC__) +#include "rseq-ppc-thread-pointer.h" +#else +#include "rseq-generic-thread-pointer.h" +#endif + +#endif diff --git a/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h new file mode 100644 index 0000000000000..d3133587d9968 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq-x86-thread-pointer.h + * + * (C) Copyright 2021 - Mathieu Desnoyers + */ + +#ifndef _RSEQ_X86_THREAD_POINTER +#define _RSEQ_X86_THREAD_POINTER + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if __GNUC_PREREQ (11, 1) +static inline void *rseq_thread_pointer(void) +{ + return __builtin_thread_pointer(); +} +#else +static inline void *rseq_thread_pointer(void) +{ + void *__result; + +# ifdef __x86_64__ + __asm__ ("mov %%fs:0, %0" : "=r" (__result)); +# else + __asm__ ("mov %%gs:0, %0" : "=r" (__result)); +# endif + return __result; +} +#endif /* !GCC 11 */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h index b2da6004fe307..bd01dc41ca130 100644 --- a/tools/testing/selftests/rseq/rseq-x86.h +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -28,6 +28,8 @@ #ifdef __x86_64__ +#define RSEQ_ASM_TP_SEGMENT %%fs + #define rseq_smp_mb() \ __asm__ __volatile__ ("lock; addl $0,-128(%%rsp)" ::: "memory", "cc") #define rseq_smp_rmb() rseq_barrier() @@ -123,14 +125,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpq %[v], %[expect]\n\t" "jnz %l[error2]\n\t" #endif @@ -141,7 +143,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -152,16 +154,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -172,7 +179,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) */ static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) @@ -184,15 +191,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movq %[v], %%rbx\n\t" "cmpq %%rbx, %[expectnot]\n\t" "je %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "movq %[v], %%rbx\n\t" "cmpq %%rbx, %[expectnot]\n\t" "je %l[error2]\n\t" @@ -207,7 +214,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -220,16 +227,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -245,11 +257,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) #endif /* final store */ "addq %[count], %[v]\n\t" @@ -258,7 +270,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [count] "er" (count) @@ -267,6 +279,66 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) : abort #ifdef RSEQ_COMPARE_TWICE , error1 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +#endif +} + +#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV + +/* + * pval = *(ptr+off) + * *pval += inc; + */ +static inline __attribute__((always_inline)) +int rseq_offset_deref_addv(intptr_t *ptr, long off, intptr_t inc, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) +#endif + /* get p+v */ + "movq %[ptr], %%rbx\n\t" + "addq %[off], %%rbx\n\t" + /* get pv */ + "movq (%%rbx), %%rcx\n\t" + /* *pv += inc */ + "addq %[inc], (%%rcx)\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_offset] "r" (rseq_offset), + /* final store input */ + [ptr] "m" (*ptr), + [off] "er" (off), + [inc] "er" (inc) + : "memory", "cc", "rax", "rbx", "rcx" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 #endif ); return 0; @@ -294,14 +366,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpq %[v], %[expect]\n\t" "jnz %l[error2]\n\t" #endif @@ -315,7 +387,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -330,16 +402,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -369,8 +446,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" @@ -379,7 +456,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(5) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpq %[v], %[expect]\n\t" "jnz %l[error2]\n\t" "cmpq %[v2], %[expect2]\n\t" @@ -392,7 +469,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -407,18 +484,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -443,14 +526,14 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, "movq %[dst], %[rseq_scratch1]\n\t" "movq %[len], %[rseq_scratch2]\n\t" /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" "jnz 5f\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f) "cmpq %[v], %[expect]\n\t" "jnz 7f\n\t" #endif @@ -498,7 +581,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -517,16 +600,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -543,7 +631,9 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, #endif /* !RSEQ_SKIP_FASTPATH */ -#elif __i386__ +#elif defined(__i386__) + +#define RSEQ_ASM_TP_SEGMENT %%gs #define rseq_smp_mb() \ __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc") @@ -644,14 +734,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpl %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpl %[v], %[expect]\n\t" "jnz %l[error2]\n\t" #endif @@ -662,7 +752,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -673,16 +763,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -693,7 +788,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) */ static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) @@ -705,15 +800,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[v], %%ebx\n\t" "cmpl %%ebx, %[expectnot]\n\t" "je %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "movl %[v], %%ebx\n\t" "cmpl %%ebx, %[expectnot]\n\t" "je %l[error2]\n\t" @@ -728,7 +823,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -741,16 +836,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -766,11 +866,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) #endif /* final store */ "addl %[count], %[v]\n\t" @@ -779,7 +879,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [count] "ir" (count) @@ -790,12 +890,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -815,14 +918,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpl %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpl %[v], %[expect]\n\t" "jnz %l[error2]\n\t" #endif @@ -837,7 +940,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* try store input */ [v2] "m" (*v2), [newv2] "m" (newv2), @@ -852,16 +955,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -881,15 +989,15 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[expect], %%eax\n\t" "cmpl %[v], %%eax\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "movl %[expect], %%eax\n\t" "cmpl %[v], %%eax\n\t" "jnz %l[error2]\n\t" @@ -905,7 +1013,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -920,16 +1028,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif @@ -951,8 +1064,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpl %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" @@ -961,7 +1074,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(5) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpl %[v], %[expect]\n\t" "jnz %l[error2]\n\t" "cmpl %[expect2], %[v2]\n\t" @@ -975,7 +1088,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -990,18 +1103,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -1027,15 +1146,15 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, "movl %[dst], %[rseq_scratch1]\n\t" "movl %[len], %[rseq_scratch2]\n\t" /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[expect], %%eax\n\t" "cmpl %%eax, %[v]\n\t" "jnz 5f\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f) "movl %[expect], %%eax\n\t" "cmpl %%eax, %[v]\n\t" "jnz 7f\n\t" @@ -1085,7 +1204,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expect] "m" (expect), @@ -1104,16 +1223,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -1139,15 +1263,15 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, "movl %[dst], %[rseq_scratch1]\n\t" "movl %[len], %[rseq_scratch2]\n\t" /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[expect], %%eax\n\t" "cmpl %%eax, %[v]\n\t" "jnz 5f\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f) "movl %[expect], %%eax\n\t" "cmpl %%eax, %[v]\n\t" "jnz 7f\n\t" @@ -1198,7 +1322,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expect] "m" (expect), @@ -1217,16 +1341,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 7159eb777fd34..986b9458efb26 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -26,131 +26,124 @@ #include #include #include +#include +#include +#include "../kselftest.h" #include "rseq.h" -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +static const ptrdiff_t *libc_rseq_offset_p; +static const unsigned int *libc_rseq_size_p; +static const unsigned int *libc_rseq_flags_p; -__thread volatile struct rseq __rseq_abi = { - .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, -}; +/* Offset from the thread pointer to the rseq area. */ +ptrdiff_t rseq_offset; -/* - * Shared with other libraries. This library may take rseq ownership if it is - * still 0 when executing the library constructor. Set to 1 by library - * constructor when handling rseq. Set to 0 in destructor if handling rseq. - */ -int __rseq_handled; +/* Size of the registered rseq area. 0 if the registration was + unsuccessful. */ +unsigned int rseq_size = -1U; + +/* Flags used during rseq registration. */ +unsigned int rseq_flags; -/* Whether this library have ownership of rseq registration. */ static int rseq_ownership; -static __thread volatile uint32_t __rseq_refcount; +static +__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"))) = { + .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, +}; -static void signal_off_save(sigset_t *oldset) +static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, + int flags, uint32_t sig) { - sigset_t set; - int ret; - - sigfillset(&set); - ret = pthread_sigmask(SIG_BLOCK, &set, oldset); - if (ret) - abort(); + return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); } -static void signal_restore(sigset_t oldset) +int rseq_available(void) { - int ret; + int rc; - ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL); - if (ret) + rc = sys_rseq(NULL, 0, 0, 0); + if (rc != -1) abort(); -} - -static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, - int flags, uint32_t sig) -{ - return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); + switch (errno) { + case ENOSYS: + return 0; + case EINVAL: + return 1; + default: + abort(); + } } int rseq_register_current_thread(void) { - int rc, ret = 0; - sigset_t oldset; + int rc; - if (!rseq_ownership) + if (!rseq_ownership) { + /* Treat libc's ownership as a successful registration. */ return 0; - signal_off_save(&oldset); - if (__rseq_refcount == UINT_MAX) { - ret = -1; - goto end; - } - if (__rseq_refcount++) - goto end; - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG); - if (!rc) { - assert(rseq_current_cpu_raw() >= 0); - goto end; } - if (errno != EBUSY) - __rseq_abi.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED; - ret = -1; - __rseq_refcount--; -end: - signal_restore(oldset); - return ret; + rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), 0, RSEQ_SIG); + if (rc) + return -1; + assert(rseq_current_cpu_raw() >= 0); + return 0; } int rseq_unregister_current_thread(void) { - int rc, ret = 0; - sigset_t oldset; + int rc; - if (!rseq_ownership) + if (!rseq_ownership) { + /* Treat libc's ownership as a successful unregistration. */ return 0; - signal_off_save(&oldset); - if (!__rseq_refcount) { - ret = -1; - goto end; } - if (--__rseq_refcount) - goto end; - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), - RSEQ_FLAG_UNREGISTER, RSEQ_SIG); - if (!rc) - goto end; - __rseq_refcount = 1; - ret = -1; -end: - signal_restore(oldset); - return ret; + rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + if (rc) + return -1; + return 0; } -int32_t rseq_fallback_current_cpu(void) +static __attribute__((constructor)) +void rseq_init(void) { - int32_t cpu; - - cpu = sched_getcpu(); - if (cpu < 0) { - perror("sched_getcpu()"); - abort(); + libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); + libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); + libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); + if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p) { + /* rseq registration owned by glibc */ + rseq_offset = *libc_rseq_offset_p; + rseq_size = *libc_rseq_size_p; + rseq_flags = *libc_rseq_flags_p; + return; } - return cpu; -} - -void __attribute__((constructor)) rseq_init(void) -{ - /* Check whether rseq is handled by another library. */ - if (__rseq_handled) + if (!rseq_available()) return; - __rseq_handled = 1; rseq_ownership = 1; + rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); + rseq_size = sizeof(struct rseq_abi); + rseq_flags = 0; } -void __attribute__((destructor)) rseq_fini(void) +static __attribute__((destructor)) +void rseq_exit(void) { if (!rseq_ownership) return; - __rseq_handled = 0; + rseq_offset = 0; + rseq_size = -1U; rseq_ownership = 0; } + +int32_t rseq_fallback_current_cpu(void) +{ + int32_t cpu; + + cpu = sched_getcpu(); + if (cpu < 0) { + perror("sched_getcpu()"); + abort(); + } + return cpu; +} diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index d40d60e7499e8..c8f49b018dbb0 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -16,7 +16,9 @@ #include #include #include -#include +#include +#include "rseq-abi.h" +#include "compiler.h" /* * Empty code injection macros, override when testing. @@ -43,8 +45,20 @@ #define RSEQ_INJECT_FAILED #endif -extern __thread volatile struct rseq __rseq_abi; -extern int __rseq_handled; +#include "rseq-thread-pointer.h" + +/* Offset from the thread pointer to the rseq area. */ +extern ptrdiff_t rseq_offset; +/* Size of the registered rseq area. 0 if the registration was + unsuccessful. */ +extern unsigned int rseq_size; +/* Flags used during rseq registration. */ +extern unsigned int rseq_flags; + +static inline struct rseq_abi *rseq_get_abi(void) +{ + return (struct rseq_abi *) ((uintptr_t) rseq_thread_pointer() + rseq_offset); +} #define rseq_likely(x) __builtin_expect(!!(x), 1) #define rseq_unlikely(x) __builtin_expect(!!(x), 0) @@ -108,7 +122,7 @@ int32_t rseq_fallback_current_cpu(void); */ static inline int32_t rseq_current_cpu_raw(void) { - return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id); + return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id); } /* @@ -124,7 +138,7 @@ static inline int32_t rseq_current_cpu_raw(void) */ static inline uint32_t rseq_cpu_start(void) { - return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start); + return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id_start); } static inline uint32_t rseq_current_cpu(void) @@ -139,11 +153,7 @@ static inline uint32_t rseq_current_cpu(void) static inline void rseq_clear_rseq_cs(void) { -#ifdef __LP64__ - __rseq_abi.rseq_cs.ptr = 0; -#else - __rseq_abi.rseq_cs.ptr.ptr32 = 0; -#endif + RSEQ_WRITE_ONCE(rseq_get_abi()->rseq_cs.arch.ptr, 0); } /* diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings index ba4d85f74cd6b..a953c96aa16e1 100644 --- a/tools/testing/selftests/rtc/settings +++ b/tools/testing/selftests/rtc/settings @@ -1 +1 @@ -timeout=90 +timeout=180 diff --git a/tools/testing/selftests/vm/map_fixed_noreplace.c b/tools/testing/selftests/vm/map_fixed_noreplace.c index d91bde5112686..eed44322d1a63 100644 --- a/tools/testing/selftests/vm/map_fixed_noreplace.c +++ b/tools/testing/selftests/vm/map_fixed_noreplace.c @@ -17,9 +17,6 @@ #define MAP_FIXED_NOREPLACE 0x100000 #endif -#define BASE_ADDRESS (256ul * 1024 * 1024) - - static void dump_maps(void) { char cmd[32]; @@ -28,18 +25,46 @@ static void dump_maps(void) system(cmd); } +static unsigned long find_base_addr(unsigned long size) +{ + void *addr; + unsigned long flags; + + flags = MAP_PRIVATE | MAP_ANONYMOUS; + addr = mmap(NULL, size, PROT_NONE, flags, -1, 0); + if (addr == MAP_FAILED) { + printf("Error: couldn't map the space we need for the test\n"); + return 0; + } + + if (munmap(addr, size) != 0) { + printf("Error: couldn't map the space we need for the test\n"); + return 0; + } + return (unsigned long)addr; +} + int main(void) { + unsigned long base_addr; unsigned long flags, addr, size, page_size; char *p; page_size = sysconf(_SC_PAGE_SIZE); + //let's find a base addr that is free before we start the tests + size = 5 * page_size; + base_addr = find_base_addr(size); + if (!base_addr) { + printf("Error: couldn't map the space we need for the test\n"); + return 1; + } + flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE; // Check we can map all the areas we need below errno = 0; - addr = BASE_ADDRESS; + addr = base_addr; size = 5 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); @@ -60,7 +85,7 @@ int main(void) printf("unmap() successful\n"); errno = 0; - addr = BASE_ADDRESS + page_size; + addr = base_addr + page_size; size = 3 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -80,7 +105,7 @@ int main(void) * +4 | free | new */ errno = 0; - addr = BASE_ADDRESS; + addr = base_addr; size = 5 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -101,7 +126,7 @@ int main(void) * +4 | free | */ errno = 0; - addr = BASE_ADDRESS + (2 * page_size); + addr = base_addr + (2 * page_size); size = page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -121,7 +146,7 @@ int main(void) * +4 | free | new */ errno = 0; - addr = BASE_ADDRESS + (3 * page_size); + addr = base_addr + (3 * page_size); size = 2 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -141,7 +166,7 @@ int main(void) * +4 | free | */ errno = 0; - addr = BASE_ADDRESS; + addr = base_addr; size = 2 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -161,7 +186,7 @@ int main(void) * +4 | free | */ errno = 0; - addr = BASE_ADDRESS; + addr = base_addr; size = page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -181,7 +206,7 @@ int main(void) * +4 | free | new */ errno = 0; - addr = BASE_ADDRESS + (4 * page_size); + addr = base_addr + (4 * page_size); size = page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -192,7 +217,7 @@ int main(void) return 1; } - addr = BASE_ADDRESS; + addr = base_addr; size = 5 * page_size; if (munmap((void *)addr, size) != 0) { dump_maps(); diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 9ba7feffe344b..9814a0a15ba78 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh index 3e2089c8cf549..8c669c0d662ee 100755 --- a/tools/testing/selftests/x86/check_cc.sh +++ b/tools/testing/selftests/x86/check_cc.sh @@ -7,7 +7,7 @@ CC="$1" TESTPROG="$2" shift 2 -if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then +if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then echo 1 else echo 0 diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index a4f4d4cf22c3b..d0752a0a8f362 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -480,7 +480,7 @@ static int test_process_vm_readv(void) } if (vsyscall_map_r) { - if (!memcmp(buf, (const void *)0xffffffffff600000, 4096)) { + if (!memcmp(buf, remote.iov_base, sizeof(buf))) { printf("[OK]\tIt worked and read correct data\n"); } else { printf("[FAIL]\tIt worked but returned incorrect data\n"); diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh index 232e958ec4547..b0b91d9b0dc21 100755 --- a/tools/testing/selftests/zram/zram.sh +++ b/tools/testing/selftests/zram/zram.sh @@ -2,9 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 TCID="zram.sh" -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - . ./zram_lib.sh run_zram () { @@ -18,14 +15,4 @@ echo "" check_prereqs -# check zram module exists -MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko -if [ -f $MODULE_PATH ]; then - run_zram -elif [ -b /dev/zram0 ]; then - run_zram -else - echo "$TCID : No zram.ko module or /dev/zram0 device file not found" - echo "$TCID : CONFIG_ZRAM is not set" - exit $ksft_skip -fi +run_zram diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh index 114863d9fb876..8f4affe34f3e4 100755 --- a/tools/testing/selftests/zram/zram01.sh +++ b/tools/testing/selftests/zram/zram01.sh @@ -33,9 +33,7 @@ zram_algs="lzo" zram_fill_fs() { - local mem_free0=$(free -m | awk 'NR==2 {print $4}') - - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo "fill zram$i..." local b=0 while [ true ]; do @@ -45,29 +43,17 @@ zram_fill_fs() b=$(($b + 1)) done echo "zram$i can be filled with '$b' KB" - done - local mem_free1=$(free -m | awk 'NR==2 {print $4}') - local used_mem=$(($mem_free0 - $mem_free1)) + local mem_used_total=`awk '{print $3}' "/sys/block/zram$i/mm_stat"` + local v=$((100 * 1024 * $b / $mem_used_total)) + if [ "$v" -lt 100 ]; then + echo "FAIL compression ratio: 0.$v:1" + ERR_CODE=-1 + return + fi - local total_size=0 - for sm in $zram_sizes; do - local s=$(echo $sm | sed 's/M//') - total_size=$(($total_size + $s)) + echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" done - - echo "zram used ${used_mem}M, zram disk sizes ${total_size}M" - - local v=$((100 * $total_size / $used_mem)) - - if [ "$v" -lt 100 ]; then - echo "FAIL compression ratio: 0.$v:1" - ERR_CODE=-1 - zram_cleanup - return - fi - - echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" } check_prereqs @@ -81,7 +67,6 @@ zram_mount zram_fill_fs zram_cleanup -zram_unload if [ $ERR_CODE -ne 0 ]; then echo "$TCID : [FAIL]" diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh index e83b404807c09..2418b0c4ed136 100755 --- a/tools/testing/selftests/zram/zram02.sh +++ b/tools/testing/selftests/zram/zram02.sh @@ -36,7 +36,6 @@ zram_set_memlimit zram_makeswap zram_swapoff zram_cleanup -zram_unload if [ $ERR_CODE -ne 0 ]; then echo "$TCID : [FAIL]" diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh index 6f872f266fd11..21ec1966de76c 100755 --- a/tools/testing/selftests/zram/zram_lib.sh +++ b/tools/testing/selftests/zram/zram_lib.sh @@ -5,12 +5,17 @@ # Author: Alexey Kodanev # Modified: Naresh Kamboju -MODULE=0 dev_makeswap=-1 dev_mounted=-1 - +dev_start=0 +dev_end=-1 +module_load=-1 +sys_control=-1 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +kernel_version=`uname -r | cut -d'.' -f1,2` +kernel_major=${kernel_version%.*} +kernel_minor=${kernel_version#*.} trap INT @@ -25,68 +30,104 @@ check_prereqs() fi } +kernel_gte() +{ + major=${1%.*} + minor=${1#*.} + + if [ $kernel_major -gt $major ]; then + return 0 + elif [[ $kernel_major -eq $major && $kernel_minor -ge $minor ]]; then + return 0 + fi + + return 1 +} + zram_cleanup() { echo "zram cleanup" local i= - for i in $(seq 0 $dev_makeswap); do + for i in $(seq $dev_start $dev_makeswap); do swapoff /dev/zram$i done - for i in $(seq 0 $dev_mounted); do + for i in $(seq $dev_start $dev_mounted); do umount /dev/zram$i done - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo 1 > /sys/block/zram${i}/reset rm -rf zram$i done -} + if [ $sys_control -eq 1 ]; then + for i in $(seq $dev_start $dev_end); do + echo $i > /sys/class/zram-control/hot_remove + done + fi -zram_unload() -{ - if [ $MODULE -ne 0 ] ; then - echo "zram rmmod zram" + if [ $module_load -eq 1 ]; then rmmod zram > /dev/null 2>&1 fi } zram_load() { - # check zram module exists - MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko - if [ -f $MODULE_PATH ]; then - MODULE=1 - echo "create '$dev_num' zram device(s)" - modprobe zram num_devices=$dev_num - if [ $? -ne 0 ]; then - echo "failed to insert zram module" - exit 1 - fi - - dev_num_created=$(ls /dev/zram* | wc -w) + echo "create '$dev_num' zram device(s)" + + # zram module loaded, new kernel + if [ -d "/sys/class/zram-control" ]; then + echo "zram modules already loaded, kernel supports" \ + "zram-control interface" + dev_start=$(ls /dev/zram* | wc -w) + dev_end=$(($dev_start + $dev_num - 1)) + sys_control=1 + + for i in $(seq $dev_start $dev_end); do + cat /sys/class/zram-control/hot_add > /dev/null + done + + echo "all zram devices (/dev/zram$dev_start~$dev_end" \ + "successfully created" + return 0 + fi - if [ "$dev_num_created" -ne "$dev_num" ]; then - echo "unexpected num of devices: $dev_num_created" - ERR_CODE=-1 + # detect old kernel or built-in + modprobe zram num_devices=$dev_num + if [ ! -d "/sys/class/zram-control" ]; then + if grep -q '^zram' /proc/modules; then + rmmod zram > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "zram module is being used on old kernel" \ + "without zram-control interface" + exit $ksft_skip + fi else - echo "zram load module successful" + echo "test needs CONFIG_ZRAM=m on old kernel without" \ + "zram-control interface" + exit $ksft_skip fi - elif [ -b /dev/zram0 ]; then - echo "/dev/zram0 device file found: OK" - else - echo "ERROR: No zram.ko module or no /dev/zram0 device found" - echo "$TCID : CONFIG_ZRAM is not set" - exit 1 + modprobe zram num_devices=$dev_num fi + + module_load=1 + dev_end=$(($dev_num - 1)) + echo "all zram devices (/dev/zram0~$dev_end) successfully created" } zram_max_streams() { echo "set max_comp_streams to zram device(s)" - local i=0 + kernel_gte 4.7 + if [ $? -eq 0 ]; then + echo "The device attribute max_comp_streams was"\ + "deprecated in 4.7" + return 0 + fi + + local i=$dev_start for max_s in $zram_max_streams; do local sys_path="/sys/block/zram${i}/max_comp_streams" echo $max_s > $sys_path || \ @@ -98,7 +139,7 @@ zram_max_streams() echo "FAIL can't set max_streams '$max_s', get $max_stream" i=$(($i + 1)) - echo "$sys_path = '$max_streams' ($i/$dev_num)" + echo "$sys_path = '$max_streams'" done echo "zram max streams: OK" @@ -108,15 +149,16 @@ zram_compress_alg() { echo "test that we can set compression algorithm" - local algs=$(cat /sys/block/zram0/comp_algorithm) + local i=$dev_start + local algs=$(cat /sys/block/zram${i}/comp_algorithm) echo "supported algs: $algs" - local i=0 + for alg in $zram_algs; do local sys_path="/sys/block/zram${i}/comp_algorithm" echo "$alg" > $sys_path || \ echo "FAIL can't set '$alg' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$alg' ($i/$dev_num)" + echo "$sys_path = '$alg'" done echo "zram set compression algorithm: OK" @@ -125,14 +167,14 @@ zram_compress_alg() zram_set_disksizes() { echo "set disk size to zram device(s)" - local i=0 + local i=$dev_start for ds in $zram_sizes; do local sys_path="/sys/block/zram${i}/disksize" echo "$ds" > $sys_path || \ echo "FAIL can't set '$ds' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$ds' ($i/$dev_num)" + echo "$sys_path = '$ds'" done echo "zram set disksizes: OK" @@ -142,14 +184,14 @@ zram_set_memlimit() { echo "set memory limit to zram device(s)" - local i=0 + local i=$dev_start for ds in $zram_mem_limits; do local sys_path="/sys/block/zram${i}/mem_limit" echo "$ds" > $sys_path || \ echo "FAIL can't set '$ds' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$ds' ($i/$dev_num)" + echo "$sys_path = '$ds'" done echo "zram set memory limit: OK" @@ -158,8 +200,8 @@ zram_set_memlimit() zram_makeswap() { echo "make swap with zram device(s)" - local i=0 - for i in $(seq 0 $(($dev_num - 1))); do + local i=$dev_start + for i in $(seq $dev_start $dev_end); do mkswap /dev/zram$i > err.log 2>&1 if [ $? -ne 0 ]; then cat err.log @@ -182,7 +224,7 @@ zram_makeswap() zram_swapoff() { local i= - for i in $(seq 0 $dev_makeswap); do + for i in $(seq $dev_start $dev_end); do swapoff /dev/zram$i > err.log 2>&1 if [ $? -ne 0 ]; then cat err.log @@ -196,7 +238,7 @@ zram_swapoff() zram_makefs() { - local i=0 + local i=$dev_start for fs in $zram_filesystems; do # if requested fs not supported default it to ext2 which mkfs.$fs > /dev/null 2>&1 || fs=ext2 @@ -215,7 +257,7 @@ zram_makefs() zram_mount() { local i=0 - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo "mount /dev/zram$i" mkdir zram$i mount /dev/zram$i zram$i > /dev/null || \ diff --git a/tools/thermal/tmon/Makefile b/tools/thermal/tmon/Makefile index 59e417ec3e134..25d7f8f37cfd6 100644 --- a/tools/thermal/tmon/Makefile +++ b/tools/thermal/tmon/Makefile @@ -10,7 +10,7 @@ override CFLAGS+= $(call cc-option,-O3,-O1) ${WARNFLAGS} # Add "-fstack-protector" only if toolchain supports it. override CFLAGS+= $(call cc-option,-fstack-protector-strong) CC?= $(CROSS_COMPILE)gcc -PKG_CONFIG?= pkg-config +PKG_CONFIG?= $(CROSS_COMPILE)pkg-config override CFLAGS+=-D VERSION=\"$(VERSION)\" LDFLAGS+= diff --git a/tools/usb/testusb.c b/tools/usb/testusb.c index ee8208b2f9460..69c3ead25313d 100644 --- a/tools/usb/testusb.c +++ b/tools/usb/testusb.c @@ -265,12 +265,6 @@ static int find_testdev(const char *name, const struct stat *sb, int flag) } entry->ifnum = ifnum; - - /* FIXME update USBDEVFS_CONNECTINFO so it tells about high speed etc */ - - fprintf(stderr, "%s speed\t%s\t%u\n", - speed(entry->speed), entry->name, entry->ifnum); - entry->next = testdevs; testdevs = entry; return 0; @@ -299,6 +293,14 @@ static void *handle_testdev (void *arg) return 0; } + status = ioctl(fd, USBDEVFS_GET_SPEED, NULL); + if (status < 0) + fprintf(stderr, "USBDEVFS_GET_SPEED failed %d\n", status); + else + dev->speed = status; + fprintf(stderr, "%s speed\t%s\t%u\n", + speed(dev->speed), dev->name, dev->ifnum); + restart: for (i = 0; i < TEST_CASES; i++) { if (dev->test != -1 && dev->test != i) diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 58c0eab71bca3..d2e836b2d16b0 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -1329,7 +1329,7 @@ int main(int argc, char *argv[]) if (opt_list && opt_list_mapcnt) kpagecount_fd = checked_open(PROC_KPAGECOUNT, O_RDONLY); - if (opt_mark_idle && opt_file) + if (opt_mark_idle) page_idle_fd = checked_open(SYS_KERNEL_MM_PAGE_IDLE, O_RDWR); if (opt_list && opt_pid) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 4af85605730e4..f7150fbeeb55e 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1141,6 +1141,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(®, argp, sizeof(reg))) break; + /* + * We could owe a reset due to PSCI. Handle the pending reset + * here to ensure userspace register accesses are ordered after + * the reset. + */ + if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) + kvm_reset_vcpu(vcpu); + if (ioctl == KVM_SET_ONE_REG) r = kvm_arm_set_reg(vcpu, ®); else diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 4c08fd009768c..d522a1000497c 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -259,6 +259,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) kvm_pmu_release_perf_event(&pmu->pmc[i]); + irq_work_sync(&vcpu->arch.pmu.overflow_work); } u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) @@ -435,6 +436,22 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) kvm_pmu_update_state(vcpu); } +/** + * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding + * to the event. + * This is why we need a callback to do it once outside of the NMI context. + */ +static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) +{ + struct kvm_vcpu *vcpu; + struct kvm_pmu *pmu; + + pmu = container_of(work, struct kvm_pmu, overflow_work); + vcpu = kvm_pmc_to_vcpu(pmu->pmc); + + kvm_vcpu_kick(vcpu); +} + /** * When the perf event overflows, set the overflow status and inform the vcpu. */ @@ -467,7 +484,11 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, if (kvm_pmu_overflow_status(vcpu)) { kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); - kvm_vcpu_kick(vcpu); + + if (!in_nmi()) + kvm_vcpu_kick(vcpu); + else + irq_work_queue(&vcpu->arch.pmu.overflow_work); } cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); @@ -760,6 +781,9 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) return ret; } + init_irq_work(&vcpu->arch.pmu.overflow_work, + kvm_pmu_perf_overflow_notify_vcpu); + vcpu->arch.pmu.created = true; return 0; } diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 48fde38d64c37..2f5dc7fb437bd 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -426,6 +426,18 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) break; } break; + case ARM_SMCCC_ARCH_WORKAROUND_3: + switch (kvm_arm_get_spectre_bhb_state()) { + case SPECTRE_VULNERABLE: + break; + case SPECTRE_MITIGATED: + val = SMCCC_RET_SUCCESS; + break; + case SPECTRE_UNAFFECTED: + val = SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED; + break; + } + break; } break; default: @@ -438,7 +450,7 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) { - return 3; /* PSCI version and two workaround registers */ + return 4; /* PSCI version and three workaround registers */ } int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) @@ -452,6 +464,9 @@ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++)) return -EFAULT; + if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, uindices++)) + return -EFAULT; + return 0; } @@ -486,9 +501,20 @@ static int get_kernel_wa_level(u64 regid) return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; case KVM_SSBD_UNKNOWN: default: - return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN; + break; } - } + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: + switch (kvm_arm_get_spectre_bhb_state()) { + case SPECTRE_VULNERABLE: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL; + case SPECTRE_MITIGATED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL; + case SPECTRE_UNAFFECTED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED; + } + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL; + } return -EINVAL; } @@ -503,6 +529,7 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) val = kvm_psci_version(vcpu, vcpu->kvm); break; case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; break; case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: @@ -555,6 +582,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: if (val & ~KVM_REG_FEATURE_LEVEL_MASK) return -EINVAL; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 362350609fe76..347015d4dbbb3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -115,6 +115,8 @@ EXPORT_SYMBOL_GPL(kvm_debugfs_dir); static int kvm_debugfs_num_entries; static const struct file_operations *stat_fops_per_vm[]; +static struct file_operations kvm_chardev_ops; + static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); #ifdef CONFIG_KVM_COMPAT @@ -766,6 +768,16 @@ static struct kvm *kvm_create_vm(unsigned long type) preempt_notifier_inc(); + /* + * When the fd passed to this ioctl() is opened it pins the module, + * but try_module_get() also prevents getting a reference if the module + * is in MODULE_STATE_GOING (e.g. if someone ran "rmmod --wait"). + */ + if (!try_module_get(kvm_chardev_ops.owner)) { + r = -ENODEV; + goto out_err; + } + return kvm; out_err: @@ -844,6 +856,7 @@ static void kvm_destroy_vm(struct kvm *kvm) preempt_notifier_dec(); hardware_disable_all(); mmdrop(mm); + module_put(kvm_chardev_ops.owner); } void kvm_get_kvm(struct kvm *kvm) @@ -2470,15 +2483,19 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) { - unsigned int old, val, shrink; + unsigned int old, val, shrink, grow_start; old = val = vcpu->halt_poll_ns; shrink = READ_ONCE(halt_poll_ns_shrink); + grow_start = READ_ONCE(halt_poll_ns_grow_start); if (shrink == 0) val = 0; else val /= shrink; + if (val < grow_start) + val = 0; + vcpu->halt_poll_ns = val; trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); } @@ -2864,7 +2881,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) goto unlock_vcpu_destroy; } - BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); + vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus); + BUG_ON(kvm->vcpus[vcpu->vcpu_idx]); /* Now it's all set up, let userspace reach it */ kvm_get_kvm(kvm); @@ -2874,7 +2892,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) goto unlock_vcpu_destroy; } - kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; + kvm->vcpus[vcpu->vcpu_idx] = vcpu; /* * Pairs with smp_rmb() in kvm_get_vcpu. Write kvm->vcpus @@ -3311,8 +3329,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, kvm_put_kvm(kvm); mutex_lock(&kvm->lock); list_del(&dev->vm_node); + if (ops->release) + ops->release(dev); mutex_unlock(&kvm->lock); - ops->destroy(dev); + if (ops->destroy) + ops->destroy(dev); return ret; }