263 files changed, 5327 insertions, 3120 deletions
diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index bc8e1cbe5e61..070779e8d836 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -29,7 +29,7 @@ Description:
 			option:	[[appraise_type=]] [template=] [permit_directio]
 				[appraise_flag=] [keyrings=]
 		  base:
-			func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK]MODULE_CHECK]
+			func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK]
 			        [FIRMWARE_CHECK]
 				[KEXEC_KERNEL_CHECK] [KEXEC_INITRAMFS_CHECK]
 				[KEXEC_CMDLINE] [KEY_CHECK] [CRITICAL_DATA]
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 9acc57f68f31..64c62b979f2f 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -2098,7 +2098,7 @@ If the program returns 0, the attempt fails with -EPERM, otherwise
 it succeeds.
 
 An example of BPF_CGROUP_DEVICE program may be found in the kernel
-source tree in the tools/testing/selftests/bpf/dev_cgroup.c file.
+source tree in the tools/testing/selftests/bpf/progs/dev_cgroup.c file.
 
 
 RDMA
diff --git a/Documentation/admin-guide/cifs/authors.rst b/Documentation/admin-guide/cifs/authors.rst
index b02d6dd6c070..5c1d2f0fa7d1 100644
--- a/Documentation/admin-guide/cifs/authors.rst
+++ b/Documentation/admin-guide/cifs/authors.rst
@@ -5,10 +5,10 @@ Authors
 Original Author
 ---------------
 
-Steve French (sfrench@samba.org)
+Steve French (smfrench@gmail.com, sfrench@samba.org)
 
 The author wishes to express his appreciation and thanks to:
-Andrew Tridgell (Samba team) for his early suggestions about smb/cifs VFS
+Andrew Tridgell (Samba team) for his early suggestions about SMB/CIFS VFS
 improvements. Thanks to IBM for allowing me time and test resources to pursue
 this project, to Jim McDonough from IBM (and the Samba Team) for his help, to
 the IBM Linux JFS team for explaining many esoteric Linux filesystem features.
@@ -51,7 +51,7 @@ Patch Contributors
 - Ronnie Sahlberg (for SMB3 xattr work, bug fixes, and lots of great work on compounding)
 - Shirish Pargaonkar (for many ACL patches over the years)
 - Sachin Prabhu (many bug fixes, including for reconnect, copy offload and security)
-- Paulo Alcantara
+- Paulo Alcantara (for some excellent work in DFS, and in booting from SMB3)
 - Long Li (some great work on RDMA, SMB Direct)
 
 
diff --git a/Documentation/admin-guide/cifs/changes.rst b/Documentation/admin-guide/cifs/changes.rst
index 71f2ecb62299..3147bbae9c43 100644
--- a/Documentation/admin-guide/cifs/changes.rst
+++ b/Documentation/admin-guide/cifs/changes.rst
@@ -3,6 +3,7 @@ Changes
 =======
 
 See https://wiki.samba.org/index.php/LinuxCIFSKernel for summary
-information (that may be easier to read than parsing the output of
-"git log fs/cifs") about fixes/improvements to CIFS/SMB2/SMB3 support (changes
+information about fixes/improvements to CIFS/SMB2/SMB3 support (changes
 to cifs.ko module) by kernel version (and cifs internal module version).
+This may be easier to read than parsing the output of "git log fs/cifs"
+by release.
diff --git a/Documentation/admin-guide/cifs/introduction.rst b/Documentation/admin-guide/cifs/introduction.rst
index cc2851d93d17..53ea62906aa5 100644
--- a/Documentation/admin-guide/cifs/introduction.rst
+++ b/Documentation/admin-guide/cifs/introduction.rst
@@ -7,19 +7,19 @@ Introduction
   protocol which was the successor to the Server Message Block
   (SMB) protocol, the native file sharing mechanism for most early
   PC operating systems. New and improved versions of CIFS are now
-  called SMB2 and SMB3. Use of SMB3 (and later, including SMB3.1.1)
-  is strongly preferred over using older dialects like CIFS due to
-  security reasons. All modern dialects, including the most recent,
-  SMB3.1.1 are supported by the CIFS VFS module. The SMB3 protocol
-  is implemented and supported by all major file servers
-  such as all modern versions of Windows (including Windows 2016
-  Server), as well as by Samba (which provides excellent
-  CIFS/SMB2/SMB3 server support and tools for Linux and many other
-  operating systems).  Apple systems also support SMB3 well, as
-  do most Network Attached Storage vendors, so this network
-  filesystem client can mount to a wide variety of systems.
-  It also supports mounting to the cloud (for example
-  Microsoft Azure), including the necessary security features.
+  called SMB2 and SMB3. Use of SMB3 (and later, including SMB3.1.1
+  the most current dialect) is strongly preferred over using older
+  dialects like CIFS due to security reasons. All modern dialects,
+  including the most recent, SMB3.1.1, are supported by the CIFS VFS
+  module. The SMB3 protocol is implemented and supported by all major
+  file servers such as Windows (including Windows 2019 Server), as
+  well as by Samba (which provides excellent CIFS/SMB2/SMB3 server
+  support and tools for Linux and many other operating systems).
+  Apple systems also support SMB3 well, as do most Network Attached
+  Storage vendors, so this network filesystem client can mount to a
+  wide variety of systems. It also supports mounting to the cloud
+  (for example Microsoft Azure), including the necessary security
+  features.
 
   The intent of this module is to provide the most advanced network
   file system function for SMB3 compliant servers, including advanced
@@ -27,8 +27,8 @@ Introduction
   POSIX compliance, secure per-user session establishment, encryption,
   high performance safe distributed caching (leases/oplocks), optional packet
   signing, large files, Unicode support and other internationalization
-  improvements. Since both Samba server and this filesystem client support
-  the CIFS Unix extensions (and in the future SMB3 POSIX extensions),
+  improvements. Since both Samba server and this filesystem client support the
+  CIFS Unix extensions, and the Linux client also suppors SMB3 POSIX extensions,
   the combination can provide a reasonable alternative to other network and
   cluster file systems for fileserving in some Linux to Linux environments,
   not just in Linux to Windows (or Linux to Mac) environments.
diff --git a/Documentation/admin-guide/cifs/todo.rst b/Documentation/admin-guide/cifs/todo.rst
index 25f11576e7b9..2646ed2e2d3e 100644
--- a/Documentation/admin-guide/cifs/todo.rst
+++ b/Documentation/admin-guide/cifs/todo.rst
@@ -13,24 +13,26 @@ is a partial list of the known problems and missing features:
 
 a) SMB3 (and SMB3.1.1) missing optional features:
 
-   - multichannel (started), integration with RDMA
-   - directory leases (improved metadata caching), started (root dir only)
+   - multichannel (partially integrated), integration of multichannel with RDMA
+   - directory leases (improved metadata caching). Currently only implemented for root dir
    - T10 copy offload ie "ODX" (copy chunk, and "Duplicate Extents" ioctl
      currently the only two server side copy mechanisms supported)
 
 b) improved sparse file support (fiemap and SEEK_HOLE are implemented
-   but additional features would be supportable by the protocol).
+   but additional features would be supportable by the protocol such
+   as FALLOC_FL_COLLAPSE_RANGE and FALLOC_FL_INSERT_RANGE)
 
 c) Directory entry caching relies on a 1 second timer, rather than
    using Directory Leases, currently only the root file handle is cached longer
+   by leveraging Directory Leases
 
-d) quota support (needs minor kernel change since quota calls
-   to make it to network filesystems or deviceless filesystems)
+d) quota support (needs minor kernel change since quota calls otherwise
+    won't make it to network filesystems or deviceless filesystems).
 
 e) Additional use cases can be optimized to use "compounding" (e.g.
    open/query/close and open/setinfo/close) to reduce the number of
    roundtrips to the server and improve performance. Various cases
-   (stat, statfs, create, unlink, mkdir) already have been improved by
+   (stat, statfs, create, unlink, mkdir, xattrs) already have been improved by
    using compounding but more can be done. In addition we could
    significantly reduce redundant opens by using deferred close (with
    handle caching leases) and better using reference counters on file
@@ -60,7 +62,9 @@ k) Add tools to take advantage of more smb3 specific ioctls and features
    metadata attributes easier from tools (e.g. extending what was done
    in smb-info tool).
 
-l) encrypted file support
+l) encrypted file support (currently the attribute showing the file is
+   encrypted on the server is reported, but changing the attribute is not
+   supported).
 
 m) improved stats gathering tools (perhaps integration with nfsometer?)
    to extend and make easier to use what is currently in /proc/fs/cifs/Stats
@@ -69,14 +73,13 @@ n) Add support for claims based ACLs ("DAC")
 
 o) mount helper GUI (to simplify the various configuration options on mount)
 
-p) Add support for witness protocol (perhaps ioctl to cifs.ko from user space
-   tool listening on witness protocol RPC) to allow for notification of share
-   move, server failover, and server adapter changes.  And also improve other
-   failover scenarios, e.g. when client knows multiple DFS entries point to
-   different servers, and the server we are connected to has gone down.
+p) Expand support for witness protocol to allow for notification of share
+   move, and server network adapter changes. Currently only notifications by
+   the witness protocol for server move is supported by the Linux client.
 
 q) Allow mount.cifs to be more verbose in reporting errors with dialect
-   or unsupported feature errors.
+   or unsupported feature errors. This would now be easier due to the
+   implementation of the new mount API.
 
 r) updating cifs documentation, and user guide.
 
@@ -87,11 +90,10 @@ t) split cifs and smb3 support into separate modules so legacy (and less
    secure) CIFS dialect can be disabled in environments that don't need it
    and simplify the code.
 
-v) POSIX Extensions for SMB3.1.1 (started, create and mkdir support added
-   so far).
+v) Additional testing of POSIX Extensions for SMB3.1.1
 
 w) Add support for additional strong encryption types, and additional spnego
-   authentication mechanisms (see MS-SMB2)
+   authentication mechanisms (see MS-SMB2).  GCM-256 is now partially implemented.
 
 x) Finish support for SMB3.1.1 compression
 
diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst
index b6d9f02bc12b..13783dc68ab7 100644
--- a/Documentation/admin-guide/cifs/usage.rst
+++ b/Documentation/admin-guide/cifs/usage.rst
@@ -83,7 +83,7 @@ and encrypted shares and stronger signing and authentication algorithms.
 There are additional mount options that may be helpful for SMB3 to get
 improved POSIX behavior (NB: can use vers=3.0 to force only SMB3, never 2.1):
 
-     ``mfsymlinks`` and ``cifsacl`` and ``idsfromsid``
+   ``mfsymlinks`` and either ``cifsacl`` or ``modefromsid`` (usually with ``idsfromsid``)
 
 Allowing User Mounts
 ====================
diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst
index 6178153d3320..5422407a96d7 100644
--- a/Documentation/admin-guide/xfs.rst
+++ b/Documentation/admin-guide/xfs.rst
@@ -284,6 +284,9 @@ The following sysctls are available for the XFS filesystem:
 	removes unused preallocation from clean inodes and releases
 	the unused space back to the free pool.
 
+  fs.xfs.speculative_cow_prealloc_lifetime
+	This is an alias for speculative_prealloc_lifetime.
+
   fs.xfs.error_level		(Min: 0  Default: 3  Max: 11)
 	A volume knob for error reporting when internal errors occur.
 	This will generate detailed messages & backtraces for filesystem
@@ -356,12 +359,13 @@ The following sysctls are available for the XFS filesystem:
 Deprecated Sysctls
 ==================
 
-===========================     ================
-  Name				Removal Schedule
-===========================     ================
-fs.xfs.irix_sgid_inherit        September 2025
-fs.xfs.irix_symlink_mode        September 2025
-===========================     ================
+===========================================     ================
+  Name                                          Removal Schedule
+===========================================     ================
+fs.xfs.irix_sgid_inherit                        September 2025
+fs.xfs.irix_symlink_mode                        September 2025
+fs.xfs.speculative_cow_prealloc_lifetime        September 2025
+===========================================     ================
 
 
 Removed Sysctls
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 5bd45d5fb0a0..fd65168c10f8 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -49,8 +49,7 @@ extensions = ['kerneldoc', 'rstFlatTable', 'kernel_include',
 if major >= 3:
     sys.stderr.write('''WARNING: The kernel documentation build process
         support for Sphinx v3.0 and above is brand new. Be prepared for
-        possible issues in the generated output.
-        ''')
+        possible issues in the generated output.\n''')
     if (major > 3) or (minor > 0 or patch >= 2):
         # Sphinx c function parser is more pedantic with regards to type
         # checking. Due to that, having macros at c:function cause problems.
diff --git a/Documentation/devicetree/bindings/leds/leds-lgm.yaml b/Documentation/devicetree/bindings/leds/leds-lgm.yaml
new file mode 100644
index 000000000000..32bbf146c01d
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-lgm.yaml
@@ -0,0 +1,113 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/leds/leds-lgm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel Lightning Mountain (LGM) SoC LED Serial Shift Output (SSO) Controller driver
+
+maintainers:
+  - Zhu, Yi Xin <Yixin.zhu@intel.com>
+  - Amireddy Mallikarjuna reddy <mallikarjunax.reddy@intel.com>
+
+properties:
+  compatible:
+    const: intel,lgm-ssoled
+
+  gpio-controller: true
+
+  '#gpio-cells':
+    const: 2
+
+  ngpios:
+    minimum: 0
+    maximum: 32
+    description:
+      Number of GPIOs this controller provides.
+
+  intel,sso-update-rate-hz:
+    description:
+      Blink frequency for SOUTs in Hz.
+
+  led-controller:
+    type: object
+    description:
+      This sub-node must contain a sub-node for each leds.
+
+    additionalProperties: false
+
+    patternProperties:
+      "^led@[0-23]$":
+        type: object
+
+        properties:
+          reg:
+            description: Index of the LED.
+            minimum: 0
+            maximum: 2
+
+          intel,sso-hw-trigger:
+            type: boolean
+            description: This property indicates Hardware driven/control LED.
+
+          intel,sso-hw-blink:
+            type: boolean
+            description: This property indicates Enable LED blink by Hardware.
+
+          intel,sso-blink-rate-hz:
+            description: LED HW blink frequency.
+
+          retain-state-suspended:
+            type: boolean
+            description: The suspend state of LED can be retained.
+
+          retain-state-shutdown:
+            type: boolean
+            description: Retain the state of the LED on shutdown.
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - "#gpio-cells"
+  - gpio-controller
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/intel,lgm-clk.h>
+    #include <dt-bindings/leds/common.h>
+
+    ssogpio: ssogpio@e0d40000 {
+      compatible = "intel,sso-led";
+      reg = <0xE0D40000 0x2E4>;
+      gpio-controller;
+      #gpio-cells = <2>;
+      ngpios = <32>;
+      pinctrl-names = "default";
+      pinctrl-0 = <&pinctrl_ledc>;
+      clocks = <&cgu0 LGM_GCLK_LEDC0>, <&afeclk>;
+      clock-names = "sso", "fpid";
+      intel,sso-update-rate-hz = <250000>;
+
+      led-controller {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        led@0 {
+          reg = <0>;
+          function = "gphy";
+          color = <LED_COLOR_ID_GREEN>;
+          led-gpio = <&ssogpio 0 0>;
+        };
+
+        led@23 {
+          reg = <23>;
+          function = LED_FUNCTION_POWER;
+          color = <LED_COLOR_ID_GREEN>;
+          led-gpio = <&ssogpio 23 0>;
+        };
+      };
+    };
diff --git a/Documentation/features/core/cBPF-JIT/arch-support.txt b/Documentation/features/core/cBPF-JIT/arch-support.txt
index 399935616813..e59b5215402d 100644
--- a/Documentation/features/core/cBPF-JIT/arch-support.txt
+++ b/Documentation/features/core/cBPF-JIT/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: | TODO |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/core/eBPF-JIT/arch-support.txt b/Documentation/features/core/eBPF-JIT/arch-support.txt
index 79409bfe0263..dcbd8679f514 100644
--- a/Documentation/features/core/eBPF-JIT/arch-support.txt
+++ b/Documentation/features/core/eBPF-JIT/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/core/generic-idle-thread/arch-support.txt b/Documentation/features/core/generic-idle-thread/arch-support.txt
index 9ea60e416efd..4efcba7b5239 100644
--- a/Documentation/features/core/generic-idle-thread/arch-support.txt
+++ b/Documentation/features/core/generic-idle-thread/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: |  ok  |
diff --git a/Documentation/features/core/jump-labels/arch-support.txt b/Documentation/features/core/jump-labels/arch-support.txt
index 894d9693b380..0c801d1bd2da 100644
--- a/Documentation/features/core/jump-labels/arch-support.txt
+++ b/Documentation/features/core/jump-labels/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/core/tracehook/arch-support.txt b/Documentation/features/core/tracehook/arch-support.txt
index cd3510e2eedb..af34308fce7f 100644
--- a/Documentation/features/core/tracehook/arch-support.txt
+++ b/Documentation/features/core/tracehook/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: |  ok  |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: |  ok  |
diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt
index b2288dc14b72..c244ac7eee26 100644
--- a/Documentation/features/debug/KASAN/arch-support.txt
+++ b/Documentation/features/debug/KASAN/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
index 1c49723e7534..7aff505af706 100644
--- a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
+++ b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: | TODO |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/debug/gcov-profile-all/arch-support.txt b/Documentation/features/debug/gcov-profile-all/arch-support.txt
index 7563a494ddb8..b39c1a5de3f3 100644
--- a/Documentation/features/debug/gcov-profile-all/arch-support.txt
+++ b/Documentation/features/debug/gcov-profile-all/arch-support.txt
@@ -10,14 +10,13 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
     |        m68k: | TODO |
     |  microblaze: |  ok  |
-    |        mips: | TODO |
+    |        mips: |  ok  |
     |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
diff --git a/Documentation/features/debug/kcov/arch-support.txt b/Documentation/features/debug/kcov/arch-support.txt
index ab0ee1c933c2..7e44013cc320 100644
--- a/Documentation/features/debug/kcov/arch-support.txt
+++ b/Documentation/features/debug/kcov/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/debug/kgdb/arch-support.txt b/Documentation/features/debug/kgdb/arch-support.txt
index bc45bac20442..2cb0576f9180 100644
--- a/Documentation/features/debug/kgdb/arch-support.txt
+++ b/Documentation/features/debug/kgdb/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: |  ok  |
     |     hexagon: |  ok  |
diff --git a/Documentation/features/debug/kmemleak/arch-support.txt b/Documentation/features/debug/kmemleak/arch-support.txt
index 2db76807ec6f..e9ac415f8aec 100644
--- a/Documentation/features/debug/kmemleak/arch-support.txt
+++ b/Documentation/features/debug/kmemleak/arch-support.txt
@@ -10,8 +10,7 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
index 6225cfe0c5bf..96156e8802a7 100644
--- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
+++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: | TODO |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
@@ -23,7 +22,7 @@
     |    openrisc: | TODO |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/debug/kprobes/arch-support.txt b/Documentation/features/debug/kprobes/arch-support.txt
index 371f0ac488f5..ee95ed61909a 100644
--- a/Documentation/features/debug/kprobes/arch-support.txt
+++ b/Documentation/features/debug/kprobes/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
@@ -23,7 +22,7 @@
     |    openrisc: | TODO |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
diff --git a/Documentation/features/debug/kretprobes/arch-support.txt b/Documentation/features/debug/kretprobes/arch-support.txt
index 38e95251deed..612cb97d47b8 100644
--- a/Documentation/features/debug/kretprobes/arch-support.txt
+++ b/Documentation/features/debug/kretprobes/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
@@ -23,7 +22,7 @@
     |    openrisc: | TODO |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
diff --git a/Documentation/features/debug/optprobes/arch-support.txt b/Documentation/features/debug/optprobes/arch-support.txt
index 7f4a20e6a12b..d6ff141a6122 100644
--- a/Documentation/features/debug/optprobes/arch-support.txt
+++ b/Documentation/features/debug/optprobes/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: | TODO |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/debug/stackprotector/arch-support.txt b/Documentation/features/debug/stackprotector/arch-support.txt
index 3329559c8207..ad4de22a71ab 100644
--- a/Documentation/features/debug/stackprotector/arch-support.txt
+++ b/Documentation/features/debug/stackprotector/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/debug/uprobes/arch-support.txt b/Documentation/features/debug/uprobes/arch-support.txt
index 43cac6ee0c68..8bd5548a4485 100644
--- a/Documentation/features/debug/uprobes/arch-support.txt
+++ b/Documentation/features/debug/uprobes/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
@@ -23,7 +22,7 @@
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: |  ok  |
diff --git a/Documentation/features/debug/user-ret-profiler/arch-support.txt b/Documentation/features/debug/user-ret-profiler/arch-support.txt
index d636ed0e679f..2a3fe812a5fa 100644
--- a/Documentation/features/debug/user-ret-profiler/arch-support.txt
+++ b/Documentation/features/debug/user-ret-profiler/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: | TODO |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/io/dma-contiguous/arch-support.txt b/Documentation/features/io/dma-contiguous/arch-support.txt
index dfc93d074e3d..bece89586efa 100644
--- a/Documentation/features/io/dma-contiguous/arch-support.txt
+++ b/Documentation/features/io/dma-contiguous/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/locking/cmpxchg-local/arch-support.txt b/Documentation/features/locking/cmpxchg-local/arch-support.txt
index 1815c7fed06d..52bdda004f5c 100644
--- a/Documentation/features/locking/cmpxchg-local/arch-support.txt
+++ b/Documentation/features/locking/cmpxchg-local/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/locking/lockdep/arch-support.txt b/Documentation/features/locking/lockdep/arch-support.txt
index 940b0bd02957..a8cd163c8b7e 100644
--- a/Documentation/features/locking/lockdep/arch-support.txt
+++ b/Documentation/features/locking/lockdep/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: |  ok  |
diff --git a/Documentation/features/locking/queued-rwlocks/arch-support.txt b/Documentation/features/locking/queued-rwlocks/arch-support.txt
index 4dd5e554873f..8c85949752b3 100644
--- a/Documentation/features/locking/queued-rwlocks/arch-support.txt
+++ b/Documentation/features/locking/queued-rwlocks/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
index b16d4f71e5ce..5f4e1b3841af 100644
--- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
+++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/perf/kprobes-event/arch-support.txt b/Documentation/features/perf/kprobes-event/arch-support.txt
index 04c17c2106a4..78f3fe080f0e 100644
--- a/Documentation/features/perf/kprobes-event/arch-support.txt
+++ b/Documentation/features/perf/kprobes-event/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: |  ok  |
@@ -23,7 +22,7 @@
     |    openrisc: | TODO |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
diff --git a/Documentation/features/perf/perf-regs/arch-support.txt b/Documentation/features/perf/perf-regs/arch-support.txt
index e7450fbb8253..5bf3b1854a1f 100644
--- a/Documentation/features/perf/perf-regs/arch-support.txt
+++ b/Documentation/features/perf/perf-regs/arch-support.txt
@@ -10,14 +10,13 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
     |        m68k: | TODO |
     |  microblaze: | TODO |
-    |        mips: | TODO |
+    |        mips: |  ok  |
     |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
diff --git a/Documentation/features/perf/perf-stackdump/arch-support.txt b/Documentation/features/perf/perf-stackdump/arch-support.txt
index 98e79d128d9b..d88659bb4fc1 100644
--- a/Documentation/features/perf/perf-stackdump/arch-support.txt
+++ b/Documentation/features/perf/perf-stackdump/arch-support.txt
@@ -10,14 +10,13 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
     |        m68k: | TODO |
     |  microblaze: | TODO |
-    |        mips: | TODO |
+    |        mips: |  ok  |
     |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
index 47e6903f47a5..883d33b265d6 100644
--- a/Documentation/features/sched/membarrier-sync-core/arch-support.txt
+++ b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
@@ -33,7 +33,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/sched/numa-balancing/arch-support.txt b/Documentation/features/sched/numa-balancing/arch-support.txt
index 964457ad26c1..9affb7c2c500 100644
--- a/Documentation/features/sched/numa-balancing/arch-support.txt
+++ b/Documentation/features/sched/numa-balancing/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: |  ..  |
     |         arm: |  ..  |
     |       arm64: |  ok  |
-    |         c6x: |  ..  |
     |        csky: |  ..  |
     |       h8300: |  ..  |
     |     hexagon: |  ..  |
@@ -23,7 +22,7 @@
     |    openrisc: |  ..  |
     |      parisc: |  ..  |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ..  |
     |       sparc: | TODO |
diff --git a/Documentation/features/seccomp/seccomp-filter/arch-support.txt b/Documentation/features/seccomp/seccomp-filter/arch-support.txt
index eb3d74092c61..26eec58ab819 100644
--- a/Documentation/features/seccomp/seccomp-filter/arch-support.txt
+++ b/Documentation/features/seccomp/seccomp-filter/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/time/arch-tick-broadcast/arch-support.txt b/Documentation/features/time/arch-tick-broadcast/arch-support.txt
index 4d11cbb3c09b..8639fe8315f5 100644
--- a/Documentation/features/time/arch-tick-broadcast/arch-support.txt
+++ b/Documentation/features/time/arch-tick-broadcast/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/time/clockevents/arch-support.txt b/Documentation/features/time/clockevents/arch-support.txt
index 6863a3fbddad..9a81cb03b1fd 100644
--- a/Documentation/features/time/clockevents/arch-support.txt
+++ b/Documentation/features/time/clockevents/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: | TODO |
     |       arm64: |  ok  |
-    |         c6x: |  ok  |
     |        csky: |  ok  |
     |       h8300: |  ok  |
     |     hexagon: |  ok  |
diff --git a/Documentation/features/time/context-tracking/arch-support.txt b/Documentation/features/time/context-tracking/arch-support.txt
index 52aea275aab7..4ed116c2ec39 100644
--- a/Documentation/features/time/context-tracking/arch-support.txt
+++ b/Documentation/features/time/context-tracking/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/time/irq-time-acct/arch-support.txt b/Documentation/features/time/irq-time-acct/arch-support.txt
index 6fc03deb1c38..bc30c15557c7 100644
--- a/Documentation/features/time/irq-time-acct/arch-support.txt
+++ b/Documentation/features/time/irq-time-acct/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/time/virt-cpuacct/arch-support.txt b/Documentation/features/time/virt-cpuacct/arch-support.txt
index e51f3af38e31..050de43bbbb9 100644
--- a/Documentation/features/time/virt-cpuacct/arch-support.txt
+++ b/Documentation/features/time/virt-cpuacct/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/vm/ELF-ASLR/arch-support.txt b/Documentation/features/vm/ELF-ASLR/arch-support.txt
index eccda0732474..99cb6d7f5005 100644
--- a/Documentation/features/vm/ELF-ASLR/arch-support.txt
+++ b/Documentation/features/vm/ELF-ASLR/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/vm/PG_uncached/arch-support.txt b/Documentation/features/vm/PG_uncached/arch-support.txt
index c74e3f8040e1..6cde38458596 100644
--- a/Documentation/features/vm/PG_uncached/arch-support.txt
+++ b/Documentation/features/vm/PG_uncached/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: | TODO |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/vm/THP/arch-support.txt b/Documentation/features/vm/THP/arch-support.txt
index 1c0b95f2b40d..e8238cb2a4da 100644
--- a/Documentation/features/vm/THP/arch-support.txt
+++ b/Documentation/features/vm/THP/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: |  ..  |
     |        csky: |  ..  |
     |       h8300: |  ..  |
     |     hexagon: |  ..  |
diff --git a/Documentation/features/vm/TLB/arch-support.txt b/Documentation/features/vm/TLB/arch-support.txt
index 30f75a79ce01..48a5ca548399 100644
--- a/Documentation/features/vm/TLB/arch-support.txt
+++ b/Documentation/features/vm/TLB/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: | TODO |
-    |         c6x: |  ..  |
     |        csky: | TODO |
     |       h8300: |  ..  |
     |     hexagon: | TODO |
diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt
index c5ff3a427722..439fd9069b8b 100644
--- a/Documentation/features/vm/huge-vmap/arch-support.txt
+++ b/Documentation/features/vm/huge-vmap/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/vm/ioremap_prot/arch-support.txt b/Documentation/features/vm/ioremap_prot/arch-support.txt
index b5fb37c28cc6..9a0c8783b84d 100644
--- a/Documentation/features/vm/ioremap_prot/arch-support.txt
+++ b/Documentation/features/vm/ioremap_prot/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: | TODO |
     |       arm64: | TODO |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt
index 13d0e1e17001..40b969f3a6bb 100644
--- a/Documentation/features/vm/pte_special/arch-support.txt
+++ b/Documentation/features/vm/pte_special/arch-support.txt
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 633610253f45..0302035781be 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -883,3 +883,10 @@ For bvec based itererators bio_iov_iter_get_pages() now doesn't copy bvecs but
 uses the one provided. Anyone issuing kiocb-I/O should ensure that the bvec and
 page references stay until I/O has completed, i.e. until ->ki_complete() has
 been called or returned with non -EIOCBQUEUED code.
+
+---
+
+**mandatory**
+
+mnt_want_write_file() can now only be paired with mnt_drop_write_file(),
+whereas previously it could be paired with mnt_drop_write() as well.
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 9abdba17565e..48fbfc336ebf 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -691,6 +691,10 @@ files are there, and which are missing.
                 number of processes currently runnable (running or on ready queue);
                 total number of processes in system;
                 last pid created.
+                All fields are separated by one space except "number of
+                processes currently runnable" and "total number of processes
+                in system", which are separated by a slash ('/'). Example:
+                0.61 0.61 0.55 3/828 22084
  locks        Kernel locks
  meminfo      Memory info
  misc         Miscellaneous
diff --git a/Documentation/powerpc/syscall64-abi.rst b/Documentation/powerpc/syscall64-abi.rst
index cf9b2857c72a..dabee3729e5a 100644
--- a/Documentation/powerpc/syscall64-abi.rst
+++ b/Documentation/powerpc/syscall64-abi.rst
@@ -46,25 +46,38 @@ stack frame LR and CR save fields are not used.
 
 Register preservation rules
 ---------------------------
-Register preservation rules match the ELF ABI calling sequence with the
-following differences:
-
-+------------------------------------------------------------------------+
-|        For the sc instruction, differences with the ELF ABI		 |
-+--------------+--------------+------------------------------------------+
-| r0           | Volatile     | (System call number.)			 |
-| rr3          | Volatile     | (Parameter 1, and return value.)	 |
-| rr4-r8       | Volatile     | (Parameters 2-6.)			 |
-| rcr0         | Volatile     | (cr0.SO is the return error condition.)	 |
-| rcr1, cr5-7  | Nonvolatile  |						 |
-| rlr          | Nonvolatile  |						 |
-+--------------+--------------+------------------------------------------+
-|      For the scv 0 instruction, differences with the ELF ABI		 |
-+--------------+--------------+------------------------------------------+
-| r0           | Volatile     | (System call number.)			 |
-| r3           | Volatile     | (Parameter 1, and return value.)	 |
-| r4-r8        | Volatile     | (Parameters 2-6.)			 |
-+--------------+--------------+------------------------------------------+
+Register preservation rules match the ELF ABI calling sequence with some
+differences.
+
+For the sc instruction, the differences from the ELF ABI are as follows:
+
++--------------+--------------------+-----------------------------------------+
+| Register     | Preservation Rules | Purpose                                 |
++==============+====================+=========================================+
+| r0           | Volatile           | (System call number.)                   |
++--------------+--------------------+-----------------------------------------+
+| r3           | Volatile           | (Parameter 1, and return value.)        |
++--------------+--------------------+-----------------------------------------+
+| r4-r8        | Volatile           | (Parameters 2-6.)                       |
++--------------+--------------------+-----------------------------------------+
+| cr0          | Volatile           | (cr0.SO is the return error condition.) |
++--------------+--------------------+-----------------------------------------+
+| cr1, cr5-7   | Nonvolatile        |                                         |
++--------------+--------------------+-----------------------------------------+
+| lr           | Nonvolatile        |                                         |
++--------------+--------------------+-----------------------------------------+
+
+For the scv 0 instruction, the differences from the ELF ABI are as follows:
+
++--------------+--------------------+-----------------------------------------+
+| Register     | Preservation Rules | Purpose                                 |
++==============+====================+=========================================+
+| r0           | Volatile           | (System call number.)                   |
++--------------+--------------------+-----------------------------------------+
+| r3           | Volatile           | (Parameter 1, and return value.)        |
++--------------+--------------------+-----------------------------------------+
+| r4-r8        | Volatile           | (Parameters 2-6.)                       |
++--------------+--------------------+-----------------------------------------+
 
 All floating point and vector data registers as well as control and status
 registers are nonvolatile.
diff --git a/Documentation/process/4.Coding.rst b/Documentation/process/4.Coding.rst
index 0825dc496f22..1f0d81f44e14 100644
--- a/Documentation/process/4.Coding.rst
+++ b/Documentation/process/4.Coding.rst
@@ -242,7 +242,7 @@ and try to avoid "fixes" which make the warning go away without addressing
 its cause.
 
 Note that not all compiler warnings are enabled by default.  Build the
-kernel with "make EXTRA_CFLAGS=-W" to get the full set.
+kernel with "make KCFLAGS=-W" to get the full set.
 
 The kernel provides several configuration options which turn on debugging
 features; most of these are found in the "kernel hacking" submenu.  Several
diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst
index f709beaf02c9..b1bc2d37bd0a 100644
--- a/Documentation/process/submit-checklist.rst
+++ b/Documentation/process/submit-checklist.rst
@@ -96,7 +96,7 @@ and elsewhere regarding submitting Linux kernel patches.
     injection might be appropriate.
 
 20) Newly-added code has been compiled with ``gcc -W`` (use
-    ``make EXTRA_CFLAGS=-W``).  This will generate lots of noise, but is good
+    ``make KCFLAGS=-W``).  This will generate lots of noise, but is good
     for finding bugs like "warning: comparison between signed and unsigned".
 
 21) Tested after it has been merged into the -mm patchset to make sure
diff --git a/Documentation/translations/it_IT/process/4.Coding.rst b/Documentation/translations/it_IT/process/4.Coding.rst
index a5e36aa60448..8012fe9497ae 100644
--- a/Documentation/translations/it_IT/process/4.Coding.rst
+++ b/Documentation/translations/it_IT/process/4.Coding.rst
@@ -256,7 +256,7 @@ e cercate di evitare le "riparazioni" che fan sparire l'avvertimento senza
 però averne trovato la causa.
 
 Tenete a mente che non tutti gli avvertimenti sono disabilitati di default.
-Costruite il kernel con "make EXTRA_CFLAGS=-W" per ottenerli tutti.
+Costruite il kernel con "make KCFLAGS=-W" per ottenerli tutti.
 
 Il kernel fornisce differenti opzioni che abilitano funzionalità di debugging;
 molti di queste sono trovano all'interno del sotto menu "kernel hacking".
diff --git a/Documentation/translations/it_IT/process/submit-checklist.rst b/Documentation/translations/it_IT/process/submit-checklist.rst
index 3e575502690f..614fc17d9086 100644
--- a/Documentation/translations/it_IT/process/submit-checklist.rst
+++ b/Documentation/translations/it_IT/process/submit-checklist.rst
@@ -104,7 +104,7 @@ sottomissione delle patch, in particolare
     l'iniezione di fallimenti specifici per il sottosistema.
 
 22) Il nuovo codice è stato compilato con ``gcc -W`` (usate
-    ``make EXTRA_CFLAGS=-W``).  Questo genererà molti avvisi, ma è ottimo
+    ``make KCFLAGS=-W``).  Questo genererà molti avvisi, ma è ottimo
     per scovare bachi come  "warning: comparison between signed and unsigned".
 
 23) La patch è stata verificata dopo essere stata inclusa nella serie di patch
diff --git a/Documentation/translations/zh_CN/process/4.Coding.rst b/Documentation/translations/zh_CN/process/4.Coding.rst
index 959a06ba025c..66cd8ee07606 100644
--- a/Documentation/translations/zh_CN/process/4.Coding.rst
+++ b/Documentation/translations/zh_CN/process/4.Coding.rst
@@ -165,7 +165,7 @@ Linus对这个问题给出了最佳答案:
 通常，这些警告都指向真正的问题。提交以供审阅的代码通常不会产生任何编译器警告。
 在消除警告时，注意了解真正的原因，并尽量避免“修复”，使警告消失而不解决其原因。
 
-请注意，并非所有编译器警告都默认启用。使用“make EXTRA_CFLAGS=-W”构建内核以
+请注意，并非所有编译器警告都默认启用。使用“make KCFLAGS=-W”构建内核以
 获得完整集合。
 
 内核提供了几个配置选项，可以打开调试功能；大多数配置选项位于“kernel hacking”
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 6c71554206cc..5112ab996394 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -249,7 +249,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	childti->pcb.ksp = (unsigned long) childstack;
 	childti->pcb.flags = 1;	/* set FEN, clear everything else */
 
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		/* kernel thread */
 		memset(childstack, 0,
 			sizeof(struct switch_stack) + sizeof(struct pt_regs));
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index 37f724ad5e39..d838d0d57696 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -191,7 +191,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	childksp[0] = 0;			/* fp */
 	childksp[1] = (unsigned long)ret_from_fork; /* blink */
 
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		memset(c_regs, 0, sizeof(struct pt_regs));
 
 		c_callee->r13 = kthread_arg;
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index ee3aee69e444..5199a2bb4111 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -243,7 +243,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	thread->cpu_domain = get_domain();
 #endif
 
-	if (likely(!(p->flags & PF_KTHREAD))) {
+	if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) {
 		*childregs = *current_pt_regs();
 		childregs->ARM_r0 = 0;
 		if (stack_start)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 4cc1ccc8d6ab..325c83b1a24d 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -398,7 +398,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 
 	ptrauth_thread_init_kernel(p);
 
-	if (likely(!(p->flags & PF_KTHREAD))) {
+	if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) {
 		*childregs = *current_pt_regs();
 		childregs->regs[0] = 0;
 
diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c
index 69af6bc87e64..3d0ca22cd0e2 100644
--- a/arch/csky/kernel/process.c
+++ b/arch/csky/kernel/process.c
@@ -49,7 +49,7 @@ int copy_thread(unsigned long clone_flags,
 	/* setup thread.sp for switch_to !!! */
 	p->thread.sp = (unsigned long)childstack;
 
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childstack->r15 = (unsigned long) ret_from_kernel_thread;
 		childstack->r10 = kthread_arg;
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index bc1364db58fe..46b1342ce515 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -112,7 +112,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 
 	childregs = (struct pt_regs *) (THREAD_SIZE + task_stack_page(p)) - 1;
 
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->retpc = (unsigned long) ret_from_kernel_thread;
 		childregs->er4 = topstk; /* arg */
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index 6a980cba7b29..c61165c99ae0 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -73,7 +73,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
 						    sizeof(*ss));
 	ss->lr = (unsigned long)ret_from_fork;
 	p->thread.switch_sp = ss;
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		memset(childregs, 0, sizeof(struct pt_regs));
 		/* r24 <- fn, r25 <- arg */
 		ss->r24 = usp;
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 4ebbfa076a26..7e1a1525e202 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -338,7 +338,7 @@ copy_thread(unsigned long clone_flags, unsigned long user_stack_base,
 
 	ia64_drop_fpu(p);	/* don't pick up stale state from a CPU's fph */
 
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		if (unlikely(!user_stack_base)) {
 			/* fork_idle() called us */
 			return 0;
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 08359a6e058f..da83cc83e791 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -157,7 +157,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
 	 */
 	p->thread.fs = get_fs().seg;
 
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		/* kernel thread */
 		memset(frame, 0, sizeof(struct fork_frame));
 		frame->regs.sr = PS_S;
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 657c2beb665e..62aa237180b6 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -59,7 +59,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
 	struct pt_regs *childregs = task_pt_regs(p);
 	struct thread_info *ti = task_thread_info(p);
 
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		/* if we're creating a new kernel thread then just zeroing all
 		 * the registers. That's OK for a brand new thread.*/
 		memset(childregs, 0, sizeof(struct pt_regs));
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index af4c862ec5ff..7efa0d1a4c2b 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -120,7 +120,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	/*  Put the stack after the struct pt_regs.  */
 	childksp = (unsigned long) childregs;
 	p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK;
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		/* kernel thread */
 		unsigned long status = p->thread.cp0_status;
 		memset(childregs, 0, sizeof(struct pt_regs));
diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c
index e01ad5d17224..c1327e552ec6 100644
--- a/arch/nds32/kernel/process.c
+++ b/arch/nds32/kernel/process.c
@@ -156,7 +156,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 
 	memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
 
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		memset(childregs, 0, sizeof(struct pt_regs));
 		/* kernel thread fn */
 		p->thread.cpu_context.r6 = stack_start;
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index 50b4eb19a6cc..c5f916ca6845 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -109,7 +109,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
 	struct switch_stack *childstack =
 		((struct switch_stack *)childregs) - 1;
 
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		memset(childstack, 0,
 			sizeof(struct switch_stack) + sizeof(struct pt_regs));
 
diff --git a/arch/openrisc/Kbuild b/arch/openrisc/Kbuild
new file mode 100644
index 000000000000..4234b4c03e72
--- /dev/null
+++ b/arch/openrisc/Kbuild
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y += lib/ kernel/ mm/
+obj-y += boot/dts/
diff --git a/arch/openrisc/Makefile b/arch/openrisc/Makefile
index bf10141c7426..410e7abfac69 100644
--- a/arch/openrisc/Makefile
+++ b/arch/openrisc/Makefile
@@ -24,6 +24,10 @@ LIBGCC 		:= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 
 KBUILD_CFLAGS	+= -pipe -ffixed-r10 -D__linux__
 
+all: vmlinux.bin
+
+boot := arch/$(ARCH)/boot
+
 ifeq ($(CONFIG_OPENRISC_HAVE_INST_MUL),y)
 	KBUILD_CFLAGS += $(call cc-option,-mhard-mul)
 else
@@ -38,14 +42,13 @@ endif
 
 head-y 		:= arch/openrisc/kernel/head.o
 
-core-y		+= arch/openrisc/lib/ \
-		   arch/openrisc/kernel/ \
-		   arch/openrisc/mm/
+core-y		+= arch/openrisc/
 libs-y		+= $(LIBGCC)
 
-ifneq '$(CONFIG_OPENRISC_BUILTIN_DTB)' '""'
-BUILTIN_DTB := y
-else
-BUILTIN_DTB := n
-endif
-core-$(BUILTIN_DTB) += arch/openrisc/boot/dts/
+PHONY += vmlinux.bin
+
+vmlinux.bin: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+archclean:
+	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/openrisc/boot/.gitignore b/arch/openrisc/boot/.gitignore
new file mode 100644
index 000000000000..007d6fea3145
--- /dev/null
+++ b/arch/openrisc/boot/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+vmlinux.bin
diff --git a/arch/openrisc/boot/Makefile b/arch/openrisc/boot/Makefile
new file mode 100644
index 000000000000..5b28538f4dd1
--- /dev/null
+++ b/arch/openrisc/boot/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for bootable kernel images
+#
+
+targets += vmlinux.bin
+
+OBJCOPYFLAGS_vmlinux.bin := -O binary
+$(obj)/vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 3c98728cce24..eb62429681fc 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -34,6 +34,7 @@
 #include <linux/init_task.h>
 #include <linux/mqueue.h>
 #include <linux/fs.h>
+#include <linux/reboot.h>
 
 #include <linux/uaccess.h>
 #include <asm/io.h>
@@ -49,10 +50,16 @@
  */
 struct thread_info *current_thread_info_set[NR_CPUS] = { &init_thread_info, };
 
-void machine_restart(void)
+void machine_restart(char *cmd)
 {
-	printk(KERN_INFO "*** MACHINE RESTART ***\n");
-	__asm__("l.nop 1");
+	do_kernel_restart(cmd);
+
+	/* Give a grace period for failure to restart of 1s */
+	mdelay(1000);
+
+	/* Whoops - the platform was unable to reboot. Tell the user! */
+	pr_emerg("Reboot failed -- System halted\n");
+	while (1);
 }
 
 /*
@@ -167,7 +174,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
 	sp -= sizeof(struct pt_regs);
 	kregs = (struct pt_regs *)sp;
 
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		memset(kregs, 0, sizeof(struct pt_regs));
 		kregs->gpr[20] = usp; /* fn, kernel thread */
 		kregs->gpr[22] = arg;
diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c
index 29c82ef2e207..48e1092a64de 100644
--- a/arch/openrisc/kernel/smp.c
+++ b/arch/openrisc/kernel/smp.c
@@ -16,6 +16,7 @@
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
 #include <linux/irq.h>
+#include <linux/of.h>
 #include <asm/cpuinfo.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
@@ -60,22 +61,32 @@ void __init smp_prepare_boot_cpu(void)
 
 void __init smp_init_cpus(void)
 {
-	int i;
+	struct device_node *cpu;
+	u32 cpu_id;
 
-	for (i = 0; i < NR_CPUS; i++)
-		set_cpu_possible(i, true);
+	for_each_of_cpu_node(cpu) {
+		if (of_property_read_u32(cpu, "reg", &cpu_id)) {
+			pr_warn("%s missing reg property", cpu->full_name);
+			continue;
+		}
+
+		if (cpu_id < NR_CPUS)
+			set_cpu_possible(cpu_id, true);
+	}
 }
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
-	int i;
+	unsigned int cpu;
 
 	/*
 	 * Initialise the present map, which describes the set of CPUs
 	 * actually populated at the present time.
 	 */
-	for (i = 0; i < max_cpus; i++)
-		set_cpu_present(i, true);
+	for_each_possible_cpu(cpu) {
+		if (cpu < max_cpus)
+			set_cpu_present(cpu, true);
+	}
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index fda1c1a6a444..b144fbe29bc1 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -200,7 +200,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
 	extern void * const ret_from_kernel_thread;
 	extern void * const child_return;
 
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		/* kernel thread */
 		memset(cregs, 0, sizeof(struct pt_regs));
 		if (!usp) /* idle thread */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 924d023dad0a..3231c2df9e26 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1670,7 +1670,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	/* Copy registers */
 	sp -= sizeof(struct pt_regs);
 	childregs = (struct pt_regs *) sp;
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		/* kernel thread */
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->gpr[1] = sp + sizeof(struct pt_regs);
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 19f4688f2f36..6f728e731bed 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -124,7 +124,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
 	struct pt_regs *childregs = task_pt_regs(p);
 
 	/* p->thread holds context to be restored by __switch_to() */
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		/* Kernel thread */
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->gp = gp_in_global;
diff --git a/arch/s390/include/asm/irq_work.h b/arch/s390/include/asm/irq_work.h
new file mode 100644
index 000000000000..603783766d0a
--- /dev/null
+++ b/arch/s390/include/asm/irq_work.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_IRQ_WORK_H
+#define _ASM_S390_IRQ_WORK_H
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+	return true;
+}
+
+void arch_irq_work_raise(void);
+
+#endif /* _ASM_S390_IRQ_WORK_H */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index d1297d6bbdcf..6b187cd72251 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -135,7 +135,7 @@ static inline void pmd_populate(struct mm_struct *mm,
 #define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte)
 
 #define pmd_pgtable(pmd) \
-	(pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)
+	((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE))
 
 /*
  * page table entry allocation/free routines.
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 794746a32806..29c7ecd5ad1d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1219,8 +1219,8 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
 #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
 
-#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
-#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
+#define p4d_deref(pud) ((unsigned long)__va(p4d_val(pud) & _REGION_ENTRY_ORIGIN))
+#define pgd_deref(pgd) ((unsigned long)__va(pgd_val(pgd) & _REGION_ENTRY_ORIGIN))
 
 static inline unsigned long pmd_deref(pmd_t pmd)
 {
@@ -1229,12 +1229,12 @@ static inline unsigned long pmd_deref(pmd_t pmd)
 	origin_mask = _SEGMENT_ENTRY_ORIGIN;
 	if (pmd_large(pmd))
 		origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
-	return pmd_val(pmd) & origin_mask;
+	return (unsigned long)__va(pmd_val(pmd) & origin_mask);
 }
 
 static inline unsigned long pmd_pfn(pmd_t pmd)
 {
-	return pmd_deref(pmd) >> PAGE_SHIFT;
+	return __pa(pmd_deref(pmd)) >> PAGE_SHIFT;
 }
 
 static inline unsigned long pud_deref(pud_t pud)
@@ -1244,12 +1244,12 @@ static inline unsigned long pud_deref(pud_t pud)
 	origin_mask = _REGION_ENTRY_ORIGIN;
 	if (pud_large(pud))
 		origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
-	return pud_val(pud) & origin_mask;
+	return (unsigned long)__va(pud_val(pud) & origin_mask);
 }
 
 static inline unsigned long pud_pfn(pud_t pud)
 {
-	return pud_deref(pud) >> PAGE_SHIFT;
+	return __pa(pud_deref(pud)) >> PAGE_SHIFT;
 }
 
 /*
@@ -1329,7 +1329,7 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
 }
 #define gup_fast_permitted gup_fast_permitted
 
-#define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
+#define pfn_pte(pfn, pgprot)	mk_pte_phys(((pfn) << PAGE_SHIFT), (pgprot))
 #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
 #define pte_page(x) pfn_to_page(pte_pfn(x))
 
@@ -1636,7 +1636,7 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
 }
 #define pmdp_collapse_flush pmdp_collapse_flush
 
-#define pfn_pmd(pfn, pgprot)	mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot))
+#define pfn_pmd(pfn, pgprot)	mk_pmd_phys(((pfn) << PAGE_SHIFT), (pgprot))
 #define mk_pmd(page, pgprot)	pfn_pmd(page_to_pfn(page), (pgprot))
 
 static inline int pmd_trans_huge(pmd_t pmd)
diff --git a/arch/s390/include/uapi/asm/perf_cpum_cf_diag.h b/arch/s390/include/uapi/asm/perf_cpum_cf_diag.h
new file mode 100644
index 000000000000..3d8284b95f87
--- /dev/null
+++ b/arch/s390/include/uapi/asm/perf_cpum_cf_diag.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright IBM Corp. 2021
+ * Interface implementation for communication with the CPU Measurement
+ * counter facility device driver.
+ *
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ *
+ * Define for ioctl() commands to communicate with the CPU Measurement
+ * counter facility device driver.
+ */
+
+#ifndef _PERF_CPUM_CF_DIAG_H
+#define _PERF_CPUM_CF_DIAG_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define S390_HWCTR_DEVICE		"hwctr"
+#define S390_HWCTR_START_VERSION	1
+
+struct s390_ctrset_start {		/* Set CPUs to operate on */
+	__u64 version;			/* Version of interface */
+	__u64 data_bytes;		/* # of bytes required */
+	__u64 cpumask_len;		/* Length of CPU mask in bytes */
+	__u64 *cpumask;			/* Pointer to CPU mask */
+	__u64 counter_sets;		/* Bit mask of counter sets to get */
+};
+
+struct s390_ctrset_setdata {		/* Counter set data */
+	__u32 set;			/* Counter set number */
+	__u32 no_cnts;			/* # of counters stored in cv[] */
+	__u64 cv[0];			/* Counter values (variable length) */
+};
+
+struct s390_ctrset_cpudata {		/* Counter set data per CPU */
+	__u32 cpu_nr;			/* CPU number */
+	__u32 no_sets;			/* # of counters sets in data[] */
+	struct s390_ctrset_setdata data[0];
+};
+
+struct s390_ctrset_read {		/* Structure to get all ctr sets */
+	__u64 no_cpus;			/* Total # of CPUs data taken from */
+	struct s390_ctrset_cpudata data[0];
+};
+
+#define S390_HWCTR_MAGIC	'C'	/* Random magic # for ioctls */
+#define	S390_HWCTR_START	_IOWR(S390_HWCTR_MAGIC, 1, struct s390_ctrset_start)
+#define	S390_HWCTR_STOP		_IO(S390_HWCTR_MAGIC, 2)
+#define	S390_HWCTR_READ		_IOWR(S390_HWCTR_MAGIC, 3, struct s390_ctrset_read)
+#endif
diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c
index b5c86fb70d63..db4877bbb9aa 100644
--- a/arch/s390/kernel/perf_cpum_cf_diag.c
+++ b/arch/s390/kernel/perf_cpum_cf_diag.c
@@ -2,7 +2,7 @@
 /*
  * Performance event support for s390x - CPU-measurement Counter Sets
  *
- *  Copyright IBM Corp. 2019
+ *  Copyright IBM Corp. 2019, 2021
  *  Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
  *	       Thomas Richer <tmricht@linux.ibm.com>
  */
@@ -17,6 +17,8 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/processor.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
 
 #include <asm/ctl_reg.h>
 #include <asm/irq.h>
@@ -24,15 +26,20 @@
 #include <asm/timex.h>
 #include <asm/debug.h>
 
-#define	CF_DIAG_CTRSET_DEF		0xfeef	/* Counter set header mark */
+#include <asm/perf_cpum_cf_diag.h>
 
+#define	CF_DIAG_CTRSET_DEF		0xfeef	/* Counter set header mark */
+#define CF_DIAG_MIN_INTERVAL		60	/* Minimum counter set read */
+						/* interval in seconds */
+static unsigned long cf_diag_interval = CF_DIAG_MIN_INTERVAL;
 static unsigned int cf_diag_cpu_speed;
 static debug_info_t *cf_diag_dbg;
 
-struct cf_diag_csd {		/* Counter set data per CPU */
+struct cf_diag_csd {			/* Counter set data per CPU */
 	size_t used;			/* Bytes used in data/start */
 	unsigned char start[PAGE_SIZE];	/* Counter set at event start */
 	unsigned char data[PAGE_SIZE];	/* Counter set at event delete */
+	unsigned int sets;		/* # Counter set saved in data */
 };
 static DEFINE_PER_CPU(struct cf_diag_csd, cf_diag_csd);
 
@@ -178,18 +185,35 @@ static void cf_diag_disable(struct pmu *pmu)
 
 /* Number of perf events counting hardware events */
 static atomic_t cf_diag_events = ATOMIC_INIT(0);
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(cf_diag_reserve_mutex);
 
 /* Release the PMU if event is the last perf event */
 static void cf_diag_perf_event_destroy(struct perf_event *event)
 {
 	debug_sprintf_event(cf_diag_dbg, 5,
 			    "%s event %p cpu %d cf_diag_events %d\n",
-			    __func__, event, event->cpu,
+			    __func__, event, smp_processor_id(),
 			    atomic_read(&cf_diag_events));
 	if (atomic_dec_return(&cf_diag_events) == 0)
 		__kernel_cpumcf_end();
 }
 
+static int get_authctrsets(void)
+{
+	struct cpu_cf_events *cpuhw;
+	unsigned long auth = 0;
+	enum cpumf_ctr_set i;
+
+	cpuhw = &get_cpu_var(cpu_cf_events);
+	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+		if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i])
+			auth |= cpumf_ctr_ctl[i];
+	}
+	put_cpu_var(cpu_cf_events);
+	return auth;
+}
+
 /* Setup the event. Test for authorized counter sets and only include counter
  * sets which are authorized at the time of the setup. Including unauthorized
  * counter sets result in specification exception (and panic).
@@ -197,15 +221,12 @@ static void cf_diag_perf_event_destroy(struct perf_event *event)
 static int __hw_perf_event_init(struct perf_event *event)
 {
 	struct perf_event_attr *attr = &event->attr;
-	struct cpu_cf_events *cpuhw;
-	enum cpumf_ctr_set i;
 	int err = 0;
 
 	debug_sprintf_event(cf_diag_dbg, 5, "%s event %p cpu %d\n", __func__,
 			    event, event->cpu);
 
 	event->hw.config = attr->config;
-	event->hw.config_base = 0;
 
 	/* Add all authorized counter sets to config_base. The
 	 * the hardware init function is either called per-cpu or just once
@@ -215,11 +236,7 @@ static int __hw_perf_event_init(struct perf_event *event)
 	 * Checking the authorization on any CPU is fine as the hardware
 	 * applies the same authorization settings to all CPUs.
 	 */
-	cpuhw = &get_cpu_var(cpu_cf_events);
-	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
-		if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i])
-			event->hw.config_base |= cpumf_ctr_ctl[i];
-	put_cpu_var(cpu_cf_events);
+	event->hw.config_base = get_authctrsets();
 
 	/* No authorized counter sets, nothing to count/sample */
 	if (!event->hw.config_base) {
@@ -237,6 +254,25 @@ out:
 	return err;
 }
 
+/* Return 0 if the CPU-measurement counter facility is currently free
+ * and an error otherwise.
+ */
+static int cf_diag_perf_event_inuse(void)
+{
+	int err = 0;
+
+	if (!atomic_inc_not_zero(&cf_diag_events)) {
+		mutex_lock(&cf_diag_reserve_mutex);
+		if (atomic_read(&cf_diag_events) == 0 &&
+		    __kernel_cpumcf_begin())
+			err = -EBUSY;
+		else
+			err = atomic_inc_return(&cf_diag_events);
+		mutex_unlock(&cf_diag_reserve_mutex);
+	}
+	return err;
+}
+
 static int cf_diag_event_init(struct perf_event *event)
 {
 	struct perf_event_attr *attr = &event->attr;
@@ -264,13 +300,9 @@ static int cf_diag_event_init(struct perf_event *event)
 	}
 
 	/* Initialize for using the CPU-measurement counter facility */
-	if (atomic_inc_return(&cf_diag_events) == 1) {
-		if (__kernel_cpumcf_begin()) {
-			atomic_dec(&cf_diag_events);
-			err = -EBUSY;
-			goto out;
-		}
-	}
+	err = cf_diag_perf_event_inuse();
+	if (err < 0)
+		goto out;
 	event->destroy = cf_diag_perf_event_destroy;
 
 	err = __hw_perf_event_init(event);
@@ -599,6 +631,8 @@ static void cf_diag_del(struct perf_event *event, int flags)
 	cpuhw->flags &= ~PMU_F_IN_USE;
 }
 
+/* Default counter set events and format attribute groups */
+
 CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG);
 
 static struct attribute *cf_diag_events_attr[] = {
@@ -663,6 +697,452 @@ static void cf_diag_get_cpu_speed(void)
 	}
 }
 
+/* Code to create device and file I/O operations */
+static atomic_t ctrset_opencnt = ATOMIC_INIT(0);	/* Excl. access */
+
+static int cf_diag_open(struct inode *inode, struct file *file)
+{
+	int err = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (atomic_xchg(&ctrset_opencnt, 1))
+		return -EBUSY;
+
+	/* Avoid concurrent access with perf_event_open() system call */
+	mutex_lock(&cf_diag_reserve_mutex);
+	if (atomic_read(&cf_diag_events) || __kernel_cpumcf_begin())
+		err = -EBUSY;
+	mutex_unlock(&cf_diag_reserve_mutex);
+	if (err) {
+		atomic_set(&ctrset_opencnt, 0);
+		return err;
+	}
+	file->private_data = NULL;
+	debug_sprintf_event(cf_diag_dbg, 2, "%s\n", __func__);
+	/* nonseekable_open() never fails */
+	return nonseekable_open(inode, file);
+}
+
+/* Variables for ioctl() interface support */
+static DEFINE_MUTEX(cf_diag_ctrset_mutex);
+static struct cf_diag_ctrset {
+	unsigned long ctrset;		/* Bit mask of counter set to read */
+	cpumask_t mask;			/* CPU mask to read from */
+	time64_t lastread;		/* Epoch counter set last read */
+} cf_diag_ctrset;
+
+static void cf_diag_ctrset_clear(void)
+{
+	cpumask_clear(&cf_diag_ctrset.mask);
+	cf_diag_ctrset.ctrset = 0;
+}
+
+static void cf_diag_release_cpu(void *p)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+
+	debug_sprintf_event(cf_diag_dbg, 3, "%s cpu %d\n", __func__,
+			    smp_processor_id());
+	lcctl(0);		/* Reset counter sets */
+	cpuhw->state = 0;	/* Save state in CPU hardware state */
+}
+
+/* Release function is also called when application gets terminated without
+ * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command.
+ * Since only one application is allowed to open the device, simple stop all
+ * CPU counter sets.
+ */
+static int cf_diag_release(struct inode *inode, struct file *file)
+{
+	on_each_cpu(cf_diag_release_cpu, NULL, 1);
+	cf_diag_ctrset_clear();
+	atomic_set(&ctrset_opencnt, 0);
+	__kernel_cpumcf_end();
+	debug_sprintf_event(cf_diag_dbg, 2, "%s\n", __func__);
+	return 0;
+}
+
+struct cf_diag_call_on_cpu_parm {	/* Parm struct for smp_call_on_cpu */
+	unsigned int sets;		/* Counter set bit mask */
+	atomic_t cpus_ack;		/* # CPUs successfully executed func */
+};
+
+static int cf_diag_all_copy(unsigned long arg, cpumask_t *mask)
+{
+	struct s390_ctrset_read __user *ctrset_read;
+	unsigned int cpu, cpus, rc;
+	void __user *uptr;
+
+	ctrset_read = (struct s390_ctrset_read __user *)arg;
+	uptr = ctrset_read->data;
+	for_each_cpu(cpu, mask) {
+		struct cf_diag_csd *csd = per_cpu_ptr(&cf_diag_csd, cpu);
+		struct s390_ctrset_cpudata __user *ctrset_cpudata;
+
+		ctrset_cpudata = uptr;
+		debug_sprintf_event(cf_diag_dbg, 5, "%s cpu %d used %zd\n",
+				    __func__, cpu, csd->used);
+		rc  = put_user(cpu, &ctrset_cpudata->cpu_nr);
+		rc |= put_user(csd->sets, &ctrset_cpudata->no_sets);
+		rc |= copy_to_user(ctrset_cpudata->data, csd->data, csd->used);
+		if (rc)
+			return -EFAULT;
+		uptr += sizeof(struct s390_ctrset_cpudata) + csd->used;
+		cond_resched();
+	}
+	cpus = cpumask_weight(mask);
+	if (put_user(cpus, &ctrset_read->no_cpus))
+		return -EFAULT;
+	debug_sprintf_event(cf_diag_dbg, 5, "%s copied %ld\n",
+			    __func__, uptr - (void __user *)ctrset_read->data);
+	return 0;
+}
+
+static size_t cf_diag_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
+				  int ctrset_size, size_t room)
+{
+	size_t need = 0;
+	int rc = -1;
+
+	need = sizeof(*p) + sizeof(u64) * ctrset_size;
+	debug_sprintf_event(cf_diag_dbg, 5,
+			    "%s room %zd need %zd set %#x set_size %d\n",
+			    __func__, room, need, ctrset, ctrset_size);
+	if (need <= room) {
+		p->set = cpumf_ctr_ctl[ctrset];
+		p->no_cnts = ctrset_size;
+		rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv);
+		if (rc == 3)		/* Nothing stored */
+			need = 0;
+	}
+	debug_sprintf_event(cf_diag_dbg, 5, "%s need %zd rc %d\n", __func__,
+			    need, rc);
+	return need;
+}
+
+/* Read all counter sets. Since the perf_event_open() system call with
+ * event cpum_cf_diag/.../ is blocked when this interface is active, reuse
+ * the perf_event_open() data buffer to store the counter sets.
+ */
+static void cf_diag_cpu_read(void *parm)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+	struct cf_diag_csd *csd = this_cpu_ptr(&cf_diag_csd);
+	struct cf_diag_call_on_cpu_parm *p = parm;
+	int set, set_size;
+	size_t space;
+
+	debug_sprintf_event(cf_diag_dbg, 5,
+			    "%s new %#x flags %#x state %#llx\n",
+			    __func__, p->sets, cpuhw->flags,
+			    cpuhw->state);
+	/* No data saved yet */
+	csd->used = 0;
+	csd->sets = 0;
+	memset(csd->data, 0, sizeof(csd->data));
+
+	/* Scan the counter sets */
+	for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) {
+		struct s390_ctrset_setdata *sp = (void *)csd->data + csd->used;
+
+		if (!(p->sets & cpumf_ctr_ctl[set]))
+			continue;	/* Counter set not in list */
+		set_size = cf_diag_ctrset_size(set, &cpuhw->info);
+		space = sizeof(csd->data) - csd->used;
+		space = cf_diag_cpuset_read(sp, set, set_size, space);
+		if (space) {
+			csd->used += space;
+			csd->sets += 1;
+		}
+		debug_sprintf_event(cf_diag_dbg, 5, "%s sp %px space %zd\n",
+				    __func__, sp, space);
+	}
+	debug_sprintf_event(cf_diag_dbg, 5, "%s sets %d used %zd\n", __func__,
+			    csd->sets, csd->used);
+}
+
+static int cf_diag_all_read(unsigned long arg)
+{
+	struct cf_diag_call_on_cpu_parm p;
+	cpumask_var_t mask;
+	time64_t now;
+	int rc = 0;
+
+	debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__);
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+	now = ktime_get_seconds();
+	if (cf_diag_ctrset.lastread + cf_diag_interval > now) {
+		debug_sprintf_event(cf_diag_dbg, 5, "%s now %lld "
+				    " lastread %lld\n", __func__, now,
+				    cf_diag_ctrset.lastread);
+		rc = -EAGAIN;
+		goto out;
+	} else {
+		cf_diag_ctrset.lastread = now;
+	}
+	p.sets = cf_diag_ctrset.ctrset;
+	cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask);
+	on_each_cpu_mask(mask, cf_diag_cpu_read, &p, 1);
+	rc = cf_diag_all_copy(arg, mask);
+out:
+	free_cpumask_var(mask);
+	debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d\n", __func__, rc);
+	return rc;
+}
+
+/* Stop all counter sets via ioctl interface */
+static void cf_diag_ioctl_off(void *parm)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+	struct cf_diag_call_on_cpu_parm *p = parm;
+	int rc;
+
+	debug_sprintf_event(cf_diag_dbg, 5,
+			    "%s new %#x flags %#x state %#llx\n",
+			    __func__, p->sets, cpuhw->flags,
+			    cpuhw->state);
+
+	ctr_set_multiple_disable(&cpuhw->state, p->sets);
+	ctr_set_multiple_stop(&cpuhw->state, p->sets);
+	rc = lcctl(cpuhw->state);		/* Stop counter sets */
+	if (!cpuhw->state)
+		cpuhw->flags &= ~PMU_F_IN_USE;
+	debug_sprintf_event(cf_diag_dbg, 5,
+			    "%s rc %d flags %#x state %#llx\n", __func__,
+			     rc, cpuhw->flags, cpuhw->state);
+}
+
+/* Start counter sets on particular CPU */
+static void cf_diag_ioctl_on(void *parm)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+	struct cf_diag_call_on_cpu_parm *p = parm;
+	int rc;
+
+	debug_sprintf_event(cf_diag_dbg, 5,
+			    "%s new %#x flags %#x state %#llx\n",
+			    __func__, p->sets, cpuhw->flags,
+			    cpuhw->state);
+
+	if (!(cpuhw->flags & PMU_F_IN_USE))
+		cpuhw->state = 0;
+	cpuhw->flags |= PMU_F_IN_USE;
+	rc = lcctl(cpuhw->state);		/* Reset unused counter sets */
+	ctr_set_multiple_enable(&cpuhw->state, p->sets);
+	ctr_set_multiple_start(&cpuhw->state, p->sets);
+	rc |= lcctl(cpuhw->state);		/* Start counter sets */
+	if (!rc)
+		atomic_inc(&p->cpus_ack);
+	debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d state %#llx\n",
+			    __func__, rc, cpuhw->state);
+}
+
+static int cf_diag_all_stop(void)
+{
+	struct cf_diag_call_on_cpu_parm p = {
+		.sets = cf_diag_ctrset.ctrset,
+	};
+	cpumask_var_t mask;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+	cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask);
+	on_each_cpu_mask(mask, cf_diag_ioctl_off, &p, 1);
+	free_cpumask_var(mask);
+	return 0;
+}
+
+static int cf_diag_all_start(void)
+{
+	struct cf_diag_call_on_cpu_parm p = {
+		.sets = cf_diag_ctrset.ctrset,
+		.cpus_ack = ATOMIC_INIT(0),
+	};
+	cpumask_var_t mask;
+	int rc = 0;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+	cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask);
+	on_each_cpu_mask(mask, cf_diag_ioctl_on, &p, 1);
+	if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) {
+		on_each_cpu_mask(mask, cf_diag_ioctl_off, &p, 1);
+		rc = -EIO;
+	}
+	free_cpumask_var(mask);
+	return rc;
+}
+
+/* Return the maximum required space for all possible CPUs in case one
+ * CPU will be onlined during the START, READ, STOP cycles.
+ * To find out the size of the counter sets, any one CPU will do. They
+ * all have the same counter sets.
+ */
+static size_t cf_diag_needspace(unsigned int sets)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+	size_t bytes = 0;
+	int i;
+
+	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+		if (!(sets & cpumf_ctr_ctl[i]))
+			continue;
+		bytes += cf_diag_ctrset_size(i, &cpuhw->info) * sizeof(u64) +
+			 sizeof(((struct s390_ctrset_setdata *)0)->set) +
+			 sizeof(((struct s390_ctrset_setdata *)0)->no_cnts);
+	}
+	bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids *
+		(bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) +
+		     sizeof(((struct s390_ctrset_cpudata *)0)->no_sets));
+	debug_sprintf_event(cf_diag_dbg, 5, "%s bytes %ld\n", __func__,
+			    bytes);
+	return bytes;
+}
+
+static long cf_diag_ioctl_read(unsigned long arg)
+{
+	struct s390_ctrset_read read;
+	int ret = 0;
+
+	debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__);
+	if (copy_from_user(&read, (char __user *)arg, sizeof(read)))
+		return -EFAULT;
+	ret = cf_diag_all_read(arg);
+	debug_sprintf_event(cf_diag_dbg, 5, "%s ret %d\n", __func__, ret);
+	return ret;
+}
+
+static long cf_diag_ioctl_stop(void)
+{
+	int ret;
+
+	debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__);
+	ret = cf_diag_all_stop();
+	cf_diag_ctrset_clear();
+	debug_sprintf_event(cf_diag_dbg, 5, "%s ret %d\n", __func__, ret);
+	return ret;
+}
+
+static long cf_diag_ioctl_start(unsigned long arg)
+{
+	struct s390_ctrset_start __user *ustart;
+	struct s390_ctrset_start start;
+	void __user *umask;
+	unsigned int len;
+	int ret = 0;
+	size_t need;
+
+	if (cf_diag_ctrset.ctrset)
+		return -EBUSY;
+	ustart = (struct s390_ctrset_start __user *)arg;
+	if (copy_from_user(&start, ustart, sizeof(start)))
+		return -EFAULT;
+	if (start.version != S390_HWCTR_START_VERSION)
+		return -EINVAL;
+	if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] |
+				   cpumf_ctr_ctl[CPUMF_CTR_SET_USER] |
+				   cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] |
+				   cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] |
+				   cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]))
+		return -EINVAL;		/* Invalid counter set */
+	if (!start.counter_sets)
+		return -EINVAL;		/* No counter set at all? */
+	cpumask_clear(&cf_diag_ctrset.mask);
+	len = min_t(u64, start.cpumask_len, cpumask_size());
+	umask = (void __user *)start.cpumask;
+	if (copy_from_user(&cf_diag_ctrset.mask, umask, len))
+		return -EFAULT;
+	if (cpumask_empty(&cf_diag_ctrset.mask))
+		return -EINVAL;
+	need = cf_diag_needspace(start.counter_sets);
+	if (put_user(need, &ustart->data_bytes))
+		ret = -EFAULT;
+	if (ret)
+		goto out;
+	cf_diag_ctrset.ctrset = start.counter_sets;
+	ret = cf_diag_all_start();
+out:
+	if (ret)
+		cf_diag_ctrset_clear();
+	debug_sprintf_event(cf_diag_dbg, 2, "%s sets %#lx need %ld ret %d\n",
+			    __func__, cf_diag_ctrset.ctrset, need, ret);
+	return ret;
+}
+
+static long cf_diag_ioctl(struct file *file, unsigned int cmd,
+			  unsigned long arg)
+{
+	int ret;
+
+	debug_sprintf_event(cf_diag_dbg, 2, "%s cmd %#x arg %lx\n", __func__,
+			    cmd, arg);
+	get_online_cpus();
+	mutex_lock(&cf_diag_ctrset_mutex);
+	switch (cmd) {
+	case S390_HWCTR_START:
+		ret = cf_diag_ioctl_start(arg);
+		break;
+	case S390_HWCTR_STOP:
+		ret = cf_diag_ioctl_stop();
+		break;
+	case S390_HWCTR_READ:
+		ret = cf_diag_ioctl_read(arg);
+		break;
+	default:
+		ret = -ENOTTY;
+		break;
+	}
+	mutex_unlock(&cf_diag_ctrset_mutex);
+	put_online_cpus();
+	debug_sprintf_event(cf_diag_dbg, 2, "%s ret %d\n", __func__, ret);
+	return ret;
+}
+
+static const struct file_operations cf_diag_fops = {
+	.owner = THIS_MODULE,
+	.open = cf_diag_open,
+	.release = cf_diag_release,
+	.unlocked_ioctl	= cf_diag_ioctl,
+	.compat_ioctl = cf_diag_ioctl,
+	.llseek = no_llseek
+};
+
+static struct miscdevice cf_diag_dev = {
+	.name	= S390_HWCTR_DEVICE,
+	.minor	= MISC_DYNAMIC_MINOR,
+	.fops	= &cf_diag_fops,
+};
+
+static int cf_diag_online_cpu(unsigned int cpu)
+{
+	struct cf_diag_call_on_cpu_parm p;
+
+	mutex_lock(&cf_diag_ctrset_mutex);
+	if (!cf_diag_ctrset.ctrset)
+		goto out;
+	p.sets = cf_diag_ctrset.ctrset;
+	cf_diag_ioctl_on(&p);
+out:
+	mutex_unlock(&cf_diag_ctrset_mutex);
+	return 0;
+}
+
+static int cf_diag_offline_cpu(unsigned int cpu)
+{
+	struct cf_diag_call_on_cpu_parm p;
+
+	mutex_lock(&cf_diag_ctrset_mutex);
+	if (!cf_diag_ctrset.ctrset)
+		goto out;
+	p.sets = cf_diag_ctrset.ctrset;
+	cf_diag_ioctl_off(&p);
+out:
+	mutex_unlock(&cf_diag_ctrset_mutex);
+	return 0;
+}
+
 /* Initialize the counter set PMU to generate complete counter set data as
  * event raw data. This relies on the CPU Measurement Counter Facility device
  * already being loaded and initialized.
@@ -685,21 +1165,43 @@ static int __init cf_diag_init(void)
 		return -ENOMEM;
 	}
 
+	rc = misc_register(&cf_diag_dev);
+	if (rc) {
+		pr_err("Registration of /dev/" S390_HWCTR_DEVICE
+		       "failed rc=%d\n", rc);
+		goto out;
+	}
+
 	/* Setup s390dbf facility */
 	cf_diag_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128);
 	if (!cf_diag_dbg) {
 		pr_err("Registration of s390dbf(cpum_cf_diag) failed\n");
-		return -ENOMEM;
+		rc = -ENOMEM;
+		goto out_dbf;
 	}
 	debug_register_view(cf_diag_dbg, &debug_sprintf_view);
 
 	rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1);
 	if (rc) {
-		debug_unregister_view(cf_diag_dbg, &debug_sprintf_view);
-		debug_unregister(cf_diag_dbg);
 		pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n",
 		       rc);
+		goto out_perf;
 	}
+	rc = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_S390_CFD_ONLINE,
+				       "perf/s390/cfd:online",
+				       cf_diag_online_cpu, cf_diag_offline_cpu);
+	if (!rc)
+		goto out;
+
+	pr_err("Registration of CPUHP_AP_PERF_S390_CFD_ONLINE failed rc=%i\n",
+	       rc);
+	perf_pmu_unregister(&cf_diag);
+out_perf:
+	debug_unregister_view(cf_diag_dbg, &debug_sprintf_view);
+	debug_unregister(cf_diag_dbg);
+out_dbf:
+	misc_deregister(&cf_diag_dev);
+out:
 	return rc;
 }
-arch_initcall(cf_diag_init);
+device_initcall(cf_diag_init);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 367bd000f6d1..e20bed1ed34a 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -130,7 +130,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 	frame->sf.gprs[9] = (unsigned long)frame;
 
 	/* Store access registers to kernel stack of new process. */
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		/* kernel thread */
 		memset(&frame->childregs, 0, sizeof(struct pt_regs));
 		frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index e299892440b6..58c8afa3da65 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -30,6 +30,7 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/irqflags.h>
+#include <linux/irq_work.h>
 #include <linux/cpu.h>
 #include <linux/slab.h>
 #include <linux/sched/hotplug.h>
@@ -62,6 +63,7 @@ enum {
 	ec_call_function_single,
 	ec_stop_cpu,
 	ec_mcck_pending,
+	ec_irq_work,
 };
 
 enum {
@@ -434,10 +436,12 @@ void notrace smp_yield_cpu(int cpu)
  */
 void notrace smp_emergency_stop(void)
 {
-	cpumask_t cpumask;
+	static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
+	static cpumask_t cpumask;
 	u64 end;
 	int cpu;
 
+	arch_spin_lock(&lock);
 	cpumask_copy(&cpumask, cpu_online_mask);
 	cpumask_clear_cpu(smp_processor_id(), &cpumask);
 
@@ -458,6 +462,7 @@ void notrace smp_emergency_stop(void)
 			break;
 		cpu_relax();
 	}
+	arch_spin_unlock(&lock);
 }
 NOKPROBE_SYMBOL(smp_emergency_stop);
 
@@ -505,6 +510,8 @@ static void smp_handle_ext_call(void)
 		generic_smp_call_function_single_interrupt();
 	if (test_bit(ec_mcck_pending, &bits))
 		__s390_handle_mcck();
+	if (test_bit(ec_irq_work, &bits))
+		irq_work_run();
 }
 
 static void do_ext_call_interrupt(struct ext_code ext_code,
@@ -537,6 +544,13 @@ void smp_send_reschedule(int cpu)
 	pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
 }
 
+#ifdef CONFIG_IRQ_WORK
+void arch_irq_work_raise(void)
+{
+	pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work);
+}
+#endif
+
 /*
  * parameter area for the set/clear control bit callbacks
  */
@@ -775,11 +789,13 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
 static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
 {
 	struct sclp_core_entry *core;
-	cpumask_t avail;
+	static cpumask_t avail;
 	bool configured;
 	u16 core_id;
 	int nr, i;
 
+	get_online_cpus();
+	mutex_lock(&smp_cpu_state_mutex);
 	nr = 0;
 	cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
 	/*
@@ -800,6 +816,8 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
 		configured = i < info->configured;
 		nr += smp_add_core(&info->core[i], &avail, configured, early);
 	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	put_online_cpus();
 	return nr;
 }
 
@@ -847,9 +865,7 @@ void __init smp_detect_cpus(void)
 	pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
 
 	/* Add CPUs present at boot */
-	get_online_cpus();
 	__smp_rescan_cpus(info, true);
-	put_online_cpus();
 	memblock_free_early((unsigned long)info, sizeof(*info));
 }
 
@@ -1178,11 +1194,7 @@ int __ref smp_rescan_cpus(void)
 	if (!info)
 		return -ENOMEM;
 	smp_get_core_info(info, 0);
-	get_online_cpus();
-	mutex_lock(&smp_cpu_state_mutex);
 	nr = __smp_rescan_cpus(info, false);
-	mutex_unlock(&smp_cpu_state_mutex);
-	put_online_cpus();
 	kfree(info);
 	if (nr)
 		topology_schedule_update();
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index ca47141a5be9..e7ce447651b9 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -62,16 +62,16 @@ static struct mask_info drawer_info;
 struct cpu_topology_s390 cpu_topology[NR_CPUS];
 EXPORT_SYMBOL_GPL(cpu_topology);
 
-static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
+static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu)
 {
-	cpumask_t mask;
+	static cpumask_t mask;
 
 	cpumask_copy(&mask, cpumask_of(cpu));
 	switch (topology_mode) {
 	case TOPOLOGY_MODE_HW:
 		while (info) {
 			if (cpumask_test_cpu(cpu, &info->mask)) {
-				mask = info->mask;
+				cpumask_copy(&mask, &info->mask);
 				break;
 			}
 			info = info->next;
@@ -89,23 +89,24 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
 		break;
 	}
 	cpumask_and(&mask, &mask, cpu_online_mask);
-	return mask;
+	cpumask_copy(dst, &mask);
 }
 
-static cpumask_t cpu_thread_map(unsigned int cpu)
+static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
 {
-	cpumask_t mask;
+	static cpumask_t mask;
 	int i;
 
 	cpumask_copy(&mask, cpumask_of(cpu));
 	if (topology_mode != TOPOLOGY_MODE_HW)
-		return mask;
+		goto out;
 	cpu -= cpu % (smp_cpu_mtid + 1);
 	for (i = 0; i <= smp_cpu_mtid; i++)
 		if (cpu_present(cpu + i))
 			cpumask_set_cpu(cpu + i, &mask);
 	cpumask_and(&mask, &mask, cpu_online_mask);
-	return mask;
+out:
+	cpumask_copy(dst, &mask);
 }
 
 #define TOPOLOGY_CORE_BITS	64
@@ -250,10 +251,10 @@ void update_cpu_masks(void)
 
 	for_each_possible_cpu(cpu) {
 		topo = &cpu_topology[cpu];
-		topo->thread_mask = cpu_thread_map(cpu);
-		topo->core_mask = cpu_group_map(&socket_info, cpu);
-		topo->book_mask = cpu_group_map(&book_info, cpu);
-		topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
+		cpu_thread_map(&topo->thread_mask, cpu);
+		cpu_group_map(&topo->core_mask, &socket_info, cpu);
+		cpu_group_map(&topo->book_mask, &book_info, cpu);
+		cpu_group_map(&topo->drawer_mask, &drawer_info, cpu);
 		topo->booted_cores = 0;
 		if (topology_mode != TOPOLOGY_MODE_HW) {
 			id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 4e87c819ddea..781965f7210e 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -58,7 +58,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
 	if (!page)
 		return NULL;
 	arch_set_page_dat(page, 2);
-	return (unsigned long *) page_to_phys(page);
+	return (unsigned long *) page_to_virt(page);
 }
 
 void crst_table_free(struct mm_struct *mm, unsigned long *table)
@@ -161,7 +161,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm)
 
 	page = alloc_page(GFP_KERNEL);
 	if (page) {
-		table = (u64 *)page_to_phys(page);
+		table = (u64 *)page_to_virt(page);
 		memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
 		memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
 	}
@@ -194,7 +194,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 			mask = atomic_read(&page->_refcount) >> 24;
 			mask = (mask | (mask >> 4)) & 3;
 			if (mask != 3) {
-				table = (unsigned long *) page_to_phys(page);
+				table = (unsigned long *) page_to_virt(page);
 				bit = mask & 1;		/* =1 -> second 2K */
 				if (bit)
 					table += PTRS_PER_PTE;
@@ -217,7 +217,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 	}
 	arch_set_page_dat(page, 0);
 	/* Initialize page table */
-	table = (unsigned long *) page_to_phys(page);
+	table = (unsigned long *) page_to_virt(page);
 	if (mm_alloc_pgste(mm)) {
 		/* Return 4K page table with PGSTEs */
 		atomic_xor_bits(&page->_refcount, 3 << 24);
@@ -239,10 +239,10 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
 	struct page *page;
 	unsigned int bit, mask;
 
-	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+	page = virt_to_page(table);
 	if (!mm_alloc_pgste(mm)) {
 		/* Free 2K page table fragment of a 4K page */
-		bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+		bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
 		spin_lock_bh(&mm->context.lock);
 		mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24));
 		mask >>= 24;
@@ -269,14 +269,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
 	unsigned int bit, mask;
 
 	mm = tlb->mm;
-	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+	page = virt_to_page(table);
 	if (mm_alloc_pgste(mm)) {
 		gmap_unlink(mm, table, vmaddr);
-		table = (unsigned long *) (__pa(table) | 3);
+		table = (unsigned long *) ((unsigned long)table | 3);
 		tlb_remove_table(tlb, table);
 		return;
 	}
-	bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
+	bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
 	spin_lock_bh(&mm->context.lock);
 	mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
 	mask >>= 24;
@@ -285,7 +285,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
 	else
 		list_del(&page->lru);
 	spin_unlock_bh(&mm->context.lock);
-	table = (unsigned long *) (__pa(table) | (1U << bit));
+	table = (unsigned long *) ((unsigned long) table | (1U << bit));
 	tlb_remove_table(tlb, table);
 }
 
@@ -293,7 +293,7 @@ void __tlb_remove_table(void *_table)
 {
 	unsigned int mask = (unsigned long) _table & 3;
 	void *table = (void *)((unsigned long) _table ^ mask);
-	struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+	struct page *page = virt_to_page(table);
 
 	switch (mask) {
 	case 0:		/* pmd, pud, or p4d */
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 82dbf9450105..96897fab89dc 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -27,14 +27,14 @@ static void __ref *vmem_alloc_pages(unsigned int order)
 
 	if (slab_is_available())
 		return (void *)__get_free_pages(GFP_KERNEL, order);
-	return (void *) memblock_phys_alloc(size, size);
+	return memblock_alloc(size, size);
 }
 
 static void vmem_free_pages(unsigned long addr, int order)
 {
 	/* We don't expect boot memory to be removed ever. */
 	if (!slab_is_available() ||
-	    WARN_ON_ONCE(PageReserved(phys_to_page(addr))))
+	    WARN_ON_ONCE(PageReserved(virt_to_page(addr))))
 		return;
 	free_pages(addr, order);
 }
@@ -57,7 +57,7 @@ pte_t __ref *vmem_pte_alloc(void)
 	if (slab_is_available())
 		pte = (pte_t *) page_table_alloc(&init_mm);
 	else
-		pte = (pte_t *) memblock_phys_alloc(size, size);
+		pte = (pte_t *) memblock_alloc(size, size);
 	if (!pte)
 		return NULL;
 	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
@@ -85,7 +85,7 @@ static void vmemmap_flush_unused_sub_pmd(void)
 {
 	if (!unused_sub_pmd_start)
 		return;
-	memset(__va(unused_sub_pmd_start), PAGE_UNUSED,
+	memset((void *)unused_sub_pmd_start, PAGE_UNUSED,
 	       ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start);
 	unused_sub_pmd_start = 0;
 }
@@ -98,7 +98,7 @@ static void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end)
 	 * getting removed (just in case the memmap never gets initialized,
 	 * e.g., because the memory block never gets onlined).
 	 */
-	memset(__va(start), 0, sizeof(struct page));
+	memset((void *)start, 0, sizeof(struct page));
 }
 
 static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
@@ -119,7 +119,7 @@ static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
 
 static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
 {
-	void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
+	unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
 
 	vmemmap_flush_unused_sub_pmd();
 
@@ -128,7 +128,7 @@ static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
 
 	/* Mark the unused parts of the new memmap page PAGE_UNUSED. */
 	if (!IS_ALIGNED(start, PMD_SIZE))
-		memset(page, PAGE_UNUSED, start - __pa(page));
+		memset((void *)page, PAGE_UNUSED, start - page);
 	/*
 	 * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
 	 * consecutive sections. Remember for the last added PMD the last
@@ -141,11 +141,11 @@ static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
 /* Returns true if the PMD is completely unused and can be freed. */
 static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
 {
-	void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
+	unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
 
 	vmemmap_flush_unused_sub_pmd();
-	memset(__va(start), PAGE_UNUSED, end - start);
-	return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE);
+	memset((void *)start, PAGE_UNUSED, end - start);
+	return !memchr_inv((void *)page, PAGE_UNUSED, PMD_SIZE);
 }
 
 /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
@@ -166,7 +166,7 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
 			if (pte_none(*pte))
 				continue;
 			if (!direct)
-				vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0);
+				vmem_free_pages((unsigned long) pfn_to_virt(pte_pfn(*pte)), 0);
 			pte_clear(&init_mm, addr, pte);
 		} else if (pte_none(*pte)) {
 			if (!direct) {
@@ -176,7 +176,7 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
 					goto out;
 				pte_val(*pte) = __pa(new_page) | prot;
 			} else {
-				pte_val(*pte) = addr | prot;
+				pte_val(*pte) = __pa(addr) | prot;
 			}
 		} else {
 			continue;
@@ -201,7 +201,7 @@ static void try_free_pte_table(pmd_t *pmd, unsigned long start)
 		if (!pte_none(*pte))
 			return;
 	}
-	vmem_pte_free(__va(pmd_deref(*pmd)));
+	vmem_pte_free((unsigned long *) pmd_deref(*pmd));
 	pmd_clear(pmd);
 }
 
@@ -242,7 +242,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
 			    IS_ALIGNED(next, PMD_SIZE) &&
 			    MACHINE_HAS_EDAT1 && addr && direct &&
 			    !debug_pagealloc_enabled()) {
-				pmd_val(*pmd) = addr | prot;
+				pmd_val(*pmd) = __pa(addr) | prot;
 				pages++;
 				continue;
 			} else if (!direct && MACHINE_HAS_EDAT1) {
@@ -338,7 +338,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
 			    IS_ALIGNED(next, PUD_SIZE) &&
 			    MACHINE_HAS_EDAT2 && addr && direct &&
 			    !debug_pagealloc_enabled()) {
-				pud_val(*pud) = addr | prot;
+				pud_val(*pud) = __pa(addr) | prot;
 				pages++;
 				continue;
 			}
diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt
index 46d8ed96cf06..0e207c46e8da 100644
--- a/arch/s390/tools/opcodes.txt
+++ b/arch/s390/tools/opcodes.txt
@@ -597,7 +597,7 @@ b9b3	cu42	RRE_RR
 b9bd	trtre	RRF_U0RR
 b9be	srstu	RRE_RR
 b9bf	trte	RRF_U0RR
-b9c0	selhhhr	RRF_RURR
+b9c0	selfhr	RRF_RURR
 b9c8	ahhhr	RRF_R0RR2
 b9c9	shhhr	RRF_R0RR2
 b9ca	alhhhr	RRF_R0RR2
diff --git a/arch/sh/boards/mach-landisk/gio.c b/arch/sh/boards/mach-landisk/gio.c
index 1c0da99dfc60..ff2200fec29a 100644
--- a/arch/sh/boards/mach-landisk/gio.c
+++ b/arch/sh/boards/mach-landisk/gio.c
@@ -27,11 +27,10 @@ static int openCnt;
 
 static int gio_open(struct inode *inode, struct file *filp)
 {
-	int minor;
+	int minor = iminor(inode);
 	int ret = -ENOENT;
 
 	preempt_disable();
-	minor = MINOR(inode->i_rdev);
 	if (minor < DEVCOUNT) {
 		if (openCnt > 0) {
 			ret = -EALREADY;
@@ -46,9 +45,8 @@ static int gio_open(struct inode *inode, struct file *filp)
 
 static int gio_close(struct inode *inode, struct file *filp)
 {
-	int minor;
+	int minor = iminor(inode);
 
-	minor = MINOR(inode->i_rdev);
 	if (minor < DEVCOUNT) {
 		openCnt--;
 	}
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 80a5d1c66a51..1aa508eb0823 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -114,7 +114,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
 
 	childregs = task_pt_regs(p);
 	p->thread.sp = (unsigned long) childregs;
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		memset(childregs, 0, sizeof(struct pt_regs));
 		p->thread.pc = (unsigned long) ret_from_kernel_thread;
 		childregs->regs[4] = arg;
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 7649b14d69f8..b91e88058e0c 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -309,7 +309,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
 	ti->ksp = (unsigned long) new_stack;
 	p->thread.kregs = childregs;
 
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		extern int nwindows;
 		unsigned long psr;
 		memset(new_stack, 0, STACKFRAME_SZ + TRACEREG_SZ);
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 6f8c7822fc06..7afd0a859a78 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -597,7 +597,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
 				       sizeof(struct sparc_stackf));
 	t->fpsaved[0] = 0;
 
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		memset(child_trap_frame, 0, child_stack_sz);
 		__thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP] = 
 			(current_pt_regs()->tstate + 1) & TSTATE_CWP;
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 81d508daf67c..c5011064b5dd 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -157,7 +157,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		unsigned long arg, struct task_struct * p, unsigned long tls)
 {
 	void (*handler)(void);
-	int kthread = current->flags & PF_KTHREAD;
+	int kthread = current->flags & (PF_KTHREAD | PF_IO_WORKER);
 	int ret = 0;
 
 	p->thread = (struct thread_struct) INIT_THREAD;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 145a7ac0c19a..9c214d7085a4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -161,7 +161,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
 #endif
 
 	/* Kernel thread ? */
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		memset(childregs, 0, sizeof(struct pt_regs));
 		kthread_frame_init(frame, sp, arg);
 		return 0;
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 397a7de56377..9534ef515d74 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -217,7 +217,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn,
 
 	p->thread.sp = (unsigned long)childregs;
 
-	if (!(p->flags & PF_KTHREAD)) {
+	if (!(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
 		struct pt_regs *regs = current_pt_regs();
 		unsigned long usp = usp_thread_fn ?
 			usp_thread_fn : regs->areg[1];
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index b398dde53af9..ec482e6641ff 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -125,6 +125,8 @@
 #include <linux/delay.h>
 #include <linux/backing-dev.h>
 
+#include <trace/events/block.h>
+
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
@@ -5621,7 +5623,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 
 	spin_unlock_irq(&bfqd->lock);
 
-	blk_mq_sched_request_inserted(rq);
+	trace_block_rq_insert(rq);
 
 	spin_lock_irq(&bfqd->lock);
 	bfqq = bfq_init_rq(rq);
diff --git a/block/blk-core.c b/block/blk-core.c
index 5e752840b41a..fc60ff208497 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -59,6 +59,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_insert);
 
 DEFINE_IDA(blk_queue_ida);
 
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index e8327c50d7c9..c176b7af56a7 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -80,6 +80,7 @@ static struct blk_crypto_keyslot {
 static struct blk_keyslot_manager blk_crypto_ksm;
 static struct workqueue_struct *blk_crypto_wq;
 static mempool_t *blk_crypto_bounce_page_pool;
+static struct bio_set crypto_bio_split;
 
 /*
  * This is the key we set when evicting a keyslot. This *should* be the all 0's
@@ -224,7 +225,8 @@ static bool blk_crypto_split_bio_if_needed(struct bio **bio_ptr)
 	if (num_sectors < bio_sectors(bio)) {
 		struct bio *split_bio;
 
-		split_bio = bio_split(bio, num_sectors, GFP_NOIO, NULL);
+		split_bio = bio_split(bio, num_sectors, GFP_NOIO,
+				      &crypto_bio_split);
 		if (!split_bio) {
 			bio->bi_status = BLK_STS_RESOURCE;
 			return false;
@@ -538,9 +540,13 @@ static int blk_crypto_fallback_init(void)
 
 	prandom_bytes(blank_key, BLK_CRYPTO_MAX_KEY_SIZE);
 
-	err = blk_ksm_init(&blk_crypto_ksm, blk_crypto_num_keyslots);
+	err = bioset_init(&crypto_bio_split, 64, 0, 0);
 	if (err)
 		goto out;
+
+	err = blk_ksm_init(&blk_crypto_ksm, blk_crypto_num_keyslots);
+	if (err)
+		goto fail_free_bioset;
 	err = -ENOMEM;
 
 	blk_crypto_ksm.ksm_ll_ops = blk_crypto_ksm_ll_ops;
@@ -591,6 +597,8 @@ fail_free_wq:
 	destroy_workqueue(blk_crypto_wq);
 fail_free_ksm:
 	blk_ksm_destroy(&blk_crypto_ksm);
+fail_free_bioset:
+	bioset_exit(&crypto_bio_split);
 out:
 	return err;
 }
diff --git a/block/blk-map.c b/block/blk-map.c
index 21630dccac62..369e204d14d0 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -150,9 +150,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
 	bmd->is_our_pages = !map_data;
 	bmd->is_null_mapped = (map_data && map_data->null_mapped);
 
-	nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
-	if (nr_pages > BIO_MAX_PAGES)
-		nr_pages = BIO_MAX_PAGES;
+	nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE));
 
 	ret = -ENOMEM;
 	bio = bio_kmalloc(gfp_mask, nr_pages);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index deff4e826e23..ddb65e9e6fd9 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -384,12 +384,6 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
 
-void blk_mq_sched_request_inserted(struct request *rq)
-{
-	trace_block_rq_insert(rq);
-}
-EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
-
 static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
 				       bool has_sched,
 				       struct request *rq)
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 0476360f05f1..5b18ab915c65 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -7,7 +7,6 @@
 
 void blk_mq_sched_assign_ioc(struct request *rq);
 
-void blk_mq_sched_request_inserted(struct request *rq);
 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs, struct request **merged_request);
 bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
diff --git a/block/blk-pm.h b/block/blk-pm.h
index a2283cc9f716..8a5a0d4b357f 100644
--- a/block/blk-pm.h
+++ b/block/blk-pm.h
@@ -21,31 +21,6 @@ static inline void blk_pm_mark_last_busy(struct request *rq)
 	if (rq->q->dev && !(rq->rq_flags & RQF_PM))
 		pm_runtime_mark_last_busy(rq->q->dev);
 }
-
-static inline void blk_pm_requeue_request(struct request *rq)
-{
-	lockdep_assert_held(&rq->q->queue_lock);
-
-	if (rq->q->dev && !(rq->rq_flags & RQF_PM))
-		rq->q->nr_pending--;
-}
-
-static inline void blk_pm_add_request(struct request_queue *q,
-				      struct request *rq)
-{
-	lockdep_assert_held(&q->queue_lock);
-
-	if (q->dev && !(rq->rq_flags & RQF_PM))
-		q->nr_pending++;
-}
-
-static inline void blk_pm_put_request(struct request *rq)
-{
-	lockdep_assert_held(&rq->q->queue_lock);
-
-	if (rq->q->dev && !(rq->rq_flags & RQF_PM))
-		--rq->q->nr_pending;
-}
 #else
 static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q)
 {
@@ -55,19 +30,6 @@ static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q)
 static inline void blk_pm_mark_last_busy(struct request *rq)
 {
 }
-
-static inline void blk_pm_requeue_request(struct request *rq)
-{
-}
-
-static inline void blk_pm_add_request(struct request_queue *q,
-				      struct request *rq)
-{
-}
-
-static inline void blk_pm_put_request(struct request *rq)
-{
-}
 #endif
 
 #endif /* _BLOCK_BLK_PM_H_ */
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 7dd8be314ac6..b4aa2f37fab6 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -504,6 +504,14 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
 }
 EXPORT_SYMBOL(blk_queue_io_opt);
 
+static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lbs)
+{
+	sectors = round_down(sectors, lbs >> SECTOR_SHIFT);
+	if (sectors < PAGE_SIZE >> SECTOR_SHIFT)
+		sectors = PAGE_SIZE >> SECTOR_SHIFT;
+	return sectors;
+}
+
 /**
  * blk_stack_limits - adjust queue_limits for stacked devices
  * @t:	the stacking driver limits (top device)
@@ -630,6 +638,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		ret = -1;
 	}
 
+	t->max_sectors = blk_round_down_sectors(t->max_sectors, t->logical_block_size);
+	t->max_hw_sectors = blk_round_down_sectors(t->max_hw_sectors, t->logical_block_size);
+	t->max_dev_sectors = blk_round_down_sectors(t->max_dev_sectors, t->logical_block_size);
+
 	/* Discard alignment and granularity */
 	if (b->discard_granularity) {
 		alignment = queue_limit_discard_alignment(b, start);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index ae39c7f3d83d..0f4f0c8a7825 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -434,10 +434,13 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
 	if (ret < 0)
 		return ret;
 
-	if (poll_on)
+	if (poll_on) {
 		blk_queue_flag_set(QUEUE_FLAG_POLL, q);
-	else
+	} else {
+		blk_mq_freeze_queue(q);
 		blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
+		blk_mq_unfreeze_queue(q);
+	}
 
 	return ret;
 }
diff --git a/block/bounce.c b/block/bounce.c
index fc55314aa426..87983a35079c 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -214,8 +214,7 @@ static void bounce_end_io_read_isa(struct bio *bio)
 	__bounce_end_io_read(bio, &isa_page_pool);
 }
 
-static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
-		struct bio_set *bs)
+static struct bio *bounce_clone_bio(struct bio *bio_src)
 {
 	struct bvec_iter iter;
 	struct bio_vec bv;
@@ -242,10 +241,12 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
 	 *    asking for trouble and would force extra work on
 	 *    __bio_clone_fast() anyways.
 	 */
-
-	bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
-	if (!bio)
-		return NULL;
+	if (bio_is_passthrough(bio_src))
+		bio = bio_kmalloc(GFP_NOIO | __GFP_NOFAIL,
+				  bio_segments(bio_src));
+	else
+		bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src),
+				       &bounce_bio_set);
 	bio->bi_bdev		= bio_src->bi_bdev;
 	if (bio_flagged(bio_src, BIO_REMAPPED))
 		bio_set_flag(bio, BIO_REMAPPED);
@@ -269,11 +270,11 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
 		break;
 	}
 
-	if (bio_crypt_clone(bio, bio_src, gfp_mask) < 0)
+	if (bio_crypt_clone(bio, bio_src, GFP_NOIO) < 0)
 		goto err_put;
 
 	if (bio_integrity(bio_src) &&
-	    bio_integrity_clone(bio, bio_src, gfp_mask) < 0)
+	    bio_integrity_clone(bio, bio_src, GFP_NOIO) < 0)
 		goto err_put;
 
 	bio_clone_blkg_association(bio, bio_src);
@@ -296,7 +297,6 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	unsigned i = 0;
 	bool bounce = false;
 	int sectors = 0;
-	bool passthrough = bio_is_passthrough(*bio_orig);
 
 	bio_for_each_segment(from, *bio_orig, iter) {
 		if (i++ < BIO_MAX_PAGES)
@@ -307,14 +307,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	if (!bounce)
 		return;
 
-	if (!passthrough && sectors < bio_sectors(*bio_orig)) {
+	if (!bio_is_passthrough(*bio_orig) &&
+	    sectors < bio_sectors(*bio_orig)) {
 		bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
 		bio_chain(bio, *bio_orig);
 		submit_bio_noacct(*bio_orig);
 		*bio_orig = bio;
 	}
-	bio = bounce_clone_bio(*bio_orig, GFP_NOIO, passthrough ? NULL :
-			&bounce_bio_set);
+	bio = bounce_clone_bio(*bio_orig);
 
 	/*
 	 * Bvec table can't be updated by bio_for_each_segment_all(),
diff --git a/block/genhd.c b/block/genhd.c
index 36ff45bbaaaf..fcc530164b5a 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -74,7 +74,7 @@ bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
 		return false;
 
 	pr_info("%s: detected capacity change from %lld to %lld\n",
-		disk->disk_name, size, capacity);
+		disk->disk_name, capacity, size);
 
 	/*
 	 * Historically we did not send a uevent for changes to/from an empty
@@ -476,7 +476,7 @@ void disk_uevent(struct gendisk *disk, enum kobject_action action)
 	struct disk_part_iter piter;
 	struct block_device *part;
 
-	disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
+	disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY_PART0);
 	while ((part = disk_part_iter_next(&piter)))
 		kobject_uevent(bdev_kobj(part), action);
 	disk_part_iter_exit(&piter);
diff --git a/block/ioctl.c b/block/ioctl.c
index d61d652078f4..ff241e663c01 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -81,20 +81,27 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
 }
 #endif
 
-static int blkdev_reread_part(struct block_device *bdev)
+static int blkdev_reread_part(struct block_device *bdev, fmode_t mode)
 {
-	int ret;
+	struct block_device *tmp;
 
 	if (!disk_part_scan_enabled(bdev->bd_disk) || bdev_is_partition(bdev))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
-	mutex_lock(&bdev->bd_mutex);
-	ret = bdev_disk_changed(bdev, false);
-	mutex_unlock(&bdev->bd_mutex);
+	/*
+	 * Reopen the device to revalidate the driver state and force a
+	 * partition rescan.
+	 */
+	mode &= ~FMODE_EXCL;
+	set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
 
-	return ret;
+	tmp = blkdev_get_by_dev(bdev->bd_dev, mode, NULL);
+	if (IS_ERR(tmp))
+		return PTR_ERR(tmp);
+	blkdev_put(tmp, mode);
+	return 0;
 }
 
 static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
@@ -498,7 +505,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
 		bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE;
 		return 0;
 	case BLKRRPART:
-		return blkdev_reread_part(bdev);
+		return blkdev_reread_part(bdev, mode);
 	case BLKTRACESTART:
 	case BLKTRACESTOP:
 	case BLKTRACETEARDOWN:
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index c25c41d0d061..33d34d69cade 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -13,6 +13,8 @@
 #include <linux/module.h>
 #include <linux/sbitmap.h>
 
+#include <trace/events/block.h>
+
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-debugfs.h"
@@ -353,19 +355,9 @@ static void kyber_timer_fn(struct timer_list *t)
 	}
 }
 
-static unsigned int kyber_sched_tags_shift(struct request_queue *q)
-{
-	/*
-	 * All of the hardware queues have the same depth, so we can just grab
-	 * the shift of the first one.
-	 */
-	return q->queue_hw_ctx[0]->sched_tags->bitmap_tags->sb.shift;
-}
-
 static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
 {
 	struct kyber_queue_data *kqd;
-	unsigned int shift;
 	int ret = -ENOMEM;
 	int i;
 
@@ -400,9 +392,6 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
 		kqd->latency_targets[i] = kyber_latency_targets[i];
 	}
 
-	shift = kyber_sched_tags_shift(q);
-	kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
-
 	return kqd;
 
 err_buckets:
@@ -458,9 +447,19 @@ static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq)
 		INIT_LIST_HEAD(&kcq->rq_list[i]);
 }
 
-static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+static void kyber_depth_updated(struct blk_mq_hw_ctx *hctx)
 {
 	struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
+	struct blk_mq_tags *tags = hctx->sched_tags;
+	unsigned int shift = tags->bitmap_tags->sb.shift;
+
+	kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
+
+	sbitmap_queue_min_shallow_depth(tags->bitmap_tags, kqd->async_depth);
+}
+
+static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+{
 	struct kyber_hctx_data *khd;
 	int i;
 
@@ -502,8 +501,7 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 	khd->batching = 0;
 
 	hctx->sched_data = khd;
-	sbitmap_queue_min_shallow_depth(hctx->sched_tags->bitmap_tags,
-					kqd->async_depth);
+	kyber_depth_updated(hctx);
 
 	return 0;
 
@@ -602,7 +600,7 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
 			list_move_tail(&rq->queuelist, head);
 		sbitmap_set_bit(&khd->kcq_map[sched_domain],
 				rq->mq_ctx->index_hw[hctx->type]);
-		blk_mq_sched_request_inserted(rq);
+		trace_block_rq_insert(rq);
 		spin_unlock(&kcq->lock);
 	}
 }
@@ -1022,6 +1020,7 @@ static struct elevator_type kyber_sched = {
 		.completed_request = kyber_completed_request,
 		.dispatch_request = kyber_dispatch_request,
 		.has_work = kyber_has_work,
+		.depth_updated = kyber_depth_updated,
 	},
 #ifdef CONFIG_BLK_DEBUG_FS
 	.queue_debugfs_attrs = kyber_queue_debugfs_attrs,
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index b57470e154c8..f3631a287466 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -18,6 +18,8 @@
 #include <linux/rbtree.h>
 #include <linux/sbitmap.h>
 
+#include <trace/events/block.h>
+
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-debugfs.h"
@@ -496,7 +498,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	if (blk_mq_sched_try_insert_merge(q, rq))
 		return;
 
-	blk_mq_sched_request_inserted(rq);
+	trace_block_rq_insert(rq);
 
 	if (at_head || blk_rq_is_passthrough(rq)) {
 		if (at_head)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 578fc034db3f..a370cde3ddd4 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -663,7 +663,7 @@ static inline int is_loop_device(struct file *file)
 {
 	struct inode *i = file->f_mapping->host;
 
-	return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
+	return i && S_ISBLK(i->i_mode) && imajor(i) == LOOP_MAJOR;
 }
 
 static int loop_validate_file(struct file *file, struct block_device *bdev)
@@ -1212,6 +1212,9 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
 		goto out_unlock;
 	}
 
+	if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
+		blk_queue_write_cache(lo->lo_queue, false, false);
+
 	/* freeze request queue during the transition */
 	blk_mq_freeze_queue(lo->lo_queue);
 
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 8b9622eb0a21..4ff71b579cfc 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -78,8 +78,7 @@ struct link_dead_args {
 #define NBD_RT_HAS_PID_FILE		3
 #define NBD_RT_HAS_CONFIG_REF		4
 #define NBD_RT_BOUND			5
-#define NBD_RT_DESTROY_ON_DISCONNECT	6
-#define NBD_RT_DISCONNECT_ON_CLOSE	7
+#define NBD_RT_DISCONNECT_ON_CLOSE	6
 
 #define NBD_DESTROY_ON_DISCONNECT	0
 #define NBD_DISCONNECT_REQUESTED	1
@@ -1904,12 +1903,21 @@ again:
 	if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
 		u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
 		if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
-			set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
-				&config->runtime_flags);
-			set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
-			put_dev = true;
+			/*
+			 * We have 1 ref to keep the device around, and then 1
+			 * ref for our current operation here, which will be
+			 * inherited by the config.  If we already have
+			 * DESTROY_ON_DISCONNECT set then we know we don't have
+			 * that extra ref already held so we don't need the
+			 * put_dev.
+			 */
+			if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
+					      &nbd->flags))
+				put_dev = true;
 		} else {
-			clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
+			if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
+					       &nbd->flags))
+				refcount_inc(&nbd->refs);
 		}
 		if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
 			set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
@@ -2080,15 +2088,13 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
 		u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
 		if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
-			if (!test_and_set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
-					      &config->runtime_flags))
+			if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
+					      &nbd->flags))
 				put_dev = true;
-			set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
 		} else {
-			if (test_and_clear_bit(NBD_RT_DESTROY_ON_DISCONNECT,
-					       &config->runtime_flags))
+			if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
+					       &nbd->flags))
 				refcount_inc(&nbd->refs);
-			clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
 		}
 
 		if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index da16121140ca..1cdf09ff67b6 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1326,9 +1326,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 				     pages[i]->page,
 				     seg[i].nsec << 9,
 				     seg[i].offset) == 0)) {
-
-			int nr_iovecs = min_t(int, (nseg-i), BIO_MAX_PAGES);
-			bio = bio_alloc(GFP_KERNEL, nr_iovecs);
+			bio = bio_alloc(GFP_KERNEL, bio_max_segs(nseg - i));
 			if (unlikely(bio == NULL))
 				goto fail_put_bio;
 
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index cadbd0a1a1ef..5fa6ae9dbc8b 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -480,7 +480,7 @@ static void dax_free_inode(struct inode *inode)
 	kfree(dax_dev->host);
 	dax_dev->host = NULL;
 	if (inode->i_rdev)
-		ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev));
+		ida_simple_remove(&dax_minor_ida, iminor(inode));
 	kmem_cache_free(dax_cache, dax_dev);
 }
 
diff --git a/drivers/gpu/drm/tilcdc/Makefile b/drivers/gpu/drm/tilcdc/Makefile
index 662bf3a348c9..f5190477de72 100644
--- a/drivers/gpu/drm/tilcdc/Makefile
+++ b/drivers/gpu/drm/tilcdc/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-ifeq (, $(findstring -W,$(EXTRA_CFLAGS)))
+ifeq (, $(findstring -W,$(KCFLAGS)))
 	ccflags-y += -Werror
 endif
 
diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c
index d4e0a0f6732a..ba766d24219e 100644
--- a/drivers/i2c/busses/i2c-brcmstb.c
+++ b/drivers/i2c/busses/i2c-brcmstb.c
@@ -316,7 +316,7 @@ static int brcmstb_send_i2c_cmd(struct brcmstb_i2c_dev *dev,
 		goto cmd_out;
 	}
 
-	if ((CMD_RD || CMD_WR) &&
+	if ((cmd == CMD_RD || cmd == CMD_WR) &&
 	    bsc_readl(dev, iic_enable) & BSC_IIC_EN_NOACK_MASK) {
 		rc = -EREMOTEIO;
 		dev_dbg(dev->device, "controller received NOACK intr for %s\n",
diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index 85307cfa7109..5392b82f68a4 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -38,6 +38,8 @@
 #define DW_IC_CON_TX_EMPTY_CTRL			BIT(8)
 #define DW_IC_CON_RX_FIFO_FULL_HLD_CTRL		BIT(9)
 
+#define DW_IC_DATA_CMD_DAT			GENMASK(7, 0)
+
 /*
  * Registers offset
  */
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index d6425ad6e6a3..dd27b9dbe931 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -432,7 +432,7 @@ i2c_dw_read(struct dw_i2c_dev *dev)
 			regmap_read(dev->map, DW_IC_DATA_CMD, &tmp);
 			/* Ensure length byte is a valid value */
 			if (flags & I2C_M_RECV_LEN &&
-			    tmp <= I2C_SMBUS_BLOCK_MAX && tmp > 0) {
+			    (tmp & DW_IC_DATA_CMD_DAT) <= I2C_SMBUS_BLOCK_MAX && tmp > 0) {
 				len = i2c_dw_recv_len(dev, tmp);
 			}
 			*buf++ = tmp;
diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c
index 20a9881a0d6c..5ac30d95650c 100644
--- a/drivers/i2c/busses/i2c-exynos5.c
+++ b/drivers/i2c/busses/i2c-exynos5.c
@@ -606,6 +606,7 @@ static void exynos5_i2c_message_start(struct exynos5_i2c *i2c, int stop)
 	u32 i2c_ctl;
 	u32 int_en = 0;
 	u32 i2c_auto_conf = 0;
+	u32 i2c_addr = 0;
 	u32 fifo_ctl;
 	unsigned long flags;
 	unsigned short trig_lvl;
@@ -640,7 +641,12 @@ static void exynos5_i2c_message_start(struct exynos5_i2c *i2c, int stop)
 		int_en |= HSI2C_INT_TX_ALMOSTEMPTY_EN;
 	}
 
-	writel(HSI2C_SLV_ADDR_MAS(i2c->msg->addr), i2c->regs + HSI2C_ADDR);
+	i2c_addr = HSI2C_SLV_ADDR_MAS(i2c->msg->addr);
+
+	if (i2c->op_clock >= I2C_MAX_FAST_MODE_PLUS_FREQ)
+		i2c_addr |= HSI2C_MASTER_ID(MASTER_ID(i2c->adap.nr));
+
+	writel(i2c_addr, i2c->regs + HSI2C_ADDR);
 
 	writel(fifo_ctl, i2c->regs + HSI2C_FIFO_CTL);
 	writel(i2c_ctl, i2c->regs + HSI2C_CTL);
diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index c3f584795911..214b4c913a13 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -375,32 +375,6 @@ static void geni_i2c_tx_msg_cleanup(struct geni_i2c_dev *gi2c,
 	}
 }
 
-static void geni_i2c_stop_xfer(struct geni_i2c_dev *gi2c)
-{
-	int ret;
-	u32 geni_status;
-	struct i2c_msg *cur;
-
-	/* Resume device, as runtime suspend can happen anytime during transfer */
-	ret = pm_runtime_get_sync(gi2c->se.dev);
-	if (ret < 0) {
-		dev_err(gi2c->se.dev, "Failed to resume device: %d\n", ret);
-		return;
-	}
-
-	geni_status = readl_relaxed(gi2c->se.base + SE_GENI_STATUS);
-	if (geni_status & M_GENI_CMD_ACTIVE) {
-		cur = gi2c->cur;
-		geni_i2c_abort_xfer(gi2c);
-		if (cur->flags & I2C_M_RD)
-			geni_i2c_rx_msg_cleanup(gi2c, cur);
-		else
-			geni_i2c_tx_msg_cleanup(gi2c, cur);
-	}
-
-	pm_runtime_put_sync_suspend(gi2c->se.dev);
-}
-
 static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
 				u32 m_param)
 {
@@ -676,13 +650,6 @@ static int geni_i2c_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static void  geni_i2c_shutdown(struct platform_device *pdev)
-{
-	struct geni_i2c_dev *gi2c = platform_get_drvdata(pdev);
-
-	geni_i2c_stop_xfer(gi2c);
-}
-
 static int __maybe_unused geni_i2c_runtime_suspend(struct device *dev)
 {
 	int ret;
@@ -747,7 +714,6 @@ MODULE_DEVICE_TABLE(of, geni_i2c_dt_match);
 static struct platform_driver geni_i2c_driver = {
 	.probe  = geni_i2c_probe,
 	.remove = geni_i2c_remove,
-	.shutdown = geni_i2c_shutdown,
 	.driver = {
 		.name = "geni_i2c",
 		.pm = &geni_i2c_pm_ops,
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 6c1d8b69a465..b6742b4231bf 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -934,4 +934,7 @@ source "drivers/leds/flash/Kconfig"
 comment "LED Triggers"
 source "drivers/leds/trigger/Kconfig"
 
+comment "LED Blink"
+source "drivers/leds/blink/Kconfig"
+
 endif # NEW_LEDS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 156c0b4e60d9..2a698df9da57 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -108,3 +108,6 @@ obj-$(CONFIG_LEDS_CLASS_FLASH)		+= flash/
 
 # LED Triggers
 obj-$(CONFIG_LEDS_TRIGGERS)		+= trigger/
+
+# LED Blink
+obj-$(CONFIG_LEDS_BLINK)                += blink/
diff --git a/drivers/leds/blink/Kconfig b/drivers/leds/blink/Kconfig
new file mode 100644
index 000000000000..265b53476a80
--- /dev/null
+++ b/drivers/leds/blink/Kconfig
@@ -0,0 +1,20 @@
+menuconfig LEDS_BLINK
+	bool "LED Blink support"
+	depends on LEDS_CLASS
+	help
+	  This option enables blink support for the leds class.
+	  If unsure, say Y.
+
+if LEDS_BLINK
+
+config LEDS_BLINK_LGM
+	tristate "LED support for Intel LGM SoC series"
+	depends on LEDS_CLASS
+	depends on MFD_SYSCON
+	depends on OF
+	help
+	  Parallel to serial conversion, which is also called SSO controller,
+	  can drive external shift register for LED outputs.
+	  This enables LED support for Serial Shift Output controller(SSO).
+
+endif # LEDS_BLINK
diff --git a/drivers/leds/blink/Makefile b/drivers/leds/blink/Makefile
new file mode 100644
index 000000000000..2fa6c7b7b67e
--- /dev/null
+++ b/drivers/leds/blink/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_LEDS_BLINK_LGM)	+= leds-lgm-sso.o
diff --git a/drivers/leds/blink/leds-lgm-sso.c b/drivers/leds/blink/leds-lgm-sso.c
new file mode 100644
index 000000000000..7d5c9ca007d6
--- /dev/null
+++ b/drivers/leds/blink/leds-lgm-sso.c
@@ -0,0 +1,888 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Lightning Mountain SoC LED Serial Shift Output Controller driver
+ *
+ * Copyright (c) 2020 Intel Corporation.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/leds.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/sizes.h>
+#include <linux/uaccess.h>
+
+#define SSO_DEV_NAME			"lgm-sso"
+
+#define LED_BLINK_H8_0			0x0
+#define LED_BLINK_H8_1			0x4
+#define GET_FREQ_OFFSET(pin, src)	(((pin) * 6) + ((src) * 2))
+#define GET_SRC_OFFSET(pinc)		(((pin) * 6) + 4)
+
+#define DUTY_CYCLE(x)			(0x8 + ((x) * 4))
+#define SSO_CON0			0x2B0
+#define SSO_CON0_RZFL			BIT(26)
+#define SSO_CON0_BLINK_R		BIT(30)
+#define SSO_CON0_SWU			BIT(31)
+
+#define SSO_CON1			0x2B4
+#define SSO_CON1_FCDSC			GENMASK(21, 20) /* Fixed Divider Shift Clock */
+#define SSO_CON1_FPID			GENMASK(24, 23)
+#define SSO_CON1_GPTD			GENMASK(26, 25)
+#define SSO_CON1_US			GENMASK(31, 30)
+
+#define SSO_CPU				0x2B8
+#define SSO_CON2			0x2C4
+#define SSO_CON3			0x2C8
+
+/* Driver MACRO */
+#define MAX_PIN_NUM_PER_BANK		SZ_32
+#define MAX_GROUP_NUM			SZ_4
+#define PINS_PER_GROUP			SZ_8
+#define FPID_FREQ_RANK_MAX		SZ_4
+#define SSO_LED_MAX_NUM			SZ_32
+#define MAX_FREQ_RANK			10
+#define DEF_GPTC_CLK_RATE		200000000
+#define SSO_DEF_BRIGHTNESS		LED_HALF
+#define DATA_CLK_EDGE			0 /* 0-rising, 1-falling */
+
+static const u32 freq_div_tbl[] = {4000, 2000, 1000, 800};
+static const int freq_tbl[] = {2, 4, 8, 10, 50000, 100000, 200000, 250000};
+static const int shift_clk_freq_tbl[] = {25000000, 12500000, 6250000, 3125000};
+
+/*
+ * Update Source to update the SOUTs
+ * SW - Software has to update the SWU bit
+ * GPTC - General Purpose timer is used as clock source
+ * FPID - Divided FSC clock (FPID) is used as clock source
+ */
+enum {
+	US_SW = 0,
+	US_GPTC = 1,
+	US_FPID = 2
+};
+
+enum {
+	MAX_FPID_FREQ_RANK = 5, /* 1 to 4 */
+	MAX_GPTC_FREQ_RANK = 9, /* 5 to 8 */
+	MAX_GPTC_HS_FREQ_RANK = 10, /* 9 to 10 */
+};
+
+enum {
+	LED_GRP0_PIN_MAX = 24,
+	LED_GRP1_PIN_MAX = 29,
+	LED_GRP2_PIN_MAX = 32,
+};
+
+enum {
+	LED_GRP0_0_23,
+	LED_GRP1_24_28,
+	LED_GRP2_29_31,
+	LED_GROUP_MAX,
+};
+
+enum {
+	CLK_SRC_FPID = 0,
+	CLK_SRC_GPTC = 1,
+	CLK_SRC_GPTC_HS = 2,
+};
+
+struct sso_led_priv;
+
+struct sso_led_desc {
+	const char *name;
+	const char *default_trigger;
+	unsigned int brightness;
+	unsigned int blink_rate;
+	unsigned int retain_state_suspended:1;
+	unsigned int retain_state_shutdown:1;
+	unsigned int panic_indicator:1;
+	unsigned int hw_blink:1;
+	unsigned int hw_trig:1;
+	unsigned int blinking:1;
+	int freq_idx;
+	u32 pin;
+};
+
+struct sso_led {
+	struct list_head list;
+	struct led_classdev cdev;
+	struct gpio_desc *gpiod;
+	struct sso_led_desc desc;
+	struct sso_led_priv *priv;
+};
+
+struct sso_gpio {
+	struct gpio_chip chip;
+	int shift_clk_freq;
+	int edge;
+	int freq;
+	u32 pins;
+	u32 alloc_bitmap;
+};
+
+struct sso_led_priv {
+	struct regmap *mmap;
+	struct device *dev;
+	struct platform_device *pdev;
+	struct clk *gclk;
+	struct clk *fpid_clk;
+	u32 fpid_clkrate;
+	u32 gptc_clkrate;
+	u32 freq[MAX_FREQ_RANK];
+	struct list_head led_list;
+	struct sso_gpio gpio;
+};
+
+static int sso_get_blink_rate_idx(struct sso_led_priv *priv, u32 rate)
+{
+	int i;
+
+	for (i = 0; i < MAX_FREQ_RANK; i++) {
+		if (rate <= priv->freq[i])
+			return i;
+	}
+
+	return -1;
+}
+
+static unsigned int sso_led_pin_to_group(u32 pin)
+{
+	if (pin < LED_GRP0_PIN_MAX)
+		return LED_GRP0_0_23;
+	else if (pin < LED_GRP1_PIN_MAX)
+		return LED_GRP1_24_28;
+	else
+		return LED_GRP2_29_31;
+}
+
+static u32 sso_led_get_freq_src(int freq_idx)
+{
+	if (freq_idx < MAX_FPID_FREQ_RANK)
+		return CLK_SRC_FPID;
+	else if (freq_idx < MAX_GPTC_FREQ_RANK)
+		return CLK_SRC_GPTC;
+	else
+		return CLK_SRC_GPTC_HS;
+}
+
+static u32 sso_led_pin_blink_off(u32 pin, unsigned int group)
+{
+	if (group == LED_GRP2_29_31)
+		return pin - LED_GRP1_PIN_MAX;
+	else if (group == LED_GRP1_24_28)
+		return pin - LED_GRP0_PIN_MAX;
+	else	/* led 0 - 23 in led 32 location */
+		return SSO_LED_MAX_NUM - LED_GRP1_PIN_MAX;
+}
+
+static struct sso_led
+*cdev_to_sso_led_data(struct led_classdev *led_cdev)
+{
+	return container_of(led_cdev, struct sso_led, cdev);
+}
+
+static void sso_led_freq_set(struct sso_led_priv *priv, u32 pin, int freq_idx)
+{
+	u32 reg, off, freq_src, val_freq;
+	u32 low, high, val;
+	unsigned int group;
+
+	if (!freq_idx)
+		return;
+
+	group = sso_led_pin_to_group(pin);
+	freq_src = sso_led_get_freq_src(freq_idx);
+	off = sso_led_pin_blink_off(pin, group);
+
+	if (group == LED_GRP0_0_23)
+		return;
+	else if (group == LED_GRP1_24_28)
+		reg = LED_BLINK_H8_0;
+	else
+		reg = LED_BLINK_H8_1;
+
+	if (freq_src == CLK_SRC_FPID)
+		val_freq = freq_idx - 1;
+	else if (freq_src == CLK_SRC_GPTC)
+		val_freq = freq_idx - MAX_FPID_FREQ_RANK;
+
+	/* set blink rate idx */
+	if (freq_src != CLK_SRC_GPTC_HS) {
+		low = GET_FREQ_OFFSET(off, freq_src);
+		high = low + 2;
+		val = val_freq << high;
+		regmap_update_bits(priv->mmap, reg, GENMASK(high, low), val);
+	}
+
+	/* select clock source */
+	low = GET_SRC_OFFSET(off);
+	high = low + 2;
+	val = freq_src << high;
+	regmap_update_bits(priv->mmap, reg, GENMASK(high, low), val);
+}
+
+static void sso_led_brightness_set(struct led_classdev *led_cdev,
+				   enum led_brightness brightness)
+{
+	struct sso_led_priv *priv;
+	struct sso_led_desc *desc;
+	struct sso_led *led;
+	int val;
+
+	led = cdev_to_sso_led_data(led_cdev);
+	priv = led->priv;
+	desc = &led->desc;
+
+	desc->brightness = brightness;
+	regmap_write(priv->mmap, DUTY_CYCLE(desc->pin), brightness);
+
+	if (brightness == LED_OFF)
+		val = 0;
+	else
+		val = 1;
+
+	/* HW blink off */
+	if (desc->hw_blink && !val && desc->blinking) {
+		desc->blinking = 0;
+		regmap_update_bits(priv->mmap, SSO_CON2, BIT(desc->pin), 0);
+	} else if (desc->hw_blink && val && !desc->blinking) {
+		desc->blinking = 1;
+		regmap_update_bits(priv->mmap, SSO_CON2, BIT(desc->pin),
+				   1 << desc->pin);
+	}
+
+	if (!desc->hw_trig && led->gpiod)
+		gpiod_set_value(led->gpiod, val);
+}
+
+static enum led_brightness sso_led_brightness_get(struct led_classdev *led_cdev)
+{
+	struct sso_led *led = cdev_to_sso_led_data(led_cdev);
+
+	return (enum led_brightness)led->desc.brightness;
+}
+
+static int
+delay_to_freq_idx(struct sso_led *led, unsigned long *delay_on,
+		  unsigned long *delay_off)
+{
+	struct sso_led_priv *priv = led->priv;
+	unsigned long delay;
+	int freq_idx;
+	u32 freq;
+
+	if (!*delay_on && !*delay_off) {
+		*delay_on = *delay_off = (1000 / priv->freq[0]) / 2;
+		return 0;
+	}
+
+	delay = *delay_on + *delay_off;
+	freq = 1000 / delay;
+
+	freq_idx = sso_get_blink_rate_idx(priv, freq);
+	if (freq_idx == -1)
+		freq_idx = MAX_FREQ_RANK - 1;
+
+	delay = 1000 / priv->freq[freq_idx];
+	*delay_on = *delay_off = delay / 2;
+
+	if (!*delay_on)
+		*delay_on = *delay_off = 1;
+
+	return freq_idx;
+}
+
+static int
+sso_led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on,
+		  unsigned long *delay_off)
+{
+	struct sso_led_priv *priv;
+	struct sso_led *led;
+	int freq_idx;
+
+	led = cdev_to_sso_led_data(led_cdev);
+	priv = led->priv;
+	freq_idx = delay_to_freq_idx(led, delay_on, delay_off);
+
+	sso_led_freq_set(priv, led->desc.pin, freq_idx);
+	regmap_update_bits(priv->mmap, SSO_CON2, BIT(led->desc.pin),
+			   1 << led->desc.pin);
+	led->desc.freq_idx = freq_idx;
+	led->desc.blink_rate = priv->freq[freq_idx];
+	led->desc.blinking = 1;
+
+	return 1;
+}
+
+static void sso_led_hw_cfg(struct sso_led_priv *priv, struct sso_led *led)
+{
+	struct sso_led_desc *desc = &led->desc;
+
+	/* set freq */
+	if (desc->hw_blink) {
+		sso_led_freq_set(priv, desc->pin, desc->freq_idx);
+		regmap_update_bits(priv->mmap, SSO_CON2, BIT(desc->pin),
+				   1 << desc->pin);
+	}
+
+	if (desc->hw_trig)
+		regmap_update_bits(priv->mmap, SSO_CON3, BIT(desc->pin),
+				   1 << desc->pin);
+
+	/* set brightness */
+	regmap_write(priv->mmap, DUTY_CYCLE(desc->pin), desc->brightness);
+
+	/* enable output */
+	if (!desc->hw_trig && desc->brightness)
+		gpiod_set_value(led->gpiod, 1);
+}
+
+static int sso_create_led(struct sso_led_priv *priv, struct sso_led *led,
+			  struct fwnode_handle *child)
+{
+	struct sso_led_desc *desc = &led->desc;
+	struct led_init_data init_data;
+	int err;
+
+	init_data.fwnode = child;
+	init_data.devicename = SSO_DEV_NAME;
+	init_data.default_label = ":";
+
+	led->cdev.default_trigger = desc->default_trigger;
+	led->cdev.brightness_set = sso_led_brightness_set;
+	led->cdev.brightness_get = sso_led_brightness_get;
+	led->cdev.brightness = desc->brightness;
+	led->cdev.max_brightness = LED_FULL;
+
+	if (desc->retain_state_shutdown)
+		led->cdev.flags |= LED_RETAIN_AT_SHUTDOWN;
+	if (desc->retain_state_suspended)
+		led->cdev.flags |= LED_CORE_SUSPENDRESUME;
+	if (desc->panic_indicator)
+		led->cdev.flags |= LED_PANIC_INDICATOR;
+
+	if (desc->hw_blink)
+		led->cdev.blink_set = sso_led_blink_set;
+
+	sso_led_hw_cfg(priv, led);
+
+	err = devm_led_classdev_register_ext(priv->dev, &led->cdev, &init_data);
+	if (err)
+		return err;
+
+	list_add(&led->list, &priv->led_list);
+
+	return 0;
+}
+
+static void sso_init_freq(struct sso_led_priv *priv)
+{
+	int i;
+
+	priv->freq[0] = 0;
+	for (i = 1; i < MAX_FREQ_RANK; i++) {
+		if (i < MAX_FPID_FREQ_RANK) {
+			priv->freq[i] = priv->fpid_clkrate / freq_div_tbl[i - 1];
+		} else if (i < MAX_GPTC_FREQ_RANK) {
+			priv->freq[i] = priv->gptc_clkrate /
+				freq_div_tbl[i - MAX_FPID_FREQ_RANK];
+		} else if (i < MAX_GPTC_HS_FREQ_RANK) {
+			priv->freq[i] = priv->gptc_clkrate;
+		}
+	}
+}
+
+static int sso_gpio_request(struct gpio_chip *chip, unsigned int offset)
+{
+	struct sso_led_priv *priv = gpiochip_get_data(chip);
+
+	if (priv->gpio.alloc_bitmap & BIT(offset))
+		return -EINVAL;
+
+	priv->gpio.alloc_bitmap |= BIT(offset);
+	regmap_write(priv->mmap, DUTY_CYCLE(offset), 0xFF);
+
+	return 0;
+}
+
+static void sso_gpio_free(struct gpio_chip *chip, unsigned int offset)
+{
+	struct sso_led_priv *priv = gpiochip_get_data(chip);
+
+	priv->gpio.alloc_bitmap &= ~BIT(offset);
+	regmap_write(priv->mmap, DUTY_CYCLE(offset), 0x0);
+}
+
+static int sso_gpio_get_dir(struct gpio_chip *chip, unsigned int offset)
+{
+	return GPIOF_DIR_OUT;
+}
+
+static int
+sso_gpio_dir_out(struct gpio_chip *chip, unsigned int offset, int value)
+{
+	struct sso_led_priv *priv = gpiochip_get_data(chip);
+	bool bit = !!value;
+
+	regmap_update_bits(priv->mmap, SSO_CPU, BIT(offset), bit << offset);
+	if (!priv->gpio.freq)
+		regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_SWU,
+				   SSO_CON0_SWU);
+
+	return 0;
+}
+
+static int sso_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+	struct sso_led_priv *priv = gpiochip_get_data(chip);
+	u32 reg_val;
+
+	regmap_read(priv->mmap, SSO_CPU, &reg_val);
+
+	return !!(reg_val & BIT(offset));
+}
+
+static void sso_gpio_set(struct gpio_chip *chip, unsigned int offset, int value)
+{
+	struct sso_led_priv *priv = gpiochip_get_data(chip);
+
+	regmap_update_bits(priv->mmap, SSO_CPU, BIT(offset), value << offset);
+	if (!priv->gpio.freq)
+		regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_SWU,
+				   SSO_CON0_SWU);
+}
+
+static int sso_gpio_gc_init(struct device *dev, struct sso_led_priv *priv)
+{
+	struct gpio_chip *gc = &priv->gpio.chip;
+
+	gc->request             = sso_gpio_request;
+	gc->free                = sso_gpio_free;
+	gc->get_direction       = sso_gpio_get_dir;
+	gc->direction_output    = sso_gpio_dir_out;
+	gc->get                 = sso_gpio_get;
+	gc->set                 = sso_gpio_set;
+
+	gc->label               = "lgm-sso";
+	gc->base                = -1;
+	/* To exclude pins from control, use "gpio-reserved-ranges" */
+	gc->ngpio               = priv->gpio.pins;
+	gc->parent              = dev;
+	gc->owner               = THIS_MODULE;
+	gc->of_node             = dev->of_node;
+
+	return devm_gpiochip_add_data(dev, gc, priv);
+}
+
+static int sso_gpio_get_freq_idx(int freq)
+{
+	int idx;
+
+	for (idx = 0; idx < ARRAY_SIZE(freq_tbl); idx++) {
+		if (freq <= freq_tbl[idx])
+			return idx;
+	}
+
+	return -1;
+}
+
+static void sso_register_shift_clk(struct sso_led_priv *priv)
+{
+	int idx, size = ARRAY_SIZE(shift_clk_freq_tbl);
+	u32 val = 0;
+
+	for (idx = 0; idx < size; idx++) {
+		if (shift_clk_freq_tbl[idx] <= priv->gpio.shift_clk_freq) {
+			val = idx;
+			break;
+		}
+	}
+
+	if (idx == size)
+		dev_warn(priv->dev, "%s: Invalid freq %d\n",
+			 __func__, priv->gpio.shift_clk_freq);
+
+	regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_FCDSC,
+			   FIELD_PREP(SSO_CON1_FCDSC, val));
+}
+
+static int sso_gpio_freq_set(struct sso_led_priv *priv)
+{
+	int freq_idx;
+	u32 val;
+
+	freq_idx = sso_gpio_get_freq_idx(priv->gpio.freq);
+	if (freq_idx == -1)
+		freq_idx = ARRAY_SIZE(freq_tbl) - 1;
+
+	val = freq_idx % FPID_FREQ_RANK_MAX;
+
+	if (!priv->gpio.freq) {
+		regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_BLINK_R, 0);
+		regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_US,
+				   FIELD_PREP(SSO_CON1_US, US_SW));
+	} else if (freq_idx < FPID_FREQ_RANK_MAX) {
+		regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_BLINK_R,
+				   SSO_CON0_BLINK_R);
+		regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_US,
+				   FIELD_PREP(SSO_CON1_US, US_FPID));
+		regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_FPID,
+				   FIELD_PREP(SSO_CON1_FPID, val));
+	} else {
+		regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_BLINK_R,
+				   SSO_CON0_BLINK_R);
+		regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_US,
+				   FIELD_PREP(SSO_CON1_US, US_GPTC));
+		regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_GPTD,
+				   FIELD_PREP(SSO_CON1_GPTD, val));
+	}
+
+	return 0;
+}
+
+static int sso_gpio_hw_init(struct sso_led_priv *priv)
+{
+	u32 activate;
+	int i, err;
+
+	/* Clear all duty cycles */
+	for (i = 0; i < priv->gpio.pins; i++) {
+		err = regmap_write(priv->mmap, DUTY_CYCLE(i), 0);
+		if (err)
+			return err;
+	}
+
+	/* 4 groups for total 32 pins */
+	for (i = 1; i <= MAX_GROUP_NUM; i++) {
+		activate = !!(i * PINS_PER_GROUP <= priv->gpio.pins ||
+			      priv->gpio.pins > (i - 1) * PINS_PER_GROUP);
+		err = regmap_update_bits(priv->mmap, SSO_CON1, BIT(i - 1),
+					 activate << (i - 1));
+		if (err)
+			return err;
+	}
+
+	/* NO HW directly controlled pin by default */
+	err = regmap_write(priv->mmap, SSO_CON3, 0);
+	if (err)
+		return err;
+
+	/* NO BLINK for all pins */
+	err = regmap_write(priv->mmap, SSO_CON2, 0);
+	if (err)
+		return err;
+
+	/* OUTPUT 0 by default */
+	err = regmap_write(priv->mmap, SSO_CPU, 0);
+	if (err)
+		return err;
+
+	/* update edge */
+	err = regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_RZFL,
+				 FIELD_PREP(SSO_CON0_RZFL, priv->gpio.edge));
+	if (err)
+		return err;
+
+	/* Set GPIO update rate */
+	sso_gpio_freq_set(priv);
+
+	/* Register shift clock */
+	sso_register_shift_clk(priv);
+
+	return 0;
+}
+
+static void sso_led_shutdown(struct sso_led *led)
+{
+	struct sso_led_priv *priv = led->priv;
+
+	/* unregister led */
+	devm_led_classdev_unregister(priv->dev, &led->cdev);
+
+	/* clear HW control bit */
+	if (led->desc.hw_trig)
+		regmap_update_bits(priv->mmap, SSO_CON3, BIT(led->desc.pin), 0);
+
+	if (led->gpiod)
+		devm_gpiod_put(priv->dev, led->gpiod);
+
+	led->priv = NULL;
+}
+
+static int
+__sso_led_dt_parse(struct sso_led_priv *priv, struct fwnode_handle *fw_ssoled)
+{
+	struct fwnode_handle *fwnode_child;
+	struct device *dev = priv->dev;
+	struct sso_led_desc *desc;
+	struct sso_led *led;
+	struct list_head *p;
+	const char *tmp;
+	u32 prop;
+	int ret;
+
+	fwnode_for_each_child_node(fw_ssoled, fwnode_child) {
+		led = devm_kzalloc(dev, sizeof(*led), GFP_KERNEL);
+		if (!led)
+			return -ENOMEM;
+
+		INIT_LIST_HEAD(&led->list);
+		led->priv = priv;
+		desc = &led->desc;
+
+		led->gpiod = devm_fwnode_get_gpiod_from_child(dev, NULL,
+							      fwnode_child,
+							      GPIOD_ASIS, NULL);
+		if (IS_ERR(led->gpiod)) {
+			dev_err(dev, "led: get gpio fail!\n");
+			goto __dt_err;
+		}
+
+		fwnode_property_read_string(fwnode_child,
+					    "linux,default-trigger",
+					    &desc->default_trigger);
+
+		if (fwnode_property_present(fwnode_child,
+					    "retain-state-suspended"))
+			desc->retain_state_suspended = 1;
+
+		if (fwnode_property_present(fwnode_child,
+					    "retain-state-shutdown"))
+			desc->retain_state_shutdown = 1;
+
+		if (fwnode_property_present(fwnode_child, "panic-indicator"))
+			desc->panic_indicator = 1;
+
+		ret = fwnode_property_read_u32(fwnode_child, "reg", &prop);
+		if (ret != 0 || prop >= SSO_LED_MAX_NUM) {
+			dev_err(dev, "invalid LED pin:%u\n", prop);
+			goto __dt_err;
+		}
+		desc->pin = prop;
+
+		if (fwnode_property_present(fwnode_child, "intel,sso-hw-blink"))
+			desc->hw_blink = 1;
+
+		desc->hw_trig = fwnode_property_read_bool(fwnode_child,
+							  "intel,sso-hw-trigger");
+		if (desc->hw_trig) {
+			desc->default_trigger = NULL;
+			desc->retain_state_shutdown = 0;
+			desc->retain_state_suspended = 0;
+			desc->panic_indicator = 0;
+			desc->hw_blink = 0;
+		}
+
+		if (fwnode_property_read_u32(fwnode_child,
+					     "intel,sso-blink-rate-hz", &prop)) {
+			/* default first freq rate */
+			desc->freq_idx = 0;
+			desc->blink_rate = priv->freq[desc->freq_idx];
+		} else {
+			desc->freq_idx = sso_get_blink_rate_idx(priv, prop);
+			if (desc->freq_idx == -1)
+				desc->freq_idx = MAX_FREQ_RANK - 1;
+
+			desc->blink_rate = priv->freq[desc->freq_idx];
+		}
+
+		if (!fwnode_property_read_string(fwnode_child, "default-state", &tmp)) {
+			if (!strcmp(tmp, "on"))
+				desc->brightness = LED_FULL;
+		}
+
+		if (sso_create_led(priv, led, fwnode_child))
+			goto __dt_err;
+	}
+	fwnode_handle_put(fw_ssoled);
+
+	return 0;
+__dt_err:
+	fwnode_handle_put(fw_ssoled);
+	/* unregister leds */
+	list_for_each(p, &priv->led_list) {
+		led = list_entry(p, struct sso_led, list);
+		sso_led_shutdown(led);
+	}
+
+	return -EINVAL;
+}
+
+static int sso_led_dt_parse(struct sso_led_priv *priv)
+{
+	struct fwnode_handle *fwnode = dev_fwnode(priv->dev);
+	struct fwnode_handle *fw_ssoled;
+	struct device *dev = priv->dev;
+	int count;
+	int ret;
+
+	count = device_get_child_node_count(dev);
+	if (!count)
+		return 0;
+
+	fw_ssoled = fwnode_get_named_child_node(fwnode, "ssoled");
+	if (fw_ssoled) {
+		ret = __sso_led_dt_parse(priv, fw_ssoled);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int sso_probe_gpios(struct sso_led_priv *priv)
+{
+	struct device *dev = priv->dev;
+	int ret;
+
+	if (device_property_read_u32(dev, "ngpios", &priv->gpio.pins))
+		priv->gpio.pins = MAX_PIN_NUM_PER_BANK;
+
+	if (priv->gpio.pins > MAX_PIN_NUM_PER_BANK)
+		return -EINVAL;
+
+	if (device_property_read_u32(dev, "intel,sso-update-rate-hz",
+				     &priv->gpio.freq))
+		priv->gpio.freq = 0;
+
+	priv->gpio.edge = DATA_CLK_EDGE;
+	priv->gpio.shift_clk_freq = -1;
+
+	ret = sso_gpio_hw_init(priv);
+	if (ret)
+		return ret;
+
+	return sso_gpio_gc_init(dev, priv);
+}
+
+static void sso_clk_disable(void *data)
+{
+	struct sso_led_priv *priv = data;
+
+	clk_disable_unprepare(priv->fpid_clk);
+	clk_disable_unprepare(priv->gclk);
+}
+
+static int intel_sso_led_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct sso_led_priv *priv;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->pdev = pdev;
+	priv->dev = dev;
+
+	/* gate clock */
+	priv->gclk = devm_clk_get(dev, "sso");
+	if (IS_ERR(priv->gclk)) {
+		dev_err(dev, "get sso gate clock failed!\n");
+		return PTR_ERR(priv->gclk);
+	}
+
+	ret = clk_prepare_enable(priv->gclk);
+	if (ret) {
+		dev_err(dev, "Failed to prepate/enable sso gate clock!\n");
+		return ret;
+	}
+
+	priv->fpid_clk = devm_clk_get(dev, "fpid");
+	if (IS_ERR(priv->fpid_clk)) {
+		dev_err(dev, "Failed to get fpid clock!\n");
+		return PTR_ERR(priv->fpid_clk);
+	}
+
+	ret = clk_prepare_enable(priv->fpid_clk);
+	if (ret) {
+		dev_err(dev, "Failed to prepare/enable fpid clock!\n");
+		return ret;
+	}
+	priv->fpid_clkrate = clk_get_rate(priv->fpid_clk);
+
+	ret = devm_add_action_or_reset(dev, sso_clk_disable, priv);
+	if (ret) {
+		dev_err(dev, "Failed to devm_add_action_or_reset, %d\n", ret);
+		return ret;
+	}
+
+	priv->mmap = syscon_node_to_regmap(dev->of_node);
+	if (IS_ERR(priv->mmap)) {
+		dev_err(dev, "Failed to map iomem!\n");
+		return PTR_ERR(priv->mmap);
+	}
+
+	ret = sso_probe_gpios(priv);
+	if (ret) {
+		regmap_exit(priv->mmap);
+		return ret;
+	}
+
+	INIT_LIST_HEAD(&priv->led_list);
+
+	platform_set_drvdata(pdev, priv);
+	sso_init_freq(priv);
+
+	priv->gptc_clkrate = DEF_GPTC_CLK_RATE;
+
+	ret = sso_led_dt_parse(priv);
+	if (ret) {
+		regmap_exit(priv->mmap);
+		return ret;
+	}
+	dev_info(priv->dev, "sso LED init success!\n");
+
+	return 0;
+}
+
+static int intel_sso_led_remove(struct platform_device *pdev)
+{
+	struct sso_led_priv *priv;
+	struct list_head *pos, *n;
+	struct sso_led *led;
+
+	priv = platform_get_drvdata(pdev);
+
+	list_for_each_safe(pos, n, &priv->led_list) {
+		list_del(pos);
+		led = list_entry(pos, struct sso_led, list);
+		sso_led_shutdown(led);
+	}
+
+	clk_disable_unprepare(priv->fpid_clk);
+	clk_disable_unprepare(priv->gclk);
+	regmap_exit(priv->mmap);
+
+	return 0;
+}
+
+static const struct of_device_id of_sso_led_match[] = {
+	{ .compatible = "intel,lgm-ssoled" },
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, of_sso_led_match);
+
+static struct platform_driver intel_sso_led_driver = {
+	.probe		= intel_sso_led_probe,
+	.remove		= intel_sso_led_remove,
+	.driver		= {
+			.name = "lgm-ssoled",
+			.of_match_table = of_match_ptr(of_sso_led_match),
+	},
+};
+
+module_platform_driver(intel_sso_led_driver);
+
+MODULE_DESCRIPTION("Intel SSO LED/GPIO driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 131ca83f5fb3..2e495ff67856 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -145,8 +145,7 @@ static void led_remove_brightness_hw_changed(struct led_classdev *led_cdev)
 	device_remove_file(led_cdev->dev, &dev_attr_brightness_hw_changed);
 }
 
-void led_classdev_notify_brightness_hw_changed(struct led_classdev *led_cdev,
-					       enum led_brightness brightness)
+void led_classdev_notify_brightness_hw_changed(struct led_classdev *led_cdev, unsigned int brightness)
 {
 	if (WARN_ON(!led_cdev->brightness_hw_changed_kn))
 		return;
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index c4e780bdb385..8eb8054ef9c6 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -39,8 +39,7 @@ const char * const led_colors[LED_COLOR_ID_MAX] = {
 };
 EXPORT_SYMBOL_GPL(led_colors);
 
-static int __led_set_brightness(struct led_classdev *led_cdev,
-				enum led_brightness value)
+static int __led_set_brightness(struct led_classdev *led_cdev, unsigned int value)
 {
 	if (!led_cdev->brightness_set)
 		return -ENOTSUPP;
@@ -50,8 +49,7 @@ static int __led_set_brightness(struct led_classdev *led_cdev,
 	return 0;
 }
 
-static int __led_set_brightness_blocking(struct led_classdev *led_cdev,
-					 enum led_brightness value)
+static int __led_set_brightness_blocking(struct led_classdev *led_cdev, unsigned int value)
 {
 	if (!led_cdev->brightness_set_blocking)
 		return -ENOTSUPP;
@@ -240,8 +238,7 @@ void led_stop_software_blink(struct led_classdev *led_cdev)
 }
 EXPORT_SYMBOL_GPL(led_stop_software_blink);
 
-void led_set_brightness(struct led_classdev *led_cdev,
-			enum led_brightness brightness)
+void led_set_brightness(struct led_classdev *led_cdev, unsigned int brightness)
 {
 	/*
 	 * If software blink is active, delay brightness setting
@@ -253,7 +250,7 @@ void led_set_brightness(struct led_classdev *led_cdev,
 		 * work queue task to avoid problems in case we are called
 		 * from hard irq context.
 		 */
-		if (brightness == LED_OFF) {
+		if (!brightness) {
 			set_bit(LED_BLINK_DISABLE, &led_cdev->work_flags);
 			schedule_work(&led_cdev->set_brightness_work);
 		} else {
@@ -268,8 +265,7 @@ void led_set_brightness(struct led_classdev *led_cdev,
 }
 EXPORT_SYMBOL_GPL(led_set_brightness);
 
-void led_set_brightness_nopm(struct led_classdev *led_cdev,
-			      enum led_brightness value)
+void led_set_brightness_nopm(struct led_classdev *led_cdev, unsigned int value)
 {
 	/* Use brightness_set op if available, it is guaranteed not to sleep */
 	if (!__led_set_brightness(led_cdev, value))
@@ -281,8 +277,7 @@ void led_set_brightness_nopm(struct led_classdev *led_cdev,
 }
 EXPORT_SYMBOL_GPL(led_set_brightness_nopm);
 
-void led_set_brightness_nosleep(struct led_classdev *led_cdev,
-				enum led_brightness value)
+void led_set_brightness_nosleep(struct led_classdev *led_cdev, unsigned int value)
 {
 	led_cdev->brightness = min(value, led_cdev->max_brightness);
 
@@ -293,8 +288,7 @@ void led_set_brightness_nosleep(struct led_classdev *led_cdev,
 }
 EXPORT_SYMBOL_GPL(led_set_brightness_nosleep);
 
-int led_set_brightness_sync(struct led_classdev *led_cdev,
-			    enum led_brightness value)
+int led_set_brightness_sync(struct led_classdev *led_cdev, unsigned int value)
 {
 	if (led_cdev->blink_delay_on || led_cdev->blink_delay_off)
 		return -EBUSY;
diff --git a/drivers/leds/leds-apu.c b/drivers/leds/leds-apu.c
index 7fd557aceff6..c409b80c236d 100644
--- a/drivers/leds/leds-apu.c
+++ b/drivers/leds/leds-apu.c
@@ -83,6 +83,7 @@ static const struct apu_led_profile apu1_led_profile[] = {
 };
 
 static const struct dmi_system_id apu_led_dmi_table[] __initconst = {
+	/* PC Engines APU with factory bios "SageBios_PCEngines_APU-45" */
 	{
 		.ident = "apu",
 		.matches = {
@@ -90,6 +91,14 @@ static const struct dmi_system_id apu_led_dmi_table[] __initconst = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "APU")
 		}
 	},
+	/* PC Engines APU with "Mainline" bios >= 4.6.8 */
+	{
+		.ident = "apu",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "PC Engines"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "apu1")
+		}
+	},
 	{}
 };
 MODULE_DEVICE_TABLE(dmi, apu_led_dmi_table);
@@ -173,7 +182,7 @@ static int __init apu_led_init(void)
 	int err;
 
 	if (!(dmi_match(DMI_SYS_VENDOR, "PC Engines") &&
-	      dmi_match(DMI_PRODUCT_NAME, "APU"))) {
+	      (dmi_match(DMI_PRODUCT_NAME, "APU") || dmi_match(DMI_PRODUCT_NAME, "apu1")))) {
 		pr_err("No PC Engines APUv1 board detected. For APUv2,3 support, enable CONFIG_PCENGINES_APU2\n");
 		return -ENODEV;
 	}
diff --git a/drivers/leds/leds-blinkm.c b/drivers/leds/leds-blinkm.c
index e11fe1788242..b4e1fdff4186 100644
--- a/drivers/leds/leds-blinkm.c
+++ b/drivers/leds/leds-blinkm.c
@@ -192,13 +192,13 @@ static int store_color_common(struct device *dev, const char *buf, int color)
 	return 0;
 }
 
-static ssize_t show_red(struct device *dev, struct device_attribute *attr,
+static ssize_t red_show(struct device *dev, struct device_attribute *attr,
 			char *buf)
 {
 	return show_color_common(dev, buf, RED);
 }
 
-static ssize_t store_red(struct device *dev, struct device_attribute *attr,
+static ssize_t red_store(struct device *dev, struct device_attribute *attr,
 			 const char *buf, size_t count)
 {
 	int ret;
@@ -209,15 +209,15 @@ static ssize_t store_red(struct device *dev, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(red, S_IRUGO | S_IWUSR, show_red, store_red);
+static DEVICE_ATTR_RW(red);
 
-static ssize_t show_green(struct device *dev, struct device_attribute *attr,
+static ssize_t green_show(struct device *dev, struct device_attribute *attr,
 			  char *buf)
 {
 	return show_color_common(dev, buf, GREEN);
 }
 
-static ssize_t store_green(struct device *dev, struct device_attribute *attr,
+static ssize_t green_store(struct device *dev, struct device_attribute *attr,
 			   const char *buf, size_t count)
 {
 
@@ -229,15 +229,15 @@ static ssize_t store_green(struct device *dev, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(green, S_IRUGO | S_IWUSR, show_green, store_green);
+static DEVICE_ATTR_RW(green);
 
-static ssize_t show_blue(struct device *dev, struct device_attribute *attr,
+static ssize_t blue_show(struct device *dev, struct device_attribute *attr,
 			 char *buf)
 {
 	return show_color_common(dev, buf, BLUE);
 }
 
-static ssize_t store_blue(struct device *dev, struct device_attribute *attr,
+static ssize_t blue_store(struct device *dev, struct device_attribute *attr,
 			  const char *buf, size_t count)
 {
 	int ret;
@@ -248,16 +248,16 @@ static ssize_t store_blue(struct device *dev, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(blue, S_IRUGO | S_IWUSR, show_blue, store_blue);
+static DEVICE_ATTR_RW(blue);
 
-static ssize_t show_test(struct device *dev, struct device_attribute *attr,
+static ssize_t test_show(struct device *dev, struct device_attribute *attr,
 			 char *buf)
 {
 	return scnprintf(buf, PAGE_SIZE,
 			 "#Write into test to start test sequence!#\n");
 }
 
-static ssize_t store_test(struct device *dev, struct device_attribute *attr,
+static ssize_t test_store(struct device *dev, struct device_attribute *attr,
 			  const char *buf, size_t count)
 {
 
@@ -273,7 +273,7 @@ static ssize_t store_test(struct device *dev, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(test, S_IRUGO | S_IWUSR, show_test, store_test);
+static DEVICE_ATTR_RW(test);
 
 /* TODO: HSB, fade, timeadj, script ... */
 
diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index 93f5b1b60fde..b5d5e22d2d1e 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c
@@ -96,7 +96,8 @@ static int create_gpio_led(const struct gpio_led *template,
 	} else {
 		state = (template->default_state == LEDS_GPIO_DEFSTATE_ON);
 	}
-	led_dat->cdev.brightness = state ? LED_FULL : LED_OFF;
+	led_dat->cdev.brightness = state;
+	led_dat->cdev.max_brightness = 1;
 	if (!template->retain_state_suspended)
 		led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
 	if (template->panic_indicator)
diff --git a/drivers/leds/leds-lm3530.c b/drivers/leds/leds-lm3530.c
index 2f8362f6bf75..2db455efd4b1 100644
--- a/drivers/leds/leds-lm3530.c
+++ b/drivers/leds/leds-lm3530.c
@@ -346,8 +346,8 @@ static void lm3530_brightness_set(struct led_classdev *led_cdev,
 	}
 }
 
-static ssize_t lm3530_mode_get(struct device *dev,
-		struct device_attribute *attr, char *buf)
+static ssize_t mode_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
 {
 	struct led_classdev *led_cdev = dev_get_drvdata(dev);
 	struct lm3530_data *drvdata;
@@ -365,8 +365,8 @@ static ssize_t lm3530_mode_get(struct device *dev,
 	return len;
 }
 
-static ssize_t lm3530_mode_set(struct device *dev, struct device_attribute
-				   *attr, const char *buf, size_t size)
+static ssize_t mode_store(struct device *dev, struct device_attribute
+			  *attr, const char *buf, size_t size)
 {
 	struct led_classdev *led_cdev = dev_get_drvdata(dev);
 	struct lm3530_data *drvdata;
@@ -397,7 +397,7 @@ static ssize_t lm3530_mode_set(struct device *dev, struct device_attribute
 
 	return sizeof(drvdata->mode);
 }
-static DEVICE_ATTR(mode, 0644, lm3530_mode_get, lm3530_mode_set);
+static DEVICE_ATTR_RW(mode);
 
 static struct attribute *lm3530_attrs[] = {
 	&dev_attr_mode.attr,
diff --git a/drivers/leds/leds-lm3533.c b/drivers/leds/leds-lm3533.c
index 9dd205870525..43d5970d96aa 100644
--- a/drivers/leds/leds-lm3533.c
+++ b/drivers/leds/leds-lm3533.c
@@ -608,7 +608,7 @@ static struct attribute *lm3533_led_attributes[] = {
 static umode_t lm3533_led_attr_is_visible(struct kobject *kobj,
 					     struct attribute *attr, int n)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	struct led_classdev *led_cdev = dev_get_drvdata(dev);
 	struct lm3533_led *led = to_lm3533_led(led_cdev);
 	umode_t mode = attr->mode;
diff --git a/drivers/leds/leds-lm355x.c b/drivers/leds/leds-lm355x.c
index 1505521249b5..2d3e11845ba5 100644
--- a/drivers/leds/leds-lm355x.c
+++ b/drivers/leds/leds-lm355x.c
@@ -349,9 +349,9 @@ static int lm355x_indicator_brightness_set(struct led_classdev *cdev,
 }
 
 /* indicator pattern only for lm3556*/
-static ssize_t lm3556_indicator_pattern_store(struct device *dev,
-					      struct device_attribute *attr,
-					      const char *buf, size_t size)
+static ssize_t pattern_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
 {
 	ssize_t ret;
 	struct led_classdev *led_cdev = dev_get_drvdata(dev);
@@ -381,7 +381,7 @@ out:
 	return ret;
 }
 
-static DEVICE_ATTR(pattern, S_IWUSR, NULL, lm3556_indicator_pattern_store);
+static DEVICE_ATTR_WO(pattern);
 
 static struct attribute *lm355x_indicator_attrs[] = {
 	&dev_attr_pattern.attr,
diff --git a/drivers/leds/leds-lm3642.c b/drivers/leds/leds-lm3642.c
index 62c14872caf7..8007b82985a8 100644
--- a/drivers/leds/leds-lm3642.c
+++ b/drivers/leds/leds-lm3642.c
@@ -165,9 +165,9 @@ static int lm3642_control(struct lm3642_chip_data *chip,
 /* torch */
 
 /* torch pin config for lm3642 */
-static ssize_t lm3642_torch_pin_store(struct device *dev,
-				      struct device_attribute *attr,
-				      const char *buf, size_t size)
+static ssize_t torch_pin_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
 {
 	ssize_t ret;
 	struct led_classdev *led_cdev = dev_get_drvdata(dev);
@@ -193,7 +193,7 @@ static ssize_t lm3642_torch_pin_store(struct device *dev,
 	return size;
 }
 
-static DEVICE_ATTR(torch_pin, S_IWUSR, NULL, lm3642_torch_pin_store);
+static DEVICE_ATTR_WO(torch_pin);
 
 static int lm3642_torch_brightness_set(struct led_classdev *cdev,
 					enum led_brightness brightness)
@@ -212,9 +212,9 @@ static int lm3642_torch_brightness_set(struct led_classdev *cdev,
 /* flash */
 
 /* strobe pin config for lm3642*/
-static ssize_t lm3642_strobe_pin_store(struct device *dev,
-				       struct device_attribute *attr,
-				       const char *buf, size_t size)
+static ssize_t strobe_pin_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
 {
 	ssize_t ret;
 	struct led_classdev *led_cdev = dev_get_drvdata(dev);
@@ -240,7 +240,7 @@ static ssize_t lm3642_strobe_pin_store(struct device *dev,
 	return size;
 }
 
-static DEVICE_ATTR(strobe_pin, S_IWUSR, NULL, lm3642_strobe_pin_store);
+static DEVICE_ATTR_WO(strobe_pin);
 
 static int lm3642_strobe_brightness_set(struct led_classdev *cdev,
 					 enum led_brightness brightness)
diff --git a/drivers/leds/leds-lp50xx.c b/drivers/leds/leds-lp50xx.c
index f13117eed976..06230614fdc5 100644
--- a/drivers/leds/leds-lp50xx.c
+++ b/drivers/leds/leds-lp50xx.c
@@ -6,10 +6,9 @@
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/leds.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
 #include <linux/slab.h>
@@ -322,7 +321,7 @@ static int lp50xx_brightness_set(struct led_classdev *cdev,
 
 	ret = regmap_write(led->priv->regmap, reg_val, brightness);
 	if (ret) {
-		dev_err(&led->priv->client->dev,
+		dev_err(led->priv->dev,
 			"Cannot write brightness value %d\n", ret);
 		goto out;
 	}
@@ -338,7 +337,7 @@ static int lp50xx_brightness_set(struct led_classdev *cdev,
 		ret = regmap_write(led->priv->regmap, reg_val,
 				   mc_dev->subled_info[i].intensity);
 		if (ret) {
-			dev_err(&led->priv->client->dev,
+			dev_err(led->priv->dev,
 				"Cannot write intensity value %d\n", ret);
 			goto out;
 		}
@@ -360,8 +359,8 @@ static int lp50xx_set_banks(struct lp50xx *priv, u32 led_banks[])
 			bank_enable_mask |= (1 << led_banks[i]);
 	}
 
-	led_config_lo = (u8)(bank_enable_mask & 0xff);
-	led_config_hi = (u8)(bank_enable_mask >> 8) & 0xff;
+	led_config_lo = bank_enable_mask;
+	led_config_hi = bank_enable_mask >> 8;
 
 	ret = regmap_write(priv->regmap, LP50XX_LED_CFG0, led_config_lo);
 	if (ret)
@@ -382,11 +381,9 @@ static int lp50xx_enable_disable(struct lp50xx *priv, int enable_disable)
 {
 	int ret;
 
-	if (priv->enable_gpio) {
-		ret = gpiod_direction_output(priv->enable_gpio, enable_disable);
-		if (ret)
-			return ret;
-	}
+	ret = gpiod_direction_output(priv->enable_gpio, enable_disable);
+	if (ret)
+		return ret;
 
 	if (enable_disable)
 		return regmap_write(priv->regmap, LP50XX_DEV_CFG0, LP50XX_CHIP_EN);
@@ -404,7 +401,7 @@ static int lp50xx_probe_leds(struct fwnode_handle *child, struct lp50xx *priv,
 
 	if (num_leds > 1) {
 		if (num_leds > priv->chip_info->max_modules) {
-			dev_err(&priv->client->dev, "reg property is invalid\n");
+			dev_err(priv->dev, "reg property is invalid\n");
 			return -EINVAL;
 		}
 
@@ -412,13 +409,13 @@ static int lp50xx_probe_leds(struct fwnode_handle *child, struct lp50xx *priv,
 
 		ret = fwnode_property_read_u32_array(child, "reg", led_banks, num_leds);
 		if (ret) {
-			dev_err(&priv->client->dev, "reg property is missing\n");
+			dev_err(priv->dev, "reg property is missing\n");
 			return ret;
 		}
 
 		ret = lp50xx_set_banks(priv, led_banks);
 		if (ret) {
-			dev_err(&priv->client->dev, "Cannot setup banked LEDs\n");
+			dev_err(priv->dev, "Cannot setup banked LEDs\n");
 			return ret;
 		}
 
@@ -426,12 +423,12 @@ static int lp50xx_probe_leds(struct fwnode_handle *child, struct lp50xx *priv,
 	} else {
 		ret = fwnode_property_read_u32(child, "reg", &led_number);
 		if (ret) {
-			dev_err(&priv->client->dev, "led reg property missing\n");
+			dev_err(priv->dev, "led reg property missing\n");
 			return ret;
 		}
 
 		if (led_number > priv->chip_info->num_leds) {
-			dev_err(&priv->client->dev, "led-sources property is invalid\n");
+			dev_err(priv->dev, "led-sources property is invalid\n");
 			return -EINVAL;
 		}
 
@@ -455,12 +452,9 @@ static int lp50xx_probe_dt(struct lp50xx *priv)
 	int i = 0;
 
 	priv->enable_gpio = devm_gpiod_get_optional(priv->dev, "enable", GPIOD_OUT_LOW);
-	if (IS_ERR(priv->enable_gpio)) {
-		ret = PTR_ERR(priv->enable_gpio);
-		dev_err(&priv->client->dev, "Failed to get enable gpio: %d\n",
-			ret);
-		return ret;
-	}
+	if (IS_ERR(priv->enable_gpio))
+		return dev_err_probe(priv->dev, PTR_ERR(priv->enable_gpio),
+				     "Failed to get enable GPIO\n");
 
 	priv->regulator = devm_regulator_get(priv->dev, "vled");
 	if (IS_ERR(priv->regulator))
@@ -470,7 +464,7 @@ static int lp50xx_probe_dt(struct lp50xx *priv)
 		led = &priv->leds[i];
 		ret = fwnode_property_count_u32(child, "reg");
 		if (ret < 0) {
-			dev_err(&priv->client->dev, "reg property is invalid\n");
+			dev_err(priv->dev, "reg property is invalid\n");
 			goto child_out;
 		}
 
@@ -510,12 +504,11 @@ static int lp50xx_probe_dt(struct lp50xx *priv)
 		led_cdev = &led->mc_cdev.led_cdev;
 		led_cdev->brightness_set_blocking = lp50xx_brightness_set;
 
-		ret = devm_led_classdev_multicolor_register_ext(&priv->client->dev,
+		ret = devm_led_classdev_multicolor_register_ext(priv->dev,
 						       &led->mc_cdev,
 						       &init_data);
 		if (ret) {
-			dev_err(&priv->client->dev, "led register err: %d\n",
-				ret);
+			dev_err(priv->dev, "led register err: %d\n", ret);
 			goto child_out;
 		}
 		i++;
@@ -529,8 +522,7 @@ child_out:
 	return ret;
 }
 
-static int lp50xx_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int lp50xx_probe(struct i2c_client *client)
 {
 	struct lp50xx *led;
 	int count;
@@ -550,7 +542,7 @@ static int lp50xx_probe(struct i2c_client *client,
 	mutex_init(&led->lock);
 	led->client = client;
 	led->dev = &client->dev;
-	led->chip_info = &lp50xx_chip_info_tbl[id->driver_data];
+	led->chip_info = device_get_match_data(&client->dev);
 	i2c_set_clientdata(client, led);
 	led->regmap = devm_regmap_init_i2c(client,
 					led->chip_info->lp50xx_regmap_config);
@@ -579,15 +571,14 @@ static int lp50xx_remove(struct i2c_client *client)
 
 	ret = lp50xx_enable_disable(led, 0);
 	if (ret) {
-		dev_err(&led->client->dev, "Failed to disable chip\n");
+		dev_err(led->dev, "Failed to disable chip\n");
 		return ret;
 	}
 
 	if (led->regulator) {
 		ret = regulator_disable(led->regulator);
 		if (ret)
-			dev_err(&led->client->dev,
-				"Failed to disable regulator\n");
+			dev_err(led->dev, "Failed to disable regulator\n");
 	}
 
 	mutex_destroy(&led->lock);
@@ -596,24 +587,24 @@ static int lp50xx_remove(struct i2c_client *client)
 }
 
 static const struct i2c_device_id lp50xx_id[] = {
-	{ "lp5009", LP5009 },
-	{ "lp5012", LP5012 },
-	{ "lp5018", LP5018 },
-	{ "lp5024", LP5024 },
-	{ "lp5030", LP5030 },
-	{ "lp5036", LP5036 },
+	{ "lp5009", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5009] },
+	{ "lp5012", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5012] },
+	{ "lp5018", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5018] },
+	{ "lp5024", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5024] },
+	{ "lp5030", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5030] },
+	{ "lp5036", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5036] },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, lp50xx_id);
 
 static const struct of_device_id of_lp50xx_leds_match[] = {
-	{ .compatible = "ti,lp5009", .data = (void *)LP5009 },
-	{ .compatible = "ti,lp5012", .data = (void *)LP5012 },
-	{ .compatible = "ti,lp5018", .data = (void *)LP5018 },
-	{ .compatible = "ti,lp5024", .data = (void *)LP5024 },
-	{ .compatible = "ti,lp5030", .data = (void *)LP5030 },
-	{ .compatible = "ti,lp5036", .data = (void *)LP5036 },
-	{},
+	{ .compatible = "ti,lp5009", .data = &lp50xx_chip_info_tbl[LP5009] },
+	{ .compatible = "ti,lp5012", .data = &lp50xx_chip_info_tbl[LP5012] },
+	{ .compatible = "ti,lp5018", .data = &lp50xx_chip_info_tbl[LP5018] },
+	{ .compatible = "ti,lp5024", .data = &lp50xx_chip_info_tbl[LP5024] },
+	{ .compatible = "ti,lp5030", .data = &lp50xx_chip_info_tbl[LP5030] },
+	{ .compatible = "ti,lp5036", .data = &lp50xx_chip_info_tbl[LP5036] },
+	{}
 };
 MODULE_DEVICE_TABLE(of, of_lp50xx_leds_match);
 
@@ -622,7 +613,7 @@ static struct i2c_driver lp50xx_driver = {
 		.name	= "lp50xx",
 		.of_match_table = of_lp50xx_leds_match,
 	},
-	.probe		= lp50xx_probe,
+	.probe_new	= lp50xx_probe,
 	.remove		= lp50xx_remove,
 	.id_table	= lp50xx_id,
 };
diff --git a/drivers/leds/leds-max8997.c b/drivers/leds/leds-max8997.c
index 512a11d142d0..c0bddb33888d 100644
--- a/drivers/leds/leds-max8997.c
+++ b/drivers/leds/leds-max8997.c
@@ -160,8 +160,8 @@ static void max8997_led_brightness_set(struct led_classdev *led_cdev,
 	}
 }
 
-static ssize_t max8997_led_show_mode(struct device *dev,
-				struct device_attribute *attr, char *buf)
+static ssize_t mode_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
 {
 	struct led_classdev *led_cdev = dev_get_drvdata(dev);
 	struct max8997_led *led =
@@ -193,9 +193,9 @@ static ssize_t max8997_led_show_mode(struct device *dev,
 	return ret;
 }
 
-static ssize_t max8997_led_store_mode(struct device *dev,
-				struct device_attribute *attr,
-				const char *buf, size_t size)
+static ssize_t mode_store(struct device *dev,
+			  struct device_attribute *attr,
+			  const char *buf, size_t size)
 {
 	struct led_classdev *led_cdev = dev_get_drvdata(dev);
 	struct max8997_led *led =
@@ -222,7 +222,7 @@ static ssize_t max8997_led_store_mode(struct device *dev,
 	return size;
 }
 
-static DEVICE_ATTR(mode, 0644, max8997_led_show_mode, max8997_led_store_mode);
+static DEVICE_ATTR_RW(mode);
 
 static struct attribute *max8997_attrs[] = {
 	&dev_attr_mode.attr,
diff --git a/drivers/leds/leds-netxbig.c b/drivers/leds/leds-netxbig.c
index 68fbf0b66fad..77213b79f84d 100644
--- a/drivers/leds/leds-netxbig.c
+++ b/drivers/leds/leds-netxbig.c
@@ -204,9 +204,9 @@ static void netxbig_led_set(struct led_classdev *led_cdev,
 	spin_unlock_irqrestore(&led_dat->lock, flags);
 }
 
-static ssize_t netxbig_led_sata_store(struct device *dev,
-				      struct device_attribute *attr,
-				      const char *buff, size_t count)
+static ssize_t sata_store(struct device *dev,
+			  struct device_attribute *attr,
+			  const char *buff, size_t count)
 {
 	struct led_classdev *led_cdev = dev_get_drvdata(dev);
 	struct netxbig_led_data *led_dat =
@@ -255,8 +255,8 @@ exit_unlock:
 	return ret;
 }
 
-static ssize_t netxbig_led_sata_show(struct device *dev,
-				     struct device_attribute *attr, char *buf)
+static ssize_t sata_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
 {
 	struct led_classdev *led_cdev = dev_get_drvdata(dev);
 	struct netxbig_led_data *led_dat =
@@ -265,7 +265,7 @@ static ssize_t netxbig_led_sata_show(struct device *dev,
 	return sprintf(buf, "%d\n", led_dat->sata);
 }
 
-static DEVICE_ATTR(sata, 0644, netxbig_led_sata_show, netxbig_led_sata_store);
+static DEVICE_ATTR_RW(sata);
 
 static struct attribute *netxbig_led_attrs[] = {
 	&dev_attr_sata.attr,
diff --git a/drivers/leds/leds-ss4200.c b/drivers/leds/leds-ss4200.c
index 245de443fe9c..fcaa34706b6c 100644
--- a/drivers/leds/leds-ss4200.c
+++ b/drivers/leds/leds-ss4200.c
@@ -441,8 +441,8 @@ static void set_power_light_amber_noblink(void)
 	nasgpio_led_set_brightness(&amber->led_cdev, LED_FULL);
 }
 
-static ssize_t nas_led_blink_show(struct device *dev,
-				  struct device_attribute *attr, char *buf)
+static ssize_t blink_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
 {
 	struct led_classdev *led = dev_get_drvdata(dev);
 	int blinking = 0;
@@ -451,9 +451,9 @@ static ssize_t nas_led_blink_show(struct device *dev,
 	return sprintf(buf, "%u\n", blinking);
 }
 
-static ssize_t nas_led_blink_store(struct device *dev,
-				   struct device_attribute *attr,
-				   const char *buf, size_t size)
+static ssize_t blink_store(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t size)
 {
 	int ret;
 	struct led_classdev *led = dev_get_drvdata(dev);
@@ -468,7 +468,7 @@ static ssize_t nas_led_blink_store(struct device *dev,
 	return size;
 }
 
-static DEVICE_ATTR(blink, 0644, nas_led_blink_show, nas_led_blink_store);
+static DEVICE_ATTR_RW(blink);
 
 static struct attribute *nasgpio_led_attrs[] = {
 	&dev_attr_blink.attr,
@@ -478,7 +478,6 @@ ATTRIBUTE_GROUPS(nasgpio_led);
 
 static int register_nasgpio_led(int led_nr)
 {
-	int ret;
 	struct nasgpio_led *nas_led = &nasgpio_leds[led_nr];
 	struct led_classdev *led = get_classdev_for_led_nr(led_nr);
 
@@ -489,11 +488,8 @@ static int register_nasgpio_led(int led_nr)
 	led->brightness_set = nasgpio_led_set_brightness;
 	led->blink_set = nasgpio_led_set_blink;
 	led->groups = nasgpio_led_groups;
-	ret = led_classdev_register(&nas_gpio_pci_dev->dev, led);
-	if (ret)
-		return ret;
 
-	return 0;
+	return led_classdev_register(&nas_gpio_pci_dev->dev, led);
 }
 
 static void unregister_nasgpio_led(int led_nr)
diff --git a/drivers/leds/leds-wm831x-status.c b/drivers/leds/leds-wm831x-status.c
index 67f4235cb28a..c48b80574f02 100644
--- a/drivers/leds/leds-wm831x-status.c
+++ b/drivers/leds/leds-wm831x-status.c
@@ -155,8 +155,8 @@ static const char * const led_src_texts[] = {
 	"soft",
 };
 
-static ssize_t wm831x_status_src_show(struct device *dev,
-				      struct device_attribute *attr, char *buf)
+static ssize_t src_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
 	struct led_classdev *led_cdev = dev_get_drvdata(dev);
 	struct wm831x_status *led = to_wm831x_status(led_cdev);
@@ -178,9 +178,9 @@ static ssize_t wm831x_status_src_show(struct device *dev,
 	return ret;
 }
 
-static ssize_t wm831x_status_src_store(struct device *dev,
-				       struct device_attribute *attr,
-				       const char *buf, size_t size)
+static ssize_t src_store(struct device *dev,
+			 struct device_attribute *attr,
+			 const char *buf, size_t size)
 {
 	struct led_classdev *led_cdev = dev_get_drvdata(dev);
 	struct wm831x_status *led = to_wm831x_status(led_cdev);
@@ -197,7 +197,7 @@ static ssize_t wm831x_status_src_store(struct device *dev,
 	return size;
 }
 
-static DEVICE_ATTR(src, 0644, wm831x_status_src_show, wm831x_status_src_store);
+static DEVICE_ATTR_RW(src);
 
 static struct attribute *wm831x_status_attrs[] = {
 	&dev_attr_src.attr,
diff --git a/drivers/leds/leds.h b/drivers/leds/leds.h
index 2d9eb48bbed9..345062ccabda 100644
--- a/drivers/leds/leds.h
+++ b/drivers/leds/leds.h
@@ -19,10 +19,8 @@ static inline int led_get_brightness(struct led_classdev *led_cdev)
 
 void led_init_core(struct led_classdev *led_cdev);
 void led_stop_software_blink(struct led_classdev *led_cdev);
-void led_set_brightness_nopm(struct led_classdev *led_cdev,
-				enum led_brightness value);
-void led_set_brightness_nosleep(struct led_classdev *led_cdev,
-				enum led_brightness value);
+void led_set_brightness_nopm(struct led_classdev *led_cdev, unsigned int value);
+void led_set_brightness_nosleep(struct led_classdev *led_cdev, unsigned int value);
 ssize_t led_trigger_read(struct file *filp, struct kobject *kobj,
 			struct bin_attribute *attr, char *buf,
 			loff_t pos, size_t count);
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 4312007d2d34..2d3cda0acacb 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -341,8 +341,8 @@ static void do_region(int op, int op_flags, unsigned region,
 			num_bvecs = 1;
 			break;
 		default:
-			num_bvecs = min_t(int, BIO_MAX_PAGES,
-					  dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)));
+			num_bvecs = bio_max_segs(dm_sector_div_up(remaining,
+						(PAGE_SIZE >> SECTOR_SHIFT)));
 		}
 
 		bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, &io->client->bios);
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index e3d35c6c9f71..57882654ffee 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -264,15 +264,14 @@ static int write_inline_data(struct log_writes_c *lc, void *entry,
 			     size_t entrylen, void *data, size_t datalen,
 			     sector_t sector)
 {
-	int num_pages, bio_pages, pg_datalen, pg_sectorlen, i;
+	int bio_pages, pg_datalen, pg_sectorlen, i;
 	struct page *page;
 	struct bio *bio;
 	size_t ret;
 	void *ptr;
 
 	while (datalen) {
-		num_pages = ALIGN(datalen, PAGE_SIZE) >> PAGE_SHIFT;
-		bio_pages = min(num_pages, BIO_MAX_PAGES);
+		bio_pages = bio_max_segs(DIV_ROUND_UP(datalen, PAGE_SIZE));
 
 		atomic_inc(&lc->io_blocks);
 
@@ -364,7 +363,7 @@ static int log_one_block(struct log_writes_c *lc,
 		goto out;
 
 	atomic_inc(&lc->io_blocks);
-	bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES));
+	bio = bio_alloc(GFP_KERNEL, bio_max_segs(block->vec_cnt));
 	if (!bio) {
 		DMERR("Couldn't alloc log bio");
 		goto error;
@@ -386,7 +385,8 @@ static int log_one_block(struct log_writes_c *lc,
 		if (ret != block->vecs[i].bv_len) {
 			atomic_inc(&lc->io_blocks);
 			submit_bio(bio);
-			bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt - i, BIO_MAX_PAGES));
+			bio = bio_alloc(GFP_KERNEL,
+					bio_max_segs(block->vec_cnt - i));
 			if (!bio) {
 				DMERR("Couldn't alloc log bio");
 				goto error;
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 646823ddd317..2d73407ee52e 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -20,7 +20,6 @@
 #include <linux/slab.h>
 #include <linux/scatterlist.h>
 #include <linux/sizes.h>
-#include <linux/swiotlb.h>
 #include <linux/regulator/consumer.h>
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
@@ -4582,12 +4581,8 @@ int sdhci_setup_host(struct sdhci_host *host)
 		mmc->max_segs = SDHCI_MAX_SEGS;
 	} else if (host->flags & SDHCI_USE_SDMA) {
 		mmc->max_segs = 1;
-		if (swiotlb_max_segment()) {
-			unsigned int max_req_size = (1 << IO_TLB_SHIFT) *
-						IO_TLB_SEGSIZE;
-			mmc->max_req_size = min(mmc->max_req_size,
-						max_req_size);
-		}
+		mmc->max_req_size = min_t(size_t, mmc->max_req_size,
+					  dma_max_mapping_size(mmc_dev(mmc)));
 	} else { /* PIO */
 		mmc->max_segs = SDHCI_MAX_SEGS;
 	}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 7b6632c00ffd..38b0d694dfc9 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2632,6 +2632,7 @@ static void nvme_reset_work(struct work_struct *work)
 	 * Don't limit the IOMMU merged segment size.
 	 */
 	dma_set_max_seg_size(dev->dev, 0xffffffff);
+	dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1);
 
 	mutex_unlock(&dev->shutdown_lock);
 
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 3d9a5d3ed9cd..9a8b3726a37c 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -185,7 +185,7 @@ static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
 	}
 
 	bip = bio_integrity_alloc(bio, GFP_NOIO,
-		min_t(unsigned int, req->metadata_sg_cnt, BIO_MAX_PAGES));
+					bio_max_segs(req->metadata_sg_cnt));
 	if (IS_ERR(bip)) {
 		pr_err("Unable to allocate bio_integrity_payload\n");
 		return PTR_ERR(bip);
@@ -225,7 +225,7 @@ static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
 
 static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 {
-	int sg_cnt = req->sg_cnt;
+	unsigned int sg_cnt = req->sg_cnt;
 	struct bio *bio;
 	struct scatterlist *sg;
 	struct blk_plug plug;
@@ -262,7 +262,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 		bio = &req->b.inline_bio;
 		bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
 	} else {
-		bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+		bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt));
 	}
 	bio_set_dev(bio, req->ns->bdev);
 	bio->bi_iter.bi_sector = sector;
@@ -289,7 +289,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 				}
 			}
 
-			bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+			bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt));
 			bio_set_dev(bio, req->ns->bdev);
 			bio->bi_iter.bi_sector = sector;
 			bio->bi_opf = op;
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index f50c7b2bf21c..26c587ccd152 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -26,7 +26,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
 	struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl;
 	u16 status = NVME_SC_SUCCESS;
 	struct nvme_id_ctrl *id;
-	int max_hw_sectors;
+	unsigned int max_hw_sectors;
 	int page_shift;
 
 	id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -198,7 +198,7 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
 		bio = &req->p.inline_bio;
 		bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
 	} else {
-		bio = bio_alloc(GFP_KERNEL, min(req->sg_cnt, BIO_MAX_PAGES));
+		bio = bio_alloc(GFP_KERNEL, bio_max_segs(req->sg_cnt));
 		bio->bi_end_io = bio_put;
 	}
 	bio->bi_opf = req_op(rq);
diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c
index cf109d9a1112..e6939103991b 100644
--- a/drivers/pcmcia/cistpl.c
+++ b/drivers/pcmcia/cistpl.c
@@ -1554,7 +1554,7 @@ static ssize_t pccard_show_cis(struct file *filp, struct kobject *kobj,
 		if (off + count > size)
 			count = size - off;
 
-		s = to_socket(container_of(kobj, struct device, kobj));
+		s = to_socket(kobj_to_dev(kobj));
 
 		if (!(s->state & SOCKET_PRESENT))
 			return -ENODEV;
@@ -1581,7 +1581,7 @@ static ssize_t pccard_store_cis(struct file *filp, struct kobject *kobj,
 	if (error)
 		return error;
 
-	s = to_socket(container_of(kobj, struct device, kobj));
+	s = to_socket(kobj_to_dev(kobj));
 
 	if (off)
 		return -EINVAL;
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index cce5b5284658..89128fc29ccc 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -785,7 +785,7 @@ static long wdt_unlocked_ioctl(struct file *file, unsigned int cmd,
  */
 static int wdt_open(struct inode *inode, struct file *file)
 {
-	if (MINOR(inode->i_rdev) == WATCHDOG_MINOR) {
+	if (iminor(inode) == WATCHDOG_MINOR) {
 		mutex_lock(&m41t80_rtc_mutex);
 		if (test_and_set_bit(0, &wdt_is_open)) {
 			mutex_unlock(&m41t80_rtc_mutex);
@@ -809,7 +809,7 @@ static int wdt_open(struct inode *inode, struct file *file)
  */
 static int wdt_release(struct inode *inode, struct file *file)
 {
-	if (MINOR(inode->i_rdev) == WATCHDOG_MINOR)
+	if (iminor(inode) == WATCHDOG_MINOR)
 		clear_bit(0, &wdt_is_open);
 	return 0;
 }
diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c
index 1bbf27b98cf6..68f49e2e964c 100644
--- a/drivers/s390/char/vmur.c
+++ b/drivers/s390/char/vmur.c
@@ -681,7 +681,7 @@ static int ur_open(struct inode *inode, struct file *file)
 	 * We treat the minor number as the devno of the ur device
 	 * to find in the driver tree.
 	 */
-	devno = MINOR(file_inode(file)->i_rdev);
+	devno = iminor(file_inode(file));
 
 	urd = urdev_get_from_devno(devno);
 	if (!urd) {
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 5730572b52cd..54e686dca6de 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -117,7 +117,7 @@ struct virtio_rev_info {
 };
 
 /* the highest virtio-ccw revision we support */
-#define VIRTIO_CCW_REV_MAX 1
+#define VIRTIO_CCW_REV_MAX 2
 
 struct virtio_ccw_vq_info {
 	struct virtqueue *vq;
@@ -952,7 +952,7 @@ static u8 virtio_ccw_get_status(struct virtio_device *vdev)
 	u8 old_status = vcdev->dma_area->status;
 	struct ccw1 *ccw;
 
-	if (vcdev->revision < 1)
+	if (vcdev->revision < 2)
 		return vcdev->dma_area->status;
 
 	ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
diff --git a/drivers/scsi/aic7xxx/aic79xx.h b/drivers/scsi/aic7xxx/aic79xx.h
index 77e1d6bb59a3..59e9321815c8 100644
--- a/drivers/scsi/aic7xxx/aic79xx.h
+++ b/drivers/scsi/aic7xxx/aic79xx.h
@@ -1175,7 +1175,7 @@ struct ahd_softc {
 	uint8_t			  tqinfifonext;
 
 	/*
-	 * Cached verson of the hs_mailbox so we can avoid
+	 * Cached version of the hs_mailbox so we can avoid
 	 * pausing the sequencer during mailbox updates.
 	 */
 	uint8_t			  hs_mailbox;
diff --git a/drivers/scsi/aic7xxx/aic7xxx.h b/drivers/scsi/aic7xxx/aic7xxx.h
index 11a09798e6b5..9bc755a0a2d3 100644
--- a/drivers/scsi/aic7xxx/aic7xxx.h
+++ b/drivers/scsi/aic7xxx/aic7xxx.h
@@ -896,8 +896,6 @@ union ahc_bus_softc {
 
 typedef void (*ahc_bus_intr_t)(struct ahc_softc *);
 typedef int (*ahc_bus_chip_init_t)(struct ahc_softc *);
-typedef int (*ahc_bus_suspend_t)(struct ahc_softc *);
-typedef int (*ahc_bus_resume_t)(struct ahc_softc *);
 typedef void ahc_callback_t (void *);
 
 struct ahc_softc {
diff --git a/drivers/scsi/bnx2fc/Kconfig b/drivers/scsi/bnx2fc/Kconfig
index 3cf7e08df809..ecdc0f0f4f4e 100644
--- a/drivers/scsi/bnx2fc/Kconfig
+++ b/drivers/scsi/bnx2fc/Kconfig
@@ -5,6 +5,7 @@ config SCSI_BNX2X_FCOE
 	depends on (IPV6 || IPV6=n)
 	depends on LIBFC
 	depends on LIBFCOE
+	depends on MMU
 	select NETDEVICES
 	select ETHERNET
 	select NET_VENDOR_BROADCOM
diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c
index fdd446765311..1e6d8f62ea3c 100644
--- a/drivers/scsi/bnx2i/bnx2i_iscsi.c
+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c
@@ -1171,10 +1171,8 @@ static void bnx2i_cleanup_task(struct iscsi_task *task)
 		bnx2i_send_cmd_cleanup_req(hba, task->dd_data);
 
 		spin_unlock_bh(&conn->session->back_lock);
-		spin_unlock_bh(&conn->session->frwd_lock);
 		wait_for_completion_timeout(&bnx2i_conn->cmd_cleanup_cmpl,
 				msecs_to_jiffies(ISCSI_CMD_CLEANUP_TIMEOUT));
-		spin_lock_bh(&conn->session->frwd_lock);
 		spin_lock_bh(&conn->session->back_lock);
 	}
 	bnx2i_iscsi_unmap_sg_list(task->dd_data);
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 337d3aa91945..38369766511c 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -1151,7 +1151,10 @@ static void __enqueue_cmd_and_start_io(struct ctlr_info *h,
 {
 	dial_down_lockup_detection_during_fw_flash(h, c);
 	atomic_inc(&h->commands_outstanding);
-	if (c->device)
+	/*
+	 * Check to see if the command is being retried.
+	 */
+	if (c->device && !c->retry_pending)
 		atomic_inc(&c->device->commands_outstanding);
 
 	reply_queue = h->reply_map[raw_smp_processor_id()];
@@ -5567,7 +5570,8 @@ static inline void hpsa_cmd_partial_init(struct ctlr_info *h, int index,
 }
 
 static int hpsa_ioaccel_submit(struct ctlr_info *h,
-		struct CommandList *c, struct scsi_cmnd *cmd)
+		struct CommandList *c, struct scsi_cmnd *cmd,
+		bool retry)
 {
 	struct hpsa_scsi_dev_t *dev = cmd->device->hostdata;
 	int rc = IO_ACCEL_INELIGIBLE;
@@ -5584,18 +5588,22 @@ static int hpsa_ioaccel_submit(struct ctlr_info *h,
 	cmd->host_scribble = (unsigned char *) c;
 
 	if (dev->offload_enabled) {
-		hpsa_cmd_init(h, c->cmdindex, c);
+		hpsa_cmd_init(h, c->cmdindex, c); /* Zeroes out all fields */
 		c->cmd_type = CMD_SCSI;
 		c->scsi_cmd = cmd;
 		c->device = dev;
+		if (retry) /* Resubmit but do not increment device->commands_outstanding. */
+			c->retry_pending = true;
 		rc = hpsa_scsi_ioaccel_raid_map(h, c);
 		if (rc < 0)     /* scsi_dma_map failed. */
 			rc = SCSI_MLQUEUE_HOST_BUSY;
 	} else if (dev->hba_ioaccel_enabled) {
-		hpsa_cmd_init(h, c->cmdindex, c);
+		hpsa_cmd_init(h, c->cmdindex, c); /* Zeroes out all fields */
 		c->cmd_type = CMD_SCSI;
 		c->scsi_cmd = cmd;
 		c->device = dev;
+		if (retry) /* Resubmit but do not increment device->commands_outstanding. */
+			c->retry_pending = true;
 		rc = hpsa_scsi_ioaccel_direct_map(h, c);
 		if (rc < 0)     /* scsi_dma_map failed. */
 			rc = SCSI_MLQUEUE_HOST_BUSY;
@@ -5628,7 +5636,8 @@ static void hpsa_command_resubmit_worker(struct work_struct *work)
 
 		if (c2->error_data.serv_response ==
 				IOACCEL2_STATUS_SR_TASK_COMP_SET_FULL) {
-			rc = hpsa_ioaccel_submit(h, c, cmd);
+			/* Resubmit with the retry_pending flag set. */
+			rc = hpsa_ioaccel_submit(h, c, cmd, true);
 			if (rc == 0)
 				return;
 			if (rc == SCSI_MLQUEUE_HOST_BUSY) {
@@ -5644,6 +5653,15 @@ static void hpsa_command_resubmit_worker(struct work_struct *work)
 		}
 	}
 	hpsa_cmd_partial_init(c->h, c->cmdindex, c);
+	/*
+	 * Here we have not come in though queue_command, so we
+	 * can set the retry_pending flag to true for a driver initiated
+	 * retry attempt (I.E. not a SML retry).
+	 * I.E. We are submitting a driver initiated retry.
+	 * Note: hpsa_ciss_submit does not zero out the command fields like
+	 *       ioaccel submit does.
+	 */
+	c->retry_pending = true;
 	if (hpsa_ciss_submit(c->h, c, cmd, dev)) {
 		/*
 		 * If we get here, it means dma mapping failed. Try
@@ -5706,11 +5724,16 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
 	/*
 	 * Call alternate submit routine for I/O accelerated commands.
 	 * Retries always go down the normal I/O path.
+	 * Note: If cmd->retries is non-zero, then this is a SML
+	 *       initiated retry and not a driver initiated retry.
+	 *       This command has been obtained from cmd_tagged_alloc
+	 *       and is therefore a brand-new command.
 	 */
 	if (likely(cmd->retries == 0 &&
 			!blk_rq_is_passthrough(cmd->request) &&
 			h->acciopath_status)) {
-		rc = hpsa_ioaccel_submit(h, c, cmd);
+		/* Submit with the retry_pending flag unset. */
+		rc = hpsa_ioaccel_submit(h, c, cmd, false);
 		if (rc == 0)
 			return 0;
 		if (rc == SCSI_MLQUEUE_HOST_BUSY) {
@@ -6105,6 +6128,7 @@ return_reset_status:
  * at init, and managed by cmd_tagged_alloc() and cmd_tagged_free() using the
  * block request tag as an index into a table of entries.  cmd_tagged_free() is
  * the complement, although cmd_free() may be called instead.
+ * This function is only called for new requests from queue_command.
  */
 static struct CommandList *cmd_tagged_alloc(struct ctlr_info *h,
 					    struct scsi_cmnd *scmd)
@@ -6139,8 +6163,14 @@ static struct CommandList *cmd_tagged_alloc(struct ctlr_info *h,
 	}
 
 	atomic_inc(&c->refcount);
-
 	hpsa_cmd_partial_init(h, idx, c);
+
+	/*
+	 * This is a new command obtained from queue_command so
+	 * there have not been any driver initiated retry attempts.
+	 */
+	c->retry_pending = false;
+
 	return c;
 }
 
@@ -6208,6 +6238,13 @@ static struct CommandList *cmd_alloc(struct ctlr_info *h)
 	}
 	hpsa_cmd_partial_init(h, i, c);
 	c->device = NULL;
+
+	/*
+	 * cmd_alloc is for "internal" commands and they are never
+	 * retried.
+	 */
+	c->retry_pending = false;
+
 	return c;
 }
 
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index 46df2e3ff89b..d126bb877250 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -448,7 +448,7 @@ struct CommandList {
 	 */
 	struct hpsa_scsi_dev_t *phys_disk;
 
-	int abort_pending;
+	bool retry_pending;
 	struct hpsa_scsi_dev_t *device;
 	atomic_t refcount; /* Must be last to avoid memset in hpsa_cmd_init() */
 } __aligned(COMMANDLIST_ALIGNMENT);
diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c
index bee16850b236..58e62162882f 100644
--- a/drivers/scsi/isci/request.c
+++ b/drivers/scsi/isci/request.c
@@ -1480,8 +1480,6 @@ static enum sci_status
 stp_request_pio_await_h2d_completion_tc_event(struct isci_request *ireq,
 					      u32 completion_code)
 {
-	enum sci_status status = SCI_SUCCESS;
-
 	switch (SCU_GET_COMPLETION_TL_STATUS(completion_code)) {
 	case SCU_MAKE_COMPLETION_STATUS(SCU_TASK_DONE_GOOD):
 		ireq->scu_status = SCU_TASK_DONE_GOOD;
@@ -1500,7 +1498,7 @@ stp_request_pio_await_h2d_completion_tc_event(struct isci_request *ireq,
 		break;
 	}
 
-	return status;
+	return SCI_SUCCESS;
 }
 
 static enum sci_status
@@ -2152,8 +2150,6 @@ static enum sci_status stp_request_udma_await_tc_event(struct isci_request *ireq
 static enum sci_status atapi_raw_completion(struct isci_request *ireq, u32 completion_code,
 						  enum sci_base_request_states next)
 {
-	enum sci_status status = SCI_SUCCESS;
-
 	switch (SCU_GET_COMPLETION_TL_STATUS(completion_code)) {
 	case SCU_MAKE_COMPLETION_STATUS(SCU_TASK_DONE_GOOD):
 		ireq->scu_status = SCU_TASK_DONE_GOOD;
@@ -2172,7 +2168,7 @@ static enum sci_status atapi_raw_completion(struct isci_request *ireq, u32 compl
 		break;
 	}
 
-	return status;
+	return SCI_SUCCESS;
 }
 
 static enum sci_status atapi_data_tc_completion_handler(struct isci_request *ireq,
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index a9ce6298b935..dd33ce0e3737 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -847,6 +847,7 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
 	struct iscsi_session *session;
 	struct iscsi_sw_tcp_host *tcp_sw_host;
 	struct Scsi_Host *shost;
+	int rc;
 
 	if (ep) {
 		printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep);
@@ -864,6 +865,11 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
 	shost->max_channel = 0;
 	shost->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE;
 
+	rc = iscsi_host_get_max_scsi_cmds(shost, cmds_max);
+	if (rc < 0)
+		goto free_host;
+	shost->can_queue = rc;
+
 	if (iscsi_host_add(shost, NULL))
 		goto free_host;
 
@@ -878,7 +884,6 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
 	tcp_sw_host = iscsi_host_priv(shost);
 	tcp_sw_host->session = session;
 
-	shost->can_queue = session->scsi_cmds_max;
 	if (iscsi_tcp_r2tpool_alloc(session))
 		goto remove_session;
 	return cls_session;
@@ -981,7 +986,7 @@ static struct scsi_host_template iscsi_sw_tcp_sht = {
 	.name			= "iSCSI Initiator over TCP/IP",
 	.queuecommand           = iscsi_queuecommand,
 	.change_queue_depth	= scsi_change_queue_depth,
-	.can_queue		= ISCSI_DEF_XMIT_CMDS_MAX - 1,
+	.can_queue		= ISCSI_TOTAL_CMDS_MAX,
 	.sg_tablesize		= 4096,
 	.max_sectors		= 0xFFFF,
 	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 4e668aafbcca..7ad11e42306d 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -523,16 +523,6 @@ static void iscsi_complete_task(struct iscsi_task *task, int state)
 	WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
 	task->state = state;
 
-	spin_lock_bh(&conn->taskqueuelock);
-	if (!list_empty(&task->running)) {
-		pr_debug_once("%s while task on list", __func__);
-		list_del_init(&task->running);
-	}
-	spin_unlock_bh(&conn->taskqueuelock);
-
-	if (conn->task == task)
-		conn->task = NULL;
-
 	if (READ_ONCE(conn->ping_task) == task)
 		WRITE_ONCE(conn->ping_task, NULL);
 
@@ -564,11 +554,41 @@ void iscsi_complete_scsi_task(struct iscsi_task *task,
 }
 EXPORT_SYMBOL_GPL(iscsi_complete_scsi_task);
 
+/*
+ * Must be called with back and frwd lock
+ */
+static bool cleanup_queued_task(struct iscsi_task *task)
+{
+	struct iscsi_conn *conn = task->conn;
+	bool early_complete = false;
+
+	/* Bad target might have completed task while it was still running */
+	if (task->state == ISCSI_TASK_COMPLETED)
+		early_complete = true;
+
+	if (!list_empty(&task->running)) {
+		list_del_init(&task->running);
+		/*
+		 * If it's on a list but still running, this could be from
+		 * a bad target sending a rsp early, cleanup from a TMF, or
+		 * session recovery.
+		 */
+		if (task->state == ISCSI_TASK_RUNNING ||
+		    task->state == ISCSI_TASK_COMPLETED)
+			__iscsi_put_task(task);
+	}
+
+	if (conn->task == task) {
+		conn->task = NULL;
+		__iscsi_put_task(task);
+	}
+
+	return early_complete;
+}
 
 /*
- * session back_lock must be held and if not called for a task that is
- * still pending or from the xmit thread, then xmit thread must
- * be suspended.
+ * session frwd lock must be held and if not called for a task that is still
+ * pending or from the xmit thread, then xmit thread must be suspended
  */
 static void fail_scsi_task(struct iscsi_task *task, int err)
 {
@@ -576,14 +596,11 @@ static void fail_scsi_task(struct iscsi_task *task, int err)
 	struct scsi_cmnd *sc;
 	int state;
 
-	/*
-	 * if a command completes and we get a successful tmf response
-	 * we will hit this because the scsi eh abort code does not take
-	 * a ref to the task.
-	 */
-	sc = task->sc;
-	if (!sc)
+	spin_lock_bh(&conn->session->back_lock);
+	if (cleanup_queued_task(task)) {
+		spin_unlock_bh(&conn->session->back_lock);
 		return;
+	}
 
 	if (task->state == ISCSI_TASK_PENDING) {
 		/*
@@ -598,11 +615,9 @@ static void fail_scsi_task(struct iscsi_task *task, int err)
 	else
 		state = ISCSI_TASK_ABRT_TMF;
 
+	sc = task->sc;
 	sc->result = err << 16;
 	scsi_set_resid(sc, scsi_bufflen(sc));
-
-	/* regular RX path uses back_lock */
-	spin_lock_bh(&conn->session->back_lock);
 	iscsi_complete_task(task, state);
 	spin_unlock_bh(&conn->session->back_lock);
 }
@@ -748,9 +763,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		if (session->tt->xmit_task(task))
 			goto free_task;
 	} else {
-		spin_lock_bh(&conn->taskqueuelock);
 		list_add_tail(&task->running, &conn->mgmtqueue);
-		spin_unlock_bh(&conn->taskqueuelock);
 		iscsi_conn_queue_work(conn);
 	}
 
@@ -1411,31 +1424,61 @@ static int iscsi_check_cmdsn_window_closed(struct iscsi_conn *conn)
 	return 0;
 }
 
-static int iscsi_xmit_task(struct iscsi_conn *conn)
+static int iscsi_xmit_task(struct iscsi_conn *conn, struct iscsi_task *task,
+			   bool was_requeue)
 {
-	struct iscsi_task *task = conn->task;
 	int rc;
 
-	if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx))
-		return -ENODATA;
-
 	spin_lock_bh(&conn->session->back_lock);
-	if (conn->task == NULL) {
+
+	if (!conn->task) {
+		/* Take a ref so we can access it after xmit_task() */
+		__iscsi_get_task(task);
+	} else {
+		/* Already have a ref from when we failed to send it last call */
+		conn->task = NULL;
+	}
+
+	/*
+	 * If this was a requeue for a R2T we have an extra ref on the task in
+	 * case a bad target sends a cmd rsp before we have handled the task.
+	 */
+	if (was_requeue)
+		__iscsi_put_task(task);
+
+	/*
+	 * Do this after dropping the extra ref because if this was a requeue
+	 * it's removed from that list and cleanup_queued_task would miss it.
+	 */
+	if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
+		/*
+		 * Save the task and ref in case we weren't cleaning up this
+		 * task and get woken up again.
+		 */
+		conn->task = task;
 		spin_unlock_bh(&conn->session->back_lock);
 		return -ENODATA;
 	}
-	__iscsi_get_task(task);
 	spin_unlock_bh(&conn->session->back_lock);
+
 	spin_unlock_bh(&conn->session->frwd_lock);
 	rc = conn->session->tt->xmit_task(task);
 	spin_lock_bh(&conn->session->frwd_lock);
 	if (!rc) {
 		/* done with this task */
 		task->last_xfer = jiffies;
-		conn->task = NULL;
 	}
 	/* regular RX path uses back_lock */
 	spin_lock(&conn->session->back_lock);
+	if (rc && task->state == ISCSI_TASK_RUNNING) {
+		/*
+		 * get an extra ref that is released next time we access it
+		 * as conn->task above.
+		 */
+		__iscsi_get_task(task);
+		conn->task = task;
+	}
+
 	__iscsi_put_task(task);
 	spin_unlock(&conn->session->back_lock);
 	return rc;
@@ -1445,9 +1488,7 @@ static int iscsi_xmit_task(struct iscsi_conn *conn)
  * iscsi_requeue_task - requeue task to run from session workqueue
  * @task: task to requeue
  *
- * LLDs that need to run a task from the session workqueue should call
- * this. The session frwd_lock must be held. This should only be called
- * by software drivers.
+ * Callers must have taken a ref to the task that is going to be requeued.
  */
 void iscsi_requeue_task(struct iscsi_task *task)
 {
@@ -1457,11 +1498,18 @@ void iscsi_requeue_task(struct iscsi_task *task)
 	 * this may be on the requeue list already if the xmit_task callout
 	 * is handling the r2ts while we are adding new ones
 	 */
-	spin_lock_bh(&conn->taskqueuelock);
-	if (list_empty(&task->running))
+	spin_lock_bh(&conn->session->frwd_lock);
+	if (list_empty(&task->running)) {
 		list_add_tail(&task->running, &conn->requeue);
-	spin_unlock_bh(&conn->taskqueuelock);
+	} else {
+		/*
+		 * Don't need the extra ref since it's already requeued and
+		 * has a ref.
+		 */
+		iscsi_put_task(task);
+	}
 	iscsi_conn_queue_work(conn);
+	spin_unlock_bh(&conn->session->frwd_lock);
 }
 EXPORT_SYMBOL_GPL(iscsi_requeue_task);
 
@@ -1487,7 +1535,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
 	}
 
 	if (conn->task) {
-		rc = iscsi_xmit_task(conn);
+		rc = iscsi_xmit_task(conn, conn->task, false);
 	        if (rc)
 		        goto done;
 	}
@@ -1497,54 +1545,41 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
 	 * only have one nop-out as a ping from us and targets should not
 	 * overflow us with nop-ins
 	 */
-	spin_lock_bh(&conn->taskqueuelock);
 check_mgmt:
 	while (!list_empty(&conn->mgmtqueue)) {
-		conn->task = list_entry(conn->mgmtqueue.next,
-					 struct iscsi_task, running);
-		list_del_init(&conn->task->running);
-		spin_unlock_bh(&conn->taskqueuelock);
-		if (iscsi_prep_mgmt_task(conn, conn->task)) {
+		task = list_entry(conn->mgmtqueue.next, struct iscsi_task,
+				  running);
+		list_del_init(&task->running);
+		if (iscsi_prep_mgmt_task(conn, task)) {
 			/* regular RX path uses back_lock */
 			spin_lock_bh(&conn->session->back_lock);
-			__iscsi_put_task(conn->task);
+			__iscsi_put_task(task);
 			spin_unlock_bh(&conn->session->back_lock);
-			conn->task = NULL;
-			spin_lock_bh(&conn->taskqueuelock);
 			continue;
 		}
-		rc = iscsi_xmit_task(conn);
+		rc = iscsi_xmit_task(conn, task, false);
 		if (rc)
 			goto done;
-		spin_lock_bh(&conn->taskqueuelock);
 	}
 
 	/* process pending command queue */
 	while (!list_empty(&conn->cmdqueue)) {
-		conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task,
-					running);
-		list_del_init(&conn->task->running);
-		spin_unlock_bh(&conn->taskqueuelock);
+		task = list_entry(conn->cmdqueue.next, struct iscsi_task,
+				  running);
+		list_del_init(&task->running);
 		if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
-			fail_scsi_task(conn->task, DID_IMM_RETRY);
-			spin_lock_bh(&conn->taskqueuelock);
+			fail_scsi_task(task, DID_IMM_RETRY);
 			continue;
 		}
-		rc = iscsi_prep_scsi_cmd_pdu(conn->task);
+		rc = iscsi_prep_scsi_cmd_pdu(task);
 		if (rc) {
-			if (rc == -ENOMEM || rc == -EACCES) {
-				spin_lock_bh(&conn->taskqueuelock);
-				list_add_tail(&conn->task->running,
-					      &conn->cmdqueue);
-				conn->task = NULL;
-				spin_unlock_bh(&conn->taskqueuelock);
-				goto done;
-			} else
-				fail_scsi_task(conn->task, DID_ABORT);
-			spin_lock_bh(&conn->taskqueuelock);
+			if (rc == -ENOMEM || rc == -EACCES)
+				fail_scsi_task(task, DID_IMM_RETRY);
+			else
+				fail_scsi_task(task, DID_ABORT);
 			continue;
 		}
-		rc = iscsi_xmit_task(conn);
+		rc = iscsi_xmit_task(conn, task, false);
 		if (rc)
 			goto done;
 		/*
@@ -1552,7 +1587,6 @@ check_mgmt:
 		 * we need to check the mgmt queue for nops that need to
 		 * be sent to aviod starvation
 		 */
-		spin_lock_bh(&conn->taskqueuelock);
 		if (!list_empty(&conn->mgmtqueue))
 			goto check_mgmt;
 	}
@@ -1566,21 +1600,17 @@ check_mgmt:
 
 		task = list_entry(conn->requeue.next, struct iscsi_task,
 				  running);
+
 		if (iscsi_check_tmf_restrictions(task, ISCSI_OP_SCSI_DATA_OUT))
 			break;
 
-		conn->task = task;
-		list_del_init(&conn->task->running);
-		conn->task->state = ISCSI_TASK_RUNNING;
-		spin_unlock_bh(&conn->taskqueuelock);
-		rc = iscsi_xmit_task(conn);
+		list_del_init(&task->running);
+		rc = iscsi_xmit_task(conn, task, true);
 		if (rc)
 			goto done;
-		spin_lock_bh(&conn->taskqueuelock);
 		if (!list_empty(&conn->mgmtqueue))
 			goto check_mgmt;
 	}
-	spin_unlock_bh(&conn->taskqueuelock);
 	spin_unlock_bh(&conn->session->frwd_lock);
 	return -ENODATA;
 
@@ -1746,9 +1776,7 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
 			goto prepd_reject;
 		}
 	} else {
-		spin_lock_bh(&conn->taskqueuelock);
 		list_add_tail(&task->running, &conn->cmdqueue);
-		spin_unlock_bh(&conn->taskqueuelock);
 		iscsi_conn_queue_work(conn);
 	}
 
@@ -1855,27 +1883,39 @@ static int iscsi_exec_task_mgmt_fn(struct iscsi_conn *conn,
 }
 
 /*
- * Fail commands. session lock held and recv side suspended and xmit
- * thread flushed
+ * Fail commands. session frwd lock held and xmit thread flushed.
  */
 static void fail_scsi_tasks(struct iscsi_conn *conn, u64 lun, int error)
 {
+	struct iscsi_session *session = conn->session;
 	struct iscsi_task *task;
 	int i;
 
-	for (i = 0; i < conn->session->cmds_max; i++) {
-		task = conn->session->cmds[i];
+	spin_lock_bh(&session->back_lock);
+	for (i = 0; i < session->cmds_max; i++) {
+		task = session->cmds[i];
 		if (!task->sc || task->state == ISCSI_TASK_FREE)
 			continue;
 
 		if (lun != -1 && lun != task->sc->device->lun)
 			continue;
 
-		ISCSI_DBG_SESSION(conn->session,
+		__iscsi_get_task(task);
+		spin_unlock_bh(&session->back_lock);
+
+		ISCSI_DBG_SESSION(session,
 				  "failing sc %p itt 0x%x state %d\n",
 				  task->sc, task->itt, task->state);
 		fail_scsi_task(task, error);
+
+		spin_unlock_bh(&session->frwd_lock);
+		iscsi_put_task(task);
+		spin_lock_bh(&session->frwd_lock);
+
+		spin_lock_bh(&session->back_lock);
 	}
+
+	spin_unlock_bh(&session->back_lock);
 }
 
 /**
@@ -1953,6 +1993,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 	ISCSI_DBG_EH(session, "scsi cmd %p timedout\n", sc);
 
 	spin_lock_bh(&session->frwd_lock);
+	spin_lock(&session->back_lock);
 	task = (struct iscsi_task *)sc->SCp.ptr;
 	if (!task) {
 		/*
@@ -1960,8 +2001,11 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 		 * so let timeout code complete it now.
 		 */
 		rc = BLK_EH_DONE;
+		spin_unlock(&session->back_lock);
 		goto done;
 	}
+	__iscsi_get_task(task);
+	spin_unlock(&session->back_lock);
 
 	if (session->state != ISCSI_STATE_LOGGED_IN) {
 		/*
@@ -2020,6 +2064,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 		goto done;
 	}
 
+	spin_lock(&session->back_lock);
 	for (i = 0; i < conn->session->cmds_max; i++) {
 		running_task = conn->session->cmds[i];
 		if (!running_task->sc || running_task == task ||
@@ -2052,10 +2097,12 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 				     "last xfer %lu/%lu. Last check %lu.\n",
 				     task->last_xfer, running_task->last_xfer,
 				     task->last_timeout);
+			spin_unlock(&session->back_lock);
 			rc = BLK_EH_RESET_TIMER;
 			goto done;
 		}
 	}
+	spin_unlock(&session->back_lock);
 
 	/* Assumes nop timeout is shorter than scsi cmd timeout */
 	if (task->have_checked_conn)
@@ -2077,9 +2124,12 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 	rc = BLK_EH_RESET_TIMER;
 
 done:
-	if (task)
-		task->last_timeout = jiffies;
 	spin_unlock_bh(&session->frwd_lock);
+
+	if (task) {
+		task->last_timeout = jiffies;
+		iscsi_put_task(task);
+	}
 	ISCSI_DBG_EH(session, "return %s\n", rc == BLK_EH_RESET_TIMER ?
 		     "timer reset" : "shutdown or nh");
 	return rc;
@@ -2187,15 +2237,20 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 	conn->eh_abort_cnt++;
 	age = session->age;
 
+	spin_lock(&session->back_lock);
 	task = (struct iscsi_task *)sc->SCp.ptr;
-	ISCSI_DBG_EH(session, "aborting [sc %p itt 0x%x]\n",
-		     sc, task->itt);
-
-	/* task completed before time out */
-	if (!task->sc) {
+	if (!task || !task->sc) {
+		/* task completed before time out */
 		ISCSI_DBG_EH(session, "sc completed while abort in progress\n");
-		goto success;
+
+		spin_unlock(&session->back_lock);
+		spin_unlock_bh(&session->frwd_lock);
+		mutex_unlock(&session->eh_mutex);
+		return SUCCESS;
 	}
+	ISCSI_DBG_EH(session, "aborting [sc %p itt 0x%x]\n", sc, task->itt);
+	__iscsi_get_task(task);
+	spin_unlock(&session->back_lock);
 
 	if (task->state == ISCSI_TASK_PENDING) {
 		fail_scsi_task(task, DID_ABORT);
@@ -2257,6 +2312,7 @@ success:
 success_unlocked:
 	ISCSI_DBG_EH(session, "abort success [sc %p itt 0x%x]\n",
 		     sc, task->itt);
+	iscsi_put_task(task);
 	mutex_unlock(&session->eh_mutex);
 	return SUCCESS;
 
@@ -2265,6 +2321,7 @@ failed:
 failed_unlocked:
 	ISCSI_DBG_EH(session, "abort failed [sc %p itt 0x%x]\n", sc,
 		     task ? task->itt : 0);
+	iscsi_put_task(task);
 	mutex_unlock(&session->eh_mutex);
 	return FAILED;
 }
@@ -2591,6 +2648,56 @@ void iscsi_pool_free(struct iscsi_pool *q)
 }
 EXPORT_SYMBOL_GPL(iscsi_pool_free);
 
+int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost,
+				 uint16_t requested_cmds_max)
+{
+	int scsi_cmds, total_cmds = requested_cmds_max;
+
+check:
+	if (!total_cmds)
+		total_cmds = ISCSI_DEF_XMIT_CMDS_MAX;
+	/*
+	 * The iscsi layer needs some tasks for nop handling and tmfs,
+	 * so the cmds_max must at least be greater than ISCSI_MGMT_CMDS_MAX
+	 * + 1 command for scsi IO.
+	 */
+	if (total_cmds < ISCSI_TOTAL_CMDS_MIN) {
+		printk(KERN_ERR "iscsi: invalid max cmds of %d. Must be a power of two that is at least %d.\n",
+		       total_cmds, ISCSI_TOTAL_CMDS_MIN);
+		return -EINVAL;
+	}
+
+	if (total_cmds > ISCSI_TOTAL_CMDS_MAX) {
+		printk(KERN_INFO "iscsi: invalid max cmds of %d. Must be a power of 2 less than or equal to %d. Using %d.\n",
+		       requested_cmds_max, ISCSI_TOTAL_CMDS_MAX,
+		       ISCSI_TOTAL_CMDS_MAX);
+		total_cmds = ISCSI_TOTAL_CMDS_MAX;
+	}
+
+	if (!is_power_of_2(total_cmds)) {
+		total_cmds = rounddown_pow_of_two(total_cmds);
+		if (total_cmds < ISCSI_TOTAL_CMDS_MIN) {
+			printk(KERN_ERR "iscsi: invalid max cmds of %d. Must be a power of 2 greater than %d.\n", requested_cmds_max, ISCSI_TOTAL_CMDS_MIN);
+			return -EINVAL;
+		}
+
+		printk(KERN_INFO "iscsi: invalid max cmds %d. Must be a power of 2. Rounding max cmds down to %d.\n",
+		       requested_cmds_max, total_cmds);
+	}
+
+	scsi_cmds = total_cmds - ISCSI_MGMT_CMDS_MAX;
+	if (shost->can_queue && scsi_cmds > shost->can_queue) {
+		total_cmds = shost->can_queue;
+
+		printk(KERN_INFO "iscsi: requested max cmds %u is higher than driver limit. Using driver limit %u\n",
+		       requested_cmds_max, shost->can_queue);
+		goto check;
+	}
+
+	return scsi_cmds;
+}
+EXPORT_SYMBOL_GPL(iscsi_host_get_max_scsi_cmds);
+
 /**
  * iscsi_host_add - add host to system
  * @shost: scsi host
@@ -2681,8 +2788,6 @@ void iscsi_host_remove(struct Scsi_Host *shost)
 		flush_signals(current);
 
 	scsi_remove_host(shost);
-	if (ihost->workq)
-		destroy_workqueue(ihost->workq);
 }
 EXPORT_SYMBOL_GPL(iscsi_host_remove);
 
@@ -2690,6 +2795,9 @@ void iscsi_host_free(struct Scsi_Host *shost)
 {
 	struct iscsi_host *ihost = shost_priv(shost);
 
+	if (ihost->workq)
+		destroy_workqueue(ihost->workq);
+
 	kfree(ihost->netdev);
 	kfree(ihost->hwaddress);
 	kfree(ihost->initiatorname);
@@ -2743,7 +2851,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
 	struct iscsi_host *ihost = shost_priv(shost);
 	struct iscsi_session *session;
 	struct iscsi_cls_session *cls_session;
-	int cmd_i, scsi_cmds, total_cmds = cmds_max;
+	int cmd_i, scsi_cmds;
 	unsigned long flags;
 
 	spin_lock_irqsave(&ihost->lock, flags);
@@ -2754,37 +2862,9 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
 	ihost->num_sessions++;
 	spin_unlock_irqrestore(&ihost->lock, flags);
 
-	if (!total_cmds)
-		total_cmds = ISCSI_DEF_XMIT_CMDS_MAX;
-	/*
-	 * The iscsi layer needs some tasks for nop handling and tmfs,
-	 * so the cmds_max must at least be greater than ISCSI_MGMT_CMDS_MAX
-	 * + 1 command for scsi IO.
-	 */
-	if (total_cmds < ISCSI_TOTAL_CMDS_MIN) {
-		printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue "
-		       "must be a power of two that is at least %d.\n",
-		       total_cmds, ISCSI_TOTAL_CMDS_MIN);
+	scsi_cmds = iscsi_host_get_max_scsi_cmds(shost, cmds_max);
+	if (scsi_cmds < 0)
 		goto dec_session_count;
-	}
-
-	if (total_cmds > ISCSI_TOTAL_CMDS_MAX) {
-		printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue "
-		       "must be a power of 2 less than or equal to %d.\n",
-		       cmds_max, ISCSI_TOTAL_CMDS_MAX);
-		total_cmds = ISCSI_TOTAL_CMDS_MAX;
-	}
-
-	if (!is_power_of_2(total_cmds)) {
-		printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue "
-		       "must be a power of 2.\n", total_cmds);
-		total_cmds = rounddown_pow_of_two(total_cmds);
-		if (total_cmds < ISCSI_TOTAL_CMDS_MIN)
-			goto dec_session_count;
-		printk(KERN_INFO "iscsi: Rounding can_queue to %d.\n",
-		       total_cmds);
-	}
-	scsi_cmds = total_cmds - ISCSI_MGMT_CMDS_MAX;
 
 	cls_session = iscsi_alloc_session(shost, iscsit,
 					  sizeof(struct iscsi_session) +
@@ -2800,7 +2880,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
 	session->lu_reset_timeout = 15;
 	session->abort_timeout = 10;
 	session->scsi_cmds_max = scsi_cmds;
-	session->cmds_max = total_cmds;
+	session->cmds_max = scsi_cmds + ISCSI_MGMT_CMDS_MAX;
 	session->queued_cmdsn = session->cmdsn = initial_cmdsn;
 	session->exp_cmdsn = initial_cmdsn + 1;
 	session->max_cmdsn = initial_cmdsn + 1;
@@ -2919,7 +2999,6 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	INIT_LIST_HEAD(&conn->mgmtqueue);
 	INIT_LIST_HEAD(&conn->cmdqueue);
 	INIT_LIST_HEAD(&conn->requeue);
-	spin_lock_init(&conn->taskqueuelock);
 	INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
 
 	/* allocate login_task used for the login/text sequences */
@@ -3085,10 +3164,16 @@ fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn)
 		ISCSI_DBG_SESSION(conn->session,
 				  "failing mgmt itt 0x%x state %d\n",
 				  task->itt, task->state);
+
+		spin_lock_bh(&session->back_lock);
+		if (cleanup_queued_task(task)) {
+			spin_unlock_bh(&session->back_lock);
+			continue;
+		}
+
 		state = ISCSI_TASK_ABRT_SESS_RECOV;
 		if (task->state == ISCSI_TASK_PENDING)
 			state = ISCSI_TASK_COMPLETED;
-		spin_lock_bh(&session->back_lock);
 		iscsi_complete_task(task, state);
 		spin_unlock_bh(&session->back_lock);
 	}
@@ -3189,6 +3274,13 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session,
 	spin_unlock_bh(&session->frwd_lock);
 
 	/*
+	 * The target could have reduced it's window size between logins, so
+	 * we have to reset max/exp cmdsn so we can see the new values.
+	 */
+	spin_lock_bh(&session->back_lock);
+	session->max_cmdsn = session->exp_cmdsn = session->cmdsn + 1;
+	spin_unlock_bh(&session->back_lock);
+	/*
 	 * Unblock xmitworker(), Login Phase will pass through.
 	 */
 	clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 83f14b2c8804..2e9ffe3d1a55 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -524,48 +524,79 @@ static int iscsi_tcp_data_in(struct iscsi_conn *conn, struct iscsi_task *task)
 /**
  * iscsi_tcp_r2t_rsp - iSCSI R2T Response processing
  * @conn: iscsi connection
- * @task: scsi command task
+ * @hdr: PDU header
  */
-static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
+static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
 {
 	struct iscsi_session *session = conn->session;
-	struct iscsi_tcp_task *tcp_task = task->dd_data;
-	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-	struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
+	struct iscsi_tcp_task *tcp_task;
+	struct iscsi_tcp_conn *tcp_conn;
+	struct iscsi_r2t_rsp *rhdr;
 	struct iscsi_r2t_info *r2t;
-	int r2tsn = be32_to_cpu(rhdr->r2tsn);
+	struct iscsi_task *task;
 	u32 data_length;
 	u32 data_offset;
+	int r2tsn;
 	int rc;
 
+	spin_lock(&session->back_lock);
+	task = iscsi_itt_to_ctask(conn, hdr->itt);
+	if (!task) {
+		spin_unlock(&session->back_lock);
+		return ISCSI_ERR_BAD_ITT;
+	} else if (task->sc->sc_data_direction != DMA_TO_DEVICE) {
+		spin_unlock(&session->back_lock);
+		return ISCSI_ERR_PROTO;
+	}
+	/*
+	 * A bad target might complete the cmd before we have handled R2Ts
+	 * so get a ref to the task that will be dropped in the xmit path.
+	 */
+	if (task->state != ISCSI_TASK_RUNNING) {
+		spin_unlock(&session->back_lock);
+		/* Let the path that got the early rsp complete it */
+		return 0;
+	}
+	task->last_xfer = jiffies;
+	__iscsi_get_task(task);
+
+	tcp_conn = conn->dd_data;
+	rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
+	/* fill-in new R2T associated with the task */
+	iscsi_update_cmdsn(session, (struct iscsi_nopin *)rhdr);
+	spin_unlock(&session->back_lock);
+
 	if (tcp_conn->in.datalen) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "invalid R2t with datalen %d\n",
 				  tcp_conn->in.datalen);
-		return ISCSI_ERR_DATALEN;
+		rc = ISCSI_ERR_DATALEN;
+		goto put_task;
 	}
 
+	tcp_task = task->dd_data;
+	r2tsn = be32_to_cpu(rhdr->r2tsn);
 	if (tcp_task->exp_datasn != r2tsn){
 		ISCSI_DBG_TCP(conn, "task->exp_datasn(%d) != rhdr->r2tsn(%d)\n",
 			      tcp_task->exp_datasn, r2tsn);
-		return ISCSI_ERR_R2TSN;
+		rc = ISCSI_ERR_R2TSN;
+		goto put_task;
 	}
 
-	/* fill-in new R2T associated with the task */
-	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);
-
-	if (!task->sc || session->state != ISCSI_STATE_LOGGED_IN) {
+	if (session->state != ISCSI_STATE_LOGGED_IN) {
 		iscsi_conn_printk(KERN_INFO, conn,
 				  "dropping R2T itt %d in recovery.\n",
 				  task->itt);
-		return 0;
+		rc = 0;
+		goto put_task;
 	}
 
 	data_length = be32_to_cpu(rhdr->data_length);
 	if (data_length == 0) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "invalid R2T with zero data len\n");
-		return ISCSI_ERR_DATALEN;
+		rc = ISCSI_ERR_DATALEN;
+		goto put_task;
 	}
 
 	if (data_length > session->max_burst)
@@ -579,7 +610,8 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 				  "invalid R2T with data len %u at offset %u "
 				  "and total length %d\n", data_length,
 				  data_offset, task->sc->sdb.length);
-		return ISCSI_ERR_DATALEN;
+		rc = ISCSI_ERR_DATALEN;
+		goto put_task;
 	}
 
 	spin_lock(&tcp_task->pool2queue);
@@ -589,7 +621,8 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 				  "Target has sent more R2Ts than it "
 				  "negotiated for or driver has leaked.\n");
 		spin_unlock(&tcp_task->pool2queue);
-		return ISCSI_ERR_PROTO;
+		rc = ISCSI_ERR_PROTO;
+		goto put_task;
 	}
 
 	r2t->exp_statsn = rhdr->statsn;
@@ -607,6 +640,10 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 
 	iscsi_requeue_task(task);
 	return 0;
+
+put_task:
+	iscsi_put_task(task);
+	return rc;
 }
 
 /*
@@ -730,20 +767,11 @@ iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
 		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
 		break;
 	case ISCSI_OP_R2T:
-		spin_lock(&conn->session->back_lock);
-		task = iscsi_itt_to_ctask(conn, hdr->itt);
-		spin_unlock(&conn->session->back_lock);
-		if (!task)
-			rc = ISCSI_ERR_BAD_ITT;
-		else if (ahslen)
+		if (ahslen) {
 			rc = ISCSI_ERR_AHSLEN;
-		else if (task->sc->sc_data_direction == DMA_TO_DEVICE) {
-			task->last_xfer = jiffies;
-			spin_lock(&conn->session->frwd_lock);
-			rc = iscsi_tcp_r2t_rsp(conn, task);
-			spin_unlock(&conn->session->frwd_lock);
-		} else
-			rc = ISCSI_ERR_PROTO;
+			break;
+		}
+		rc = iscsi_tcp_r2t_rsp(conn, hdr);
 		break;
 	case ISCSI_OP_LOGIN_RSP:
 	case ISCSI_OP_TEXT_RSP:
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index f5582c8e77c9..ac066f86bb14 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -3648,25 +3648,16 @@ _base_get_msix_index(struct MPT3SAS_ADAPTER *ioc,
 		    base_mod64(atomic64_add_return(1,
 		    &ioc->total_io_cnt), ioc->reply_queue_count) : 0;
 
-	return ioc->cpu_msix_table[raw_smp_processor_id()];
-}
+	if (scmd && ioc->shost->nr_hw_queues > 1) {
+		u32 tag = blk_mq_unique_tag(scmd->request);
 
-/**
- * _base_sdev_nr_inflight_request -get number of inflight requests
- *				   of a request queue.
- * @q: request_queue object
- *
- * returns number of inflight request of a request queue.
- */
-inline unsigned long
-_base_sdev_nr_inflight_request(struct request_queue *q)
-{
-	struct blk_mq_hw_ctx *hctx = q->queue_hw_ctx[0];
+		return blk_mq_unique_tag_to_hwq(tag) +
+			ioc->high_iops_queues;
+	}
 
-	return atomic_read(&hctx->nr_active);
+	return ioc->cpu_msix_table[raw_smp_processor_id()];
 }
 
-
 /**
  * _base_get_high_iops_msix_index - get the msix index of
  *				high iops queues
@@ -3686,7 +3677,8 @@ _base_get_high_iops_msix_index(struct MPT3SAS_ADAPTER *ioc,
 	 * reply queues in terms of batch count 16 when outstanding
 	 * IOs on the target device is >=8.
 	 */
-	if (_base_sdev_nr_inflight_request(scmd->device->request_queue) >
+
+	if (atomic_read(&scmd->device->device_busy) >
 	    MPT3SAS_DEVICE_HIGH_IOPS_DEPTH)
 		return base_mod64((
 		    atomic64_add_return(1, &ioc->high_iops_outstanding) /
@@ -3739,8 +3731,23 @@ mpt3sas_base_get_smid_scsiio(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx,
 	struct scsi_cmnd *scmd)
 {
 	struct scsiio_tracker *request = scsi_cmd_priv(scmd);
-	unsigned int tag = scmd->request->tag;
 	u16 smid;
+	u32 tag, unique_tag;
+
+	unique_tag = blk_mq_unique_tag(scmd->request);
+	tag = blk_mq_unique_tag_to_tag(unique_tag);
+
+	/*
+	 * Store hw queue number corresponding to the tag.
+	 * This hw queue number is used later to determine
+	 * the unique_tag using the logic below. This unique_tag
+	 * is used to retrieve the scmd pointer corresponding
+	 * to tag using scsi_host_find_tag() API.
+	 *
+	 * tag = smid - 1;
+	 * unique_tag = ioc->io_queue_num[tag] << BLK_MQ_UNIQUE_TAG_BITS | tag;
+	 */
+	ioc->io_queue_num[tag] = blk_mq_unique_tag_to_hwq(unique_tag);
 
 	smid = tag + 1;
 	request->cb_idx = cb_idx;
@@ -3831,6 +3838,7 @@ mpt3sas_base_free_smid(struct MPT3SAS_ADAPTER *ioc, u16 smid)
 
 		mpt3sas_base_clear_st(ioc, st);
 		_base_recovery_check(ioc);
+		ioc->io_queue_num[smid - 1] = 0;
 		return;
 	}
 
@@ -5362,6 +5370,9 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc)
 		kfree(ioc->chain_lookup);
 		ioc->chain_lookup = NULL;
 	}
+
+	kfree(ioc->io_queue_num);
+	ioc->io_queue_num = NULL;
 }
 
 /**
@@ -5641,7 +5652,8 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc)
 	reply_post_free_sz = ioc->reply_post_queue_depth *
 	    sizeof(Mpi2DefaultReplyDescriptor_t);
 	rdpq_sz = reply_post_free_sz * RDPQ_MAX_INDEX_IN_ONE_CHUNK;
-	if (_base_is_controller_msix_enabled(ioc) && !ioc->rdpq_array_enable)
+	if ((_base_is_controller_msix_enabled(ioc) && !ioc->rdpq_array_enable)
+	    || (ioc->reply_queue_count < RDPQ_MAX_INDEX_IN_ONE_CHUNK))
 		rdpq_sz = reply_post_free_sz * ioc->reply_queue_count;
 	ret = base_alloc_rdpq_dma_pool(ioc, rdpq_sz);
 	if (ret == -EAGAIN) {
@@ -5772,6 +5784,11 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc)
 		    ioc_info(ioc, "internal(0x%p): depth(%d), start smid(%d)\n",
 			     ioc->internal,
 			     ioc->internal_depth, ioc->internal_smid));
+
+	ioc->io_queue_num = kcalloc(ioc->scsiio_depth,
+	    sizeof(u16), GFP_KERNEL);
+	if (!ioc->io_queue_num)
+		goto out;
 	/*
 	 * The number of NVMe page sized blocks needed is:
 	 *     (((sg_tablesize * 8) - 1) / (page_size - 8)) + 1
@@ -8174,8 +8191,11 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc,
 		ioc_state = mpt3sas_base_get_iocstate(ioc, 0);
 		if ((ioc_state & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT ||
 		    (ioc_state & MPI2_IOC_STATE_MASK) ==
-		    MPI2_IOC_STATE_COREDUMP)
+		    MPI2_IOC_STATE_COREDUMP) {
 			is_fault = 1;
+			ioc->htb_rel.trigger_info_dwords[1] =
+			    (ioc_state & MPI2_DOORBELL_DATA_MASK);
+		}
 	}
 	_base_pre_reset_handler(ioc);
 	mpt3sas_wait_for_commands_to_complete(ioc);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 2def7a340616..315aee6ef86f 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -77,8 +77,8 @@
 #define MPT3SAS_DRIVER_NAME		"mpt3sas"
 #define MPT3SAS_AUTHOR "Avago Technologies <MPT-FusionLinux.pdl@avagotech.com>"
 #define MPT3SAS_DESCRIPTION	"LSI MPT Fusion SAS 3.0 Device Driver"
-#define MPT3SAS_DRIVER_VERSION		"36.100.00.00"
-#define MPT3SAS_MAJOR_VERSION		36
+#define MPT3SAS_DRIVER_VERSION		"37.100.00.00"
+#define MPT3SAS_MAJOR_VERSION		37
 #define MPT3SAS_MINOR_VERSION		100
 #define MPT3SAS_BUILD_VERSION		0
 #define MPT3SAS_RELEASE_VERSION	00
@@ -1073,6 +1073,50 @@ struct hba_port {
 
 #define MULTIPATH_DISABLED_PORT_ID     0xFF
 
+/**
+ * struct htb_rel_query - diagnostic buffer release reason
+ * @unique_id - unique id associated with this buffer.
+ * @buffer_rel_condition - Release condition ioctl/sysfs/reset
+ * @reserved
+ * @trigger_type - Master/Event/scsi/MPI
+ * @trigger_info_dwords - Data Correspondig to trigger type
+ */
+struct htb_rel_query {
+	u16	buffer_rel_condition;
+	u16	reserved;
+	u32	trigger_type;
+	u32	trigger_info_dwords[2];
+};
+
+/* Buffer_rel_condition bit fields */
+
+/* Bit 0 - Diag Buffer not Released */
+#define MPT3_DIAG_BUFFER_NOT_RELEASED	(0x00)
+/* Bit 0 - Diag Buffer Released */
+#define MPT3_DIAG_BUFFER_RELEASED	(0x01)
+
+/*
+ * Bit 1 - Diag Buffer Released by IOCTL,
+ * This bit is valid only if Bit 0 is one
+ */
+#define MPT3_DIAG_BUFFER_REL_IOCTL	(0x02 | MPT3_DIAG_BUFFER_RELEASED)
+
+/*
+ * Bit 2 - Diag Buffer Released by Trigger,
+ * This bit is valid only if Bit 0 is one
+ */
+#define MPT3_DIAG_BUFFER_REL_TRIGGER	(0x04 | MPT3_DIAG_BUFFER_RELEASED)
+
+/*
+ * Bit 3 - Diag Buffer Released by SysFs,
+ * This bit is valid only if Bit 0 is one
+ */
+#define MPT3_DIAG_BUFFER_REL_SYSFS	(0x08 | MPT3_DIAG_BUFFER_RELEASED)
+
+/* DIAG RESET Master trigger flags */
+#define MPT_DIAG_RESET_ISSUED_BY_DRIVER 0x00000000
+#define MPT_DIAG_RESET_ISSUED_BY_USER	0x00000001
+
 typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc);
 /**
  * struct MPT3SAS_ADAPTER - per adapter struct
@@ -1439,6 +1483,7 @@ struct MPT3SAS_ADAPTER {
 	spinlock_t	scsi_lookup_lock;
 	int		pending_io_count;
 	wait_queue_head_t reset_wq;
+	u16		*io_queue_num;
 
 	/* PCIe SGL */
 	struct dma_pool *pcie_sgl_dma_pool;
@@ -1529,6 +1574,8 @@ struct MPT3SAS_ADAPTER {
 	u32		diagnostic_flags[MPI2_DIAG_BUF_TYPE_COUNT];
 	u32		ring_buffer_offset;
 	u32		ring_buffer_sz;
+	struct htb_rel_query htb_rel;
+	u8 reset_from_user;
 	u8		is_warpdrive;
 	u8		is_mcpu_endpoint;
 	u8		hide_ir_msg;
@@ -1565,6 +1612,7 @@ struct mpt3sas_debugfs_buffer {
 };
 
 #define MPT_DRV_SUPPORT_BITMAP_MEMMOVE 0x00000001
+#define MPT_DRV_SUPPORT_BITMAP_ADDNLQUERY	0x00000002
 
 typedef u8 (*MPT_CALLBACK)(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
 	u32 reply);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
index c8a0ce18f2c5..44f9a05db94e 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
@@ -479,6 +479,8 @@ void mpt3sas_ctl_pre_reset_handler(struct MPT3SAS_ADAPTER *ioc)
 		ioc_info(ioc,
 		    "%s: Releasing the trace buffer due to adapter reset.",
 		    __func__);
+		ioc->htb_rel.buffer_rel_condition =
+		    MPT3_DIAG_BUFFER_REL_TRIGGER;
 		mpt3sas_send_diag_release(ioc, i, &issue_reset);
 	}
 }
@@ -1334,6 +1336,7 @@ _ctl_do_reset(struct MPT3SAS_ADAPTER *ioc, void __user *arg)
 	dctlprintk(ioc, ioc_info(ioc, "%s: enter\n",
 				 __func__));
 
+	ioc->reset_from_user = 1;
 	retval = mpt3sas_base_hard_reset_handler(ioc, FORCE_BIG_HAMMER);
 	ioc_info(ioc,
 	    "Ioctl: host reset: %s\n", ((!retval) ? "SUCCESS" : "FAILED"));
@@ -1687,6 +1690,9 @@ _ctl_diag_register_2(struct MPT3SAS_ADAPTER *ioc,
 	request_data = ioc->diag_buffer[buffer_type];
 	request_data_sz = diag_register->requested_buffer_size;
 	ioc->unique_id[buffer_type] = diag_register->unique_id;
+	/* Reset ioc variables used for additional query commands */
+	ioc->reset_from_user = 0;
+	memset(&ioc->htb_rel, 0, sizeof(struct htb_rel_query));
 	ioc->diag_buffer_status[buffer_type] &=
 	    MPT3_DIAG_BUFFER_IS_DRIVER_ALLOCATED;
 	memcpy(ioc->product_specific[buffer_type],
@@ -2469,7 +2475,61 @@ _ctl_diag_read_buffer(struct MPT3SAS_ADAPTER *ioc, void __user *arg)
 	return rc;
 }
 
+/**
+ * _ctl_addnl_diag_query - query relevant info associated with diag buffers
+ * @ioc: per adapter object
+ * @arg: user space buffer containing ioctl content
+ *
+ * The application will send only unique_id.  Driver will
+ * inspect unique_id first, if valid, fill the details related to cause
+ * for diag buffer release.
+ */
+static long
+_ctl_addnl_diag_query(struct MPT3SAS_ADAPTER *ioc, void __user *arg)
+{
+	struct mpt3_addnl_diag_query karg;
+	u32 buffer_type = 0;
 
+	if (copy_from_user(&karg, arg, sizeof(karg))) {
+		pr_err("%s: failure at %s:%d/%s()!\n",
+		    ioc->name, __FILE__, __LINE__, __func__);
+		return -EFAULT;
+	}
+	dctlprintk(ioc, ioc_info(ioc, "%s\n",  __func__));
+	if (karg.unique_id == 0) {
+		ioc_err(ioc, "%s: unique_id is(0x%08x)\n",
+		    __func__, karg.unique_id);
+		return -EPERM;
+	}
+	buffer_type = _ctl_diag_get_bufftype(ioc, karg.unique_id);
+	if (buffer_type == MPT3_DIAG_UID_NOT_FOUND) {
+		ioc_err(ioc, "%s: buffer with unique_id(0x%08x) not found\n",
+		    __func__, karg.unique_id);
+		return -EPERM;
+	}
+	memset(&karg.buffer_rel_condition, 0, sizeof(struct htb_rel_query));
+	if ((ioc->diag_buffer_status[buffer_type] &
+	    MPT3_DIAG_BUFFER_IS_REGISTERED) == 0) {
+		ioc_info(ioc, "%s: buffer_type(0x%02x) is not registered\n",
+		    __func__, buffer_type);
+		goto out;
+	}
+	if ((ioc->diag_buffer_status[buffer_type] &
+	    MPT3_DIAG_BUFFER_IS_RELEASED) == 0) {
+		ioc_err(ioc, "%s: buffer_type(0x%02x) is not released\n",
+		    __func__, buffer_type);
+		return -EPERM;
+	}
+	memcpy(&karg.buffer_rel_condition, &ioc->htb_rel,
+	    sizeof(struct  htb_rel_query));
+out:
+	if (copy_to_user(arg, &karg, sizeof(struct mpt3_addnl_diag_query))) {
+		ioc_err(ioc, "%s: unable to write mpt3_addnl_diag_query data @ %p\n",
+		    __func__, arg);
+		return -EFAULT;
+	}
+	return 0;
+}
 
 #ifdef CONFIG_COMPAT
 /**
@@ -2533,7 +2593,7 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg,
 	struct MPT3SAS_ADAPTER *ioc;
 	struct mpt3_ioctl_header ioctl_header;
 	enum block_state state;
-	long ret = -EINVAL;
+	long ret = -ENOIOCTLCMD;
 
 	/* get IOCTL header */
 	if (copy_from_user(&ioctl_header, (char __user *)arg,
@@ -2643,6 +2703,10 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg,
 		if (_IOC_SIZE(cmd) == sizeof(struct mpt3_diag_read_buffer))
 			ret = _ctl_diag_read_buffer(ioc, arg);
 		break;
+	case MPT3ADDNLDIAGQUERY:
+		if (_IOC_SIZE(cmd) == sizeof(struct mpt3_addnl_diag_query))
+			ret = _ctl_addnl_diag_query(ioc, arg);
+		break;
 	default:
 		dctlprintk(ioc,
 			   ioc_info(ioc, "unsupported ioctl opcode(0x%08x)\n",
@@ -3425,6 +3489,7 @@ host_trace_buffer_enable_store(struct device *cdev,
 		    MPT3_DIAG_BUFFER_IS_RELEASED))
 			goto out;
 		ioc_info(ioc, "releasing host trace buffer\n");
+		ioc->htb_rel.buffer_rel_condition = MPT3_DIAG_BUFFER_REL_SYSFS;
 		mpt3sas_send_diag_release(ioc, MPI2_DIAG_BUF_TYPE_TRACE,
 		    &issue_reset);
 	}
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.h b/drivers/scsi/mpt3sas/mpt3sas_ctl.h
index 0f7aa4ddade0..d2ccdafb8df2 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.h
@@ -94,6 +94,8 @@
 	struct mpt3_diag_query)
 #define MPT3DIAGREADBUFFER _IOWR(MPT3_MAGIC_NUMBER, 30, \
 	struct mpt3_diag_read_buffer)
+#define MPT3ADDNLDIAGQUERY _IOWR(MPT3_MAGIC_NUMBER, 32, \
+	struct mpt3_addnl_diag_query)
 
 /* Trace Buffer default UniqueId */
 #define MPT2DIAGBUFFUNIQUEID (0x07075900)
@@ -430,4 +432,24 @@ struct mpt3_diag_read_buffer {
 	uint32_t diagnostic_data[1];
 };
 
+/**
+ * struct mpt3_addnl_diag_query - diagnostic buffer release reason
+ * @hdr - generic header
+ * @unique_id - unique id associated with this buffer.
+ * @buffer_rel_condition - Release condition ioctl/sysfs/reset
+ * @reserved1
+ * @trigger_type - Master/Event/scsi/MPI
+ * @trigger_info_dwords - Data Correspondig to trigger type
+ * @reserved2
+ */
+struct mpt3_addnl_diag_query {
+	struct mpt3_ioctl_header hdr;
+	uint32_t unique_id;
+	uint16_t buffer_rel_condition;
+	uint16_t reserved1;
+	uint32_t trigger_type;
+	uint32_t trigger_info_dwords[2];
+	uint32_t reserved2[2];
+};
+
 #endif /* MPT3SAS_CTL_H_INCLUDED */
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index c8b09a81834d..ffca03064797 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -54,6 +54,7 @@
 #include <linux/interrupt.h>
 #include <linux/aer.h>
 #include <linux/raid_class.h>
+#include <linux/blk-mq-pci.h>
 #include <asm/unaligned.h>
 
 #include "mpt3sas_base.h"
@@ -168,6 +169,11 @@ MODULE_PARM_DESC(multipath_on_hba,
 	"\t SAS 2.0 & SAS 3.0 HBA - This will be disabled,\n\t\t"
 	"\t SAS 3.5 HBA - This will be enabled)");
 
+static int host_tagset_enable = 1;
+module_param(host_tagset_enable, int, 0444);
+MODULE_PARM_DESC(host_tagset_enable,
+	"Shared host tagset enable/disable Default: enable(1)");
+
 /* raid transport support */
 static struct raid_template *mpt3sas_raid_template;
 static struct raid_template *mpt2sas_raid_template;
@@ -1743,10 +1749,12 @@ mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc, u16 smid)
 	struct scsi_cmnd *scmd = NULL;
 	struct scsiio_tracker *st;
 	Mpi25SCSIIORequest_t *mpi_request;
+	u16 tag = smid - 1;
 
 	if (smid > 0  &&
 	    smid <= ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT) {
-		u32 unique_tag = smid - 1;
+		u32 unique_tag =
+		    ioc->io_queue_num[tag] << BLK_MQ_UNIQUE_TAG_BITS | tag;
 
 		mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
 
@@ -11599,6 +11607,22 @@ scsih_scan_finished(struct Scsi_Host *shost, unsigned long time)
 	return 1;
 }
 
+/**
+ * scsih_map_queues - map reply queues with request queues
+ * @shost: SCSI host pointer
+ */
+static int scsih_map_queues(struct Scsi_Host *shost)
+{
+	struct MPT3SAS_ADAPTER *ioc =
+	    (struct MPT3SAS_ADAPTER *)shost->hostdata;
+
+	if (ioc->shost->nr_hw_queues == 1)
+		return 0;
+
+	return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
+	    ioc->pdev, ioc->high_iops_queues);
+}
+
 /* shost template for SAS 2.0 HBA devices */
 static struct scsi_host_template mpt2sas_driver_template = {
 	.module				= THIS_MODULE,
@@ -11666,6 +11690,7 @@ static struct scsi_host_template mpt3sas_driver_template = {
 	.sdev_attrs			= mpt3sas_dev_attrs,
 	.track_queue_depth		= 1,
 	.cmd_size			= sizeof(struct scsiio_tracker),
+	.map_queues			= scsih_map_queues,
 };
 
 /* raid transport support for SAS 3.0 HBA devices */
@@ -11922,6 +11947,8 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	 * Enable MEMORY MOVE support flag.
 	 */
 	ioc->drv_support_bitmap |= MPT_DRV_SUPPORT_BITMAP_MEMMOVE;
+	/* Enable ADDITIONAL QUERY support flag. */
+	ioc->drv_support_bitmap |= MPT_DRV_SUPPORT_BITMAP_ADDNLQUERY;
 
 	ioc->enable_sdev_max_qd = enable_sdev_max_qd;
 
@@ -12028,6 +12055,21 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	} else
 		ioc->hide_drives = 0;
 
+	shost->host_tagset = 0;
+	shost->nr_hw_queues = 1;
+
+	if (ioc->is_gen35_ioc && ioc->reply_queue_count > 1 &&
+	    host_tagset_enable && ioc->smp_affinity_enable) {
+
+		shost->host_tagset = 1;
+		shost->nr_hw_queues =
+		    ioc->reply_queue_count - ioc->high_iops_queues;
+
+		dev_info(&ioc->pdev->dev,
+		    "Max SCSIIO MPT commands: %d shared with nr_hw_queues = %d\n",
+		    shost->can_queue, shost->nr_hw_queues);
+	}
+
 	rv = scsi_add_host(shost, &pdev->dev);
 	if (rv) {
 		ioc_err(ioc, "failure at %s:%d/%s()!\n",
diff --git a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c
index 8ec9bab20ec4..d9b7d0ee25b0 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c
@@ -132,6 +132,35 @@ mpt3sas_process_trigger_data(struct MPT3SAS_ADAPTER *ioc,
 		    &issue_reset);
 	}
 
+	ioc->htb_rel.buffer_rel_condition = MPT3_DIAG_BUFFER_REL_TRIGGER;
+	if (event_data) {
+		ioc->htb_rel.trigger_type = event_data->trigger_type;
+		switch (event_data->trigger_type) {
+		case MPT3SAS_TRIGGER_SCSI:
+			memcpy(&ioc->htb_rel.trigger_info_dwords,
+			    &event_data->u.scsi,
+			    sizeof(struct SL_WH_SCSI_TRIGGER_T));
+			break;
+		case MPT3SAS_TRIGGER_MPI:
+			memcpy(&ioc->htb_rel.trigger_info_dwords,
+			    &event_data->u.mpi,
+			    sizeof(struct SL_WH_MPI_TRIGGER_T));
+			break;
+		case MPT3SAS_TRIGGER_MASTER:
+			ioc->htb_rel.trigger_info_dwords[0] =
+			    event_data->u.master.MasterData;
+			break;
+		case MPT3SAS_TRIGGER_EVENT:
+			memcpy(&ioc->htb_rel.trigger_info_dwords,
+			    &event_data->u.event,
+			    sizeof(struct SL_WH_EVENT_TRIGGER_T));
+			break;
+		default:
+			ioc_err(ioc, "%d - Is not a valid Trigger type\n",
+			    event_data->trigger_type);
+			break;
+		}
+	}
 	_mpt3sas_raise_sigio(ioc, event_data);
 
 	dTriggerDiagPrintk(ioc, ioc_info(ioc, "%s: exit\n",
@@ -201,9 +230,14 @@ mpt3sas_trigger_master(struct MPT3SAS_ADAPTER *ioc, u32 trigger_bitmask)
 	event_data.u.master.MasterData = trigger_bitmask;
 
 	if (trigger_bitmask & MASTER_TRIGGER_FW_FAULT ||
-	    trigger_bitmask & MASTER_TRIGGER_ADAPTER_RESET)
+	    trigger_bitmask & MASTER_TRIGGER_ADAPTER_RESET) {
+		ioc->htb_rel.trigger_type = MPT3SAS_TRIGGER_MASTER;
+		ioc->htb_rel.trigger_info_dwords[0] = trigger_bitmask;
+		if (ioc->reset_from_user)
+			ioc->htb_rel.trigger_info_dwords[1] =
+			    MPT_DIAG_RESET_ISSUED_BY_USER;
 		_mpt3sas_raise_sigio(ioc, &event_data);
-	else
+	} else
 		mpt3sas_send_trigger_data_event(ioc, &event_data);
 
  out:
diff --git a/drivers/scsi/pmcraid.h b/drivers/scsi/pmcraid.h
index 15c962108075..6d36debde18e 100644
--- a/drivers/scsi/pmcraid.h
+++ b/drivers/scsi/pmcraid.h
@@ -244,7 +244,7 @@ struct pmcraid_ioarcb {
 	__u8  hrrq_id;
 	__u8  cdb[PMCRAID_MAX_CDB_LEN];
 	struct pmcraid_ioarcb_add_data add_data;
-} __attribute__((packed, aligned(PMCRAID_IOARCB_ALIGNMENT)));
+};
 
 /* well known resource handle values */
 #define PMCRAID_IOA_RES_HANDLE        0xffffffff
@@ -1040,8 +1040,8 @@ struct pmcraid_passthrough_ioctl_buffer {
 	struct pmcraid_ioctl_header ioctl_header;
 	struct pmcraid_ioarcb ioarcb;
 	struct pmcraid_ioasa  ioasa;
-	u8  request_buffer[1];
-} __attribute__ ((packed));
+	u8  request_buffer[];
+} __attribute__ ((packed, aligned(PMCRAID_IOARCB_ALIGNMENT)));
 
 /*
  * keys to differentiate between driver handled IOCTLs and passthrough
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 0d09480b66cd..c48daf52725d 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -981,8 +981,7 @@ void qlt_free_session_done(struct work_struct *work)
 			int rc;
 
 			if (!own ||
-			    (own &&
-			     (own->iocb.u.isp24.status_subcode == ELS_PLOGI))) {
+			     (own->iocb.u.isp24.status_subcode == ELS_PLOGI)) {
 				rc = qla2x00_post_async_logout_work(vha, sess,
 				    NULL);
 				if (rc != QLA_SUCCESS)
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index a4b014e1cd8c..7bd9a4a04ad5 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -841,7 +841,7 @@ static int __qla4xxx_is_chap_active(struct device *dev, void *data)
 	sess = cls_session->dd_data;
 	ddb_entry = sess->dd_data;
 
-	if (iscsi_session_chkready(cls_session))
+	if (iscsi_is_session_online(cls_session))
 		goto exit_is_chap_active;
 
 	if (ddb_entry->chap_tbl_idx == *chap_tbl_idx)
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 2e68c0a87698..969d24d580e2 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1701,10 +1701,8 @@ static const char *iscsi_session_state_name(int state)
 
 int iscsi_session_chkready(struct iscsi_cls_session *session)
 {
-	unsigned long flags;
 	int err;
 
-	spin_lock_irqsave(&session->lock, flags);
 	switch (session->state) {
 	case ISCSI_SESSION_LOGGED_IN:
 		err = 0;
@@ -1719,7 +1717,6 @@ int iscsi_session_chkready(struct iscsi_cls_session *session)
 		err = DID_NO_CONNECT << 16;
 		break;
 	}
-	spin_unlock_irqrestore(&session->lock, flags);
 	return err;
 }
 EXPORT_SYMBOL_GPL(iscsi_session_chkready);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index a3d2d4bc4a3d..ed0b1bb99f08 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -707,9 +707,9 @@ static int sd_sec_submit(void *data, u16 spsp, u8 secp, void *buffer,
 	put_unaligned_be16(spsp, &cdb[2]);
 	put_unaligned_be32(len, &cdb[6]);
 
-	ret = scsi_execute_req(sdev, cdb,
-			send ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
-			buffer, len, NULL, SD_TIMEOUT, sdkp->max_retries, NULL);
+	ret = scsi_execute(sdev, cdb, send ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
+		buffer, len, NULL, NULL, SD_TIMEOUT, sdkp->max_retries, 0,
+		RQF_PM, NULL);
 	return ret <= 0 ? ret : -EIO;
 }
 #endif /* CONFIG_BLK_SED_OPAL */
@@ -3379,10 +3379,12 @@ static int sd_probe(struct device *dev)
 	    sdp->type != TYPE_RBC)
 		goto out;
 
-#ifndef CONFIG_BLK_DEV_ZONED
-	if (sdp->type == TYPE_ZBC)
+	if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) && sdp->type == TYPE_ZBC) {
+		sdev_printk(KERN_WARNING, sdp,
+			    "Unsupported ZBC host-managed device.\n");
 		goto out;
-#endif
+	}
+
 	SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp,
 					"sd_probe\n"));
 
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 03adb39293c2..ee558675eab4 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -704,6 +704,7 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
 	unsigned int nr_zones = sdkp->rev_nr_zones;
 	u32 max_append;
 	int ret = 0;
+	unsigned int flags;
 
 	/*
 	 * For all zoned disks, initialize zone append emulation data if not
@@ -736,16 +737,19 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
 	    disk->queue->nr_zones == nr_zones)
 		goto unlock;
 
+	flags = memalloc_noio_save();
 	sdkp->zone_blocks = zone_blocks;
 	sdkp->nr_zones = nr_zones;
-	sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_NOIO);
+	sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_KERNEL);
 	if (!sdkp->rev_wp_offset) {
 		ret = -ENOMEM;
+		memalloc_noio_restore(flags);
 		goto unlock;
 	}
 
 	ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
 
+	memalloc_noio_restore(flags);
 	kvfree(sdkp->rev_wp_offset);
 	sdkp->rev_wp_offset = NULL;
 
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 721f55db181f..77161750c9fb 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -451,6 +451,8 @@ static void ufshcd_print_evt(struct ufs_hba *hba, u32 id,
 
 	if (!found)
 		dev_err(hba->dev, "No record of %s\n", err_name);
+	else
+		dev_err(hba->dev, "%s: total cnt=%llu\n", err_name, e->cnt);
 }
 
 static void ufshcd_print_evt_hist(struct ufs_hba *hba)
@@ -1866,7 +1868,7 @@ static ssize_t ufshcd_clkgate_delay_show(struct device *dev,
 {
 	struct ufs_hba *hba = dev_get_drvdata(dev);
 
-	return snprintf(buf, PAGE_SIZE, "%lu\n", hba->clk_gating.delay_ms);
+	return sysfs_emit(buf, "%lu\n", hba->clk_gating.delay_ms);
 }
 
 static ssize_t ufshcd_clkgate_delay_store(struct device *dev,
@@ -1889,7 +1891,7 @@ static ssize_t ufshcd_clkgate_enable_show(struct device *dev,
 {
 	struct ufs_hba *hba = dev_get_drvdata(dev);
 
-	return snprintf(buf, PAGE_SIZE, "%d\n", hba->clk_gating.is_enabled);
+	return sysfs_emit(buf, "%d\n", hba->clk_gating.is_enabled);
 }
 
 static ssize_t ufshcd_clkgate_enable_store(struct device *dev,
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index ee61f821f75d..18e56c1c1b30 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -570,7 +570,7 @@ enum ufshcd_quirks {
 	/*
 	 * This quirk allows only sg entries aligned with page size.
 	 */
-	UFSHCD_QUIRK_ALIGN_SG_WITH_PAGE_SIZE		= 1 << 13,
+	UFSHCD_QUIRK_ALIGN_SG_WITH_PAGE_SIZE		= 1 << 14,
 };
 
 enum ufshcd_caps {
diff --git a/drivers/soc/litex/Kconfig b/drivers/soc/litex/Kconfig
index 7a7c38282e11..e7011d665b15 100644
--- a/drivers/soc/litex/Kconfig
+++ b/drivers/soc/litex/Kconfig
@@ -12,9 +12,21 @@ config LITEX_SOC_CONTROLLER
 	select LITEX
 	help
 	  This option enables the SoC Controller Driver which verifies
-	  LiteX CSR access and provides common litex_get_reg/litex_set_reg
+	  LiteX CSR access and provides common litex_[read|write]*
 	  accessors.
 	  All drivers that use functions from litex.h must depend on
 	  LITEX.
 
+config LITEX_SUBREG_SIZE
+	int "Size of a LiteX CSR subregister, in bytes"
+	depends on LITEX
+	range 1 4
+	default 4
+	help
+	LiteX MMIO registers (referred to as Configuration and Status
+	registers, or CSRs) are spread across adjacent 8- or 32-bit
+	subregisters, located at 32-bit aligned MMIO addresses. Use
+	this to select the appropriate size (1 or 4 bytes) matching
+	your particular LiteX build.
+
 endmenu
diff --git a/drivers/soc/litex/litex_soc_ctrl.c b/drivers/soc/litex/litex_soc_ctrl.c
index 9b0766384570..6268bfa7f0d6 100644
--- a/drivers/soc/litex/litex_soc_ctrl.c
+++ b/drivers/soc/litex/litex_soc_ctrl.c
@@ -15,79 +15,11 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/io.h>
+#include <linux/reboot.h>
 
-/*
- * LiteX SoC Generator, depending on the configuration, can split a single
- * logical CSR (Control&Status Register) into a series of consecutive physical
- * registers.
- *
- * For example, in the configuration with 8-bit CSR Bus, 32-bit aligned (the
- * default one for 32-bit CPUs) a 32-bit logical CSR will be generated as four
- * 32-bit physical registers, each one containing one byte of meaningful data.
- *
- * For details see: https://github.com/enjoy-digital/litex/wiki/CSR-Bus
- *
- * The purpose of `litex_set_reg`/`litex_get_reg` is to implement the logic
- * of writing to/reading from the LiteX CSR in a single place that can be
- * then reused by all LiteX drivers.
- */
-
-/**
- * litex_set_reg() - Writes the value to the LiteX CSR (Control&Status Register)
- * @reg: Address of the CSR
- * @reg_size: The width of the CSR expressed in the number of bytes
- * @val: Value to be written to the CSR
- *
- * In the currently supported LiteX configuration (8-bit CSR Bus, 32-bit aligned),
- * a 32-bit LiteX CSR is generated as 4 consecutive 32-bit physical registers,
- * each one containing one byte of meaningful data.
- *
- * This function splits a single possibly multi-byte write into a series of
- * single-byte writes with a proper offset.
- */
-void litex_set_reg(void __iomem *reg, unsigned long reg_size,
-		    unsigned long val)
-{
-	unsigned long shifted_data, shift, i;
-
-	for (i = 0; i < reg_size; ++i) {
-		shift = ((reg_size - i - 1) * LITEX_SUBREG_SIZE_BIT);
-		shifted_data = val >> shift;
-
-		WRITE_LITEX_SUBREGISTER(shifted_data, reg, i);
-	}
-}
-EXPORT_SYMBOL_GPL(litex_set_reg);
-
-/**
- * litex_get_reg() - Reads the value of the LiteX CSR (Control&Status Register)
- * @reg: Address of the CSR
- * @reg_size: The width of the CSR expressed in the number of bytes
- *
- * Return: Value read from the CSR
- *
- * In the currently supported LiteX configuration (8-bit CSR Bus, 32-bit aligned),
- * a 32-bit LiteX CSR is generated as 4 consecutive 32-bit physical registers,
- * each one containing one byte of meaningful data.
- *
- * This function generates a series of single-byte reads with a proper offset
- * and joins their results into a single multi-byte value.
- */
-unsigned long litex_get_reg(void __iomem *reg, unsigned long reg_size)
-{
-	unsigned long shifted_data, shift, i;
-	unsigned long result = 0;
-
-	for (i = 0; i < reg_size; ++i) {
-		shifted_data = READ_LITEX_SUBREGISTER(reg, i);
-
-		shift = ((reg_size - i - 1) * LITEX_SUBREG_SIZE_BIT);
-		result |= (shifted_data << shift);
-	}
-
-	return result;
-}
-EXPORT_SYMBOL_GPL(litex_get_reg);
+/* reset register located at the base address */
+#define RESET_REG_OFF           0x00
+#define RESET_REG_VALUE         0x00000001
 
 #define SCRATCH_REG_OFF         0x04
 #define SCRATCH_REG_VALUE       0x12345678
@@ -131,15 +63,27 @@ static int litex_check_csr_access(void __iomem *reg_addr)
 	/* restore original value of the SCRATCH register */
 	litex_write32(reg_addr + SCRATCH_REG_OFF, SCRATCH_REG_VALUE);
 
-	pr_info("LiteX SoC Controller driver initialized");
+	pr_info("LiteX SoC Controller driver initialized: subreg:%d, align:%d",
+		LITEX_SUBREG_SIZE, LITEX_SUBREG_ALIGN);
 
 	return 0;
 }
 
 struct litex_soc_ctrl_device {
 	void __iomem *base;
+	struct notifier_block reset_nb;
 };
 
+static int litex_reset_handler(struct notifier_block *this, unsigned long mode,
+			       void *cmd)
+{
+	struct litex_soc_ctrl_device *soc_ctrl_dev =
+		container_of(this, struct litex_soc_ctrl_device, reset_nb);
+
+	litex_write32(soc_ctrl_dev->base + RESET_REG_OFF, RESET_REG_VALUE);
+	return NOTIFY_DONE;
+}
+
 #ifdef CONFIG_OF
 static const struct of_device_id litex_soc_ctrl_of_match[] = {
 	{.compatible = "litex,soc-controller"},
@@ -151,6 +95,7 @@ MODULE_DEVICE_TABLE(of, litex_soc_ctrl_of_match);
 static int litex_soc_ctrl_probe(struct platform_device *pdev)
 {
 	struct litex_soc_ctrl_device *soc_ctrl_dev;
+	int error;
 
 	soc_ctrl_dev = devm_kzalloc(&pdev->dev, sizeof(*soc_ctrl_dev), GFP_KERNEL);
 	if (!soc_ctrl_dev)
@@ -160,7 +105,29 @@ static int litex_soc_ctrl_probe(struct platform_device *pdev)
 	if (IS_ERR(soc_ctrl_dev->base))
 		return PTR_ERR(soc_ctrl_dev->base);
 
-	return litex_check_csr_access(soc_ctrl_dev->base);
+	error = litex_check_csr_access(soc_ctrl_dev->base);
+	if (error)
+		return error;
+
+	platform_set_drvdata(pdev, soc_ctrl_dev);
+
+	soc_ctrl_dev->reset_nb.notifier_call = litex_reset_handler;
+	soc_ctrl_dev->reset_nb.priority = 128;
+	error = register_restart_handler(&soc_ctrl_dev->reset_nb);
+	if (error) {
+		dev_warn(&pdev->dev, "cannot register restart handler: %d\n",
+			 error);
+	}
+
+	return 0;
+}
+
+static int litex_soc_ctrl_remove(struct platform_device *pdev)
+{
+	struct litex_soc_ctrl_device *soc_ctrl_dev = platform_get_drvdata(pdev);
+
+	unregister_restart_handler(&soc_ctrl_dev->reset_nb);
+	return 0;
 }
 
 static struct platform_driver litex_soc_ctrl_driver = {
@@ -169,6 +136,7 @@ static struct platform_driver litex_soc_ctrl_driver = {
 		.of_match_table = of_match_ptr(litex_soc_ctrl_of_match)
 	},
 	.probe = litex_soc_ctrl_probe,
+	.remove = litex_soc_ctrl_remove,
 };
 
 module_platform_driver(litex_soc_ctrl_driver);
diff --git a/drivers/staging/vme/devices/vme_user.c b/drivers/staging/vme/devices/vme_user.c
index 35d7260e2271..e3fa38bd7f12 100644
--- a/drivers/staging/vme/devices/vme_user.c
+++ b/drivers/staging/vme/devices/vme_user.c
@@ -175,7 +175,7 @@ static ssize_t buffer_from_user(unsigned int minor, const char __user *buf,
 static ssize_t vme_user_read(struct file *file, char __user *buf, size_t count,
 			     loff_t *ppos)
 {
-	unsigned int minor = MINOR(file_inode(file)->i_rdev);
+	unsigned int minor = iminor(file_inode(file));
 	ssize_t retval;
 	size_t image_size;
 
@@ -218,7 +218,7 @@ static ssize_t vme_user_read(struct file *file, char __user *buf, size_t count,
 static ssize_t vme_user_write(struct file *file, const char __user *buf,
 			      size_t count, loff_t *ppos)
 {
-	unsigned int minor = MINOR(file_inode(file)->i_rdev);
+	unsigned int minor = iminor(file_inode(file));
 	ssize_t retval;
 	size_t image_size;
 
@@ -260,7 +260,7 @@ static ssize_t vme_user_write(struct file *file, const char __user *buf,
 
 static loff_t vme_user_llseek(struct file *file, loff_t off, int whence)
 {
-	unsigned int minor = MINOR(file_inode(file)->i_rdev);
+	unsigned int minor = iminor(file_inode(file));
 	size_t image_size;
 	loff_t res;
 
@@ -294,7 +294,7 @@ static int vme_user_ioctl(struct inode *inode, struct file *file,
 	struct vme_slave slave;
 	struct vme_irq_id irq_req;
 	unsigned long copied;
-	unsigned int minor = MINOR(inode->i_rdev);
+	unsigned int minor = iminor(inode);
 	int retval;
 	dma_addr_t pci_addr;
 	void __user *argp = (void __user *)arg;
@@ -412,7 +412,7 @@ vme_user_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	int ret;
 	struct inode *inode = file_inode(file);
-	unsigned int minor = MINOR(inode->i_rdev);
+	unsigned int minor = iminor(inode);
 
 	mutex_lock(&image[minor].mutex);
 	ret = vme_user_ioctl(inode, file, cmd, arg);
@@ -481,7 +481,7 @@ static int vme_user_master_mmap(unsigned int minor, struct vm_area_struct *vma)
 
 static int vme_user_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	unsigned int minor = MINOR(file_inode(file)->i_rdev);
+	unsigned int minor = iminor(file_inode(file));
 
 	if (type[minor] == MASTER_MINOR)
 		return vme_user_master_mmap(minor, vma);
diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c
index e4a9b9fe3dfb..2a6165febd3b 100644
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c
@@ -1006,7 +1006,7 @@ static void tgt_agent_fetch_work(struct work_struct *work)
 			agent->state = AGENT_STATE_SUSPENDED;
 
 		spin_unlock_bh(&agent->lock);
-	};
+	}
 }
 
 static struct sbp_target_agent *sbp_target_agent_register(
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 8ed93fd205c7..ee3d52061281 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -315,10 +315,8 @@ iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num, int op,
 	 * Only allocate as many vector entries as the bio code allows us to,
 	 * we'll loop later on until we have handled the whole request.
 	 */
-	if (sg_num > BIO_MAX_PAGES)
-		sg_num = BIO_MAX_PAGES;
-
-	bio = bio_alloc_bioset(GFP_NOIO, sg_num, &ib_dev->ibd_bio_set);
+	bio = bio_alloc_bioset(GFP_NOIO, bio_max_segs(sg_num),
+				&ib_dev->ibd_bio_set);
 	if (!bio) {
 		pr_err("Unable to allocate memory for bio\n");
 		return NULL;
@@ -638,8 +636,7 @@ iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio,
 		return -ENODEV;
 	}
 
-	bip = bio_integrity_alloc(bio, GFP_NOIO,
-			min_t(unsigned int, cmd->t_prot_nents, BIO_MAX_PAGES));
+	bip = bio_integrity_alloc(bio, GFP_NOIO, bio_max_segs(cmd->t_prot_nents));
 	if (IS_ERR(bip)) {
 		pr_err("Unable to allocate bio_integrity_payload\n");
 		return PTR_ERR(bip);
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 14db5e568f22..d4cc43afe05b 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -3739,6 +3739,7 @@ core_scsi3_pri_read_keys(struct se_cmd *cmd)
 	spin_unlock(&dev->t10_pr.registration_lock);
 
 	put_unaligned_be32(add_len, &buf[4]);
+	target_set_cmd_data_length(cmd, 8 + add_len);
 
 	transport_kunmap_data_sg(cmd);
 
@@ -3757,7 +3758,7 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd)
 	struct t10_pr_registration *pr_reg;
 	unsigned char *buf;
 	u64 pr_res_key;
-	u32 add_len = 16; /* Hardcoded to 16 when a reservation is held. */
+	u32 add_len = 0;
 
 	if (cmd->data_length < 8) {
 		pr_err("PRIN SA READ_RESERVATIONS SCSI Data Length: %u"
@@ -3775,8 +3776,9 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd)
 	pr_reg = dev->dev_pr_res_holder;
 	if (pr_reg) {
 		/*
-		 * Set the hardcoded Additional Length
+		 * Set the Additional Length to 16 when a reservation is held
 		 */
+		add_len = 16;
 		put_unaligned_be32(add_len, &buf[4]);
 
 		if (cmd->data_length < 22)
@@ -3812,6 +3814,8 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd)
 			  (pr_reg->pr_res_type & 0x0f);
 	}
 
+	target_set_cmd_data_length(cmd, 8 + add_len);
+
 err:
 	spin_unlock(&dev->dev_reservation_lock);
 	transport_kunmap_data_sg(cmd);
@@ -3830,7 +3834,7 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd)
 	struct se_device *dev = cmd->se_dev;
 	struct t10_reservation *pr_tmpl = &dev->t10_pr;
 	unsigned char *buf;
-	u16 add_len = 8; /* Hardcoded to 8. */
+	u16 len = 8; /* Hardcoded to 8. */
 
 	if (cmd->data_length < 6) {
 		pr_err("PRIN SA REPORT_CAPABILITIES SCSI Data Length:"
@@ -3842,7 +3846,7 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd)
 	if (!buf)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
-	put_unaligned_be16(add_len, &buf[0]);
+	put_unaligned_be16(len, &buf[0]);
 	buf[2] |= 0x10; /* CRH: Compatible Reservation Hanlding bit. */
 	buf[2] |= 0x08; /* SIP_C: Specify Initiator Ports Capable bit */
 	buf[2] |= 0x04; /* ATP_C: All Target Ports Capable bit */
@@ -3871,6 +3875,8 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd)
 	buf[4] |= 0x02; /* PR_TYPE_WRITE_EXCLUSIVE */
 	buf[5] |= 0x01; /* PR_TYPE_EXCLUSIVE_ACCESS_ALLREG */
 
+	target_set_cmd_data_length(cmd, len);
+
 	transport_kunmap_data_sg(cmd);
 
 	return 0;
@@ -4031,6 +4037,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 	 * Set ADDITIONAL_LENGTH
 	 */
 	put_unaligned_be32(add_len, &buf[4]);
+	target_set_cmd_data_length(cmd, 8 + add_len);
 
 	transport_kunmap_data_sg(cmd);
 
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 33770e5808ce..3cbc074992bc 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -881,7 +881,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 
 			if (!bio) {
 new_bio:
-				nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
+				nr_vecs = bio_max_segs(nr_pages);
 				nr_pages -= nr_vecs;
 				/*
 				 * Calls bio_kmalloc() and sets bio->bi_end_io()
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 93ea17cbad79..5ecb9f18a53d 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -879,11 +879,9 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 }
 EXPORT_SYMBOL(target_complete_cmd);
 
-void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length)
+void target_set_cmd_data_length(struct se_cmd *cmd, int length)
 {
-	if ((scsi_status == SAM_STAT_GOOD ||
-	     cmd->se_cmd_flags & SCF_TREAT_READ_AS_NORMAL) &&
-	    length < cmd->data_length) {
+	if (length < cmd->data_length) {
 		if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) {
 			cmd->residual_count += cmd->data_length - length;
 		} else {
@@ -893,6 +891,15 @@ void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int len
 
 		cmd->data_length = length;
 	}
+}
+EXPORT_SYMBOL(target_set_cmd_data_length);
+
+void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length)
+{
+	if (scsi_status == SAM_STAT_GOOD ||
+	    cmd->se_cmd_flags & SCF_TREAT_READ_AS_NORMAL) {
+		target_set_cmd_data_length(cmd, length);
+	}
 
 	target_complete_cmd(cmd, scsi_status);
 }
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index a5991df23581..bf73cd5f4b04 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1566,6 +1566,88 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 	return &udev->se_dev;
 }
 
+static void tcmu_dev_call_rcu(struct rcu_head *p)
+{
+	struct se_device *dev = container_of(p, struct se_device, rcu_head);
+	struct tcmu_dev *udev = TCMU_DEV(dev);
+
+	kfree(udev->uio_info.name);
+	kfree(udev->name);
+	kfree(udev);
+}
+
+static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
+{
+	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
+		kmem_cache_free(tcmu_cmd_cache, cmd);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static void tcmu_blocks_release(struct radix_tree_root *blocks,
+				int start, int end)
+{
+	int i;
+	struct page *page;
+
+	for (i = start; i < end; i++) {
+		page = radix_tree_delete(blocks, i);
+		if (page) {
+			__free_page(page);
+			atomic_dec(&global_db_count);
+		}
+	}
+}
+
+static void tcmu_remove_all_queued_tmr(struct tcmu_dev *udev)
+{
+	struct tcmu_tmr *tmr, *tmp;
+
+	list_for_each_entry_safe(tmr, tmp, &udev->tmr_queue, queue_entry) {
+		list_del_init(&tmr->queue_entry);
+		kfree(tmr);
+	}
+}
+
+static void tcmu_dev_kref_release(struct kref *kref)
+{
+	struct tcmu_dev *udev = container_of(kref, struct tcmu_dev, kref);
+	struct se_device *dev = &udev->se_dev;
+	struct tcmu_cmd *cmd;
+	bool all_expired = true;
+	int i;
+
+	vfree(udev->mb_addr);
+	udev->mb_addr = NULL;
+
+	spin_lock_bh(&timed_out_udevs_lock);
+	if (!list_empty(&udev->timedout_entry))
+		list_del(&udev->timedout_entry);
+	spin_unlock_bh(&timed_out_udevs_lock);
+
+	/* Upper layer should drain all requests before calling this */
+	mutex_lock(&udev->cmdr_lock);
+	idr_for_each_entry(&udev->commands, cmd, i) {
+		if (tcmu_check_and_free_pending_cmd(cmd) != 0)
+			all_expired = false;
+	}
+	/* There can be left over TMR cmds. Remove them. */
+	tcmu_remove_all_queued_tmr(udev);
+	if (!list_empty(&udev->qfull_queue))
+		all_expired = false;
+	idr_destroy(&udev->commands);
+	WARN_ON(!all_expired);
+
+	tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1);
+	bitmap_free(udev->data_bitmap);
+	mutex_unlock(&udev->cmdr_lock);
+
+	pr_debug("dev_kref_release\n");
+
+	call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
+}
+
 static void run_qfull_queue(struct tcmu_dev *udev, bool fail)
 {
 	struct tcmu_cmd *tcmu_cmd, *tmp_cmd;
@@ -1678,6 +1760,25 @@ static struct page *tcmu_try_get_block_page(struct tcmu_dev *udev, uint32_t dbi)
 	return page;
 }
 
+static void tcmu_vma_open(struct vm_area_struct *vma)
+{
+	struct tcmu_dev *udev = vma->vm_private_data;
+
+	pr_debug("vma_open\n");
+
+	kref_get(&udev->kref);
+}
+
+static void tcmu_vma_close(struct vm_area_struct *vma)
+{
+	struct tcmu_dev *udev = vma->vm_private_data;
+
+	pr_debug("vma_close\n");
+
+	/* release ref from tcmu_vma_open */
+	kref_put(&udev->kref, tcmu_dev_kref_release);
+}
+
 static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
 {
 	struct tcmu_dev *udev = vmf->vma->vm_private_data;
@@ -1716,6 +1817,8 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
 }
 
 static const struct vm_operations_struct tcmu_vm_ops = {
+	.open = tcmu_vma_open,
+	.close = tcmu_vma_close,
 	.fault = tcmu_vma_fault,
 };
 
@@ -1732,6 +1835,8 @@ static int tcmu_mmap(struct uio_info *info, struct vm_area_struct *vma)
 	if (vma_pages(vma) != (udev->ring_size >> PAGE_SHIFT))
 		return -EINVAL;
 
+	tcmu_vma_open(vma);
+
 	return 0;
 }
 
@@ -1744,93 +1849,12 @@ static int tcmu_open(struct uio_info *info, struct inode *inode)
 		return -EBUSY;
 
 	udev->inode = inode;
-	kref_get(&udev->kref);
 
 	pr_debug("open\n");
 
 	return 0;
 }
 
-static void tcmu_dev_call_rcu(struct rcu_head *p)
-{
-	struct se_device *dev = container_of(p, struct se_device, rcu_head);
-	struct tcmu_dev *udev = TCMU_DEV(dev);
-
-	kfree(udev->uio_info.name);
-	kfree(udev->name);
-	kfree(udev);
-}
-
-static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
-{
-	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
-		kmem_cache_free(tcmu_cmd_cache, cmd);
-		return 0;
-	}
-	return -EINVAL;
-}
-
-static void tcmu_blocks_release(struct radix_tree_root *blocks,
-				int start, int end)
-{
-	int i;
-	struct page *page;
-
-	for (i = start; i < end; i++) {
-		page = radix_tree_delete(blocks, i);
-		if (page) {
-			__free_page(page);
-			atomic_dec(&global_db_count);
-		}
-	}
-}
-
-static void tcmu_remove_all_queued_tmr(struct tcmu_dev *udev)
-{
-	struct tcmu_tmr *tmr, *tmp;
-
-	list_for_each_entry_safe(tmr, tmp, &udev->tmr_queue, queue_entry) {
-		list_del_init(&tmr->queue_entry);
-		kfree(tmr);
-	}
-}
-
-static void tcmu_dev_kref_release(struct kref *kref)
-{
-	struct tcmu_dev *udev = container_of(kref, struct tcmu_dev, kref);
-	struct se_device *dev = &udev->se_dev;
-	struct tcmu_cmd *cmd;
-	bool all_expired = true;
-	int i;
-
-	vfree(udev->mb_addr);
-	udev->mb_addr = NULL;
-
-	spin_lock_bh(&timed_out_udevs_lock);
-	if (!list_empty(&udev->timedout_entry))
-		list_del(&udev->timedout_entry);
-	spin_unlock_bh(&timed_out_udevs_lock);
-
-	/* Upper layer should drain all requests before calling this */
-	mutex_lock(&udev->cmdr_lock);
-	idr_for_each_entry(&udev->commands, cmd, i) {
-		if (tcmu_check_and_free_pending_cmd(cmd) != 0)
-			all_expired = false;
-	}
-	/* There can be left over TMR cmds. Remove them. */
-	tcmu_remove_all_queued_tmr(udev);
-	if (!list_empty(&udev->qfull_queue))
-		all_expired = false;
-	idr_destroy(&udev->commands);
-	WARN_ON(!all_expired);
-
-	tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1);
-	bitmap_free(udev->data_bitmap);
-	mutex_unlock(&udev->cmdr_lock);
-
-	call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
-}
-
 static int tcmu_release(struct uio_info *info, struct inode *inode)
 {
 	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
@@ -1838,8 +1862,7 @@ static int tcmu_release(struct uio_info *info, struct inode *inode)
 	clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags);
 
 	pr_debug("close\n");
-	/* release ref from open */
-	kref_put(&udev->kref, tcmu_dev_kref_release);
+
 	return 0;
 }
 
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 648eb4c4cf7f..8d97f0b45e9c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1139,9 +1139,6 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
 		 struct super_block *sb, unsigned int flags)
 {
 	umode_t mode;
-	char ext[32];
-	char tag_name[14];
-	unsigned int i_nlink;
 	struct v9fs_session_info *v9ses = sb->s_fs_info;
 	struct v9fs_inode *v9inode = V9FS_I(inode);
 
@@ -1159,18 +1156,18 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
 		inode->i_gid = stat->n_gid;
 	}
 	if ((S_ISREG(inode->i_mode)) || (S_ISDIR(inode->i_mode))) {
-		if (v9fs_proto_dotu(v9ses) && (stat->extension[0] != '\0')) {
+		if (v9fs_proto_dotu(v9ses)) {
+			unsigned int i_nlink;
 			/*
-			 * Hadlink support got added later to
-			 * to the .u extension. So there can be
-			 * server out there that doesn't support
-			 * this even with .u extension. So check
-			 * for non NULL stat->extension
+			 * Hadlink support got added later to the .u extension.
+			 * So there can be a server out there that doesn't
+			 * support this even with .u extension. That would
+			 * just leave us with stat->extension being an empty
+			 * string, though.
 			 */
-			strlcpy(ext, stat->extension, sizeof(ext));
 			/* HARDLINKCOUNT %u */
-			sscanf(ext, "%13s %u", tag_name, &i_nlink);
-			if (!strncmp(tag_name, "HARDLINKCOUNT", 13))
+			if (sscanf(stat->extension,
+				   " HARDLINKCOUNT %u", &i_nlink) == 1)
 				set_nlink(inode, i_nlink);
 		}
 	}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 91ff9bfd7c1a..4aa1f88d5bf8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -221,7 +221,7 @@ static void blkdev_bio_end_io_simple(struct bio *bio)
 
 static ssize_t
 __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
-		int nr_pages)
+		unsigned int nr_pages)
 {
 	struct file *file = iocb->ki_filp;
 	struct block_device *bdev = I_BDEV(bdev_file_inode(file));
@@ -355,8 +355,8 @@ static void blkdev_bio_end_io(struct bio *bio)
 	}
 }
 
-static ssize_t
-__blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
+static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+		unsigned int nr_pages)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = bdev_file_inode(file);
@@ -486,7 +486,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 static ssize_t
 blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 {
-	int nr_pages;
+	unsigned int nr_pages;
 
 	if (!iov_iter_count(iter))
 		return 0;
@@ -495,7 +495,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES)
 		return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
 
-	return __blkdev_direct_IO(iocb, iter, min(nr_pages, BIO_MAX_PAGES));
+	return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
 }
 
 static __init int blkdev_init(void)
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index aa697ccfa9dc..3aedc484e440 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -133,11 +133,12 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan)
 {
 	struct TCP_Server_Info *server = chan->server;
 
-	seq_printf(m, "\t\tChannel %d Number of credits: %d Dialect 0x%x "
-		   "TCP status: %d Instance: %d Local Users To Server: %d "
-		   "SecMode: 0x%x Req On Wire: %d In Send: %d "
-		   "In MaxReq Wait: %d\n",
-		   i+1,
+	seq_printf(m, "\n\n\t\tChannel: %d ConnectionId: 0x%llx"
+		   "\n\t\tNumber of credits: %d Dialect 0x%x"
+		   "\n\t\tTCP status: %d Instance: %d"
+		   "\n\t\tLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d"
+		   "\n\t\tIn Send: %d In MaxReq Wait: %d",
+		   i+1, server->conn_id,
 		   server->credits,
 		   server->dialect,
 		   server->tcpStatus,
@@ -227,7 +228,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 	struct TCP_Server_Info *server;
 	struct cifs_ses *ses;
 	struct cifs_tcon *tcon;
-	int i, j;
+	int c, i, j;
 
 	seq_puts(m,
 		    "Display Internal CIFS Data Structures for Debugging\n"
@@ -275,14 +276,25 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 	seq_putc(m, '\n');
 	seq_printf(m, "CIFSMaxBufSize: %d\n", CIFSMaxBufSize);
 	seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
-	seq_printf(m, "Servers:");
 
-	i = 0;
+	seq_printf(m, "\nServers: ");
+
+	c = 0;
 	spin_lock(&cifs_tcp_ses_lock);
 	list_for_each(tmp1, &cifs_tcp_ses_list) {
 		server = list_entry(tmp1, struct TCP_Server_Info,
 				    tcp_ses_list);
 
+		/* channel info will be printed as a part of sessions below */
+		if (server->is_channel)
+			continue;
+
+		c++;
+		seq_printf(m, "\n%d) ConnectionId: 0x%llx ",
+			c, server->conn_id);
+
+		if (server->hostname)
+			seq_printf(m, "Hostname: %s ", server->hostname);
 #ifdef CONFIG_CIFS_SMB_DIRECT
 		if (!server->rdma)
 			goto skip_rdma;
@@ -362,46 +374,48 @@ skip_rdma:
 		if (server->posix_ext_supported)
 			seq_printf(m, " posix");
 
-		i++;
+		if (server->rdma)
+			seq_printf(m, "\nRDMA ");
+		seq_printf(m, "\nTCP status: %d Instance: %d"
+				"\nLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d",
+				server->tcpStatus,
+				server->reconnect_instance,
+				server->srv_count,
+				server->sec_mode, in_flight(server));
+
+		seq_printf(m, "\nIn Send: %d In MaxReq Wait: %d",
+				atomic_read(&server->in_send),
+				atomic_read(&server->num_waiters));
+
+		seq_printf(m, "\n\n\tSessions: ");
+		i = 0;
 		list_for_each(tmp2, &server->smb_ses_list) {
 			ses = list_entry(tmp2, struct cifs_ses,
 					 smb_ses_list);
+			i++;
 			if ((ses->serverDomain == NULL) ||
 				(ses->serverOS == NULL) ||
 				(ses->serverNOS == NULL)) {
-				seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d ",
-					i, ses->serverName, ses->ses_count,
+				seq_printf(m, "\n\t%d) Address: %s Uses: %d Capability: 0x%x\tSession Status: %d ",
+					i, ses->ip_addr, ses->ses_count,
 					ses->capabilities, ses->status);
 				if (ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
-					seq_printf(m, "Guest\t");
+					seq_printf(m, "Guest ");
 				else if (ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
-					seq_printf(m, "Anonymous\t");
+					seq_printf(m, "Anonymous ");
 			} else {
 				seq_printf(m,
-				    "\n%d) Name: %s  Domain: %s Uses: %d OS:"
-				    " %s\n\tNOS: %s\tCapability: 0x%x\n\tSMB"
-				    " session status: %d ",
-				i, ses->serverName, ses->serverDomain,
+				    "\n\t%d) Name: %s  Domain: %s Uses: %d OS: %s "
+				    "\n\tNOS: %s\tCapability: 0x%x"
+					"\n\tSMB session status: %d ",
+				i, ses->ip_addr, ses->serverDomain,
 				ses->ses_count, ses->serverOS, ses->serverNOS,
 				ses->capabilities, ses->status);
 			}
 
-			seq_printf(m,"Security type: %s\n",
+			seq_printf(m, "\n\tSecurity type: %s ",
 				get_security_type_str(server->ops->select_sectype(server, ses->sectype)));
 
-			if (server->rdma)
-				seq_printf(m, "RDMA\n\t");
-			seq_printf(m, "TCP status: %d Instance: %d\n\tLocal Users To "
-				   "Server: %d SecMode: 0x%x Req On Wire: %d",
-				   server->tcpStatus,
-				   server->reconnect_instance,
-				   server->srv_count,
-				   server->sec_mode, in_flight(server));
-
-			seq_printf(m, " In Send: %d In MaxReq Wait: %d",
-				atomic_read(&server->in_send),
-				atomic_read(&server->num_waiters));
-
 			/* dump session id helpful for use with network trace */
 			seq_printf(m, " SessionId: 0x%llx", ses->Suid);
 			if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
@@ -414,13 +428,13 @@ skip_rdma:
 				   from_kuid(&init_user_ns, ses->cred_uid));
 
 			if (ses->chan_count > 1) {
-				seq_printf(m, "\n\n\tExtra Channels: %zu\n",
+				seq_printf(m, "\n\n\tExtra Channels: %zu ",
 					   ses->chan_count-1);
 				for (j = 1; j < ses->chan_count; j++)
 					cifs_dump_channel(m, j, &ses->chans[j]);
 			}
 
-			seq_puts(m, "\n\n\tShares:");
+			seq_puts(m, "\n\n\tShares: ");
 			j = 0;
 
 			seq_printf(m, "\n\t%d) IPC: ", j);
@@ -437,38 +451,43 @@ skip_rdma:
 				cifs_debug_tcon(m, tcon);
 			}
 
-			seq_puts(m, "\n\tMIDs:\n");
-
-			spin_lock(&GlobalMid_Lock);
-			list_for_each(tmp3, &server->pending_mid_q) {
-				mid_entry = list_entry(tmp3, struct mid_q_entry,
-					qhead);
-				seq_printf(m, "\tState: %d com: %d pid:"
-					      " %d cbdata: %p mid %llu\n",
-					      mid_entry->mid_state,
-					      le16_to_cpu(mid_entry->command),
-					      mid_entry->pid,
-					      mid_entry->callback_data,
-					      mid_entry->mid);
-			}
-			spin_unlock(&GlobalMid_Lock);
-
 			spin_lock(&ses->iface_lock);
 			if (ses->iface_count)
-				seq_printf(m, "\n\tServer interfaces: %zu\n",
+				seq_printf(m, "\n\n\tServer interfaces: %zu",
 					   ses->iface_count);
 			for (j = 0; j < ses->iface_count; j++) {
 				struct cifs_server_iface *iface;
 
 				iface = &ses->iface_list[j];
-				seq_printf(m, "\t%d)", j);
+				seq_printf(m, "\n\t%d)", j+1);
 				cifs_dump_iface(m, iface);
 				if (is_ses_using_iface(ses, iface))
 					seq_puts(m, "\t\t[CONNECTED]\n");
 			}
 			spin_unlock(&ses->iface_lock);
 		}
+		if (i == 0)
+			seq_printf(m, "\n\t\t[NONE]");
+
+		seq_puts(m, "\n\n\tMIDs: ");
+		spin_lock(&GlobalMid_Lock);
+		list_for_each(tmp3, &server->pending_mid_q) {
+			mid_entry = list_entry(tmp3, struct mid_q_entry,
+					qhead);
+			seq_printf(m, "\n\tState: %d com: %d pid:"
+					" %d cbdata: %p mid %llu\n",
+					mid_entry->mid_state,
+					le16_to_cpu(mid_entry->command),
+					mid_entry->pid,
+					mid_entry->callback_data,
+					mid_entry->mid);
+		}
+		spin_unlock(&GlobalMid_Lock);
+		seq_printf(m, "\n--\n");
 	}
+	if (c == 0)
+		seq_printf(m, "\n\t[NONE]");
+
 	spin_unlock(&cifs_tcp_ses_lock);
 	seq_putc(m, '\n');
 
diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
index d35f599aa00e..f2d730fffccb 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/cifs/cifs_swn.c
@@ -272,7 +272,7 @@ static struct cifs_swn_reg *cifs_find_swn_reg(struct cifs_tcon *tcon)
 	if (IS_ERR(share_name)) {
 		int ret;
 
-		ret = PTR_ERR(net_name);
+		ret = PTR_ERR(share_name);
 		cifs_dbg(VFS, "%s: failed to extract share name from target '%s': %d\n",
 				__func__, tcon->treeName, ret);
 		kfree(net_name);
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 562913e2b3f2..9d29eb9660c2 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -267,10 +267,11 @@ is_well_known_sid(const struct cifs_sid *psid, uint32_t *puid, bool is_group)
 	return true; /* well known sid found, uid returned */
 }
 
-static void
+static __u16
 cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
 {
 	int i;
+	__u16 size = 1 + 1 + 6;
 
 	dst->revision = src->revision;
 	dst->num_subauth = min_t(u8, src->num_subauth, SID_MAX_SUB_AUTHORITIES);
@@ -278,6 +279,9 @@ cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
 		dst->authority[i] = src->authority[i];
 	for (i = 0; i < dst->num_subauth; ++i)
 		dst->sub_auth[i] = src->sub_auth[i];
+	size += (dst->num_subauth * 4);
+
+	return size;
 }
 
 static int
@@ -521,8 +525,11 @@ exit_cifs_idmap(void)
 }
 
 /* copy ntsd, owner sid, and group sid from a security descriptor to another */
-static void copy_sec_desc(const struct cifs_ntsd *pntsd,
-				struct cifs_ntsd *pnntsd, __u32 sidsoffset)
+static __u32 copy_sec_desc(const struct cifs_ntsd *pntsd,
+				struct cifs_ntsd *pnntsd,
+				__u32 sidsoffset,
+				struct cifs_sid *pownersid,
+				struct cifs_sid *pgrpsid)
 {
 	struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
 	struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
@@ -536,19 +543,25 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
 	pnntsd->gsidoffset = cpu_to_le32(sidsoffset + sizeof(struct cifs_sid));
 
 	/* copy owner sid */
-	owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+	if (pownersid)
+		owner_sid_ptr = pownersid;
+	else
+		owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
 				le32_to_cpu(pntsd->osidoffset));
 	nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset);
 	cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr);
 
 	/* copy group sid */
-	group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+	if (pgrpsid)
+		group_sid_ptr = pgrpsid;
+	else
+		group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
 				le32_to_cpu(pntsd->gsidoffset));
 	ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset +
 					sizeof(struct cifs_sid));
 	cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr);
 
-	return;
+	return sidsoffset + (2 * sizeof(struct cifs_sid));
 }
 
 
@@ -663,6 +676,25 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use,
 	return;
 }
 
+static __u16 cifs_copy_ace(struct cifs_ace *dst, struct cifs_ace *src, struct cifs_sid *psid)
+{
+	__u16 size = 1 + 1 + 2 + 4;
+
+	dst->type = src->type;
+	dst->flags = src->flags;
+	dst->access_req = src->access_req;
+
+	/* Check if there's a replacement sid specified */
+	if (psid)
+		size += cifs_copy_sid(&dst->sid, psid);
+	else
+		size += cifs_copy_sid(&dst->sid, &src->sid);
+
+	dst->size = cpu_to_le16(size);
+
+	return size;
+}
+
 static __u16 fill_ace_for_sid(struct cifs_ace *pntace,
 			const struct cifs_sid *psid, __u64 nmode,
 			umode_t bits, __u8 access_type,
@@ -907,29 +939,30 @@ unsigned int setup_special_user_owner_ACE(struct cifs_ace *pntace)
 	return ace_size;
 }
 
-static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
-			struct cifs_sid *pgrpsid, __u64 *pnmode, bool modefromsid)
+static void populate_new_aces(char *nacl_base,
+		struct cifs_sid *pownersid,
+		struct cifs_sid *pgrpsid,
+		__u64 *pnmode, u32 *pnum_aces, u16 *pnsize,
+		bool modefromsid)
 {
-	u16 size = 0;
-	u32 num_aces = 0;
-	struct cifs_acl *pnndacl;
 	__u64 nmode;
+	u32 num_aces = 0;
+	u16 nsize = 0;
 	__u64 user_mode;
 	__u64 group_mode;
 	__u64 other_mode;
 	__u64 deny_user_mode = 0;
 	__u64 deny_group_mode = 0;
 	bool sticky_set = false;
-
-	pnndacl = (struct cifs_acl *)((char *)pndacl + sizeof(struct cifs_acl));
+	struct cifs_ace *pnntace = NULL;
 
 	nmode = *pnmode;
+	num_aces = *pnum_aces;
+	nsize = *pnsize;
 
 	if (modefromsid) {
-		struct cifs_ace *pntace =
-			(struct cifs_ace *)((char *)pnndacl + size);
-
-		size += setup_special_mode_ACE(pntace, nmode);
+		pnntace = (struct cifs_ace *) (nacl_base + nsize);
+		nsize += setup_special_mode_ACE(pnntace, nmode);
 		num_aces++;
 		goto set_size;
 	}
@@ -966,40 +999,170 @@ static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
 		sticky_set = true;
 
 	if (deny_user_mode) {
-		size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
-				pownersid, deny_user_mode, 0700, ACCESS_DENIED, false);
+		pnntace = (struct cifs_ace *) (nacl_base + nsize);
+		nsize += fill_ace_for_sid(pnntace, pownersid, deny_user_mode,
+				0700, ACCESS_DENIED, false);
 		num_aces++;
 	}
+
 	/* Group DENY ACE does not conflict with owner ALLOW ACE. Keep in preferred order*/
 	if (deny_group_mode && !(deny_group_mode & (user_mode >> 3))) {
-		size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
-				pgrpsid, deny_group_mode, 0070, ACCESS_DENIED, false);
+		pnntace = (struct cifs_ace *) (nacl_base + nsize);
+		nsize += fill_ace_for_sid(pnntace, pgrpsid, deny_group_mode,
+				0070, ACCESS_DENIED, false);
 		num_aces++;
 	}
-	size += fill_ace_for_sid((struct cifs_ace *) ((char *)pnndacl + size),
-			pownersid, user_mode, 0700, ACCESS_ALLOWED, true);
+
+	pnntace = (struct cifs_ace *) (nacl_base + nsize);
+	nsize += fill_ace_for_sid(pnntace, pownersid, user_mode,
+			0700, ACCESS_ALLOWED, true);
 	num_aces++;
+
 	/* Group DENY ACE conflicts with owner ALLOW ACE. So keep it after. */
 	if (deny_group_mode && (deny_group_mode & (user_mode >> 3))) {
-		size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
-				pgrpsid, deny_group_mode, 0070, ACCESS_DENIED, false);
+		pnntace = (struct cifs_ace *) (nacl_base + nsize);
+		nsize += fill_ace_for_sid(pnntace, pgrpsid, deny_group_mode,
+				0070, ACCESS_DENIED, false);
 		num_aces++;
 	}
-	size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
-			pgrpsid, group_mode, 0070, ACCESS_ALLOWED, !sticky_set);
+
+	pnntace = (struct cifs_ace *) (nacl_base + nsize);
+	nsize += fill_ace_for_sid(pnntace, pgrpsid, group_mode,
+			0070, ACCESS_ALLOWED, !sticky_set);
 	num_aces++;
-	size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
-			&sid_everyone, other_mode, 0007, ACCESS_ALLOWED, !sticky_set);
+
+	pnntace = (struct cifs_ace *) (nacl_base + nsize);
+	nsize += fill_ace_for_sid(pnntace, &sid_everyone, other_mode,
+			0007, ACCESS_ALLOWED, !sticky_set);
 	num_aces++;
 
 set_size:
+	*pnum_aces = num_aces;
+	*pnsize = nsize;
+}
+
+static __u16 replace_sids_and_copy_aces(struct cifs_acl *pdacl, struct cifs_acl *pndacl,
+		struct cifs_sid *pownersid, struct cifs_sid *pgrpsid,
+		struct cifs_sid *pnownersid, struct cifs_sid *pngrpsid)
+{
+	int i;
+	u16 size = 0;
+	struct cifs_ace *pntace = NULL;
+	char *acl_base = NULL;
+	u32 src_num_aces = 0;
+	u16 nsize = 0;
+	struct cifs_ace *pnntace = NULL;
+	char *nacl_base = NULL;
+	u16 ace_size = 0;
+
+	acl_base = (char *)pdacl;
+	size = sizeof(struct cifs_acl);
+	src_num_aces = le32_to_cpu(pdacl->num_aces);
+
+	nacl_base = (char *)pndacl;
+	nsize = sizeof(struct cifs_acl);
+
+	/* Go through all the ACEs */
+	for (i = 0; i < src_num_aces; ++i) {
+		pntace = (struct cifs_ace *) (acl_base + size);
+		pnntace = (struct cifs_ace *) (nacl_base + nsize);
+
+		if (pnownersid && compare_sids(&pntace->sid, pownersid) == 0)
+			ace_size = cifs_copy_ace(pnntace, pntace, pnownersid);
+		else if (pngrpsid && compare_sids(&pntace->sid, pgrpsid) == 0)
+			ace_size = cifs_copy_ace(pnntace, pntace, pngrpsid);
+		else
+			ace_size = cifs_copy_ace(pnntace, pntace, NULL);
+
+		size += le16_to_cpu(pntace->size);
+		nsize += ace_size;
+	}
+
+	return nsize;
+}
+
+static int set_chmod_dacl(struct cifs_acl *pdacl, struct cifs_acl *pndacl,
+		struct cifs_sid *pownersid,	struct cifs_sid *pgrpsid,
+		__u64 *pnmode, bool mode_from_sid)
+{
+	int i;
+	u16 size = 0;
+	struct cifs_ace *pntace = NULL;
+	char *acl_base = NULL;
+	u32 src_num_aces = 0;
+	u16 nsize = 0;
+	struct cifs_ace *pnntace = NULL;
+	char *nacl_base = NULL;
+	u32 num_aces = 0;
+	__u64 nmode;
+	bool new_aces_set = false;
+
+	/* Assuming that pndacl and pnmode are never NULL */
+	nmode = *pnmode;
+	nacl_base = (char *)pndacl;
+	nsize = sizeof(struct cifs_acl);
+
+	/* If pdacl is NULL, we don't have a src. Simply populate new ACL. */
+	if (!pdacl) {
+		populate_new_aces(nacl_base,
+				pownersid, pgrpsid,
+				pnmode, &num_aces, &nsize,
+				mode_from_sid);
+		goto finalize_dacl;
+	}
+
+	acl_base = (char *)pdacl;
+	size = sizeof(struct cifs_acl);
+	src_num_aces = le32_to_cpu(pdacl->num_aces);
+
+	/* Retain old ACEs which we can retain */
+	for (i = 0; i < src_num_aces; ++i) {
+		pntace = (struct cifs_ace *) (acl_base + size);
+		pnntace = (struct cifs_ace *) (nacl_base + nsize);
+
+		if (!new_aces_set && (pntace->flags & INHERITED_ACE)) {
+			/* Place the new ACEs in between existing explicit and inherited */
+			populate_new_aces(nacl_base,
+					pownersid, pgrpsid,
+					pnmode, &num_aces, &nsize,
+					mode_from_sid);
+
+			new_aces_set = true;
+		}
+
+		/* If it's any one of the ACE we're replacing, skip! */
+		if ((compare_sids(&pntace->sid, &sid_unix_NFS_mode) == 0) ||
+				(compare_sids(&pntace->sid, pownersid) == 0) ||
+				(compare_sids(&pntace->sid, pgrpsid) == 0) ||
+				(compare_sids(&pntace->sid, &sid_everyone) == 0) ||
+				(compare_sids(&pntace->sid, &sid_authusers) == 0)) {
+			goto next_ace;
+		}
+
+		nsize += cifs_copy_ace(pnntace, pntace, NULL);
+		num_aces++;
+
+next_ace:
+		size += le16_to_cpu(pntace->size);
+	}
+
+	/* If inherited ACEs are not present, place the new ones at the tail */
+	if (!new_aces_set) {
+		populate_new_aces(nacl_base,
+				pownersid, pgrpsid,
+				pnmode, &num_aces, &nsize,
+				mode_from_sid);
+
+		new_aces_set = true;
+	}
+
+finalize_dacl:
 	pndacl->num_aces = cpu_to_le32(num_aces);
-	pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl));
+	pndacl->size = cpu_to_le16(nsize);
 
 	return 0;
 }
 
-
 static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
 {
 	/* BB need to add parm so we can store the SID BB */
@@ -1094,7 +1257,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
 
 /* Convert permission bits from mode to equivalent CIFS ACL */
 static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
-	__u32 secdesclen, __u64 *pnmode, kuid_t uid, kgid_t gid,
+	__u32 secdesclen, __u32 *pnsecdesclen, __u64 *pnmode, kuid_t uid, kgid_t gid,
 	bool mode_from_sid, bool id_from_sid, int *aclflag)
 {
 	int rc = 0;
@@ -1102,39 +1265,59 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
 	__u32 ndacloffset;
 	__u32 sidsoffset;
 	struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
-	struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
+	struct cifs_sid *nowner_sid_ptr = NULL, *ngroup_sid_ptr = NULL;
 	struct cifs_acl *dacl_ptr = NULL;  /* no need for SACL ptr */
 	struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */
+	char *end_of_acl = ((char *)pntsd) + secdesclen;
+	u16 size = 0;
 
-	if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
-		owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
-				le32_to_cpu(pntsd->osidoffset));
-		group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
-				le32_to_cpu(pntsd->gsidoffset));
-		dacloffset = le32_to_cpu(pntsd->dacloffset);
+	dacloffset = le32_to_cpu(pntsd->dacloffset);
+	if (dacloffset) {
 		dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset);
+		if (end_of_acl < (char *)dacl_ptr + le16_to_cpu(dacl_ptr->size)) {
+			cifs_dbg(VFS, "Server returned illegal ACL size\n");
+			return -EINVAL;
+		}
+	}
+
+	owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+			le32_to_cpu(pntsd->osidoffset));
+	group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+			le32_to_cpu(pntsd->gsidoffset));
+
+	if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
 		ndacloffset = sizeof(struct cifs_ntsd);
 		ndacl_ptr = (struct cifs_acl *)((char *)pnntsd + ndacloffset);
-		ndacl_ptr->revision = dacl_ptr->revision;
-		ndacl_ptr->size = 0;
-		ndacl_ptr->num_aces = 0;
+		ndacl_ptr->revision =
+			dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION);
 
-		rc = set_chmod_dacl(ndacl_ptr, owner_sid_ptr, group_sid_ptr,
+		ndacl_ptr->size = cpu_to_le16(0);
+		ndacl_ptr->num_aces = cpu_to_le32(0);
+
+		rc = set_chmod_dacl(dacl_ptr, ndacl_ptr, owner_sid_ptr, group_sid_ptr,
 				    pnmode, mode_from_sid);
+
 		sidsoffset = ndacloffset + le16_to_cpu(ndacl_ptr->size);
-		/* copy sec desc control portion & owner and group sids */
-		copy_sec_desc(pntsd, pnntsd, sidsoffset);
-		*aclflag = CIFS_ACL_DACL;
+		/* copy the non-dacl portion of secdesc */
+		*pnsecdesclen = copy_sec_desc(pntsd, pnntsd, sidsoffset,
+				NULL, NULL);
+
+		*aclflag |= CIFS_ACL_DACL;
 	} else {
-		memcpy(pnntsd, pntsd, secdesclen);
+		ndacloffset = sizeof(struct cifs_ntsd);
+		ndacl_ptr = (struct cifs_acl *)((char *)pnntsd + ndacloffset);
+		ndacl_ptr->revision =
+			dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION);
+		ndacl_ptr->num_aces = dacl_ptr->num_aces;
+
 		if (uid_valid(uid)) { /* chown */
 			uid_t id;
-			owner_sid_ptr = (struct cifs_sid *)((char *)pnntsd +
-					le32_to_cpu(pnntsd->osidoffset));
 			nowner_sid_ptr = kmalloc(sizeof(struct cifs_sid),
 								GFP_KERNEL);
-			if (!nowner_sid_ptr)
-				return -ENOMEM;
+			if (!nowner_sid_ptr) {
+				rc = -ENOMEM;
+				goto chown_chgrp_exit;
+			}
 			id = from_kuid(&init_user_ns, uid);
 			if (id_from_sid) {
 				struct owner_sid *osid = (struct owner_sid *)nowner_sid_ptr;
@@ -1145,27 +1328,25 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
 				osid->SubAuthorities[0] = cpu_to_le32(88);
 				osid->SubAuthorities[1] = cpu_to_le32(1);
 				osid->SubAuthorities[2] = cpu_to_le32(id);
+
 			} else { /* lookup sid with upcall */
 				rc = id_to_sid(id, SIDOWNER, nowner_sid_ptr);
 				if (rc) {
 					cifs_dbg(FYI, "%s: Mapping error %d for owner id %d\n",
 						 __func__, rc, id);
-					kfree(nowner_sid_ptr);
-					return rc;
+					goto chown_chgrp_exit;
 				}
 			}
-			cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr);
-			kfree(nowner_sid_ptr);
-			*aclflag = CIFS_ACL_OWNER;
+			*aclflag |= CIFS_ACL_OWNER;
 		}
 		if (gid_valid(gid)) { /* chgrp */
 			gid_t id;
-			group_sid_ptr = (struct cifs_sid *)((char *)pnntsd +
-					le32_to_cpu(pnntsd->gsidoffset));
 			ngroup_sid_ptr = kmalloc(sizeof(struct cifs_sid),
 								GFP_KERNEL);
-			if (!ngroup_sid_ptr)
-				return -ENOMEM;
+			if (!ngroup_sid_ptr) {
+				rc = -ENOMEM;
+				goto chown_chgrp_exit;
+			}
 			id = from_kgid(&init_user_ns, gid);
 			if (id_from_sid) {
 				struct owner_sid *gsid = (struct owner_sid *)ngroup_sid_ptr;
@@ -1176,19 +1357,35 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
 				gsid->SubAuthorities[0] = cpu_to_le32(88);
 				gsid->SubAuthorities[1] = cpu_to_le32(2);
 				gsid->SubAuthorities[2] = cpu_to_le32(id);
+
 			} else { /* lookup sid with upcall */
 				rc = id_to_sid(id, SIDGROUP, ngroup_sid_ptr);
 				if (rc) {
 					cifs_dbg(FYI, "%s: Mapping error %d for group id %d\n",
 						 __func__, rc, id);
-					kfree(ngroup_sid_ptr);
-					return rc;
+					goto chown_chgrp_exit;
 				}
 			}
-			cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr);
-			kfree(ngroup_sid_ptr);
-			*aclflag = CIFS_ACL_GROUP;
+			*aclflag |= CIFS_ACL_GROUP;
+		}
+
+		if (dacloffset) {
+			/* Replace ACEs for old owner with new one */
+			size = replace_sids_and_copy_aces(dacl_ptr, ndacl_ptr,
+					owner_sid_ptr, group_sid_ptr,
+					nowner_sid_ptr, ngroup_sid_ptr);
+			ndacl_ptr->size = cpu_to_le16(size);
 		}
+
+		sidsoffset = ndacloffset + le16_to_cpu(ndacl_ptr->size);
+		/* copy the non-dacl portion of secdesc */
+		*pnsecdesclen = copy_sec_desc(pntsd, pnntsd, sidsoffset,
+				nowner_sid_ptr, ngroup_sid_ptr);
+
+chown_chgrp_exit:
+		/* errors could jump here. So make sure we return soon after this */
+		kfree(nowner_sid_ptr);
+		kfree(ngroup_sid_ptr);
 	}
 
 	return rc;
@@ -1384,6 +1581,9 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
 	int rc = 0;
 	int aclflag = CIFS_ACL_DACL; /* default flag to set */
 	__u32 secdesclen = 0;
+	__u32 nsecdesclen = 0;
+	__u32 dacloffset = 0;
+	struct cifs_acl *dacl_ptr = NULL;
 	struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */
 	struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -1414,31 +1614,52 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
 		return rc;
 	}
 
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)
+		mode_from_sid = true;
+	else
+		mode_from_sid = false;
+
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL)
+		id_from_sid = true;
+	else
+		id_from_sid = false;
+
+	/* Potentially, five new ACEs can be added to the ACL for U,G,O mapping */
+	nsecdesclen = secdesclen;
+	if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
+		if (mode_from_sid)
+			nsecdesclen += sizeof(struct cifs_ace);
+		else /* cifsacl */
+			nsecdesclen += 5 * sizeof(struct cifs_ace);
+	} else { /* chown */
+		/* When ownership changes, changes new owner sid length could be different */
+		nsecdesclen = sizeof(struct cifs_ntsd) + (sizeof(struct cifs_sid) * 2);
+		dacloffset = le32_to_cpu(pntsd->dacloffset);
+		if (dacloffset) {
+			dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset);
+			if (mode_from_sid)
+				nsecdesclen +=
+					le32_to_cpu(dacl_ptr->num_aces) * sizeof(struct cifs_ace);
+			else /* cifsacl */
+				nsecdesclen += le16_to_cpu(dacl_ptr->size);
+		}
+	}
+
 	/*
 	 * Add three ACEs for owner, group, everyone getting rid of other ACEs
 	 * as chmod disables ACEs and set the security descriptor. Allocate
 	 * memory for the smb header, set security descriptor request security
 	 * descriptor parameters, and secuirty descriptor itself
 	 */
-	secdesclen = max_t(u32, secdesclen, DEFAULT_SEC_DESC_LEN);
-	pnntsd = kmalloc(secdesclen, GFP_KERNEL);
+	nsecdesclen = max_t(u32, nsecdesclen, DEFAULT_SEC_DESC_LEN);
+	pnntsd = kmalloc(nsecdesclen, GFP_KERNEL);
 	if (!pnntsd) {
 		kfree(pntsd);
 		cifs_put_tlink(tlink);
 		return -ENOMEM;
 	}
 
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)
-		mode_from_sid = true;
-	else
-		mode_from_sid = false;
-
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL)
-		id_from_sid = true;
-	else
-		id_from_sid = false;
-
-	rc = build_sec_desc(pntsd, pnntsd, secdesclen, pnmode, uid, gid,
+	rc = build_sec_desc(pntsd, pnntsd, secdesclen, &nsecdesclen, pnmode, uid, gid,
 			    mode_from_sid, id_from_sid, &aclflag);
 
 	cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc);
@@ -1448,7 +1669,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
 
 	if (!rc) {
 		/* Set the security descriptor */
-		rc = ops->set_acl(pnntsd, secdesclen, inode, path, aclflag);
+		rc = ops->set_acl(pnntsd, nsecdesclen, inode, path, aclflag);
 		cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc);
 	}
 	cifs_put_tlink(tlink);
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index ff7fd0862e28..d9e704979d99 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -31,8 +31,8 @@
 #define EXEC_BIT        0x1
 
 #define ACL_OWNER_MASK 0700
-#define ACL_GROUP_MASK 0770
-#define ACL_EVERYONE_MASK 0777
+#define ACL_GROUP_MASK 0070
+#define ACL_EVERYONE_MASK 0007
 
 #define UBITSHIFT	6
 #define GBITSHIFT	3
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 51d53e4bdf6b..b8f1ff9a83f3 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -568,15 +568,15 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
 			return rc;
 		}
 	} else {
-		/* We use ses->serverName if no domain name available */
-		len = strlen(ses->serverName);
+		/* We use ses->ip_addr if no domain name available */
+		len = strlen(ses->ip_addr);
 
 		server = kmalloc(2 + (len * 2), GFP_KERNEL);
 		if (server == NULL) {
 			rc = -ENOMEM;
 			return rc;
 		}
-		len = cifs_strtoUTF16((__le16 *)server, ses->serverName, len,
+		len = cifs_strtoUTF16((__le16 *)server, ses->ip_addr, len,
 					nls_cp);
 		rc =
 		crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 38534e066cc7..d43e935d2df4 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -638,8 +638,18 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 		seq_printf(s, ",snapshot=%llu", tcon->snapshot_time);
 	if (tcon->handle_timeout)
 		seq_printf(s, ",handletimeout=%u", tcon->handle_timeout);
-	/* convert actimeo and display it in seconds */
-	seq_printf(s, ",actimeo=%lu", cifs_sb->ctx->actimeo / HZ);
+
+	/*
+	 * Display file and directory attribute timeout in seconds.
+	 * If file and directory attribute timeout the same then actimeo
+	 * was likely specified on mount
+	 */
+	if (cifs_sb->ctx->acdirmax == cifs_sb->ctx->acregmax)
+		seq_printf(s, ",actimeo=%lu", cifs_sb->ctx->acregmax / HZ);
+	else {
+		seq_printf(s, ",acdirmax=%lu", cifs_sb->ctx->acdirmax / HZ);
+		seq_printf(s, ",acregmax=%lu", cifs_sb->ctx->acregmax / HZ);
+	}
 
 	if (tcon->ses->chan_max > 1)
 		seq_printf(s, ",multichannel,max_channels=%zu",
@@ -1526,6 +1536,7 @@ init_cifs(void)
  */
 	atomic_set(&sesInfoAllocCount, 0);
 	atomic_set(&tconInfoAllocCount, 0);
+	atomic_set(&tcpSesNextId, 0);
 	atomic_set(&tcpSesAllocCount, 0);
 	atomic_set(&tcpSesReconnectCount, 0);
 	atomic_set(&tconInfoReconnectCount, 0);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 71e9c6abb2a6..0d7ef150dbb2 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -165,5 +165,5 @@ extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type,
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "2.30"
+#define CIFS_VERSION   "2.31"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 50fcb65920e8..3de3c5908a72 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -21,6 +21,7 @@
 
 #include <linux/in.h>
 #include <linux/in6.h>
+#include <linux/inet.h>
 #include <linux/slab.h>
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
@@ -504,6 +505,8 @@ struct smb_version_operations {
 	loff_t (*llseek)(struct file *, struct cifs_tcon *, loff_t, int);
 	/* Check for STATUS_IO_TIMEOUT */
 	bool (*is_status_io_timeout)(char *buf);
+	/* Check for STATUS_NETWORK_NAME_DELETED */
+	void (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv);
 };
 
 struct smb_version_values {
@@ -577,6 +580,7 @@ inc_rfc1001_len(void *buf, int count)
 struct TCP_Server_Info {
 	struct list_head tcp_ses_list;
 	struct list_head smb_ses_list;
+	__u64 conn_id; /* connection identifier (useful for debugging) */
 	int srv_count; /* reference counter */
 	/* 15 character server name + 0x20 16th byte indicating type = srv */
 	char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
@@ -901,7 +905,7 @@ struct cifs_ses {
 	kuid_t linux_uid;	/* overriding owner of files on the mount */
 	kuid_t cred_uid;	/* owner of credentials */
 	unsigned int capabilities;
-	char serverName[SERVER_NAME_LEN_WITH_NULL];
+	char ip_addr[INET6_ADDRSTRLEN + 1]; /* Max ipv6 (or v4) addr string len */
 	char *user_name;	/* must not be null except during init of sess
 				   and after mount option parsing we fill it */
 	char *domainName;
@@ -1704,7 +1708,9 @@ static inline bool is_retryable_error(int error)
 #define   CIFS_ECHO_OP      0x080    /* echo request */
 #define   CIFS_OBREAK_OP   0x0100    /* oplock break request */
 #define   CIFS_NEG_OP      0x0200    /* negotiate request */
-#define   CIFS_OP_MASK     0x0380    /* mask request type */
+/* Lower bitmask values are reserved by others below. */
+#define   CIFS_SESS_OP     0x2000    /* session setup request */
+#define   CIFS_OP_MASK     0x2380    /* mask request type */
 
 #define   CIFS_HAS_CREDITS 0x0400    /* already has credits */
 #define   CIFS_TRANSFORM_REQ 0x0800    /* transform request before sending */
@@ -1844,6 +1850,7 @@ GLOBAL_EXTERN spinlock_t GlobalMid_Lock;  /* protects above & list operations */
  */
 GLOBAL_EXTERN atomic_t sesInfoAllocCount;
 GLOBAL_EXTERN atomic_t tconInfoAllocCount;
+GLOBAL_EXTERN atomic_t tcpSesNextId;
 GLOBAL_EXTERN atomic_t tcpSesAllocCount;
 GLOBAL_EXTERN atomic_t tcpSesReconnectCount;
 GLOBAL_EXTERN atomic_t tconInfoReconnectCount;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 32f7a013402e..75ce6f742b8d 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -232,6 +232,8 @@ extern unsigned int setup_special_user_owner_ACE(struct cifs_ace *pace);
 extern void dequeue_mid(struct mid_q_entry *mid, bool malformed);
 extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
 			         unsigned int to_read);
+extern ssize_t cifs_discard_from_socket(struct TCP_Server_Info *server,
+					size_t to_read);
 extern int cifs_read_page_from_socket(struct TCP_Server_Info *server,
 					struct page *page,
 					unsigned int page_offset,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 0496934feecb..c279527aae92 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1451,9 +1451,9 @@ cifs_discard_remaining_data(struct TCP_Server_Info *server)
 	while (remaining > 0) {
 		int length;
 
-		length = cifs_read_from_socket(server, server->bigbuf,
-				min_t(unsigned int, remaining,
-				    CIFSMaxBufSize + MAX_HEADER_SIZE(server)));
+		length = cifs_discard_from_socket(server,
+				min_t(size_t, remaining,
+				      CIFSMaxBufSize + MAX_HEADER_SIZE(server)));
 		if (length < 0)
 			return length;
 		server->total_read += length;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4bb9decbbf27..112692300fb6 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -242,7 +242,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
 	server->max_read = 0;
 
 	cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
-	trace_smb3_reconnect(server->CurrentMid, server->hostname);
+	trace_smb3_reconnect(server->CurrentMid, server->conn_id, server->hostname);
 
 	/* before reconnecting the tcp session, mark the smb session (uid)
 		and the tid bad so they are not used until reconnected */
@@ -564,6 +564,23 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
 	return cifs_readv_from_socket(server, &smb_msg);
 }
 
+ssize_t
+cifs_discard_from_socket(struct TCP_Server_Info *server, size_t to_read)
+{
+	struct msghdr smb_msg;
+
+	/*
+	 *  iov_iter_discard already sets smb_msg.type and count and iov_offset
+	 *  and cifs_readv_from_socket sets msg_control and msg_controllen
+	 *  so little to initialize in struct msghdr
+	 */
+	smb_msg.msg_name = NULL;
+	smb_msg.msg_namelen = 0;
+	iov_iter_discard(&smb_msg.msg_iter, READ, to_read);
+
+	return cifs_readv_from_socket(server, &smb_msg);
+}
+
 int
 cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
 	unsigned int page_offset, unsigned int to_read)
@@ -846,7 +863,7 @@ static void
 smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
 {
 	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buffer;
-	int scredits = server->credits;
+	int scredits, in_flight;
 
 	/*
 	 * SMB1 does not use credits.
@@ -857,12 +874,14 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
 	if (shdr->CreditRequest) {
 		spin_lock(&server->req_lock);
 		server->credits += le16_to_cpu(shdr->CreditRequest);
+		scredits = server->credits;
+		in_flight = server->in_flight;
 		spin_unlock(&server->req_lock);
 		wake_up(&server->request_q);
 
 		trace_smb3_add_credits(server->CurrentMid,
-				server->hostname, scredits,
-				le16_to_cpu(shdr->CreditRequest));
+				server->conn_id, server->hostname, scredits,
+				le16_to_cpu(shdr->CreditRequest), in_flight);
 		cifs_server_dbg(FYI, "%s: added %u credits total=%d\n",
 				__func__, le16_to_cpu(shdr->CreditRequest),
 				scredits);
@@ -993,6 +1012,10 @@ next_pdu:
 			if (mids[i] != NULL) {
 				mids[i]->resp_buf_size = server->pdu_size;
 
+				if (bufs[i] && server->ops->is_network_name_deleted)
+					server->ops->is_network_name_deleted(bufs[i],
+									server);
+
 				if (!mids[i]->multiRsp || mids[i]->multiEnd)
 					mids[i]->callback(mids[i]);
 
@@ -1317,6 +1340,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx)
 		goto out_err_crypto_release;
 	}
 
+	tcp_ses->conn_id = atomic_inc_return(&tcpSesNextId);
 	tcp_ses->noblockcnt = ctx->rootfs;
 	tcp_ses->noblocksnd = ctx->noblocksnd || ctx->rootfs;
 	tcp_ses->noautotune = ctx->noautotune;
@@ -1838,9 +1862,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
 	/* new SMB session uses our server ref */
 	ses->server = server;
 	if (server->dstaddr.ss_family == AF_INET6)
-		sprintf(ses->serverName, "%pI6", &addr6->sin6_addr);
+		sprintf(ses->ip_addr, "%pI6", &addr6->sin6_addr);
 	else
-		sprintf(ses->serverName, "%pI4", &addr->sin_addr);
+		sprintf(ses->ip_addr, "%pI4", &addr->sin_addr);
 
 	if (ctx->username) {
 		ses->user_name = kstrdup(ctx->username, GFP_KERNEL);
@@ -2269,7 +2293,9 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
 	if (strcmp(old->local_nls->charset, new->local_nls->charset))
 		return 0;
 
-	if (old->ctx->actimeo != new->ctx->actimeo)
+	if (old->ctx->acregmax != new->ctx->acregmax)
+		return 0;
+	if (old->ctx->acdirmax != new->ctx->acdirmax)
 		return 0;
 
 	return 1;
@@ -2911,7 +2937,7 @@ static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
 #ifdef CONFIG_CIFS_DFS_UPCALL
 /*
  * cifs_build_path_to_root returns full path to root when we do not have an
- * exiting connection (tcon)
+ * existing connection (tcon)
  */
 static char *
 build_unc_path_to_root(const struct smb3_fs_context *ctx,
@@ -3038,96 +3064,91 @@ static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it,
 	return 0;
 }
 
-static int setup_dfs_tgt_conn(const char *path, const char *full_path,
-			      const struct dfs_cache_tgt_iterator *tgt_it,
-			      struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
-			      unsigned int *xid, struct TCP_Server_Info **server,
-			      struct cifs_ses **ses, struct cifs_tcon **tcon)
-{
-	int rc;
-	struct dfs_info3_param ref = {0};
-	char *mdata = NULL;
-	struct smb3_fs_context fake_ctx = {NULL};
-	char *fake_devname = NULL;
-
-	cifs_dbg(FYI, "%s: dfs path: %s\n", __func__, path);
-
-	rc = dfs_cache_get_tgt_referral(path, tgt_it, &ref);
-	if (rc)
-		return rc;
-
-	mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options,
-					   full_path + 1, &ref,
-					   &fake_devname);
-	free_dfs_info_param(&ref);
-
-	if (IS_ERR(mdata)) {
-		rc = PTR_ERR(mdata);
-		mdata = NULL;
-	} else
-		rc = cifs_setup_volume_info(&fake_ctx, mdata, fake_devname);
-
-	kfree(mdata);
-	kfree(fake_devname);
-
-	if (!rc) {
-		/*
-		 * We use a 'fake_ctx' here because we need pass it down to the
-		 * mount_{get,put} functions to test connection against new DFS
-		 * targets.
-		 */
-		mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon);
-		rc = mount_get_conns(&fake_ctx, cifs_sb, xid, server, ses,
-				     tcon);
-		if (!rc || (*server && *ses)) {
-			/*
-			 * We were able to connect to new target server.
-			 * Update current context with new target server.
-			 */
-			rc = update_vol_info(tgt_it, &fake_ctx, ctx);
-		}
-	}
-	smb3_cleanup_fs_context_contents(&fake_ctx);
-	return rc;
-}
-
 static int do_dfs_failover(const char *path, const char *full_path, struct cifs_sb_info *cifs_sb,
 			   struct smb3_fs_context *ctx, struct cifs_ses *root_ses,
 			   unsigned int *xid, struct TCP_Server_Info **server,
 			   struct cifs_ses **ses, struct cifs_tcon **tcon)
 {
 	int rc;
-	struct dfs_cache_tgt_list tgt_list;
+	struct dfs_cache_tgt_list tgt_list = {0};
 	struct dfs_cache_tgt_iterator *tgt_it = NULL;
+	struct smb3_fs_context tmp_ctx = {NULL};
 
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
 		return -EOPNOTSUPP;
 
+	cifs_dbg(FYI, "%s: path=%s full_path=%s\n", __func__, path, full_path);
+
 	rc = dfs_cache_noreq_find(path, NULL, &tgt_list);
 	if (rc)
 		return rc;
+	/*
+	 * We use a 'tmp_ctx' here because we need pass it down to the mount_{get,put} functions to
+	 * test connection against new DFS targets.
+	 */
+	rc = smb3_fs_context_dup(&tmp_ctx, ctx);
+	if (rc)
+		goto out;
 
 	for (;;) {
+		struct dfs_info3_param ref = {0};
+		char *fake_devname = NULL, *mdata = NULL;
+
 		/* Get next DFS target server - if any */
 		rc = get_next_dfs_tgt(path, &tgt_list, &tgt_it);
 		if (rc)
 			break;
-		/* Connect to next DFS target */
-		rc = setup_dfs_tgt_conn(path, full_path, tgt_it, cifs_sb, ctx, xid, server, ses,
-					tcon);
-		if (!rc || (*server && *ses))
+
+		rc = dfs_cache_get_tgt_referral(path, tgt_it, &ref);
+		if (rc)
+			break;
+
+		cifs_dbg(FYI, "%s: old ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
+			 tmp_ctx.prepath);
+
+		mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, full_path + 1, &ref,
+						   &fake_devname);
+		free_dfs_info_param(&ref);
+
+		if (IS_ERR(mdata)) {
+			rc = PTR_ERR(mdata);
+			mdata = NULL;
+		} else
+			rc = cifs_setup_volume_info(&tmp_ctx, mdata, fake_devname);
+
+		kfree(mdata);
+		kfree(fake_devname);
+
+		if (rc)
 			break;
+
+		cifs_dbg(FYI, "%s: new ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
+			 tmp_ctx.prepath);
+
+		mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon);
+		rc = mount_get_conns(&tmp_ctx, cifs_sb, xid, server, ses, tcon);
+		if (!rc || (*server && *ses)) {
+			/*
+			 * We were able to connect to new target server. Update current context with
+			 * new target server.
+			 */
+			rc = update_vol_info(tgt_it, &tmp_ctx, ctx);
+			break;
+		}
 	}
 	if (!rc) {
+		cifs_dbg(FYI, "%s: final ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
+			 tmp_ctx.prepath);
 		/*
-		 * Update DFS target hint in DFS referral cache with the target
-		 * server we successfully reconnected to.
+		 * Update DFS target hint in DFS referral cache with the target server we
+		 * successfully reconnected to.
 		 */
-		rc = dfs_cache_update_tgthint(*xid, root_ses ? root_ses : *ses,
-					      cifs_sb->local_nls,
-					      cifs_remap(cifs_sb), path,
-					      tgt_it);
+		rc = dfs_cache_update_tgthint(*xid, root_ses ? root_ses : *ses, cifs_sb->local_nls,
+					      cifs_remap(cifs_sb), path, tgt_it);
 	}
+
+out:
+	smb3_cleanup_fs_context_contents(&tmp_ctx);
 	dfs_cache_free_tgts(&tgt_list);
 	return rc;
 }
@@ -3285,77 +3306,77 @@ static void put_root_ses(struct cifs_ses *ses)
 		cifs_put_smb_ses(ses);
 }
 
-/* Check if a path component is remote and then update @dfs_path accordingly */
-static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
-			     const unsigned int xid, struct TCP_Server_Info *server,
-			     struct cifs_tcon *tcon, char **dfs_path)
+/* Set up next dfs prefix path in @dfs_path */
+static int next_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
+			    const unsigned int xid, struct TCP_Server_Info *server,
+			    struct cifs_tcon *tcon, char **dfs_path)
 {
-	char *path, *s;
-	char sep = CIFS_DIR_SEP(cifs_sb), tmp;
-	char *npath;
-	int rc = 0;
-	int added_treename = tcon->Flags & SMB_SHARE_IS_IN_DFS;
-	int skip = added_treename;
+	char *path, *npath;
+	int added_treename = is_tcon_dfs(tcon);
+	int rc;
 
 	path = cifs_build_path_to_root(ctx, cifs_sb, tcon, added_treename);
 	if (!path)
 		return -ENOMEM;
 
-	/*
-	 * Walk through the path components in @path and check if they're accessible. In case any of
-	 * the components is -EREMOTE, then update @dfs_path with the next DFS referral request path
-	 * (NOT including the remaining components).
-	 */
-	s = path;
-	do {
-		/* skip separators */
-		while (*s && *s == sep)
-			s++;
-		if (!*s)
-			break;
-		/* next separator */
-		while (*s && *s != sep)
-			s++;
-		/*
-		 * if the treename is added, we then have to skip the first
-		 * part within the separators
-		 */
-		if (skip) {
-			skip = 0;
-			continue;
+	rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
+	if (rc == -EREMOTE) {
+		struct smb3_fs_context v = {NULL};
+		/* if @path contains a tree name, skip it in the prefix path */
+		if (added_treename) {
+			rc = smb3_parse_devname(path, &v);
+			if (rc)
+				goto out;
+			npath = build_unc_path_to_root(&v, cifs_sb, true);
+			smb3_cleanup_fs_context_contents(&v);
+		} else {
+			v.UNC = ctx->UNC;
+			v.prepath = path + 1;
+			npath = build_unc_path_to_root(&v, cifs_sb, true);
 		}
-		tmp = *s;
-		*s = 0;
-		rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, path);
-		if (rc && rc == -EREMOTE) {
-			struct smb3_fs_context v = {NULL};
-			/* if @path contains a tree name, skip it in the prefix path */
-			if (added_treename) {
-				rc = smb3_parse_devname(path, &v);
-				if (rc)
-					break;
-				rc = -EREMOTE;
-				npath = build_unc_path_to_root(&v, cifs_sb, true);
-				smb3_cleanup_fs_context_contents(&v);
-			} else {
-				v.UNC = ctx->UNC;
-				v.prepath = path + 1;
-				npath = build_unc_path_to_root(&v, cifs_sb, true);
-			}
-			if (IS_ERR(npath)) {
-				rc = PTR_ERR(npath);
-				break;
-			}
-			kfree(*dfs_path);
-			*dfs_path = npath;
+
+		if (IS_ERR(npath)) {
+			rc = PTR_ERR(npath);
+			goto out;
 		}
-		*s = tmp;
-	} while (rc == 0);
 
+		kfree(*dfs_path);
+		*dfs_path = npath;
+		rc = -EREMOTE;
+	}
+
+out:
 	kfree(path);
 	return rc;
 }
 
+/* Check if resolved targets can handle any DFS referrals */
+static int is_referral_server(const char *ref_path, struct cifs_tcon *tcon, bool *ref_server)
+{
+	int rc;
+	struct dfs_info3_param ref = {0};
+
+	if (is_tcon_dfs(tcon)) {
+		*ref_server = true;
+	} else {
+		cifs_dbg(FYI, "%s: ref_path=%s\n", __func__, ref_path);
+
+		rc = dfs_cache_noreq_find(ref_path, &ref, NULL);
+		if (rc) {
+			cifs_dbg(VFS, "%s: dfs_cache_noreq_find: failed (rc=%d)\n", __func__, rc);
+			return rc;
+		}
+		cifs_dbg(FYI, "%s: ref.flags=0x%x\n", __func__, ref.flags);
+		/*
+		 * Check if all targets are capable of handling DFS referrals as per
+		 * MS-DFSC 2.2.4 RESP_GET_DFS_REFERRAL.
+		 */
+		*ref_server = !!(ref.flags & DFSREF_REFERRAL_SERVER);
+		free_dfs_info_param(&ref);
+	}
+	return 0;
+}
+
 int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 {
 	int rc = 0;
@@ -3367,18 +3388,19 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 	char *ref_path = NULL, *full_path = NULL;
 	char *oldmnt = NULL;
 	char *mntdata = NULL;
+	bool ref_server = false;
 
 	rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
 	/*
-	 * Unconditionally try to get an DFS referral (even cached) to determine whether it is an
-	 * DFS mount.
+	 * If called with 'nodfs' mount option, then skip DFS resolving.  Otherwise unconditionally
+	 * try to get an DFS referral (even cached) to determine whether it is an DFS mount.
 	 *
 	 * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem
 	 * to respond with PATH_NOT_COVERED to requests that include the prefix.
 	 */
-	if (dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL,
+	if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) ||
+	    dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL,
 			   NULL)) {
-		/* No DFS referral was returned.  Looks like a regular share. */
 		if (rc)
 			goto error;
 		/* Check if it is fully accessible and then mount it */
@@ -3432,13 +3454,18 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 			break;
 		if (!tcon)
 			continue;
+
 		/* Make sure that requests go through new root servers */
-		if (is_tcon_dfs(tcon)) {
+		rc = is_referral_server(ref_path + 1, tcon, &ref_server);
+		if (rc)
+			break;
+		if (ref_server) {
 			put_root_ses(root_ses);
 			set_root_ses(cifs_sb, ses, &root_ses);
 		}
-		/* Check for remaining path components and then continue chasing them (-EREMOTE) */
-		rc = check_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path);
+
+		/* Get next dfs path and then continue chasing them if -EREMOTE */
+		rc = next_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path);
 		/* Prevent recursion on broken link referrals */
 		if (rc == -EREMOTE && ++count > MAX_NESTED_LINKS)
 			rc = -ELOOP;
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 4950ab0486ae..098b4bc8da59 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -37,11 +37,12 @@ struct cache_dfs_tgt {
 struct cache_entry {
 	struct hlist_node hlist;
 	const char *path;
-	int ttl;
-	int srvtype;
-	int flags;
+	int hdr_flags; /* RESP_GET_DFS_REFERRAL.ReferralHeaderFlags */
+	int ttl; /* DFS_REREFERRAL_V3.TimeToLive */
+	int srvtype; /* DFS_REREFERRAL_V3.ServerType */
+	int ref_flags; /* DFS_REREFERRAL_V3.ReferralEntryFlags */
 	struct timespec64 etime;
-	int path_consumed;
+	int path_consumed; /* RESP_GET_DFS_REFERRAL.PathConsumed */
 	int numtgts;
 	struct list_head tlist;
 	struct cache_dfs_tgt *tgthint;
@@ -166,14 +167,11 @@ static int dfscache_proc_show(struct seq_file *m, void *v)
 				continue;
 
 			seq_printf(m,
-				   "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,"
-				   "interlink=%s,path_consumed=%d,expired=%s\n",
-				   ce->path,
-				   ce->srvtype == DFS_TYPE_ROOT ? "root" : "link",
-				   ce->ttl, ce->etime.tv_nsec,
-				   IS_INTERLINK_SET(ce->flags) ? "yes" : "no",
-				   ce->path_consumed,
-				   cache_entry_expired(ce) ? "yes" : "no");
+				   "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,hdr_flags=0x%x,ref_flags=0x%x,interlink=%s,path_consumed=%d,expired=%s\n",
+				   ce->path, ce->srvtype == DFS_TYPE_ROOT ? "root" : "link",
+				   ce->ttl, ce->etime.tv_nsec, ce->ref_flags, ce->hdr_flags,
+				   IS_INTERLINK_SET(ce->hdr_flags) ? "yes" : "no",
+				   ce->path_consumed, cache_entry_expired(ce) ? "yes" : "no");
 
 			list_for_each_entry(t, &ce->tlist, list) {
 				seq_printf(m, "  %s%s\n",
@@ -236,11 +234,12 @@ static inline void dump_tgts(const struct cache_entry *ce)
 
 static inline void dump_ce(const struct cache_entry *ce)
 {
-	cifs_dbg(FYI, "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,interlink=%s,path_consumed=%d,expired=%s\n",
+	cifs_dbg(FYI, "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,hdr_flags=0x%x,ref_flags=0x%x,interlink=%s,path_consumed=%d,expired=%s\n",
 		 ce->path,
 		 ce->srvtype == DFS_TYPE_ROOT ? "root" : "link", ce->ttl,
 		 ce->etime.tv_nsec,
-		 IS_INTERLINK_SET(ce->flags) ? "yes" : "no",
+		 ce->hdr_flags, ce->ref_flags,
+		 IS_INTERLINK_SET(ce->hdr_flags) ? "yes" : "no",
 		 ce->path_consumed,
 		 cache_entry_expired(ce) ? "yes" : "no");
 	dump_tgts(ce);
@@ -381,7 +380,8 @@ static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs,
 	ce->ttl = refs[0].ttl;
 	ce->etime = get_expire_time(ce->ttl);
 	ce->srvtype = refs[0].server_type;
-	ce->flags = refs[0].ref_flag;
+	ce->hdr_flags = refs[0].flags;
+	ce->ref_flags = refs[0].ref_flag;
 	ce->path_consumed = refs[0].path_consumed;
 
 	for (i = 0; i < numrefs; i++) {
@@ -799,7 +799,8 @@ static int setup_referral(const char *path, struct cache_entry *ce,
 	ref->path_consumed = ce->path_consumed;
 	ref->ttl = ce->ttl;
 	ref->server_type = ce->srvtype;
-	ref->ref_flag = ce->flags;
+	ref->ref_flag = ce->ref_flags;
+	ref->flags = ce->hdr_flags;
 
 	return 0;
 
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 6d001905c8e5..26de4329d161 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -580,7 +580,7 @@ int cifs_open(struct inode *inode, struct file *file)
 		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 			if (tcon->ses->serverNOS)
 				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
-					 tcon->ses->serverName,
+					 tcon->ses->ip_addr,
 					 tcon->ses->serverNOS);
 			tcon->broken_posix_open = true;
 		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 12a5da0230b5..892f51a21278 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -140,6 +140,8 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
 	fsparam_u32("rsize", Opt_rsize),
 	fsparam_u32("wsize", Opt_wsize),
 	fsparam_u32("actimeo", Opt_actimeo),
+	fsparam_u32("acdirmax", Opt_acdirmax),
+	fsparam_u32("acregmax", Opt_acregmax),
 	fsparam_u32("echo_interval", Opt_echo_interval),
 	fsparam_u32("max_credits", Opt_max_credits),
 	fsparam_u32("handletimeout", Opt_handletimeout),
@@ -397,7 +399,7 @@ cifs_parse_smb_version(char *value, struct smb3_fs_context *ctx, bool is_smb3)
 		ctx->vals = &smb3any_values;
 		break;
 	case Smb_default:
-		ctx->ops = &smb30_operations; /* currently identical with 3.0 */
+		ctx->ops = &smb30_operations;
 		ctx->vals = &smbdefault_values;
 		break;
 	default:
@@ -542,20 +544,37 @@ static int smb3_fs_context_parse_monolithic(struct fs_context *fc,
 
 	/* BB Need to add support for sep= here TBD */
 	while ((key = strsep(&options, ",")) != NULL) {
-		if (*key) {
-			size_t v_len = 0;
-			char *value = strchr(key, '=');
-
-			if (value) {
-				if (value == key)
-					continue;
-				*value++ = 0;
-				v_len = strlen(value);
-			}
-			ret = vfs_parse_fs_string(fc, key, value, v_len);
-			if (ret < 0)
-				break;
+		size_t len;
+		char *value;
+
+		if (*key == 0)
+			break;
+
+		/* Check if following character is the deliminator If yes,
+		 * we have encountered a double deliminator reset the NULL
+		 * character to the deliminator
+		 */
+		while (options && options[0] == ',') {
+			len = strlen(key);
+			strcpy(key + len, options);
+			options = strchr(options, ',');
+			if (options)
+				*options++ = 0;
+		}
+
+
+		len = 0;
+		value = strchr(key, '=');
+		if (value) {
+			if (value == key)
+				continue;
+			*value++ = 0;
+			len = strlen(value);
 		}
+
+		ret = vfs_parse_fs_string(fc, key, value, len);
+		if (ret < 0)
+			break;
 	}
 
 	return ret;
@@ -929,12 +948,31 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
 		ctx->wsize = result.uint_32;
 		ctx->got_wsize = true;
 		break;
+	case Opt_acregmax:
+		ctx->acregmax = HZ * result.uint_32;
+		if (ctx->acregmax > CIFS_MAX_ACTIMEO) {
+			cifs_dbg(VFS, "acregmax too large\n");
+			goto cifs_parse_mount_err;
+		}
+		break;
+	case Opt_acdirmax:
+		ctx->acdirmax = HZ * result.uint_32;
+		if (ctx->acdirmax > CIFS_MAX_ACTIMEO) {
+			cifs_dbg(VFS, "acdirmax too large\n");
+			goto cifs_parse_mount_err;
+		}
+		break;
 	case Opt_actimeo:
-		ctx->actimeo = HZ * result.uint_32;
-		if (ctx->actimeo > CIFS_MAX_ACTIMEO) {
-			cifs_dbg(VFS, "attribute cache timeout too large\n");
+		if (HZ * result.uint_32 > CIFS_MAX_ACTIMEO) {
+			cifs_dbg(VFS, "timeout too large\n");
 			goto cifs_parse_mount_err;
 		}
+		if ((ctx->acdirmax != CIFS_DEF_ACTIMEO) ||
+		    (ctx->acregmax != CIFS_DEF_ACTIMEO)) {
+			cifs_dbg(VFS, "actimeo ignored since acregmax or acdirmax specified\n");
+			break;
+		}
+		ctx->acdirmax = ctx->acregmax = HZ * result.uint_32;
 		break;
 	case Opt_echo_interval:
 		ctx->echo_interval = result.uint_32;
@@ -1361,7 +1399,8 @@ int smb3_init_fs_context(struct fs_context *fc)
 	/* default is to use strict cifs caching semantics */
 	ctx->strict_io = true;
 
-	ctx->actimeo = CIFS_DEF_ACTIMEO;
+	ctx->acregmax = CIFS_DEF_ACTIMEO;
+	ctx->acdirmax = CIFS_DEF_ACTIMEO;
 
 	/* Most clients set timeout to 0, allows server to use its default */
 	ctx->handle_timeout = 0; /* See MS-SMB2 spec section 2.2.14.2.12 */
diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h
index 1c44a460e2c0..87dd1f7168f2 100644
--- a/fs/cifs/fs_context.h
+++ b/fs/cifs/fs_context.h
@@ -118,6 +118,8 @@ enum cifs_param {
 	Opt_rsize,
 	Opt_wsize,
 	Opt_actimeo,
+	Opt_acdirmax,
+	Opt_acregmax,
 	Opt_echo_interval,
 	Opt_max_credits,
 	Opt_snapshot,
@@ -232,7 +234,9 @@ struct smb3_fs_context {
 	unsigned int wsize;
 	unsigned int min_offload;
 	bool sockopt_tcp_nodelay:1;
-	unsigned long actimeo; /* attribute cache timeout (jiffies) */
+	/* attribute cache timemout for files and directories in jiffies */
+	unsigned long acregmax;
+	unsigned long acdirmax;
 	struct smb_version_operations *ops;
 	struct smb_version_values *vals;
 	char *prepath;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 3e9c7bb23f26..7c61bc9573c0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2199,12 +2199,23 @@ cifs_inode_needs_reval(struct inode *inode)
 	if (!lookupCacheEnabled)
 		return true;
 
-	if (!cifs_sb->ctx->actimeo)
-		return true;
-
-	if (!time_in_range(jiffies, cifs_i->time,
-				cifs_i->time + cifs_sb->ctx->actimeo))
-		return true;
+	/*
+	 * depending on inode type, check if attribute caching disabled for
+	 * files or directories
+	 */
+	if (S_ISDIR(inode->i_mode)) {
+		if (!cifs_sb->ctx->acdirmax)
+			return true;
+		if (!time_in_range(jiffies, cifs_i->time,
+				   cifs_i->time + cifs_sb->ctx->acdirmax))
+			return true;
+	} else { /* file */
+		if (!cifs_sb->ctx->acregmax)
+			return true;
+		if (!time_in_range(jiffies, cifs_i->time,
+				   cifs_i->time + cifs_sb->ctx->acregmax))
+			return true;
+	}
 
 	/* hardlinked files w/ noserverino get "special" treatment */
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 213465718fa8..183a3a868d7b 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -218,7 +218,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
 
 	/* UNC and paths */
 	/* XXX: Use ses->server->hostname? */
-	sprintf(unc, unc_fmt, ses->serverName);
+	sprintf(unc, unc_fmt, ses->ip_addr);
 	ctx.UNC = unc;
 	ctx.prepath = "";
 
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index f19274857292..f5087295424c 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -63,17 +63,19 @@ smb2_add_credits(struct TCP_Server_Info *server,
 		 const struct cifs_credits *credits, const int optype)
 {
 	int *val, rc = -1;
+	int scredits, in_flight;
 	unsigned int add = credits->value;
 	unsigned int instance = credits->instance;
 	bool reconnect_detected = false;
+	bool reconnect_with_invalid_credits = false;
 
 	spin_lock(&server->req_lock);
 	val = server->ops->get_credits_field(server, optype);
 
 	/* eg found case where write overlapping reconnect messed up credits */
 	if (((optype & CIFS_OP_MASK) == CIFS_NEG_OP) && (*val != 0))
-		trace_smb3_reconnect_with_invalid_credits(server->CurrentMid,
-			server->hostname, *val, add);
+		reconnect_with_invalid_credits = true;
+
 	if ((instance == 0) || (instance == server->reconnect_instance))
 		*val += add;
 	else
@@ -84,7 +86,9 @@ smb2_add_credits(struct TCP_Server_Info *server,
 		pr_warn_once("server overflowed SMB3 credits\n");
 	}
 	server->in_flight--;
-	if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP)
+	if (server->in_flight == 0 &&
+	   ((optype & CIFS_OP_MASK) != CIFS_NEG_OP) &&
+	   ((optype & CIFS_OP_MASK) != CIFS_SESS_OP))
 		rc = change_conf(server);
 	/*
 	 * Sometimes server returns 0 credits on oplock break ack - we need to
@@ -97,14 +101,26 @@ smb2_add_credits(struct TCP_Server_Info *server,
 			server->oplock_credits++;
 		}
 	}
+	scredits = *val;
+	in_flight = server->in_flight;
 	spin_unlock(&server->req_lock);
 	wake_up(&server->request_q);
 
 	if (reconnect_detected) {
+		trace_smb3_reconnect_detected(server->CurrentMid,
+			server->conn_id, server->hostname, scredits, add, in_flight);
+
 		cifs_dbg(FYI, "trying to put %d credits from the old server instance %d\n",
 			 add, instance);
 	}
 
+	if (reconnect_with_invalid_credits) {
+		trace_smb3_reconnect_with_invalid_credits(server->CurrentMid,
+			server->conn_id, server->hostname, scredits, add, in_flight);
+		cifs_dbg(FYI, "Negotiate operation when server credits is non-zero. Optype: %d, server credits: %d, credits added: %d\n",
+			 optype, scredits, add);
+	}
+
 	if (server->tcpStatus == CifsNeedReconnect
 	    || server->tcpStatus == CifsExiting)
 		return;
@@ -123,23 +139,30 @@ smb2_add_credits(struct TCP_Server_Info *server,
 		cifs_dbg(FYI, "disabling oplocks\n");
 		break;
 	default:
-		trace_smb3_add_credits(server->CurrentMid,
-			server->hostname, rc, add);
-		cifs_dbg(FYI, "%s: added %u credits total=%d\n", __func__, add, rc);
+		/* change_conf rebalanced credits for different types */
+		break;
 	}
+
+	trace_smb3_add_credits(server->CurrentMid,
+			server->conn_id, server->hostname, scredits, add, in_flight);
+	cifs_dbg(FYI, "%s: added %u credits total=%d\n", __func__, add, scredits);
 }
 
 static void
 smb2_set_credits(struct TCP_Server_Info *server, const int val)
 {
+	int scredits, in_flight;
+
 	spin_lock(&server->req_lock);
 	server->credits = val;
 	if (val == 1)
 		server->reconnect_instance++;
+	scredits = server->credits;
+	in_flight = server->in_flight;
 	spin_unlock(&server->req_lock);
 
 	trace_smb3_set_credits(server->CurrentMid,
-			server->hostname, val, val);
+			server->conn_id, server->hostname, scredits, val, in_flight);
 	cifs_dbg(FYI, "%s: set %u credits\n", __func__, val);
 
 	/* don't log while holding the lock */
@@ -171,7 +194,7 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
 		      unsigned int *num, struct cifs_credits *credits)
 {
 	int rc = 0;
-	unsigned int scredits;
+	unsigned int scredits, in_flight;
 
 	spin_lock(&server->req_lock);
 	while (1) {
@@ -208,17 +231,18 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
 				DIV_ROUND_UP(*num, SMB2_MAX_BUFFER_SIZE);
 			credits->instance = server->reconnect_instance;
 			server->credits -= credits->value;
-			scredits = server->credits;
 			server->in_flight++;
 			if (server->in_flight > server->max_in_flight)
 				server->max_in_flight = server->in_flight;
 			break;
 		}
 	}
+	scredits = server->credits;
+	in_flight = server->in_flight;
 	spin_unlock(&server->req_lock);
 
 	trace_smb3_add_credits(server->CurrentMid,
-			server->hostname, scredits, -(credits->value));
+			server->conn_id, server->hostname, scredits, -(credits->value), in_flight);
 	cifs_dbg(FYI, "%s: removed %u credits total=%d\n",
 			__func__, credits->value, scredits);
 
@@ -231,14 +255,14 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
 		    const unsigned int payload_size)
 {
 	int new_val = DIV_ROUND_UP(payload_size, SMB2_MAX_BUFFER_SIZE);
-	int scredits;
+	int scredits, in_flight;
 
 	if (!credits->value || credits->value == new_val)
 		return 0;
 
 	if (credits->value < new_val) {
 		trace_smb3_too_many_credits(server->CurrentMid,
-				server->hostname, 0, credits->value - new_val);
+				server->conn_id, server->hostname, 0, credits->value - new_val, 0);
 		cifs_server_dbg(VFS, "request has less credits (%d) than required (%d)",
 				credits->value, new_val);
 
@@ -248,9 +272,13 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
 	spin_lock(&server->req_lock);
 
 	if (server->reconnect_instance != credits->instance) {
+		scredits = server->credits;
+		in_flight = server->in_flight;
 		spin_unlock(&server->req_lock);
+
 		trace_smb3_reconnect_detected(server->CurrentMid,
-			server->hostname, 0, 0);
+			server->conn_id, server->hostname, scredits,
+			credits->value - new_val, in_flight);
 		cifs_server_dbg(VFS, "trying to return %d credits to old session\n",
 			 credits->value - new_val);
 		return -EAGAIN;
@@ -258,15 +286,18 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
 
 	server->credits += credits->value - new_val;
 	scredits = server->credits;
+	in_flight = server->in_flight;
 	spin_unlock(&server->req_lock);
 	wake_up(&server->request_q);
-	credits->value = new_val;
 
 	trace_smb3_add_credits(server->CurrentMid,
-			server->hostname, scredits, credits->value - new_val);
+			server->conn_id, server->hostname, scredits,
+			credits->value - new_val, in_flight);
 	cifs_dbg(FYI, "%s: adjust added %u credits total=%d\n",
 			__func__, credits->value - new_val, scredits);
 
+	credits->value = new_val;
+
 	return 0;
 }
 
@@ -2369,7 +2400,7 @@ static bool
 smb2_is_status_pending(char *buf, struct TCP_Server_Info *server)
 {
 	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
-	int scredits;
+	int scredits, in_flight;
 
 	if (shdr->Status != STATUS_PENDING)
 		return false;
@@ -2378,11 +2409,13 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server)
 		spin_lock(&server->req_lock);
 		server->credits += le16_to_cpu(shdr->CreditRequest);
 		scredits = server->credits;
+		in_flight = server->in_flight;
 		spin_unlock(&server->req_lock);
 		wake_up(&server->request_q);
 
 		trace_smb3_add_credits(server->CurrentMid,
-				server->hostname, scredits, le16_to_cpu(shdr->CreditRequest));
+				server->conn_id, server->hostname, scredits,
+				le16_to_cpu(shdr->CreditRequest), in_flight);
 		cifs_dbg(FYI, "%s: status pending add %u credits total=%d\n",
 				__func__, le16_to_cpu(shdr->CreditRequest), scredits);
 	}
@@ -2418,6 +2451,34 @@ smb2_is_status_io_timeout(char *buf)
 		return false;
 }
 
+static void
+smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
+{
+	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
+	struct list_head *tmp, *tmp1;
+	struct cifs_ses *ses;
+	struct cifs_tcon *tcon;
+
+	if (shdr->Status != STATUS_NETWORK_NAME_DELETED)
+		return;
+
+	spin_lock(&cifs_tcp_ses_lock);
+	list_for_each(tmp, &server->smb_ses_list) {
+		ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+		list_for_each(tmp1, &ses->tcon_list) {
+			tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
+			if (tcon->tid == shdr->TreeId) {
+				tcon->need_reconnect = true;
+				spin_unlock(&cifs_tcp_ses_lock);
+				pr_warn_once("Server share %s deleted.\n",
+					     tcon->treeName);
+				return;
+			}
+		}
+	}
+	spin_unlock(&cifs_tcp_ses_lock);
+}
+
 static int
 smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
 		     struct cifsInodeInfo *cinode)
@@ -4605,6 +4666,10 @@ static void smb2_decrypt_offload(struct work_struct *work)
 #ifdef CONFIG_CIFS_STATS2
 			mid->when_received = jiffies;
 #endif
+			if (dw->server->ops->is_network_name_deleted)
+				dw->server->ops->is_network_name_deleted(dw->buf,
+									 dw->server);
+
 			mid->callback(mid);
 		} else {
 			spin_lock(&GlobalMid_Lock);
@@ -4723,6 +4788,12 @@ non_offloaded_decrypt:
 		rc = handle_read_data(server, *mid, buf,
 				      server->vals->read_rsp_size,
 				      pages, npages, len, false);
+		if (rc >= 0) {
+			if (server->ops->is_network_name_deleted) {
+				server->ops->is_network_name_deleted(buf,
+								server);
+			}
+		}
 	}
 
 free_pages:
@@ -5072,6 +5143,7 @@ struct smb_version_operations smb20_operations = {
 	.fiemap = smb3_fiemap,
 	.llseek = smb3_llseek,
 	.is_status_io_timeout = smb2_is_status_io_timeout,
+	.is_network_name_deleted = smb2_is_network_name_deleted,
 };
 
 struct smb_version_operations smb21_operations = {
@@ -5173,6 +5245,7 @@ struct smb_version_operations smb21_operations = {
 	.fiemap = smb3_fiemap,
 	.llseek = smb3_llseek,
 	.is_status_io_timeout = smb2_is_status_io_timeout,
+	.is_network_name_deleted = smb2_is_network_name_deleted,
 };
 
 struct smb_version_operations smb30_operations = {
@@ -5286,6 +5359,7 @@ struct smb_version_operations smb30_operations = {
 	.fiemap = smb3_fiemap,
 	.llseek = smb3_llseek,
 	.is_status_io_timeout = smb2_is_status_io_timeout,
+	.is_network_name_deleted = smb2_is_network_name_deleted,
 };
 
 struct smb_version_operations smb311_operations = {
@@ -5399,6 +5473,7 @@ struct smb_version_operations smb311_operations = {
 	.fiemap = smb3_fiemap,
 	.llseek = smb3_llseek,
 	.is_status_io_timeout = smb2_is_status_io_timeout,
+	.is_network_name_deleted = smb2_is_network_name_deleted,
 };
 
 struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 794fc3b68b4f..4bbb6126b14d 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -814,8 +814,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 		   SMB3ANY_VERSION_STRING) == 0) {
 		req->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
 		req->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
-		req->DialectCount = cpu_to_le16(2);
-		total_len += 4;
+		req->Dialects[2] = cpu_to_le16(SMB311_PROT_ID);
+		req->DialectCount = cpu_to_le16(3);
+		total_len += 6;
 	} else if (strcmp(server->vals->version_string,
 		   SMBDEFAULT_VERSION_STRING) == 0) {
 		req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
@@ -849,6 +850,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 			SMB2_CLIENT_GUID_SIZE);
 		if ((server->vals->protocol_id == SMB311_PROT_ID) ||
 		    (strcmp(server->vals->version_string,
+		     SMB3ANY_VERSION_STRING) == 0) ||
+		    (strcmp(server->vals->version_string,
 		     SMBDEFAULT_VERSION_STRING) == 0))
 			assemble_neg_contexts(req, server, &total_len);
 	}
@@ -883,6 +886,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 			cifs_server_dbg(VFS,
 				"SMB2.1 dialect returned but not requested\n");
 			return -EIO;
+		} else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
+			/* ops set to 3.0 by default for default so update */
+			server->ops = &smb311_operations;
+			server->vals = &smb311_values;
 		}
 	} else if (strcmp(server->vals->version_string,
 		   SMBDEFAULT_VERSION_STRING) == 0) {
@@ -1042,10 +1049,11 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
 		SMB3ANY_VERSION_STRING) == 0) {
 		pneg_inbuf->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
 		pneg_inbuf->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
-		pneg_inbuf->DialectCount = cpu_to_le16(2);
-		/* structure is big enough for 3 dialects, sending only 2 */
+		pneg_inbuf->Dialects[2] = cpu_to_le16(SMB311_PROT_ID);
+		pneg_inbuf->DialectCount = cpu_to_le16(3);
+		/* SMB 2.1 not included so subtract one dialect from len */
 		inbuflen = sizeof(*pneg_inbuf) -
-				(2 * sizeof(pneg_inbuf->Dialects[0]));
+				(sizeof(pneg_inbuf->Dialects[0]));
 	} else if (strcmp(server->vals->version_string,
 		SMBDEFAULT_VERSION_STRING) == 0) {
 		pneg_inbuf->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
@@ -1053,7 +1061,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
 		pneg_inbuf->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
 		pneg_inbuf->Dialects[3] = cpu_to_le16(SMB311_PROT_ID);
 		pneg_inbuf->DialectCount = cpu_to_le16(4);
-		/* structure is big enough for 3 dialects */
+		/* structure is big enough for 4 dialects */
 		inbuflen = sizeof(*pneg_inbuf);
 	} else {
 		/* otherwise specific dialect was requested */
@@ -1253,7 +1261,7 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data)
 			    cifs_ses_server(sess_data->ses),
 			    &rqst,
 			    &sess_data->buf0_type,
-			    CIFS_LOG_ERROR | CIFS_NEG_OP, &rsp_iov);
+			    CIFS_LOG_ERROR | CIFS_SESS_OP, &rsp_iov);
 	cifs_small_buf_release(sess_data->iov[0].iov_base);
 	memcpy(&sess_data->iov[0], &rsp_iov, sizeof(struct kvec));
 
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index c3d1a584f251..d6df908dccad 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -851,17 +851,21 @@ DEFINE_SMB3_LEASE_ERR_EVENT(lease_err);
 
 DECLARE_EVENT_CLASS(smb3_reconnect_class,
 	TP_PROTO(__u64	currmid,
+		__u64 conn_id,
 		char *hostname),
-	TP_ARGS(currmid, hostname),
+	TP_ARGS(currmid, conn_id, hostname),
 	TP_STRUCT__entry(
 		__field(__u64, currmid)
+		__field(__u64, conn_id)
 		__field(char *, hostname)
 	),
 	TP_fast_assign(
 		__entry->currmid = currmid;
+		__entry->conn_id = conn_id;
 		__entry->hostname = hostname;
 	),
-	TP_printk("server=%s current_mid=0x%llx",
+	TP_printk("conn_id=0x%llx server=%s current_mid=%llu",
+		__entry->conn_id,
 		__entry->hostname,
 		__entry->currmid)
 )
@@ -869,44 +873,56 @@ DECLARE_EVENT_CLASS(smb3_reconnect_class,
 #define DEFINE_SMB3_RECONNECT_EVENT(name)        \
 DEFINE_EVENT(smb3_reconnect_class, smb3_##name,  \
 	TP_PROTO(__u64	currmid,		\
-		char *hostname),		\
-	TP_ARGS(currmid, hostname))
+		__u64 conn_id,			\
+		char *hostname),				\
+	TP_ARGS(currmid, conn_id, hostname))
 
 DEFINE_SMB3_RECONNECT_EVENT(reconnect);
 DEFINE_SMB3_RECONNECT_EVENT(partial_send_reconnect);
 
 DECLARE_EVENT_CLASS(smb3_credit_class,
 	TP_PROTO(__u64	currmid,
+		__u64 conn_id,
 		char *hostname,
 		int credits,
-		int credits_to_add),
-	TP_ARGS(currmid, hostname, credits, credits_to_add),
+		int credits_to_add,
+		int in_flight),
+	TP_ARGS(currmid, conn_id, hostname, credits, credits_to_add, in_flight),
 	TP_STRUCT__entry(
 		__field(__u64, currmid)
+		__field(__u64, conn_id)
 		__field(char *, hostname)
 		__field(int, credits)
 		__field(int, credits_to_add)
+		__field(int, in_flight)
 	),
 	TP_fast_assign(
 		__entry->currmid = currmid;
+		__entry->conn_id = conn_id;
 		__entry->hostname = hostname;
 		__entry->credits = credits;
 		__entry->credits_to_add = credits_to_add;
+		__entry->in_flight = in_flight;
 	),
-	TP_printk("server=%s current_mid=0x%llx credits=%d credits_to_add=%d",
+	TP_printk("conn_id=0x%llx server=%s current_mid=%llu "
+			"credits=%d credit_change=%d in_flight=%d",
+		__entry->conn_id,
 		__entry->hostname,
 		__entry->currmid,
 		__entry->credits,
-		__entry->credits_to_add)
+		__entry->credits_to_add,
+		__entry->in_flight)
 )
 
 #define DEFINE_SMB3_CREDIT_EVENT(name)        \
 DEFINE_EVENT(smb3_credit_class, smb3_##name,  \
 	TP_PROTO(__u64	currmid,		\
+		__u64 conn_id,			\
 		char *hostname,			\
 		int  credits,			\
-		int  credits_to_add),		\
-	TP_ARGS(currmid, hostname, credits, credits_to_add))
+		int  credits_to_add,	\
+		int in_flight),			\
+	TP_ARGS(currmid, conn_id, hostname, credits, credits_to_add, in_flight))
 
 DEFINE_SMB3_CREDIT_EVENT(reconnect_with_invalid_credits);
 DEFINE_SMB3_CREDIT_EVENT(reconnect_detected);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 4a2b836eb017..e90a1d1380b0 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -445,7 +445,7 @@ unmask:
 		 */
 		server->tcpStatus = CifsNeedReconnect;
 		trace_smb3_partial_send_reconnect(server->CurrentMid,
-						  server->hostname);
+						  server->conn_id, server->hostname);
 	}
 smbd_done:
 	if (rc < 0 && rc != -EINTR)
@@ -527,7 +527,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
 	int *credits;
 	int optype;
 	long int t;
-	int scredits = server->credits;
+	int scredits, in_flight;
 
 	if (timeout < 0)
 		t = MAX_JIFFY_OFFSET;
@@ -551,23 +551,39 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
 			server->max_in_flight = server->in_flight;
 		*credits -= 1;
 		*instance = server->reconnect_instance;
+		scredits = *credits;
+		in_flight = server->in_flight;
 		spin_unlock(&server->req_lock);
+
+		trace_smb3_add_credits(server->CurrentMid,
+				server->conn_id, server->hostname, scredits, -1, in_flight);
+		cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
+				__func__, 1, scredits);
+
 		return 0;
 	}
 
 	while (1) {
 		if (*credits < num_credits) {
+			scredits = *credits;
 			spin_unlock(&server->req_lock);
+
 			cifs_num_waiters_inc(server);
 			rc = wait_event_killable_timeout(server->request_q,
 				has_credits(server, credits, num_credits), t);
 			cifs_num_waiters_dec(server);
 			if (!rc) {
+				spin_lock(&server->req_lock);
+				scredits = *credits;
+				in_flight = server->in_flight;
+				spin_unlock(&server->req_lock);
+
 				trace_smb3_credit_timeout(server->CurrentMid,
-					server->hostname, num_credits, 0);
+						server->conn_id, server->hostname, scredits,
+						num_credits, in_flight);
 				cifs_server_dbg(VFS, "wait timed out after %d ms\n",
-					 timeout);
-				return -ENOTSUPP;
+						timeout);
+				return -EBUSY;
 			}
 			if (rc == -ERESTARTSYS)
 				return -ERESTARTSYS;
@@ -595,6 +611,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
 			    server->in_flight > 2 * MAX_COMPOUND &&
 			    *credits <= MAX_COMPOUND) {
 				spin_unlock(&server->req_lock);
+
 				cifs_num_waiters_inc(server);
 				rc = wait_event_killable_timeout(
 					server->request_q,
@@ -603,13 +620,18 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
 					t);
 				cifs_num_waiters_dec(server);
 				if (!rc) {
+					spin_lock(&server->req_lock);
+					scredits = *credits;
+					in_flight = server->in_flight;
+					spin_unlock(&server->req_lock);
+
 					trace_smb3_credit_timeout(
-						server->CurrentMid,
-						server->hostname, num_credits,
-						0);
+							server->CurrentMid,
+							server->conn_id, server->hostname,
+							scredits, num_credits, in_flight);
 					cifs_server_dbg(VFS, "wait timed out after %d ms\n",
-						 timeout);
-					return -ENOTSUPP;
+							timeout);
+					return -EBUSY;
 				}
 				if (rc == -ERESTARTSYS)
 					return -ERESTARTSYS;
@@ -625,16 +647,18 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
 			/* update # of requests on the wire to server */
 			if ((flags & CIFS_TIMEOUT_MASK) != CIFS_BLOCKING_OP) {
 				*credits -= num_credits;
-				scredits = *credits;
 				server->in_flight += num_credits;
 				if (server->in_flight > server->max_in_flight)
 					server->max_in_flight = server->in_flight;
 				*instance = server->reconnect_instance;
 			}
+			scredits = *credits;
+			in_flight = server->in_flight;
 			spin_unlock(&server->req_lock);
 
 			trace_smb3_add_credits(server->CurrentMid,
-					server->hostname, scredits, -(num_credits));
+					server->conn_id, server->hostname, scredits,
+					-(num_credits), in_flight);
 			cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
 					__func__, num_credits, scredits);
 			break;
@@ -656,13 +680,13 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num,
 			  const int flags, unsigned int *instance)
 {
 	int *credits;
-	int scredits, sin_flight;
+	int scredits, in_flight;
 
 	credits = server->ops->get_credits_field(server, flags & CIFS_OP_MASK);
 
 	spin_lock(&server->req_lock);
 	scredits = *credits;
-	sin_flight = server->in_flight;
+	in_flight = server->in_flight;
 
 	if (*credits < num) {
 		/*
@@ -684,10 +708,11 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num,
 		if (server->in_flight == 0) {
 			spin_unlock(&server->req_lock);
 			trace_smb3_insufficient_credits(server->CurrentMid,
-					server->hostname, scredits, sin_flight);
+					server->conn_id, server->hostname, scredits,
+					num, in_flight);
 			cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n",
-					__func__, sin_flight, num, scredits);
-			return -ENOTSUPP;
+					__func__, in_flight, num, scredits);
+			return -EDEADLK;
 		}
 	}
 	spin_unlock(&server->req_lock);
@@ -1171,7 +1196,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
 	/*
 	 * Compounding is never used during session establish.
 	 */
-	if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
+	if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP))
 		smb311_update_preauth_hash(ses, rqst[0].rq_iov,
 					   rqst[0].rq_nvec);
 
@@ -1236,7 +1261,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
 	/*
 	 * Compounding is never used during session establish.
 	 */
-	if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
+	if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
 		struct kvec iov = {
 			.iov_base = resp_iov[0].iov_base,
 			.iov_len = resp_iov[0].iov_len
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 0957e1bb8eb2..b61491bf3166 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -695,7 +695,7 @@ static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
 	if (ret)
 		goto out;
 	sector = start_sector << (sdio->blkbits - 9);
-	nr_pages = min(sdio->pages_in_io, BIO_MAX_PAGES);
+	nr_pages = bio_max_segs(sdio->pages_in_io);
 	BUG_ON(nr_pages <= 0);
 	dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages);
 	sdio->boundary = 0;
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index ea4f693bee22..f88851c5c250 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -215,10 +215,8 @@ submit_bio_retry:
 		/* max # of continuous pages */
 		if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE))
 			nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE);
-		if (nblocks > BIO_MAX_PAGES)
-			nblocks = BIO_MAX_PAGES;
 
-		bio = bio_alloc(GFP_NOIO, nblocks);
+		bio = bio_alloc(GFP_NOIO, bio_max_segs(nblocks));
 
 		bio->bi_end_io = erofs_readendio;
 		bio_set_dev(bio, sb->s_bdev);
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index f014c5e473a9..3db923403505 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -371,8 +371,7 @@ int ext4_mpage_readpages(struct inode *inode,
 			 * bio_alloc will _always_ be able to allocate a bio if
 			 * __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset().
 			 */
-			bio = bio_alloc(GFP_KERNEL,
-				min_t(int, nr_pages, BIO_MAX_PAGES));
+			bio = bio_alloc(GFP_KERNEL, bio_max_segs(nr_pages));
 			fscrypt_set_bio_crypt_ctx(bio, inode, next_block,
 						  GFP_KERNEL);
 			ext4_set_bio_post_read_ctx(bio, inode, page->index);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b9721c8f116c..7c95818639a6 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -969,8 +969,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
 	unsigned int post_read_steps = 0;
 
 	bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL,
-			       min_t(int, nr_pages, BIO_MAX_PAGES),
-			       &f2fs_bioset);
+			       bio_max_segs(nr_pages), &f2fs_bioset);
 	if (!bio)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index a8a0fb890e8d..4b0e2e3c2c88 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2747,7 +2747,7 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
 	sum_entry = &sum->entries[0];
 
 	for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
-		nrpages = min(last_offset - i, BIO_MAX_PAGES);
+		nrpages = bio_max_segs(last_offset - i);
 
 		/* readahead node pages */
 		f2fs_ra_meta_pages(sbi, addr, nrpages, META_POR, true);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 68376fa03880..c9775d5c6594 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -491,8 +491,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
 	di = (struct gfs2_dinode *)dibh->b_data;
 	gfs2_dinode_out(ip, di);
 
-	di->di_major = cpu_to_be32(MAJOR(ip->i_inode.i_rdev));
-	di->di_minor = cpu_to_be32(MINOR(ip->i_inode.i_rdev));
+	di->di_major = cpu_to_be32(imajor(&ip->i_inode));
+	di->di_minor = cpu_to_be32(iminor(&ip->i_inode));
 	di->__pad1 = 0;
 	di->__pad2 = 0;
 	di->__pad3 = 0;
diff --git a/fs/inode.c b/fs/inode.c
index 6dba963d3f6d..a047ab306f9a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -142,6 +142,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	atomic_set(&inode->i_count, 1);
 	inode->i_op = &empty_iops;
 	inode->i_fop = &no_open_fops;
+	inode->i_ino = 0;
 	inode->__i_nlink = 1;
 	inode->i_opflags = 0;
 	if (sb->s_xattr)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index c36bbcd823ce..44e20248805a 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -13,13 +13,9 @@
 #include <linux/sched/mm.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
-#include <linux/kthread.h>
 #include <linux/rculist_nulls.h>
-#include <linux/fs_struct.h>
-#include <linux/task_work.h>
-#include <linux/blk-cgroup.h>
-#include <linux/audit.h>
 #include <linux/cpu.h>
+#include <linux/tracehook.h>
 
 #include "../kernel/sched/sched.h"
 #include "io-wq.h"
@@ -36,7 +32,6 @@ enum {
 
 enum {
 	IO_WQ_BIT_EXIT		= 0,	/* wq exiting */
-	IO_WQ_BIT_ERROR		= 1,	/* error on setup */
 };
 
 enum {
@@ -57,14 +52,12 @@ struct io_worker {
 	struct io_wq_work *cur_work;
 	spinlock_t lock;
 
-	struct rcu_head rcu;
-	struct mm_struct *mm;
-#ifdef CONFIG_BLK_CGROUP
-	struct cgroup_subsys_state *blkcg_css;
-#endif
 	const struct cred *cur_creds;
 	const struct cred *saved_creds;
-	struct nsproxy *restore_nsproxy;
+
+	struct completion ref_done;
+
+	struct rcu_head rcu;
 };
 
 #if BITS_PER_LONG == 64
@@ -93,7 +86,6 @@ struct io_wqe {
 	struct {
 		raw_spinlock_t lock;
 		struct io_wq_work_list work_list;
-		unsigned long hash_map;
 		unsigned flags;
 	} ____cacheline_aligned_in_smp;
 
@@ -103,6 +95,8 @@ struct io_wqe {
 	struct hlist_nulls_head free_list;
 	struct list_head all_list;
 
+	struct wait_queue_entry wait;
+
 	struct io_wq *wq;
 	struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
 };
@@ -119,12 +113,15 @@ struct io_wq {
 
 	struct task_struct *manager;
 	struct user_struct *user;
+
+	struct io_wq_hash *hash;
+
 	refcount_t refs;
 	struct completion done;
 
 	struct hlist_node cpuhp_node;
 
-	refcount_t use_refs;
+	pid_t task_pid;
 };
 
 static enum cpuhp_state io_wq_online;
@@ -137,62 +134,7 @@ static bool io_worker_get(struct io_worker *worker)
 static void io_worker_release(struct io_worker *worker)
 {
 	if (refcount_dec_and_test(&worker->ref))
-		wake_up_process(worker->task);
-}
-
-/*
- * Note: drops the wqe->lock if returning true! The caller must re-acquire
- * the lock in that case. Some callers need to restart handling if this
- * happens, so we can't just re-acquire the lock on behalf of the caller.
- */
-static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
-{
-	bool dropped_lock = false;
-
-	if (worker->saved_creds) {
-		revert_creds(worker->saved_creds);
-		worker->cur_creds = worker->saved_creds = NULL;
-	}
-
-	if (current->files) {
-		__acquire(&wqe->lock);
-		raw_spin_unlock_irq(&wqe->lock);
-		dropped_lock = true;
-
-		task_lock(current);
-		current->files = NULL;
-		current->nsproxy = worker->restore_nsproxy;
-		task_unlock(current);
-	}
-
-	if (current->fs)
-		current->fs = NULL;
-
-	/*
-	 * If we have an active mm, we need to drop the wq lock before unusing
-	 * it. If we do, return true and let the caller retry the idle loop.
-	 */
-	if (worker->mm) {
-		if (!dropped_lock) {
-			__acquire(&wqe->lock);
-			raw_spin_unlock_irq(&wqe->lock);
-			dropped_lock = true;
-		}
-		__set_current_state(TASK_RUNNING);
-		kthread_unuse_mm(worker->mm);
-		mmput(worker->mm);
-		worker->mm = NULL;
-	}
-
-#ifdef CONFIG_BLK_CGROUP
-	if (worker->blkcg_css) {
-		kthread_associate_blkcg(NULL);
-		worker->blkcg_css = NULL;
-	}
-#endif
-	if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY)
-		current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
-	return dropped_lock;
+		complete(&worker->ref_done);
 }
 
 static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe,
@@ -204,9 +146,10 @@ static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe,
 	return &wqe->acct[IO_WQ_ACCT_BOUND];
 }
 
-static inline struct io_wqe_acct *io_wqe_get_acct(struct io_wqe *wqe,
-						  struct io_worker *worker)
+static inline struct io_wqe_acct *io_wqe_get_acct(struct io_worker *worker)
 {
+	struct io_wqe *wqe = worker->wqe;
+
 	if (worker->flags & IO_WORKER_F_BOUND)
 		return &wqe->acct[IO_WQ_ACCT_BOUND];
 
@@ -216,39 +159,36 @@ static inline struct io_wqe_acct *io_wqe_get_acct(struct io_wqe *wqe,
 static void io_worker_exit(struct io_worker *worker)
 {
 	struct io_wqe *wqe = worker->wqe;
-	struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
+	struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+	unsigned flags;
 
-	/*
-	 * If we're not at zero, someone else is holding a brief reference
-	 * to the worker. Wait for that to go away.
-	 */
-	set_current_state(TASK_INTERRUPTIBLE);
-	if (!refcount_dec_and_test(&worker->ref))
-		schedule();
-	__set_current_state(TASK_RUNNING);
+	if (refcount_dec_and_test(&worker->ref))
+		complete(&worker->ref_done);
+	wait_for_completion(&worker->ref_done);
 
 	preempt_disable();
 	current->flags &= ~PF_IO_WORKER;
-	if (worker->flags & IO_WORKER_F_RUNNING)
+	flags = worker->flags;
+	worker->flags = 0;
+	if (flags & IO_WORKER_F_RUNNING)
 		atomic_dec(&acct->nr_running);
-	if (!(worker->flags & IO_WORKER_F_BOUND))
-		atomic_dec(&wqe->wq->user->processes);
 	worker->flags = 0;
 	preempt_enable();
 
+	if (worker->saved_creds) {
+		revert_creds(worker->saved_creds);
+		worker->cur_creds = worker->saved_creds = NULL;
+	}
+
 	raw_spin_lock_irq(&wqe->lock);
-	hlist_nulls_del_rcu(&worker->nulls_node);
+	if (flags & IO_WORKER_F_FREE)
+		hlist_nulls_del_rcu(&worker->nulls_node);
 	list_del_rcu(&worker->all_list);
-	if (__io_worker_unuse(wqe, worker)) {
-		__release(&wqe->lock);
-		raw_spin_lock_irq(&wqe->lock);
-	}
 	acct->nr_workers--;
 	raw_spin_unlock_irq(&wqe->lock);
 
 	kfree_rcu(worker, rcu);
-	if (refcount_dec_and_test(&wqe->wq->refs))
-		complete(&wqe->wq->done);
+	io_wq_put(wqe->wq);
 }
 
 static inline bool io_wqe_run_queue(struct io_wqe *wqe)
@@ -306,33 +246,27 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
 		wake_up_process(wqe->wq->manager);
 }
 
-static void io_wqe_inc_running(struct io_wqe *wqe, struct io_worker *worker)
+static void io_wqe_inc_running(struct io_worker *worker)
 {
-	struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
+	struct io_wqe_acct *acct = io_wqe_get_acct(worker);
 
 	atomic_inc(&acct->nr_running);
 }
 
-static void io_wqe_dec_running(struct io_wqe *wqe, struct io_worker *worker)
+static void io_wqe_dec_running(struct io_worker *worker)
 	__must_hold(wqe->lock)
 {
-	struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
+	struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+	struct io_wqe *wqe = worker->wqe;
 
 	if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe))
 		io_wqe_wake_worker(wqe, acct);
 }
 
-static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)
+static void io_worker_start(struct io_worker *worker)
 {
-	allow_kernel_signal(SIGINT);
-
-	current->flags |= PF_IO_WORKER;
-	current->fs = NULL;
-	current->files = NULL;
-
 	worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
-	worker->restore_nsproxy = current->nsproxy;
-	io_wqe_inc_running(wqe, worker);
+	io_wqe_inc_running(worker);
 }
 
 /*
@@ -357,19 +291,17 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
 	worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
 	work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
 	if (worker_bound != work_bound) {
-		io_wqe_dec_running(wqe, worker);
+		io_wqe_dec_running(worker);
 		if (work_bound) {
 			worker->flags |= IO_WORKER_F_BOUND;
 			wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers--;
 			wqe->acct[IO_WQ_ACCT_BOUND].nr_workers++;
-			atomic_dec(&wqe->wq->user->processes);
 		} else {
 			worker->flags &= ~IO_WORKER_F_BOUND;
 			wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers++;
 			wqe->acct[IO_WQ_ACCT_BOUND].nr_workers--;
-			atomic_inc(&wqe->wq->user->processes);
 		}
-		io_wqe_inc_running(wqe, worker);
+		io_wqe_inc_running(worker);
 	 }
 }
 
@@ -380,15 +312,17 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
  * retry the loop in that case (we changed task state), we don't regrab
  * the lock if we return success.
  */
-static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
+static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
 	__must_hold(wqe->lock)
 {
 	if (!(worker->flags & IO_WORKER_F_FREE)) {
 		worker->flags |= IO_WORKER_F_FREE;
 		hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
 	}
-
-	return __io_worker_unuse(wqe, worker);
+	if (worker->saved_creds) {
+		revert_creds(worker->saved_creds);
+		worker->cur_creds = worker->saved_creds = NULL;
+	}
 }
 
 static inline unsigned int io_get_work_hash(struct io_wq_work *work)
@@ -396,14 +330,31 @@ static inline unsigned int io_get_work_hash(struct io_wq_work *work)
 	return work->flags >> IO_WQ_HASH_SHIFT;
 }
 
+static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
+{
+	struct io_wq *wq = wqe->wq;
+
+	spin_lock(&wq->hash->wait.lock);
+	if (list_empty(&wqe->wait.entry)) {
+		__add_wait_queue(&wq->hash->wait, &wqe->wait);
+		if (!test_bit(hash, &wq->hash->map)) {
+			__set_current_state(TASK_RUNNING);
+			list_del_init(&wqe->wait.entry);
+		}
+	}
+	spin_unlock(&wq->hash->wait.lock);
+}
+
 static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
 	__must_hold(wqe->lock)
 {
 	struct io_wq_work_node *node, *prev;
 	struct io_wq_work *work, *tail;
-	unsigned int hash;
+	unsigned int stall_hash = -1U;
 
 	wq_list_for_each(node, prev, &wqe->work_list) {
+		unsigned int hash;
+
 		work = container_of(node, struct io_wq_work, list);
 
 		/* not hashed, can run anytime */
@@ -412,111 +363,60 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
 			return work;
 		}
 
-		/* hashed, can run if not already running */
 		hash = io_get_work_hash(work);
-		if (!(wqe->hash_map & BIT(hash))) {
-			wqe->hash_map |= BIT(hash);
-			/* all items with this hash lie in [work, tail] */
-			tail = wqe->hash_tail[hash];
+		/* all items with this hash lie in [work, tail] */
+		tail = wqe->hash_tail[hash];
+
+		/* hashed, can run if not already running */
+		if (!test_and_set_bit(hash, &wqe->wq->hash->map)) {
 			wqe->hash_tail[hash] = NULL;
 			wq_list_cut(&wqe->work_list, &tail->list, prev);
 			return work;
 		}
+		if (stall_hash == -1U)
+			stall_hash = hash;
+		/* fast forward to a next hash, for-each will fix up @prev */
+		node = &tail->list;
 	}
 
-	return NULL;
-}
-
-static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work)
-{
-	if (worker->mm) {
-		kthread_unuse_mm(worker->mm);
-		mmput(worker->mm);
-		worker->mm = NULL;
+	if (stall_hash != -1U) {
+		raw_spin_unlock(&wqe->lock);
+		io_wait_on_hash(wqe, stall_hash);
+		raw_spin_lock(&wqe->lock);
 	}
 
-	if (mmget_not_zero(work->identity->mm)) {
-		kthread_use_mm(work->identity->mm);
-		worker->mm = work->identity->mm;
-		return;
-	}
-
-	/* failed grabbing mm, ensure work gets cancelled */
-	work->flags |= IO_WQ_WORK_CANCEL;
+	return NULL;
 }
 
-static inline void io_wq_switch_blkcg(struct io_worker *worker,
-				      struct io_wq_work *work)
+static void io_flush_signals(void)
 {
-#ifdef CONFIG_BLK_CGROUP
-	if (!(work->flags & IO_WQ_WORK_BLKCG))
-		return;
-	if (work->identity->blkcg_css != worker->blkcg_css) {
-		kthread_associate_blkcg(work->identity->blkcg_css);
-		worker->blkcg_css = work->identity->blkcg_css;
+	if (unlikely(test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL))) {
+		if (current->task_works)
+			task_work_run();
+		clear_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL);
 	}
-#endif
 }
 
 static void io_wq_switch_creds(struct io_worker *worker,
 			       struct io_wq_work *work)
 {
-	const struct cred *old_creds = override_creds(work->identity->creds);
+	const struct cred *old_creds = override_creds(work->creds);
 
-	worker->cur_creds = work->identity->creds;
+	worker->cur_creds = work->creds;
 	if (worker->saved_creds)
 		put_cred(old_creds); /* creds set by previous switch */
 	else
 		worker->saved_creds = old_creds;
 }
 
-static void io_impersonate_work(struct io_worker *worker,
-				struct io_wq_work *work)
-{
-	if ((work->flags & IO_WQ_WORK_FILES) &&
-	    current->files != work->identity->files) {
-		task_lock(current);
-		current->files = work->identity->files;
-		current->nsproxy = work->identity->nsproxy;
-		task_unlock(current);
-		if (!work->identity->files) {
-			/* failed grabbing files, ensure work gets cancelled */
-			work->flags |= IO_WQ_WORK_CANCEL;
-		}
-	}
-	if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs)
-		current->fs = work->identity->fs;
-	if ((work->flags & IO_WQ_WORK_MM) && work->identity->mm != worker->mm)
-		io_wq_switch_mm(worker, work);
-	if ((work->flags & IO_WQ_WORK_CREDS) &&
-	    worker->cur_creds != work->identity->creds)
-		io_wq_switch_creds(worker, work);
-	if (work->flags & IO_WQ_WORK_FSIZE)
-		current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize;
-	else if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY)
-		current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
-	io_wq_switch_blkcg(worker, work);
-#ifdef CONFIG_AUDIT
-	current->loginuid = work->identity->loginuid;
-	current->sessionid = work->identity->sessionid;
-#endif
-}
-
 static void io_assign_current_work(struct io_worker *worker,
 				   struct io_wq_work *work)
 {
 	if (work) {
-		/* flush pending signals before assigning new work */
-		if (signal_pending(current))
-			flush_signals(current);
+		io_flush_signals();
 		cond_resched();
 	}
 
-#ifdef CONFIG_AUDIT
-	current->loginuid = KUIDT_INIT(AUDIT_UID_UNSET);
-	current->sessionid = AUDIT_SID_UNSET;
-#endif
-
 	spin_lock_irq(&worker->lock);
 	worker->cur_work = work;
 	spin_unlock_irq(&worker->lock);
@@ -550,6 +450,7 @@ get_next:
 		if (!work)
 			break;
 		io_assign_current_work(worker, work);
+		__set_current_state(TASK_RUNNING);
 
 		/* handle a whole dependent link */
 		do {
@@ -557,7 +458,8 @@ get_next:
 			unsigned int hash = io_get_work_hash(work);
 
 			next_hashed = wq_next_work(work);
-			io_impersonate_work(worker, work);
+			if (work->creds && worker->cur_creds != work->creds)
+				io_wq_switch_creds(worker, work);
 			wq->do_work(work);
 			io_assign_current_work(worker, NULL);
 
@@ -572,8 +474,10 @@ get_next:
 				io_wqe_enqueue(wqe, linked);
 
 			if (hash != -1U && !next_hashed) {
+				clear_bit(hash, &wq->hash->map);
+				if (wq_has_sleeper(&wq->hash->wait))
+					wake_up(&wq->hash->wait);
 				raw_spin_lock_irq(&wqe->lock);
-				wqe->hash_map &= ~BIT_ULL(hash);
 				wqe->flags &= ~IO_WQE_FLAG_STALLED;
 				/* skip unnecessary unlock-lock wqe->lock */
 				if (!work)
@@ -592,27 +496,23 @@ static int io_wqe_worker(void *data)
 	struct io_wqe *wqe = worker->wqe;
 	struct io_wq *wq = wqe->wq;
 
-	io_worker_start(wqe, worker);
+	io_worker_start(worker);
 
 	while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
 		set_current_state(TASK_INTERRUPTIBLE);
 loop:
 		raw_spin_lock_irq(&wqe->lock);
 		if (io_wqe_run_queue(wqe)) {
-			__set_current_state(TASK_RUNNING);
 			io_worker_handle_work(worker);
 			goto loop;
 		}
-		/* drops the lock on success, retry */
-		if (__io_worker_idle(wqe, worker)) {
-			__release(&wqe->lock);
-			goto loop;
-		}
+		__io_worker_idle(wqe, worker);
 		raw_spin_unlock_irq(&wqe->lock);
-		if (signal_pending(current))
-			flush_signals(current);
+		io_flush_signals();
 		if (schedule_timeout(WORKER_IDLE_TIMEOUT))
 			continue;
+		if (fatal_signal_pending(current))
+			break;
 		/* timed out, exit unless we're the fixed worker */
 		if (test_bit(IO_WQ_BIT_EXIT, &wq->state) ||
 		    !(worker->flags & IO_WORKER_F_FIXED))
@@ -636,15 +536,16 @@ loop:
  */
 void io_wq_worker_running(struct task_struct *tsk)
 {
-	struct io_worker *worker = kthread_data(tsk);
-	struct io_wqe *wqe = worker->wqe;
+	struct io_worker *worker = tsk->pf_io_worker;
 
+	if (!worker)
+		return;
 	if (!(worker->flags & IO_WORKER_F_UP))
 		return;
 	if (worker->flags & IO_WORKER_F_RUNNING)
 		return;
 	worker->flags |= IO_WORKER_F_RUNNING;
-	io_wqe_inc_running(wqe, worker);
+	io_wqe_inc_running(worker);
 }
 
 /*
@@ -654,9 +555,10 @@ void io_wq_worker_running(struct task_struct *tsk)
  */
 void io_wq_worker_sleeping(struct task_struct *tsk)
 {
-	struct io_worker *worker = kthread_data(tsk);
-	struct io_wqe *wqe = worker->wqe;
+	struct io_worker *worker = tsk->pf_io_worker;
 
+	if (!worker)
+		return;
 	if (!(worker->flags & IO_WORKER_F_UP))
 		return;
 	if (!(worker->flags & IO_WORKER_F_RUNNING))
@@ -664,32 +566,27 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
 
 	worker->flags &= ~IO_WORKER_F_RUNNING;
 
-	raw_spin_lock_irq(&wqe->lock);
-	io_wqe_dec_running(wqe, worker);
-	raw_spin_unlock_irq(&wqe->lock);
+	raw_spin_lock_irq(&worker->wqe->lock);
+	io_wqe_dec_running(worker);
+	raw_spin_unlock_irq(&worker->wqe->lock);
 }
 
-static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
+static int task_thread(void *data, int index)
 {
+	struct io_worker *worker = data;
+	struct io_wqe *wqe = worker->wqe;
 	struct io_wqe_acct *acct = &wqe->acct[index];
-	struct io_worker *worker;
+	struct io_wq *wq = wqe->wq;
+	char buf[TASK_COMM_LEN];
 
-	worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
-	if (!worker)
-		return false;
+	sprintf(buf, "iou-wrk-%d", wq->task_pid);
+	set_task_comm(current, buf);
 
-	refcount_set(&worker->ref, 1);
-	worker->nulls_node.pprev = NULL;
-	worker->wqe = wqe;
-	spin_lock_init(&worker->lock);
+	current->pf_io_worker = worker;
+	worker->task = current;
 
-	worker->task = kthread_create_on_node(io_wqe_worker, worker, wqe->node,
-				"io_wqe_worker-%d/%d", index, wqe->node);
-	if (IS_ERR(worker->task)) {
-		kfree(worker);
-		return false;
-	}
-	kthread_bind_mask(worker->task, cpumask_of_node(wqe->node));
+	set_cpus_allowed_ptr(current, cpumask_of_node(wqe->node));
+	current->flags |= PF_NO_SETAFFINITY;
 
 	raw_spin_lock_irq(&wqe->lock);
 	hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
@@ -702,11 +599,63 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
 	acct->nr_workers++;
 	raw_spin_unlock_irq(&wqe->lock);
 
-	if (index == IO_WQ_ACCT_UNBOUND)
-		atomic_inc(&wq->user->processes);
+	io_wqe_worker(data);
+	do_exit(0);
+}
+
+static int task_thread_bound(void *data)
+{
+	return task_thread(data, IO_WQ_ACCT_BOUND);
+}
+
+static int task_thread_unbound(void *data)
+{
+	return task_thread(data, IO_WQ_ACCT_UNBOUND);
+}
+
+pid_t io_wq_fork_thread(int (*fn)(void *), void *arg)
+{
+	unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
+				CLONE_IO|SIGCHLD;
+	struct kernel_clone_args args = {
+		.flags		= ((lower_32_bits(flags) | CLONE_VM |
+				    CLONE_UNTRACED) & ~CSIGNAL),
+		.exit_signal	= (lower_32_bits(flags) & CSIGNAL),
+		.stack		= (unsigned long)fn,
+		.stack_size	= (unsigned long)arg,
+	};
+
+	return kernel_clone(&args);
+}
+
+static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
+{
+	struct io_worker *worker;
+	pid_t pid;
+
+	__set_current_state(TASK_RUNNING);
+
+	worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
+	if (!worker)
+		return false;
+
+	refcount_set(&worker->ref, 1);
+	worker->nulls_node.pprev = NULL;
+	worker->wqe = wqe;
+	spin_lock_init(&worker->lock);
+	init_completion(&worker->ref_done);
 
 	refcount_inc(&wq->refs);
-	wake_up_process(worker->task);
+
+	if (index == IO_WQ_ACCT_BOUND)
+		pid = io_wq_fork_thread(task_thread_bound, worker);
+	else
+		pid = io_wq_fork_thread(task_thread_unbound, worker);
+	if (pid < 0) {
+		io_wq_put(wq);
+		kfree(worker);
+		return false;
+	}
 	return true;
 }
 
@@ -752,93 +701,57 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data)
 	return false;
 }
 
-/*
- * Manager thread. Tasked with creating new workers, if we need them.
- */
-static int io_wq_manager(void *data)
+static void io_wq_check_workers(struct io_wq *wq)
 {
-	struct io_wq *wq = data;
 	int node;
 
-	/* create fixed workers */
-	refcount_set(&wq->refs, 1);
 	for_each_node(node) {
+		struct io_wqe *wqe = wq->wqes[node];
+		bool fork_worker[2] = { false, false };
+
 		if (!node_online(node))
 			continue;
-		if (create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
-			continue;
-		set_bit(IO_WQ_BIT_ERROR, &wq->state);
-		set_bit(IO_WQ_BIT_EXIT, &wq->state);
-		goto out;
-	}
-
-	complete(&wq->done);
-
-	while (!kthread_should_stop()) {
-		if (current->task_works)
-			task_work_run();
-
-		for_each_node(node) {
-			struct io_wqe *wqe = wq->wqes[node];
-			bool fork_worker[2] = { false, false };
-
-			if (!node_online(node))
-				continue;
-
-			raw_spin_lock_irq(&wqe->lock);
-			if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
-				fork_worker[IO_WQ_ACCT_BOUND] = true;
-			if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND))
-				fork_worker[IO_WQ_ACCT_UNBOUND] = true;
-			raw_spin_unlock_irq(&wqe->lock);
-			if (fork_worker[IO_WQ_ACCT_BOUND])
-				create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND);
-			if (fork_worker[IO_WQ_ACCT_UNBOUND])
-				create_io_worker(wq, wqe, IO_WQ_ACCT_UNBOUND);
-		}
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(HZ);
-	}
-
-	if (current->task_works)
-		task_work_run();
 
-out:
-	if (refcount_dec_and_test(&wq->refs)) {
-		complete(&wq->done);
-		return 0;
-	}
-	/* if ERROR is set and we get here, we have workers to wake */
-	if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) {
-		rcu_read_lock();
-		for_each_node(node)
-			io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
-		rcu_read_unlock();
+		raw_spin_lock_irq(&wqe->lock);
+		if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
+			fork_worker[IO_WQ_ACCT_BOUND] = true;
+		if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND))
+			fork_worker[IO_WQ_ACCT_UNBOUND] = true;
+		raw_spin_unlock_irq(&wqe->lock);
+		if (fork_worker[IO_WQ_ACCT_BOUND])
+			create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND);
+		if (fork_worker[IO_WQ_ACCT_UNBOUND])
+			create_io_worker(wq, wqe, IO_WQ_ACCT_UNBOUND);
 	}
-	return 0;
 }
 
-static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct,
-			    struct io_wq_work *work)
+/*
+ * Manager thread. Tasked with creating new workers, if we need them.
+ */
+static int io_wq_manager(void *data)
 {
-	bool free_worker;
-
-	if (!(work->flags & IO_WQ_WORK_UNBOUND))
-		return true;
-	if (atomic_read(&acct->nr_running))
-		return true;
+	struct io_wq *wq = data;
+	char buf[TASK_COMM_LEN];
 
-	rcu_read_lock();
-	free_worker = !hlist_nulls_empty(&wqe->free_list);
-	rcu_read_unlock();
-	if (free_worker)
-		return true;
+	sprintf(buf, "iou-mgr-%d", wq->task_pid);
+	set_task_comm(current, buf);
+	current->flags |= PF_IO_WORKER;
+	wq->manager = current;
 
-	if (atomic_read(&wqe->wq->user->processes) >= acct->max_workers &&
-	    !(capable(CAP_SYS_RESOURCE) || capable(CAP_SYS_ADMIN)))
-		return false;
+	complete(&wq->done);
 
-	return true;
+	do {
+		set_current_state(TASK_INTERRUPTIBLE);
+		io_wq_check_workers(wq);
+		schedule_timeout(HZ);
+		if (fatal_signal_pending(current))
+			set_bit(IO_WQ_BIT_EXIT, &wq->state);
+	} while (!test_bit(IO_WQ_BIT_EXIT, &wq->state));
+
+	io_wq_check_workers(wq);
+	wq->manager = NULL;
+	io_wq_put(wq);
+	do_exit(0);
 }
 
 static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
@@ -872,20 +785,37 @@ append:
 	wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
 }
 
+static int io_wq_fork_manager(struct io_wq *wq)
+{
+	int ret;
+
+	if (wq->manager)
+		return 0;
+
+	clear_bit(IO_WQ_BIT_EXIT, &wq->state);
+	refcount_inc(&wq->refs);
+	current->flags |= PF_IO_WORKER;
+	ret = io_wq_fork_thread(io_wq_manager, wq);
+	current->flags &= ~PF_IO_WORKER;
+	if (ret >= 0) {
+		wait_for_completion(&wq->done);
+		return 0;
+	}
+
+	io_wq_put(wq);
+	return ret;
+}
+
 static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
 {
 	struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
 	int work_flags;
 	unsigned long flags;
 
-	/*
-	 * Do early check to see if we need a new unbound worker, and if we do,
-	 * if we're allowed to do so. This isn't 100% accurate as there's a
-	 * gap between this check and incrementing the value, but that's OK.
-	 * It's close enough to not be an issue, fork() has the same delay.
-	 */
-	if (unlikely(!io_wq_can_queue(wqe, acct, work))) {
-		io_run_cancel(work, wqe);
+	/* Can only happen if manager creation fails after exec */
+	if (unlikely(io_wq_fork_manager(wqe->wq))) {
+		work->flags |= IO_WQ_WORK_CANCEL;
+		wqe->wq->do_work(work);
 		return;
 	}
 
@@ -939,7 +869,7 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
 	spin_lock_irqsave(&worker->lock, flags);
 	if (worker->cur_work &&
 	    match->fn(worker->cur_work, match->data)) {
-		send_sig(SIGINT, worker->task, 1);
+		set_notify_signal(worker->task);
 		match->nr_running++;
 	}
 	spin_unlock_irqrestore(&worker->lock, flags);
@@ -1043,6 +973,24 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
 	return IO_WQ_CANCEL_NOTFOUND;
 }
 
+static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
+			    int sync, void *key)
+{
+	struct io_wqe *wqe = container_of(wait, struct io_wqe, wait);
+	int ret;
+
+	list_del_init(&wait->entry);
+
+	rcu_read_lock();
+	ret = io_wqe_activate_free_worker(wqe);
+	rcu_read_unlock();
+
+	if (!ret)
+		wake_up_process(wqe->wq->manager);
+
+	return 1;
+}
+
 struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 {
 	int ret = -ENOMEM, node;
@@ -1063,12 +1011,11 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 	if (ret)
 		goto err_wqes;
 
+	refcount_inc(&data->hash->refs);
+	wq->hash = data->hash;
 	wq->free_work = data->free_work;
 	wq->do_work = data->do_work;
 
-	/* caller must already hold a reference to this */
-	wq->user = data->user;
-
 	ret = -ENOMEM;
 	for_each_node(node) {
 		struct io_wqe *wqe;
@@ -1083,11 +1030,11 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 		wqe->node = alloc_node;
 		wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
 		atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
-		if (wq->user) {
-			wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
+		wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
 					task_rlimit(current, RLIMIT_NPROC);
-		}
 		atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
+		wqe->wait.func = io_wqe_hash_wake;
+		INIT_LIST_HEAD(&wqe->wait.entry);
 		wqe->wq = wq;
 		raw_spin_lock_init(&wqe->lock);
 		INIT_WQ_LIST(&wqe->work_list);
@@ -1095,23 +1042,16 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 		INIT_LIST_HEAD(&wqe->all_list);
 	}
 
+	wq->task_pid = current->pid;
 	init_completion(&wq->done);
+	refcount_set(&wq->refs, 1);
 
-	wq->manager = kthread_create(io_wq_manager, wq, "io_wq_manager");
-	if (!IS_ERR(wq->manager)) {
-		wake_up_process(wq->manager);
-		wait_for_completion(&wq->done);
-		if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) {
-			ret = -ENOMEM;
-			goto err;
-		}
-		refcount_set(&wq->use_refs, 1);
-		reinit_completion(&wq->done);
+	ret = io_wq_fork_manager(wq);
+	if (!ret)
 		return wq;
-	}
 
-	ret = PTR_ERR(wq->manager);
-	complete(&wq->done);
+	io_wq_put(wq);
+	io_wq_put_hash(data->hash);
 err:
 	cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
 	for_each_node(node)
@@ -1123,15 +1063,7 @@ err_wq:
 	return ERR_PTR(ret);
 }
 
-bool io_wq_get(struct io_wq *wq, struct io_wq_data *data)
-{
-	if (data->free_work != wq->free_work || data->do_work != wq->do_work)
-		return false;
-
-	return refcount_inc_not_zero(&wq->use_refs);
-}
-
-static void __io_wq_destroy(struct io_wq *wq)
+static void io_wq_destroy(struct io_wq *wq)
 {
 	int node;
 
@@ -1139,30 +1071,31 @@ static void __io_wq_destroy(struct io_wq *wq)
 
 	set_bit(IO_WQ_BIT_EXIT, &wq->state);
 	if (wq->manager)
-		kthread_stop(wq->manager);
+		wake_up_process(wq->manager);
 
 	rcu_read_lock();
 	for_each_node(node)
 		io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
 	rcu_read_unlock();
 
-	wait_for_completion(&wq->done);
+	spin_lock_irq(&wq->hash->wait.lock);
+	for_each_node(node) {
+		struct io_wqe *wqe = wq->wqes[node];
 
-	for_each_node(node)
-		kfree(wq->wqes[node]);
+		list_del_init(&wqe->wait.entry);
+		kfree(wqe);
+	}
+	spin_unlock_irq(&wq->hash->wait.lock);
+	io_wq_put_hash(wq->hash);
 	kfree(wq->wqes);
 	kfree(wq);
-}
 
-void io_wq_destroy(struct io_wq *wq)
-{
-	if (refcount_dec_and_test(&wq->use_refs))
-		__io_wq_destroy(wq);
 }
 
-struct task_struct *io_wq_get_task(struct io_wq *wq)
+void io_wq_put(struct io_wq *wq)
 {
-	return wq->manager;
+	if (refcount_dec_and_test(&wq->refs))
+		io_wq_destroy(wq);
 }
 
 static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 096f1021018e..b6ca12b60c35 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -1,6 +1,7 @@
 #ifndef INTERNAL_IO_WQ_H
 #define INTERNAL_IO_WQ_H
 
+#include <linux/refcount.h>
 #include <linux/io_uring.h>
 
 struct io_wq;
@@ -11,13 +12,6 @@ enum {
 	IO_WQ_WORK_UNBOUND	= 4,
 	IO_WQ_WORK_CONCURRENT	= 16,
 
-	IO_WQ_WORK_FILES	= 32,
-	IO_WQ_WORK_FS		= 64,
-	IO_WQ_WORK_MM		= 128,
-	IO_WQ_WORK_CREDS	= 256,
-	IO_WQ_WORK_BLKCG	= 512,
-	IO_WQ_WORK_FSIZE	= 1024,
-
 	IO_WQ_HASH_SHIFT	= 24,	/* upper 8 bits are used for hash key */
 };
 
@@ -85,7 +79,7 @@ static inline void wq_list_del(struct io_wq_work_list *list,
 
 struct io_wq_work {
 	struct io_wq_work_node list;
-	struct io_identity *identity;
+	const struct cred *creds;
 	unsigned flags;
 };
 
@@ -100,20 +94,32 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
 typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *);
 typedef void (io_wq_work_fn)(struct io_wq_work *);
 
-struct io_wq_data {
-	struct user_struct *user;
+struct io_wq_hash {
+	refcount_t refs;
+	unsigned long map;
+	struct wait_queue_head wait;
+};
 
+static inline void io_wq_put_hash(struct io_wq_hash *hash)
+{
+	if (refcount_dec_and_test(&hash->refs))
+		kfree(hash);
+}
+
+struct io_wq_data {
+	struct io_wq_hash *hash;
 	io_wq_work_fn *do_work;
 	free_work_fn *free_work;
 };
 
 struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
-bool io_wq_get(struct io_wq *wq, struct io_wq_data *data);
-void io_wq_destroy(struct io_wq *wq);
+void io_wq_put(struct io_wq *wq);
 
 void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
 void io_wq_hash_work(struct io_wq_work *work, void *val);
 
+pid_t io_wq_fork_thread(int (*fn)(void *), void *arg);
+
 static inline bool io_wq_is_hashed(struct io_wq_work *work)
 {
 	return work->flags & IO_WQ_WORK_HASHED;
@@ -124,8 +130,6 @@ typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
 enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
 					void *data, bool cancel_all);
 
-struct task_struct *io_wq_get_task(struct io_wq *wq);
-
 #if defined(CONFIG_IO_WQ)
 extern void io_wq_worker_sleeping(struct task_struct *);
 extern void io_wq_worker_running(struct task_struct *);
@@ -140,6 +144,7 @@ static inline void io_wq_worker_running(struct task_struct *tsk)
 
 static inline bool io_wq_current_is_worker(void)
 {
-	return in_task() && (current->flags & PF_IO_WORKER);
+	return in_task() && (current->flags & PF_IO_WORKER) &&
+		current->pf_io_worker;
 }
 #endif
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 14ce789927e4..4a088581b0f2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -57,7 +57,6 @@
 #include <linux/mman.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
-#include <linux/kthread.h>
 #include <linux/blkdev.h>
 #include <linux/bvec.h>
 #include <linux/net.h>
@@ -104,6 +103,10 @@
 #define IORING_MAX_RESTRICTIONS	(IORING_RESTRICTION_LAST + \
 				 IORING_REGISTER_LAST + IORING_OP_LAST)
 
+#define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK|	\
+				IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
+				IOSQE_BUFFER_SELECT)
+
 struct io_uring {
 	u32 head ____cacheline_aligned_in_smp;
 	u32 tail ____cacheline_aligned_in_smp;
@@ -232,6 +235,7 @@ struct fixed_rsrc_data {
 	struct fixed_rsrc_ref_node	*node;
 	struct percpu_ref		refs;
 	struct completion		done;
+	bool				quiesce;
 };
 
 struct io_buffer {
@@ -249,6 +253,11 @@ struct io_restriction {
 	bool registered;
 };
 
+enum {
+	IO_SQ_THREAD_SHOULD_STOP = 0,
+	IO_SQ_THREAD_SHOULD_PARK,
+};
+
 struct io_sq_data {
 	refcount_t		refs;
 	struct mutex		lock;
@@ -262,6 +271,13 @@ struct io_sq_data {
 	struct wait_queue_head	wait;
 
 	unsigned		sq_thread_idle;
+	int			sq_cpu;
+	pid_t			task_pid;
+
+	unsigned long		state;
+	struct completion	startup;
+	struct completion	completion;
+	struct completion	exited;
 };
 
 #define IO_IOPOLL_BATCH			8
@@ -279,8 +295,14 @@ struct io_comp_state {
 	struct list_head	locked_free_list;
 };
 
+struct io_submit_link {
+	struct io_kiocb		*head;
+	struct io_kiocb		*last;
+};
+
 struct io_submit_state {
 	struct blk_plug		plug;
+	struct io_submit_link	link;
 
 	/*
 	 * io_kiocb alloc cache
@@ -312,12 +334,12 @@ struct io_ring_ctx {
 	struct {
 		unsigned int		flags;
 		unsigned int		compat: 1;
-		unsigned int		limit_mem: 1;
 		unsigned int		cq_overflow_flushed: 1;
 		unsigned int		drain_next: 1;
 		unsigned int		eventfd_async: 1;
 		unsigned int		restricted: 1;
 		unsigned int		sqo_dead: 1;
+		unsigned int		sqo_exec: 1;
 
 		/*
 		 * Ring buffer of indices into array of io_uring_sqe, which is
@@ -339,6 +361,9 @@ struct io_ring_ctx {
 		unsigned		cached_cq_overflow;
 		unsigned long		sq_check_overflow;
 
+		/* hashed buffered write serialization */
+		struct io_wq_hash	*hash_map;
+
 		struct list_head	defer_list;
 		struct list_head	timeout_list;
 		struct list_head	cq_overflow_list;
@@ -355,22 +380,14 @@ struct io_ring_ctx {
 
 	struct io_rings	*rings;
 
-	/* IO offload */
-	struct io_wq		*io_wq;
-
 	/*
-	 * For SQPOLL usage - we hold a reference to the parent task, so we
-	 * have access to the ->files
+	 * For SQPOLL usage
 	 */
 	struct task_struct	*sqo_task;
 
 	/* Only used for accounting purposes */
 	struct mm_struct	*mm_account;
 
-#ifdef CONFIG_BLK_CGROUP
-	struct cgroup_subsys_state	*sqo_blkcg_css;
-#endif
-
 	struct io_sq_data	*sq_data;	/* if using sq thread polling */
 
 	struct wait_queue_head	sqo_sq_wait;
@@ -390,13 +407,6 @@ struct io_ring_ctx {
 
 	struct user_struct	*user;
 
-	const struct cred	*creds;
-
-#ifdef CONFIG_AUDIT
-	kuid_t			loginuid;
-	unsigned int		sessionid;
-#endif
-
 	struct completion	ref_comp;
 	struct completion	sq_thread_comp;
 
@@ -445,6 +455,11 @@ struct io_ring_ctx {
 
 	struct io_restriction		restrictions;
 
+	/* exit task_work */
+	struct callback_head		*exit_task_work;
+
+	struct wait_queue_head		hash_wait;
+
 	/* Keep this last, we don't need it for the fast path */
 	struct work_struct		exit_work;
 };
@@ -827,7 +842,6 @@ struct io_op_def {
 	unsigned		plug : 1;
 	/* size of async data needed, if any */
 	unsigned short		async_size;
-	unsigned		work_flags;
 };
 
 static const struct io_op_def io_op_defs[] = {
@@ -840,7 +854,6 @@ static const struct io_op_def io_op_defs[] = {
 		.needs_async_data	= 1,
 		.plug			= 1,
 		.async_size		= sizeof(struct io_async_rw),
-		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
 	},
 	[IORING_OP_WRITEV] = {
 		.needs_file		= 1,
@@ -850,12 +863,9 @@ static const struct io_op_def io_op_defs[] = {
 		.needs_async_data	= 1,
 		.plug			= 1,
 		.async_size		= sizeof(struct io_async_rw),
-		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
-						IO_WQ_WORK_FSIZE,
 	},
 	[IORING_OP_FSYNC] = {
 		.needs_file		= 1,
-		.work_flags		= IO_WQ_WORK_BLKCG,
 	},
 	[IORING_OP_READ_FIXED] = {
 		.needs_file		= 1,
@@ -863,7 +873,6 @@ static const struct io_op_def io_op_defs[] = {
 		.pollin			= 1,
 		.plug			= 1,
 		.async_size		= sizeof(struct io_async_rw),
-		.work_flags		= IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM,
 	},
 	[IORING_OP_WRITE_FIXED] = {
 		.needs_file		= 1,
@@ -872,8 +881,6 @@ static const struct io_op_def io_op_defs[] = {
 		.pollout		= 1,
 		.plug			= 1,
 		.async_size		= sizeof(struct io_async_rw),
-		.work_flags		= IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE |
-						IO_WQ_WORK_MM,
 	},
 	[IORING_OP_POLL_ADD] = {
 		.needs_file		= 1,
@@ -882,7 +889,6 @@ static const struct io_op_def io_op_defs[] = {
 	[IORING_OP_POLL_REMOVE] = {},
 	[IORING_OP_SYNC_FILE_RANGE] = {
 		.needs_file		= 1,
-		.work_flags		= IO_WQ_WORK_BLKCG,
 	},
 	[IORING_OP_SENDMSG] = {
 		.needs_file		= 1,
@@ -890,8 +896,6 @@ static const struct io_op_def io_op_defs[] = {
 		.pollout		= 1,
 		.needs_async_data	= 1,
 		.async_size		= sizeof(struct io_async_msghdr),
-		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
-						IO_WQ_WORK_FS,
 	},
 	[IORING_OP_RECVMSG] = {
 		.needs_file		= 1,
@@ -900,29 +904,23 @@ static const struct io_op_def io_op_defs[] = {
 		.buffer_select		= 1,
 		.needs_async_data	= 1,
 		.async_size		= sizeof(struct io_async_msghdr),
-		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
-						IO_WQ_WORK_FS,
 	},
 	[IORING_OP_TIMEOUT] = {
 		.needs_async_data	= 1,
 		.async_size		= sizeof(struct io_timeout_data),
-		.work_flags		= IO_WQ_WORK_MM,
 	},
 	[IORING_OP_TIMEOUT_REMOVE] = {
 		/* used by timeout updates' prep() */
-		.work_flags		= IO_WQ_WORK_MM,
 	},
 	[IORING_OP_ACCEPT] = {
 		.needs_file		= 1,
 		.unbound_nonreg_file	= 1,
 		.pollin			= 1,
-		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_FILES,
 	},
 	[IORING_OP_ASYNC_CANCEL] = {},
 	[IORING_OP_LINK_TIMEOUT] = {
 		.needs_async_data	= 1,
 		.async_size		= sizeof(struct io_timeout_data),
-		.work_flags		= IO_WQ_WORK_MM,
 	},
 	[IORING_OP_CONNECT] = {
 		.needs_file		= 1,
@@ -930,26 +928,14 @@ static const struct io_op_def io_op_defs[] = {
 		.pollout		= 1,
 		.needs_async_data	= 1,
 		.async_size		= sizeof(struct io_async_connect),
-		.work_flags		= IO_WQ_WORK_MM,
 	},
 	[IORING_OP_FALLOCATE] = {
 		.needs_file		= 1,
-		.work_flags		= IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE,
-	},
-	[IORING_OP_OPENAT] = {
-		.work_flags		= IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG |
-						IO_WQ_WORK_FS | IO_WQ_WORK_MM,
-	},
-	[IORING_OP_CLOSE] = {
-		.work_flags		= IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG,
-	},
-	[IORING_OP_FILES_UPDATE] = {
-		.work_flags		= IO_WQ_WORK_FILES | IO_WQ_WORK_MM,
-	},
-	[IORING_OP_STATX] = {
-		.work_flags		= IO_WQ_WORK_FILES | IO_WQ_WORK_MM |
-						IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
 	},
+	[IORING_OP_OPENAT] = {},
+	[IORING_OP_CLOSE] = {},
+	[IORING_OP_FILES_UPDATE] = {},
+	[IORING_OP_STATX] = {},
 	[IORING_OP_READ] = {
 		.needs_file		= 1,
 		.unbound_nonreg_file	= 1,
@@ -957,7 +943,6 @@ static const struct io_op_def io_op_defs[] = {
 		.buffer_select		= 1,
 		.plug			= 1,
 		.async_size		= sizeof(struct io_async_rw),
-		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
 	},
 	[IORING_OP_WRITE] = {
 		.needs_file		= 1,
@@ -965,42 +950,31 @@ static const struct io_op_def io_op_defs[] = {
 		.pollout		= 1,
 		.plug			= 1,
 		.async_size		= sizeof(struct io_async_rw),
-		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
-						IO_WQ_WORK_FSIZE,
 	},
 	[IORING_OP_FADVISE] = {
 		.needs_file		= 1,
-		.work_flags		= IO_WQ_WORK_BLKCG,
-	},
-	[IORING_OP_MADVISE] = {
-		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
 	},
+	[IORING_OP_MADVISE] = {},
 	[IORING_OP_SEND] = {
 		.needs_file		= 1,
 		.unbound_nonreg_file	= 1,
 		.pollout		= 1,
-		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
 	},
 	[IORING_OP_RECV] = {
 		.needs_file		= 1,
 		.unbound_nonreg_file	= 1,
 		.pollin			= 1,
 		.buffer_select		= 1,
-		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
 	},
 	[IORING_OP_OPENAT2] = {
-		.work_flags		= IO_WQ_WORK_FILES | IO_WQ_WORK_FS |
-						IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM,
 	},
 	[IORING_OP_EPOLL_CTL] = {
 		.unbound_nonreg_file	= 1,
-		.work_flags		= IO_WQ_WORK_FILES,
 	},
 	[IORING_OP_SPLICE] = {
 		.needs_file		= 1,
 		.hash_reg_file		= 1,
 		.unbound_nonreg_file	= 1,
-		.work_flags		= IO_WQ_WORK_BLKCG,
 	},
 	[IORING_OP_PROVIDE_BUFFERS] = {},
 	[IORING_OP_REMOVE_BUFFERS] = {},
@@ -1012,24 +986,18 @@ static const struct io_op_def io_op_defs[] = {
 	[IORING_OP_SHUTDOWN] = {
 		.needs_file		= 1,
 	},
-	[IORING_OP_RENAMEAT] = {
-		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_FILES |
-						IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
-	},
-	[IORING_OP_UNLINKAT] = {
-		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_FILES |
-						IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
-	},
+	[IORING_OP_RENAMEAT] = {},
+	[IORING_OP_UNLINKAT] = {},
 };
 
 static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 					 struct task_struct *task,
 					 struct files_struct *files);
+static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx);
 static void destroy_fixed_rsrc_ref_node(struct fixed_rsrc_ref_node *ref_node);
 static struct fixed_rsrc_ref_node *alloc_fixed_rsrc_ref_node(
 			struct io_ring_ctx *ctx);
-static void init_fixed_file_ref_node(struct io_ring_ctx *ctx,
-				     struct fixed_rsrc_ref_node *ref_node);
+static void io_ring_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
 
 static bool io_rw_reissue(struct io_kiocb *req);
 static void io_cqring_fill_event(struct io_kiocb *req, long res);
@@ -1116,161 +1084,18 @@ static bool io_match_task(struct io_kiocb *head,
 			continue;
 		if (req->file && req->file->f_op == &io_uring_fops)
 			return true;
-		if ((req->work.flags & IO_WQ_WORK_FILES) &&
-		    req->work.identity->files == files)
+		if (req->task->files == files)
 			return true;
 	}
 	return false;
 }
 
-static void io_sq_thread_drop_mm_files(void)
-{
-	struct files_struct *files = current->files;
-	struct mm_struct *mm = current->mm;
-
-	if (mm) {
-		kthread_unuse_mm(mm);
-		mmput(mm);
-		current->mm = NULL;
-	}
-	if (files) {
-		struct nsproxy *nsproxy = current->nsproxy;
-
-		task_lock(current);
-		current->files = NULL;
-		current->nsproxy = NULL;
-		task_unlock(current);
-		put_files_struct(files);
-		put_nsproxy(nsproxy);
-	}
-}
-
-static int __io_sq_thread_acquire_files(struct io_ring_ctx *ctx)
-{
-	if (!current->files) {
-		struct files_struct *files;
-		struct nsproxy *nsproxy;
-
-		task_lock(ctx->sqo_task);
-		files = ctx->sqo_task->files;
-		if (!files) {
-			task_unlock(ctx->sqo_task);
-			return -EOWNERDEAD;
-		}
-		atomic_inc(&files->count);
-		get_nsproxy(ctx->sqo_task->nsproxy);
-		nsproxy = ctx->sqo_task->nsproxy;
-		task_unlock(ctx->sqo_task);
-
-		task_lock(current);
-		current->files = files;
-		current->nsproxy = nsproxy;
-		task_unlock(current);
-	}
-	return 0;
-}
-
-static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx)
-{
-	struct mm_struct *mm;
-
-	if (current->mm)
-		return 0;
-
-	task_lock(ctx->sqo_task);
-	mm = ctx->sqo_task->mm;
-	if (unlikely(!mm || !mmget_not_zero(mm)))
-		mm = NULL;
-	task_unlock(ctx->sqo_task);
-
-	if (mm) {
-		kthread_use_mm(mm);
-		return 0;
-	}
-
-	return -EFAULT;
-}
-
-static int __io_sq_thread_acquire_mm_files(struct io_ring_ctx *ctx,
-					   struct io_kiocb *req)
-{
-	const struct io_op_def *def = &io_op_defs[req->opcode];
-	int ret;
-
-	if (def->work_flags & IO_WQ_WORK_MM) {
-		ret = __io_sq_thread_acquire_mm(ctx);
-		if (unlikely(ret))
-			return ret;
-	}
-
-	if (def->needs_file || (def->work_flags & IO_WQ_WORK_FILES)) {
-		ret = __io_sq_thread_acquire_files(ctx);
-		if (unlikely(ret))
-			return ret;
-	}
-
-	return 0;
-}
-
-static inline int io_sq_thread_acquire_mm_files(struct io_ring_ctx *ctx,
-						struct io_kiocb *req)
-{
-	if (!(ctx->flags & IORING_SETUP_SQPOLL))
-		return 0;
-	return __io_sq_thread_acquire_mm_files(ctx, req);
-}
-
-static void io_sq_thread_associate_blkcg(struct io_ring_ctx *ctx,
-					 struct cgroup_subsys_state **cur_css)
-
-{
-#ifdef CONFIG_BLK_CGROUP
-	/* puts the old one when swapping */
-	if (*cur_css != ctx->sqo_blkcg_css) {
-		kthread_associate_blkcg(ctx->sqo_blkcg_css);
-		*cur_css = ctx->sqo_blkcg_css;
-	}
-#endif
-}
-
-static void io_sq_thread_unassociate_blkcg(void)
-{
-#ifdef CONFIG_BLK_CGROUP
-	kthread_associate_blkcg(NULL);
-#endif
-}
-
 static inline void req_set_fail_links(struct io_kiocb *req)
 {
 	if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK)
 		req->flags |= REQ_F_FAIL_LINK;
 }
 
-/*
- * None of these are dereferenced, they are simply used to check if any of
- * them have changed. If we're under current and check they are still the
- * same, we're fine to grab references to them for actual out-of-line use.
- */
-static void io_init_identity(struct io_identity *id)
-{
-	id->files = current->files;
-	id->mm = current->mm;
-#ifdef CONFIG_BLK_CGROUP
-	rcu_read_lock();
-	id->blkcg_css = blkcg_css();
-	rcu_read_unlock();
-#endif
-	id->creds = current_cred();
-	id->nsproxy = current->nsproxy;
-	id->fs = current->fs;
-	id->fsize = rlimit(RLIMIT_FSIZE);
-#ifdef CONFIG_AUDIT
-	id->loginuid = current->loginuid;
-	id->sessionid = current->sessionid;
-#endif
-	refcount_set(&id->count, 1);
-}
-
 static inline void __io_req_init_async(struct io_kiocb *req)
 {
 	memset(&req->work, 0, sizeof(req->work));
@@ -1283,17 +1108,10 @@ static inline void __io_req_init_async(struct io_kiocb *req)
  */
 static inline void io_req_init_async(struct io_kiocb *req)
 {
-	struct io_uring_task *tctx = current->io_uring;
-
 	if (req->flags & REQ_F_WORK_INITIALIZED)
 		return;
 
 	__io_req_init_async(req);
-
-	/* Grab a ref if this isn't our static identity */
-	req->work.identity = tctx->identity;
-	if (tctx->identity != &tctx->__identity)
-		refcount_inc(&req->work.identity->count);
 }
 
 static void io_ring_ctx_ref_free(struct percpu_ref *ref)
@@ -1378,40 +1196,14 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
 	return false;
 }
 
-static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req)
-{
-	if (req->work.identity == &tctx->__identity)
-		return;
-	if (refcount_dec_and_test(&req->work.identity->count))
-		kfree(req->work.identity);
-}
-
 static void io_req_clean_work(struct io_kiocb *req)
 {
 	if (!(req->flags & REQ_F_WORK_INITIALIZED))
 		return;
 
-	if (req->work.flags & IO_WQ_WORK_MM)
-		mmdrop(req->work.identity->mm);
-#ifdef CONFIG_BLK_CGROUP
-	if (req->work.flags & IO_WQ_WORK_BLKCG)
-		css_put(req->work.identity->blkcg_css);
-#endif
-	if (req->work.flags & IO_WQ_WORK_CREDS)
-		put_cred(req->work.identity->creds);
-	if (req->work.flags & IO_WQ_WORK_FS) {
-		struct fs_struct *fs = req->work.identity->fs;
-
-		spin_lock(&req->work.identity->fs->lock);
-		if (--fs->users)
-			fs = NULL;
-		spin_unlock(&req->work.identity->fs->lock);
-		if (fs)
-			free_fs_struct(fs);
-	}
-	if (req->work.flags & IO_WQ_WORK_FILES) {
-		put_files_struct(req->work.identity->files);
-		put_nsproxy(req->work.identity->nsproxy);
+	if (req->work.creds) {
+		put_cred(req->work.creds);
+		req->work.creds = NULL;
 	}
 	if (req->flags & REQ_F_INFLIGHT) {
 		struct io_ring_ctx *ctx = req->ctx;
@@ -1427,54 +1219,6 @@ static void io_req_clean_work(struct io_kiocb *req)
 	}
 
 	req->flags &= ~REQ_F_WORK_INITIALIZED;
-	req->work.flags &= ~(IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | IO_WQ_WORK_FS |
-			     IO_WQ_WORK_CREDS | IO_WQ_WORK_FILES);
-	io_put_identity(req->task->io_uring, req);
-}
-
-/*
- * Create a private copy of io_identity, since some fields don't match
- * the current context.
- */
-static bool io_identity_cow(struct io_kiocb *req)
-{
-	struct io_uring_task *tctx = current->io_uring;
-	const struct cred *creds = NULL;
-	struct io_identity *id;
-
-	if (req->work.flags & IO_WQ_WORK_CREDS)
-		creds = req->work.identity->creds;
-
-	id = kmemdup(req->work.identity, sizeof(*id), GFP_KERNEL);
-	if (unlikely(!id)) {
-		req->work.flags |= IO_WQ_WORK_CANCEL;
-		return false;
-	}
-
-	/*
-	 * We can safely just re-init the creds we copied  Either the field
-	 * matches the current one, or we haven't grabbed it yet. The only
-	 * exception is ->creds, through registered personalities, so handle
-	 * that one separately.
-	 */
-	io_init_identity(id);
-	if (creds)
-		id->creds = creds;
-
-	/* add one for this request */
-	refcount_inc(&id->count);
-
-	/* drop tctx and req identity references, if needed */
-	if (tctx->identity != &tctx->__identity &&
-	    refcount_dec_and_test(&tctx->identity->count))
-		kfree(tctx->identity);
-	if (req->work.identity != &tctx->__identity &&
-	    refcount_dec_and_test(&req->work.identity->count))
-		kfree(req->work.identity);
-
-	req->work.identity = id;
-	tctx->identity = id;
-	return true;
 }
 
 static void io_req_track_inflight(struct io_kiocb *req)
@@ -1491,79 +1235,6 @@ static void io_req_track_inflight(struct io_kiocb *req)
 	}
 }
 
-static bool io_grab_identity(struct io_kiocb *req)
-{
-	const struct io_op_def *def = &io_op_defs[req->opcode];
-	struct io_identity *id = req->work.identity;
-
-	if (def->work_flags & IO_WQ_WORK_FSIZE) {
-		if (id->fsize != rlimit(RLIMIT_FSIZE))
-			return false;
-		req->work.flags |= IO_WQ_WORK_FSIZE;
-	}
-#ifdef CONFIG_BLK_CGROUP
-	if (!(req->work.flags & IO_WQ_WORK_BLKCG) &&
-	    (def->work_flags & IO_WQ_WORK_BLKCG)) {
-		rcu_read_lock();
-		if (id->blkcg_css != blkcg_css()) {
-			rcu_read_unlock();
-			return false;
-		}
-		/*
-		 * This should be rare, either the cgroup is dying or the task
-		 * is moving cgroups. Just punt to root for the handful of ios.
-		 */
-		if (css_tryget_online(id->blkcg_css))
-			req->work.flags |= IO_WQ_WORK_BLKCG;
-		rcu_read_unlock();
-	}
-#endif
-	if (!(req->work.flags & IO_WQ_WORK_CREDS)) {
-		if (id->creds != current_cred())
-			return false;
-		get_cred(id->creds);
-		req->work.flags |= IO_WQ_WORK_CREDS;
-	}
-#ifdef CONFIG_AUDIT
-	if (!uid_eq(current->loginuid, id->loginuid) ||
-	    current->sessionid != id->sessionid)
-		return false;
-#endif
-	if (!(req->work.flags & IO_WQ_WORK_FS) &&
-	    (def->work_flags & IO_WQ_WORK_FS)) {
-		if (current->fs != id->fs)
-			return false;
-		spin_lock(&id->fs->lock);
-		if (!id->fs->in_exec) {
-			id->fs->users++;
-			req->work.flags |= IO_WQ_WORK_FS;
-		} else {
-			req->work.flags |= IO_WQ_WORK_CANCEL;
-		}
-		spin_unlock(&current->fs->lock);
-	}
-	if (!(req->work.flags & IO_WQ_WORK_FILES) &&
-	    (def->work_flags & IO_WQ_WORK_FILES) &&
-	    !(req->flags & REQ_F_NO_FILE_TABLE)) {
-		if (id->files != current->files ||
-		    id->nsproxy != current->nsproxy)
-			return false;
-		atomic_inc(&id->files->count);
-		get_nsproxy(id->nsproxy);
-		req->work.flags |= IO_WQ_WORK_FILES;
-		io_req_track_inflight(req);
-	}
-	if (!(req->work.flags & IO_WQ_WORK_MM) &&
-	    (def->work_flags & IO_WQ_WORK_MM)) {
-		if (id->mm != current->mm)
-			return false;
-		mmgrab(id->mm);
-		req->work.flags |= IO_WQ_WORK_MM;
-	}
-
-	return true;
-}
-
 static void io_prep_async_work(struct io_kiocb *req)
 {
 	const struct io_op_def *def = &io_op_defs[req->opcode];
@@ -1581,17 +1252,8 @@ static void io_prep_async_work(struct io_kiocb *req)
 		if (def->unbound_nonreg_file)
 			req->work.flags |= IO_WQ_WORK_UNBOUND;
 	}
-
-	/* if we fail grabbing identity, we must COW, regrab, and retry */
-	if (io_grab_identity(req))
-		return;
-
-	if (!io_identity_cow(req))
-		return;
-
-	/* can't fail at this point */
-	if (!io_grab_identity(req))
-		WARN_ON(1);
+	if (!req->work.creds)
+		req->work.creds = get_current_cred();
 }
 
 static void io_prep_async_link(struct io_kiocb *req)
@@ -1606,10 +1268,14 @@ static struct io_kiocb *__io_queue_async_work(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	struct io_kiocb *link = io_prep_linked_timeout(req);
+	struct io_uring_task *tctx = req->task->io_uring;
+
+	BUG_ON(!tctx);
+	BUG_ON(!tctx->io_wq);
 
 	trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
 					&req->work, req->flags);
-	io_wq_enqueue(ctx->io_wq, &req->work);
+	io_wq_enqueue(tctx->io_wq, &req->work);
 	return link;
 }
 
@@ -2303,11 +1969,14 @@ static int io_req_task_work_add(struct io_kiocb *req)
 static void io_req_task_work_add_fallback(struct io_kiocb *req,
 					  task_work_func_t cb)
 {
-	struct task_struct *tsk = io_wq_get_task(req->ctx->io_wq);
+	struct io_ring_ctx *ctx = req->ctx;
+	struct callback_head *head;
 
 	init_task_work(&req->task_work, cb);
-	task_work_add(tsk, &req->task_work, TWA_NONE);
-	wake_up_process(tsk);
+	do {
+		head = READ_ONCE(ctx->exit_task_work);
+		req->task_work.next = head;
+	} while (cmpxchg(&ctx->exit_task_work, head, &req->task_work) != head);
 }
 
 static void __io_req_task_cancel(struct io_kiocb *req, int error)
@@ -2329,7 +1998,9 @@ static void io_req_task_cancel(struct callback_head *cb)
 	struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
 	struct io_ring_ctx *ctx = req->ctx;
 
-	__io_req_task_cancel(req, -ECANCELED);
+	mutex_lock(&ctx->uring_lock);
+	__io_req_task_cancel(req, req->result);
+	mutex_unlock(&ctx->uring_lock);
 	percpu_ref_put(&ctx->refs);
 }
 
@@ -2339,15 +2010,11 @@ static void __io_req_task_submit(struct io_kiocb *req)
 
 	/* ctx stays valid until unlock, even if we drop all ours ctx->refs */
 	mutex_lock(&ctx->uring_lock);
-	if (!ctx->sqo_dead && !(current->flags & PF_EXITING) &&
-	    !io_sq_thread_acquire_mm_files(ctx, req))
+	if (!ctx->sqo_dead && !(current->flags & PF_EXITING) && !current->in_execve)
 		__io_queue_sqe(req);
 	else
 		__io_req_task_cancel(req, -EFAULT);
 	mutex_unlock(&ctx->uring_lock);
-
-	if (ctx->flags & IORING_SETUP_SQPOLL)
-		io_sq_thread_drop_mm_files();
 }
 
 static void io_req_task_submit(struct callback_head *cb)
@@ -2364,11 +2031,22 @@ static void io_req_task_queue(struct io_kiocb *req)
 	req->task_work.func = io_req_task_submit;
 	ret = io_req_task_work_add(req);
 	if (unlikely(ret)) {
+		req->result = -ECANCELED;
 		percpu_ref_get(&req->ctx->refs);
 		io_req_task_work_add_fallback(req, io_req_task_cancel);
 	}
 }
 
+static void io_req_task_queue_fail(struct io_kiocb *req, int ret)
+{
+	percpu_ref_get(&req->ctx->refs);
+	req->result = ret;
+	req->task_work.func = io_req_task_cancel;
+
+	if (unlikely(io_req_task_work_add(req)))
+		io_req_task_work_add_fallback(req, io_req_task_cancel);
+}
+
 static inline void io_queue_next(struct io_kiocb *req)
 {
 	struct io_kiocb *nxt = io_req_find_next(req);
@@ -2800,18 +2478,22 @@ static bool io_rw_reissue(struct io_kiocb *req)
 {
 #ifdef CONFIG_BLOCK
 	umode_t mode = file_inode(req->file)->i_mode;
-	int ret;
 
 	if (!S_ISBLK(mode) && !S_ISREG(mode))
 		return false;
 	if ((req->flags & REQ_F_NOWAIT) || io_wq_current_is_worker())
 		return false;
+	/*
+	 * If ref is dying, we might be running poll reap from the exit work.
+	 * Don't attempt to reissue from that path, just let it fail with
+	 * -EAGAIN.
+	 */
+	if (percpu_ref_is_dying(&req->ctx->refs))
+		return false;
 
 	lockdep_assert_held(&req->ctx->uring_lock);
 
-	ret = io_sq_thread_acquire_mm_files(req->ctx, req);
-
-	if (!ret && io_resubmit_prep(req)) {
+	if (io_resubmit_prep(req)) {
 		refcount_inc(&req->refs);
 		io_queue_async_work(req);
 		return true;
@@ -3467,19 +3149,9 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
 
 static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-	ssize_t ret;
-
-	ret = io_prep_rw(req, sqe);
-	if (ret)
-		return ret;
-
 	if (unlikely(!(req->file->f_mode & FMODE_READ)))
 		return -EBADF;
-
-	/* either don't need iovec imported or already have it */
-	if (!req->async_data)
-		return 0;
-	return io_rw_prep_async(req, READ);
+	return io_prep_rw(req, sqe);
 }
 
 /*
@@ -3607,10 +3279,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 	ret = io_iter_do_read(req, iter);
 
 	if (ret == -EIOCBQUEUED) {
-		/* it's faster to check here then delegate to kfree */
-		if (iovec)
-			kfree(iovec);
-		return 0;
+		goto out_free;
 	} else if (ret == -EAGAIN) {
 		/* IOPOLL retry should happen for io-wq threads */
 		if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
@@ -3631,6 +3300,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 	if (ret2)
 		return ret2;
 
+	iovec = NULL;
 	rw = req->async_data;
 	/* now use our persistent iterator, if we aren't already */
 	iter = &rw->iter;
@@ -3657,24 +3327,18 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 	} while (ret > 0 && ret < io_size);
 done:
 	kiocb_done(kiocb, ret, issue_flags);
+out_free:
+	/* it's faster to check here then delegate to kfree */
+	if (iovec)
+		kfree(iovec);
 	return 0;
 }
 
 static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-	ssize_t ret;
-
-	ret = io_prep_rw(req, sqe);
-	if (ret)
-		return ret;
-
 	if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
 		return -EBADF;
-
-	/* either don't need iovec imported or already have it */
-	if (!req->async_data)
-		return 0;
-	return io_rw_prep_async(req, WRITE);
+	return io_prep_rw(req, sqe);
 }
 
 static int io_write(struct io_kiocb *req, unsigned int issue_flags)
@@ -4011,7 +3675,7 @@ static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
 	return 0;
 }
 
-static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 
@@ -4598,13 +4262,10 @@ err:
 	return 0;
 }
 
-static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 
-	if (!req->file)
-		return -EBADF;
-
 	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
 	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
@@ -4664,11 +4325,21 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req,
 				   req->sr_msg.msg_flags, &iomsg->free_iov);
 }
 
+static int io_sendmsg_prep_async(struct io_kiocb *req)
+{
+	int ret;
+
+	if (!io_op_defs[req->opcode].needs_async_data)
+		return 0;
+	ret = io_sendmsg_copy_hdr(req, req->async_data);
+	if (!ret)
+		req->flags |= REQ_F_NEED_CLEANUP;
+	return ret;
+}
+
 static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-	struct io_async_msghdr *async_msg = req->async_data;
 	struct io_sr_msg *sr = &req->sr_msg;
-	int ret;
 
 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
@@ -4681,13 +4352,7 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	if (req->ctx->compat)
 		sr->msg_flags |= MSG_CMSG_COMPAT;
 #endif
-
-	if (!async_msg || !io_op_defs[req->opcode].needs_async_data)
-		return 0;
-	ret = io_sendmsg_copy_hdr(req, async_msg);
-	if (!ret)
-		req->flags |= REQ_F_NEED_CLEANUP;
-	return ret;
+	return 0;
 }
 
 static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
@@ -4881,13 +4546,22 @@ static inline unsigned int io_put_recv_kbuf(struct io_kiocb *req)
 	return io_put_kbuf(req, req->sr_msg.kbuf);
 }
 
-static int io_recvmsg_prep(struct io_kiocb *req,
-			   const struct io_uring_sqe *sqe)
+static int io_recvmsg_prep_async(struct io_kiocb *req)
 {
-	struct io_async_msghdr *async_msg = req->async_data;
-	struct io_sr_msg *sr = &req->sr_msg;
 	int ret;
 
+	if (!io_op_defs[req->opcode].needs_async_data)
+		return 0;
+	ret = io_recvmsg_copy_hdr(req, req->async_data);
+	if (!ret)
+		req->flags |= REQ_F_NEED_CLEANUP;
+	return ret;
+}
+
+static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_sr_msg *sr = &req->sr_msg;
+
 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
 
@@ -4900,13 +4574,7 @@ static int io_recvmsg_prep(struct io_kiocb *req,
 	if (req->ctx->compat)
 		sr->msg_flags |= MSG_CMSG_COMPAT;
 #endif
-
-	if (!async_msg || !io_op_defs[req->opcode].needs_async_data)
-		return 0;
-	ret = io_recvmsg_copy_hdr(req, async_msg);
-	if (!ret)
-		req->flags |= REQ_F_NEED_CLEANUP;
-	return ret;
+	return 0;
 }
 
 static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
@@ -5059,10 +4727,17 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
 	return 0;
 }
 
+static int io_connect_prep_async(struct io_kiocb *req)
+{
+	struct io_async_connect *io = req->async_data;
+	struct io_connect *conn = &req->connect;
+
+	return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
+}
+
 static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_connect *conn = &req->connect;
-	struct io_async_connect *io = req->async_data;
 
 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
@@ -5071,12 +4746,7 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
 	conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
 	conn->addr_len =  READ_ONCE(sqe->addr2);
-
-	if (!io)
-		return 0;
-
-	return move_addr_to_kernel(conn->addr, conn->addr_len,
-					&io->address);
+	return 0;
 }
 
 static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
@@ -5121,56 +4791,32 @@ out:
 	return 0;
 }
 #else /* !CONFIG_NET */
-static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
-	return -EOPNOTSUPP;
-}
-
-static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
-{
-	return -EOPNOTSUPP;
-}
-
-static int io_send(struct io_kiocb *req, unsigned int issue_flags)
-{
-	return -EOPNOTSUPP;
-}
-
-static int io_recvmsg_prep(struct io_kiocb *req,
-			   const struct io_uring_sqe *sqe)
-{
-	return -EOPNOTSUPP;
-}
-
-static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
-{
-	return -EOPNOTSUPP;
-}
-
-static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
-{
-	return -EOPNOTSUPP;
-}
-
-static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
-	return -EOPNOTSUPP;
-}
-
-static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
-{
-	return -EOPNOTSUPP;
-}
-
-static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
-	return -EOPNOTSUPP;
-}
-
-static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
-{
-	return -EOPNOTSUPP;
-}
+#define IO_NETOP_FN(op)							\
+static int io_##op(struct io_kiocb *req, unsigned int issue_flags)	\
+{									\
+	return -EOPNOTSUPP;						\
+}
+
+#define IO_NETOP_PREP(op)						\
+IO_NETOP_FN(op)								\
+static int io_##op##_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) \
+{									\
+	return -EOPNOTSUPP;						\
+}									\
+
+#define IO_NETOP_PREP_ASYNC(op)						\
+IO_NETOP_PREP(op)							\
+static int io_##op##_prep_async(struct io_kiocb *req)			\
+{									\
+	return -EOPNOTSUPP;						\
+}
+
+IO_NETOP_PREP_ASYNC(sendmsg);
+IO_NETOP_PREP_ASYNC(recvmsg);
+IO_NETOP_PREP_ASYNC(connect);
+IO_NETOP_PREP(accept);
+IO_NETOP_FN(send);
+IO_NETOP_FN(recv);
 #endif /* CONFIG_NET */
 
 struct io_poll_table {
@@ -5952,12 +5598,15 @@ static bool io_cancel_cb(struct io_wq_work *work, void *data)
 	return req->user_data == (unsigned long) data;
 }
 
-static int io_async_cancel_one(struct io_ring_ctx *ctx, void *sqe_addr)
+static int io_async_cancel_one(struct io_uring_task *tctx, void *sqe_addr)
 {
 	enum io_wq_cancel cancel_ret;
 	int ret = 0;
 
-	cancel_ret = io_wq_cancel_cb(ctx->io_wq, io_cancel_cb, sqe_addr, false);
+	if (!tctx->io_wq)
+		return -ENOENT;
+
+	cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, sqe_addr, false);
 	switch (cancel_ret) {
 	case IO_WQ_CANCEL_OK:
 		ret = 0;
@@ -5980,7 +5629,8 @@ static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
 	unsigned long flags;
 	int ret;
 
-	ret = io_async_cancel_one(ctx, (void *) (unsigned long) sqe_addr);
+	ret = io_async_cancel_one(req->task->io_uring,
+					(void *) (unsigned long) sqe_addr);
 	if (ret != -ENOENT) {
 		spin_lock_irqsave(&ctx->completion_lock, flags);
 		goto done;
@@ -6084,9 +5734,9 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	case IORING_OP_POLL_REMOVE:
 		return io_poll_remove_prep(req, sqe);
 	case IORING_OP_FSYNC:
-		return io_prep_fsync(req, sqe);
+		return io_fsync_prep(req, sqe);
 	case IORING_OP_SYNC_FILE_RANGE:
-		return io_prep_sfr(req, sqe);
+		return io_sfr_prep(req, sqe);
 	case IORING_OP_SENDMSG:
 	case IORING_OP_SEND:
 		return io_sendmsg_prep(req, sqe);
@@ -6144,14 +5794,39 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	return-EINVAL;
 }
 
-static int io_req_defer_prep(struct io_kiocb *req,
-			     const struct io_uring_sqe *sqe)
+static int io_req_prep_async(struct io_kiocb *req)
+{
+	switch (req->opcode) {
+	case IORING_OP_READV:
+	case IORING_OP_READ_FIXED:
+	case IORING_OP_READ:
+		return io_rw_prep_async(req, READ);
+	case IORING_OP_WRITEV:
+	case IORING_OP_WRITE_FIXED:
+	case IORING_OP_WRITE:
+		return io_rw_prep_async(req, WRITE);
+	case IORING_OP_SENDMSG:
+	case IORING_OP_SEND:
+		return io_sendmsg_prep_async(req);
+	case IORING_OP_RECVMSG:
+	case IORING_OP_RECV:
+		return io_recvmsg_prep_async(req);
+	case IORING_OP_CONNECT:
+		return io_connect_prep_async(req);
+	}
+	return 0;
+}
+
+static int io_req_defer_prep(struct io_kiocb *req)
 {
-	if (!sqe)
+	if (!io_op_defs[req->opcode].needs_async_data)
+		return 0;
+	/* some opcodes init it during the inital prep */
+	if (req->async_data)
 		return 0;
-	if (io_alloc_async_data(req))
+	if (__io_alloc_async_data(req))
 		return -EAGAIN;
-	return io_req_prep(req, sqe);
+	return io_req_prep_async(req);
 }
 
 static u32 io_get_sequence(struct io_kiocb *req)
@@ -6167,7 +5842,7 @@ static u32 io_get_sequence(struct io_kiocb *req)
 	return total_submitted - nr_reqs;
 }
 
-static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int io_req_defer(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	struct io_defer_entry *de;
@@ -6184,11 +5859,9 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list))
 		return 0;
 
-	if (!req->async_data) {
-		ret = io_req_defer_prep(req, sqe);
-		if (ret)
-			return ret;
-	}
+	ret = io_req_defer_prep(req);
+	if (ret)
+		return ret;
 	io_prep_async_link(req);
 	de = kmalloc(sizeof(*de), GFP_KERNEL);
 	if (!de)
@@ -6427,29 +6100,11 @@ static void io_wq_submit_work(struct io_wq_work *work)
 		} while (1);
 	}
 
+	/* avoid locking problems by failing it from a clean context */
 	if (ret) {
-		struct io_ring_ctx *lock_ctx = NULL;
-
-		if (req->ctx->flags & IORING_SETUP_IOPOLL)
-			lock_ctx = req->ctx;
-
-		/*
-		 * io_iopoll_complete() does not hold completion_lock to
-		 * complete polled io, so here for polled io, we can not call
-		 * io_req_complete() directly, otherwise there maybe concurrent
-		 * access to cqring, defer_list, etc, which is not safe. Given
-		 * that io_iopoll_complete() is always called under uring_lock,
-		 * so here for polled io, we also get uring_lock to complete
-		 * it.
-		 */
-		if (lock_ctx)
-			mutex_lock(&lock_ctx->uring_lock);
-
-		req_set_fail_links(req);
-		io_req_complete(req, ret);
-
-		if (lock_ctx)
-			mutex_unlock(&lock_ctx->uring_lock);
+		/* io-wq is going to take one down */
+		refcount_inc(&req->refs);
+		io_req_task_queue_fail(req, ret);
 	}
 }
 
@@ -6564,10 +6219,9 @@ static void __io_queue_sqe(struct io_kiocb *req)
 	const struct cred *old_creds = NULL;
 	int ret;
 
-	if ((req->flags & REQ_F_WORK_INITIALIZED) &&
-	    (req->work.flags & IO_WQ_WORK_CREDS) &&
-	    req->work.identity->creds != current_cred())
-		old_creds = override_creds(req->work.identity->creds);
+	if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds &&
+	    req->work.creds != current_cred())
+		old_creds = override_creds(req->work.creds);
 
 	ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER);
 
@@ -6607,11 +6261,11 @@ static void __io_queue_sqe(struct io_kiocb *req)
 		io_queue_linked_timeout(linked_timeout);
 }
 
-static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static void io_queue_sqe(struct io_kiocb *req)
 {
 	int ret;
 
-	ret = io_req_defer(req, sqe);
+	ret = io_req_defer(req);
 	if (ret) {
 		if (ret != -EIOCBQUEUED) {
 fail_req:
@@ -6620,42 +6274,139 @@ fail_req:
 			io_req_complete(req, ret);
 		}
 	} else if (req->flags & REQ_F_FORCE_ASYNC) {
-		if (!req->async_data) {
-			ret = io_req_defer_prep(req, sqe);
-			if (unlikely(ret))
-				goto fail_req;
-		}
+		ret = io_req_defer_prep(req);
+		if (unlikely(ret))
+			goto fail_req;
 		io_queue_async_work(req);
 	} else {
-		if (sqe) {
-			ret = io_req_prep(req, sqe);
-			if (unlikely(ret))
-				goto fail_req;
-		}
 		__io_queue_sqe(req);
 	}
 }
 
-static inline void io_queue_link_head(struct io_kiocb *req)
+/*
+ * Check SQE restrictions (opcode and flags).
+ *
+ * Returns 'true' if SQE is allowed, 'false' otherwise.
+ */
+static inline bool io_check_restriction(struct io_ring_ctx *ctx,
+					struct io_kiocb *req,
+					unsigned int sqe_flags)
 {
-	if (unlikely(req->flags & REQ_F_FAIL_LINK)) {
-		io_put_req(req);
-		io_req_complete(req, -ECANCELED);
-	} else
-		io_queue_sqe(req, NULL);
+	if (!ctx->restricted)
+		return true;
+
+	if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
+		return false;
+
+	if ((sqe_flags & ctx->restrictions.sqe_flags_required) !=
+	    ctx->restrictions.sqe_flags_required)
+		return false;
+
+	if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed |
+			  ctx->restrictions.sqe_flags_required))
+		return false;
+
+	return true;
 }
 
-struct io_submit_link {
-	struct io_kiocb *head;
-	struct io_kiocb *last;
-};
+static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
+		       const struct io_uring_sqe *sqe)
+{
+	struct io_submit_state *state;
+	unsigned int sqe_flags;
+	int id, ret = 0;
+
+	req->opcode = READ_ONCE(sqe->opcode);
+	/* same numerical values with corresponding REQ_F_*, safe to copy */
+	req->flags = sqe_flags = READ_ONCE(sqe->flags);
+	req->user_data = READ_ONCE(sqe->user_data);
+	req->async_data = NULL;
+	req->file = NULL;
+	req->ctx = ctx;
+	req->link = NULL;
+	req->fixed_rsrc_refs = NULL;
+	/* one is dropped after submission, the other at completion */
+	refcount_set(&req->refs, 2);
+	req->task = current;
+	req->result = 0;
+
+	/* enforce forwards compatibility on users */
+	if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) {
+		req->flags = 0;
+		return -EINVAL;
+	}
+
+	if (unlikely(req->opcode >= IORING_OP_LAST))
+		return -EINVAL;
+
+	if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
+		return -EACCES;
+
+	if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
+	    !io_op_defs[req->opcode].buffer_select)
+		return -EOPNOTSUPP;
+
+	id = READ_ONCE(sqe->personality);
+	if (id) {
+		__io_req_init_async(req);
+		req->work.creds = idr_find(&ctx->personality_idr, id);
+		if (unlikely(!req->work.creds))
+			return -EINVAL;
+		get_cred(req->work.creds);
+	}
+
+	state = &ctx->submit_state;
+
+	/*
+	 * Plug now if we have more than 1 IO left after this, and the target
+	 * is potentially a read/write to block based storage.
+	 */
+	if (!state->plug_started && state->ios_left > 1 &&
+	    io_op_defs[req->opcode].plug) {
+		blk_start_plug(&state->plug);
+		state->plug_started = true;
+	}
+
+	if (io_op_defs[req->opcode].needs_file) {
+		bool fixed = req->flags & REQ_F_FIXED_FILE;
+
+		req->file = io_file_get(state, req, READ_ONCE(sqe->fd), fixed);
+		if (unlikely(!req->file))
+			ret = -EBADF;
+	}
+
+	state->ios_left--;
+	return ret;
+}
 
-static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
-			 struct io_submit_link *link)
+static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
+			 const struct io_uring_sqe *sqe)
 {
-	struct io_ring_ctx *ctx = req->ctx;
+	struct io_submit_link *link = &ctx->submit_state.link;
 	int ret;
 
+	ret = io_init_req(ctx, req, sqe);
+	if (unlikely(ret)) {
+fail_req:
+		io_put_req(req);
+		io_req_complete(req, ret);
+		if (link->head) {
+			/* fail even hard links since we don't submit */
+			link->head->flags |= REQ_F_FAIL_LINK;
+			io_put_req(link->head);
+			io_req_complete(link->head, -ECANCELED);
+			link->head = NULL;
+		}
+		return ret;
+	}
+	ret = io_req_prep(req, sqe);
+	if (unlikely(ret))
+		goto fail_req;
+
+	/* don't need @sqe from now on */
+	trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
+				true, ctx->flags & IORING_SETUP_SQPOLL);
+
 	/*
 	 * If we already have a head request, queue this one for async
 	 * submittal once the head completes. If we don't have a head but
@@ -6677,19 +6428,16 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			head->flags |= REQ_F_IO_DRAIN;
 			ctx->drain_next = 1;
 		}
-		ret = io_req_defer_prep(req, sqe);
-		if (unlikely(ret)) {
-			/* fail even hard links since we don't submit */
-			head->flags |= REQ_F_FAIL_LINK;
-			return ret;
-		}
+		ret = io_req_defer_prep(req);
+		if (unlikely(ret))
+			goto fail_req;
 		trace_io_uring_link(ctx, req, head);
 		link->last->link = req;
 		link->last = req;
 
 		/* last request of a link, enqueue the link */
 		if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
-			io_queue_link_head(head);
+			io_queue_sqe(head);
 			link->head = NULL;
 		}
 	} else {
@@ -6698,13 +6446,10 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			ctx->drain_next = 0;
 		}
 		if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
-			ret = io_req_defer_prep(req, sqe);
-			if (unlikely(ret))
-				req->flags |= REQ_F_FAIL_LINK;
 			link->head = req;
 			link->last = req;
 		} else {
-			io_queue_sqe(req, sqe);
+			io_queue_sqe(req);
 		}
 	}
 
@@ -6717,6 +6462,8 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 static void io_submit_state_end(struct io_submit_state *state,
 				struct io_ring_ctx *ctx)
 {
+	if (state->link.head)
+		io_queue_sqe(state->link.head);
 	if (state->comp.nr)
 		io_submit_flush_completions(&state->comp, ctx);
 	if (state->plug_started)
@@ -6732,6 +6479,8 @@ static void io_submit_state_start(struct io_submit_state *state,
 {
 	state->plug_started = false;
 	state->ios_left = max_ios;
+	/* set only head, no need to init link_last in advance */
+	state->link.head = NULL;
 }
 
 static void io_commit_sqring(struct io_ring_ctx *ctx)
@@ -6777,117 +6526,9 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
 	return NULL;
 }
 
-/*
- * Check SQE restrictions (opcode and flags).
- *
- * Returns 'true' if SQE is allowed, 'false' otherwise.
- */
-static inline bool io_check_restriction(struct io_ring_ctx *ctx,
-					struct io_kiocb *req,
-					unsigned int sqe_flags)
-{
-	if (!ctx->restricted)
-		return true;
-
-	if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
-		return false;
-
-	if ((sqe_flags & ctx->restrictions.sqe_flags_required) !=
-	    ctx->restrictions.sqe_flags_required)
-		return false;
-
-	if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed |
-			  ctx->restrictions.sqe_flags_required))
-		return false;
-
-	return true;
-}
-
-#define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK|	\
-				IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
-				IOSQE_BUFFER_SELECT)
-
-static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
-		       const struct io_uring_sqe *sqe)
-{
-	struct io_submit_state *state;
-	unsigned int sqe_flags;
-	int id, ret = 0;
-
-	req->opcode = READ_ONCE(sqe->opcode);
-	/* same numerical values with corresponding REQ_F_*, safe to copy */
-	req->flags = sqe_flags = READ_ONCE(sqe->flags);
-	req->user_data = READ_ONCE(sqe->user_data);
-	req->async_data = NULL;
-	req->file = NULL;
-	req->ctx = ctx;
-	req->link = NULL;
-	req->fixed_rsrc_refs = NULL;
-	/* one is dropped after submission, the other at completion */
-	refcount_set(&req->refs, 2);
-	req->task = current;
-	req->result = 0;
-
-	/* enforce forwards compatibility on users */
-	if (unlikely(sqe_flags & ~SQE_VALID_FLAGS))
-		return -EINVAL;
-
-	if (unlikely(req->opcode >= IORING_OP_LAST))
-		return -EINVAL;
-
-	if (unlikely(io_sq_thread_acquire_mm_files(ctx, req)))
-		return -EFAULT;
-
-	if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
-		return -EACCES;
-
-	if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
-	    !io_op_defs[req->opcode].buffer_select)
-		return -EOPNOTSUPP;
-
-	id = READ_ONCE(sqe->personality);
-	if (id) {
-		struct io_identity *iod;
-
-		iod = idr_find(&ctx->personality_idr, id);
-		if (unlikely(!iod))
-			return -EINVAL;
-		refcount_inc(&iod->count);
-
-		__io_req_init_async(req);
-		get_cred(iod->creds);
-		req->work.identity = iod;
-		req->work.flags |= IO_WQ_WORK_CREDS;
-	}
-
-	state = &ctx->submit_state;
-
-	/*
-	 * Plug now if we have more than 1 IO left after this, and the target
-	 * is potentially a read/write to block based storage.
-	 */
-	if (!state->plug_started && state->ios_left > 1 &&
-	    io_op_defs[req->opcode].plug) {
-		blk_start_plug(&state->plug);
-		state->plug_started = true;
-	}
-
-	if (io_op_defs[req->opcode].needs_file) {
-		bool fixed = req->flags & REQ_F_FIXED_FILE;
-
-		req->file = io_file_get(state, req, READ_ONCE(sqe->fd), fixed);
-		if (unlikely(!req->file))
-			ret = -EBADF;
-	}
-
-	state->ios_left--;
-	return ret;
-}
-
 static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 {
-	struct io_submit_link link;
-	int i, submitted = 0;
+	int submitted = 0;
 
 	/* if we have a backlog and couldn't flush it all, return BUSY */
 	if (test_bit(0, &ctx->sq_check_overflow)) {
@@ -6903,14 +6544,11 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 
 	percpu_counter_add(&current->io_uring->inflight, nr);
 	refcount_add(nr, &current->usage);
-
 	io_submit_state_start(&ctx->submit_state, nr);
-	link.head = NULL;
 
-	for (i = 0; i < nr; i++) {
+	while (submitted < nr) {
 		const struct io_uring_sqe *sqe;
 		struct io_kiocb *req;
-		int err;
 
 		req = io_alloc_req(ctx);
 		if (unlikely(!req)) {
@@ -6925,20 +6563,8 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 		}
 		/* will complete beyond this point, count as submitted */
 		submitted++;
-
-		err = io_init_req(ctx, req, sqe);
-		if (unlikely(err)) {
-fail_req:
-			io_put_req(req);
-			io_req_complete(req, err);
+		if (io_submit_sqe(ctx, req, sqe))
 			break;
-		}
-
-		trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
-					true, ctx->flags & IORING_SETUP_SQPOLL);
-		err = io_submit_sqe(req, sqe, &link);
-		if (err)
-			goto fail_req;
 	}
 
 	if (unlikely(submitted != nr)) {
@@ -6950,10 +6576,8 @@ fail_req:
 		percpu_counter_sub(&tctx->inflight, unused);
 		put_task_struct_many(current, unused);
 	}
-	if (link.head)
-		io_queue_link_head(link.head);
-	io_submit_state_end(&ctx->submit_state, ctx);
 
+	io_submit_state_end(&ctx->submit_state, ctx);
 	 /* Commit SQ ring head once we've consumed and submitted all SQEs */
 	io_commit_sqring(ctx);
 
@@ -7030,71 +6654,97 @@ static void io_sqd_init_new(struct io_sq_data *sqd)
 	io_sqd_update_thread_idle(sqd);
 }
 
+static bool io_sq_thread_should_stop(struct io_sq_data *sqd)
+{
+	return test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+}
+
+static bool io_sq_thread_should_park(struct io_sq_data *sqd)
+{
+	return test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
+}
+
+static void io_sq_thread_parkme(struct io_sq_data *sqd)
+{
+	for (;;) {
+		/*
+		 * TASK_PARKED is a special state; we must serialize against
+		 * possible pending wakeups to avoid store-store collisions on
+		 * task->state.
+		 *
+		 * Such a collision might possibly result in the task state
+		 * changin from TASK_PARKED and us failing the
+		 * wait_task_inactive() in kthread_park().
+		 */
+		set_special_state(TASK_PARKED);
+		if (!test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state))
+			break;
+
+		/*
+		 * Thread is going to call schedule(), do not preempt it,
+		 * or the caller of kthread_park() may spend more time in
+		 * wait_task_inactive().
+		 */
+		preempt_disable();
+		complete(&sqd->completion);
+		schedule_preempt_disabled();
+		preempt_enable();
+	}
+	__set_current_state(TASK_RUNNING);
+}
+
 static int io_sq_thread(void *data)
 {
-	struct cgroup_subsys_state *cur_css = NULL;
-	struct files_struct *old_files = current->files;
-	struct nsproxy *old_nsproxy = current->nsproxy;
-	const struct cred *old_cred = NULL;
 	struct io_sq_data *sqd = data;
 	struct io_ring_ctx *ctx;
 	unsigned long timeout = 0;
+	char buf[TASK_COMM_LEN];
 	DEFINE_WAIT(wait);
 
-	task_lock(current);
-	current->files = NULL;
-	current->nsproxy = NULL;
-	task_unlock(current);
+	sprintf(buf, "iou-sqp-%d", sqd->task_pid);
+	set_task_comm(current, buf);
+	sqd->thread = current;
+	current->pf_io_worker = NULL;
+
+	if (sqd->sq_cpu != -1)
+		set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu));
+	else
+		set_cpus_allowed_ptr(current, cpu_online_mask);
+	current->flags |= PF_NO_SETAFFINITY;
+
+	complete(&sqd->completion);
+
+	wait_for_completion(&sqd->startup);
 
-	while (!kthread_should_stop()) {
+	while (!io_sq_thread_should_stop(sqd)) {
 		int ret;
 		bool cap_entries, sqt_spin, needs_sched;
 
 		/*
 		 * Any changes to the sqd lists are synchronized through the
-		 * kthread parking. This synchronizes the thread vs users,
+		 * thread parking. This synchronizes the thread vs users,
 		 * the users are synchronized on the sqd->ctx_lock.
 		 */
-		if (kthread_should_park()) {
-			kthread_parkme();
-			/*
-			 * When sq thread is unparked, in case the previous park operation
-			 * comes from io_put_sq_data(), which means that sq thread is going
-			 * to be stopped, so here needs to have a check.
-			 */
-			if (kthread_should_stop())
-				break;
+		if (io_sq_thread_should_park(sqd)) {
+			io_sq_thread_parkme(sqd);
+			continue;
 		}
-
 		if (unlikely(!list_empty(&sqd->ctx_new_list))) {
 			io_sqd_init_new(sqd);
 			timeout = jiffies + sqd->sq_thread_idle;
 		}
-
+		if (fatal_signal_pending(current))
+			break;
 		sqt_spin = false;
 		cap_entries = !list_is_singular(&sqd->ctx_list);
 		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
-			if (current->cred != ctx->creds) {
-				if (old_cred)
-					revert_creds(old_cred);
-				old_cred = override_creds(ctx->creds);
-			}
-			io_sq_thread_associate_blkcg(ctx, &cur_css);
-#ifdef CONFIG_AUDIT
-			current->loginuid = ctx->loginuid;
-			current->sessionid = ctx->sessionid;
-#endif
-
 			ret = __io_sq_thread(ctx, cap_entries);
 			if (!sqt_spin && (ret > 0 || !list_empty(&ctx->iopoll_list)))
 				sqt_spin = true;
-
-			io_sq_thread_drop_mm_files();
 		}
 
 		if (sqt_spin || !time_after(jiffies, timeout)) {
 			io_run_task_work();
-			io_sq_thread_drop_mm_files();
 			cond_resched();
 			if (sqt_spin)
 				timeout = jiffies + sqd->sq_thread_idle;
@@ -7115,7 +6765,7 @@ static int io_sq_thread(void *data)
 			}
 		}
 
-		if (needs_sched && !kthread_should_park()) {
+		if (needs_sched && !io_sq_thread_should_park(sqd)) {
 			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
 				io_ring_set_wakeup_flag(ctx);
 
@@ -7128,22 +6778,25 @@ static int io_sq_thread(void *data)
 		timeout = jiffies + sqd->sq_thread_idle;
 	}
 
-	io_run_task_work();
-	io_sq_thread_drop_mm_files();
-
-	if (cur_css)
-		io_sq_thread_unassociate_blkcg();
-	if (old_cred)
-		revert_creds(old_cred);
+	list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+		io_uring_cancel_sqpoll(ctx);
 
-	task_lock(current);
-	current->files = old_files;
-	current->nsproxy = old_nsproxy;
-	task_unlock(current);
+	io_run_task_work();
 
-	kthread_parkme();
+	/*
+	 * Clear thread under lock so that concurrent parks work correctly
+	 */
+	complete_all(&sqd->completion);
+	mutex_lock(&sqd->lock);
+	sqd->thread = NULL;
+	list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
+		ctx->sqo_exec = 1;
+		io_ring_set_wakeup_flag(ctx);
+	}
+	mutex_unlock(&sqd->lock);
 
-	return 0;
+	complete(&sqd->exited);
+	do_exit(0);
 }
 
 struct io_wait_queue {
@@ -7328,38 +6981,59 @@ static void io_sqe_rsrc_set_node(struct io_ring_ctx *ctx,
 	percpu_ref_get(&rsrc_data->refs);
 }
 
-static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data,
-			       struct io_ring_ctx *ctx,
-			       struct fixed_rsrc_ref_node *backup_node)
+static void io_sqe_rsrc_kill_node(struct io_ring_ctx *ctx, struct fixed_rsrc_data *data)
 {
-	struct fixed_rsrc_ref_node *ref_node;
-	int ret;
+	struct fixed_rsrc_ref_node *ref_node = NULL;
 
 	io_rsrc_ref_lock(ctx);
 	ref_node = data->node;
+	data->node = NULL;
 	io_rsrc_ref_unlock(ctx);
 	if (ref_node)
 		percpu_ref_kill(&ref_node->refs);
+}
+
+static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data,
+			       struct io_ring_ctx *ctx,
+			       void (*rsrc_put)(struct io_ring_ctx *ctx,
+			                        struct io_rsrc_put *prsrc))
+{
+	struct fixed_rsrc_ref_node *backup_node;
+	int ret;
 
-	percpu_ref_kill(&data->refs);
+	if (data->quiesce)
+		return -ENXIO;
 
-	/* wait for all refs nodes to complete */
-	flush_delayed_work(&ctx->rsrc_put_work);
+	data->quiesce = true;
 	do {
+		ret = -ENOMEM;
+		backup_node = alloc_fixed_rsrc_ref_node(ctx);
+		if (!backup_node)
+			break;
+		backup_node->rsrc_data = data;
+		backup_node->rsrc_put = rsrc_put;
+
+		io_sqe_rsrc_kill_node(ctx, data);
+		percpu_ref_kill(&data->refs);
+		flush_delayed_work(&ctx->rsrc_put_work);
+
 		ret = wait_for_completion_interruptible(&data->done);
 		if (!ret)
 			break;
+
+		percpu_ref_resurrect(&data->refs);
+		io_sqe_rsrc_set_node(ctx, data, backup_node);
+		backup_node = NULL;
+		reinit_completion(&data->done);
+		mutex_unlock(&ctx->uring_lock);
 		ret = io_run_task_work_sig();
-		if (ret < 0) {
-			percpu_ref_resurrect(&data->refs);
-			reinit_completion(&data->done);
-			io_sqe_rsrc_set_node(ctx, data, backup_node);
-			return ret;
-		}
-	} while (1);
+		mutex_lock(&ctx->uring_lock);
+	} while (ret >= 0);
+	data->quiesce = false;
 
-	destroy_fixed_rsrc_ref_node(backup_node);
-	return 0;
+	if (backup_node)
+		destroy_fixed_rsrc_ref_node(backup_node);
+	return ret;
 }
 
 static struct fixed_rsrc_data *alloc_fixed_rsrc_data(struct io_ring_ctx *ctx)
@@ -7390,18 +7064,17 @@ static void free_fixed_rsrc_data(struct fixed_rsrc_data *data)
 static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 {
 	struct fixed_rsrc_data *data = ctx->file_data;
-	struct fixed_rsrc_ref_node *backup_node;
 	unsigned nr_tables, i;
 	int ret;
 
-	if (!data)
+	/*
+	 * percpu_ref_is_dying() is to stop parallel files unregister
+	 * Since we possibly drop uring lock later in this function to
+	 * run task work.
+	 */
+	if (!data || percpu_ref_is_dying(&data->refs))
 		return -ENXIO;
-	backup_node = alloc_fixed_rsrc_ref_node(ctx);
-	if (!backup_node)
-		return -ENOMEM;
-	init_fixed_file_ref_node(ctx, backup_node);
-
-	ret = io_rsrc_ref_quiesce(data, ctx, backup_node);
+	ret = io_rsrc_ref_quiesce(data, ctx, io_ring_file_put);
 	if (ret)
 		return ret;
 
@@ -7415,20 +7088,74 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 	return 0;
 }
 
+static void io_sq_thread_unpark(struct io_sq_data *sqd)
+	__releases(&sqd->lock)
+{
+	if (!sqd->thread)
+		return;
+	if (sqd->thread == current)
+		return;
+	clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
+	wake_up_state(sqd->thread, TASK_PARKED);
+	mutex_unlock(&sqd->lock);
+}
+
+static bool io_sq_thread_park(struct io_sq_data *sqd)
+	__acquires(&sqd->lock)
+{
+	if (sqd->thread == current)
+		return true;
+	mutex_lock(&sqd->lock);
+	if (!sqd->thread) {
+		mutex_unlock(&sqd->lock);
+		return false;
+	}
+	set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
+	wake_up_process(sqd->thread);
+	wait_for_completion(&sqd->completion);
+	return true;
+}
+
+static void io_sq_thread_stop(struct io_sq_data *sqd)
+{
+	if (!sqd->thread)
+		return;
+
+	set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+	WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state));
+	wake_up_process(sqd->thread);
+	wait_for_completion(&sqd->exited);
+}
+
 static void io_put_sq_data(struct io_sq_data *sqd)
 {
 	if (refcount_dec_and_test(&sqd->refs)) {
-		/*
-		 * The park is a bit of a work-around, without it we get
-		 * warning spews on shutdown with SQPOLL set and affinity
-		 * set to a single CPU.
-		 */
+		io_sq_thread_stop(sqd);
+		kfree(sqd);
+	}
+}
+
+static void io_sq_thread_finish(struct io_ring_ctx *ctx)
+{
+	struct io_sq_data *sqd = ctx->sq_data;
+
+	if (sqd) {
+		complete(&sqd->startup);
 		if (sqd->thread) {
-			kthread_park(sqd->thread);
-			kthread_stop(sqd->thread);
+			wait_for_completion(&ctx->sq_thread_comp);
+			io_sq_thread_park(sqd);
 		}
 
-		kfree(sqd);
+		mutex_lock(&sqd->ctx_lock);
+		list_del(&ctx->sqd_list);
+		io_sqd_update_thread_idle(sqd);
+		mutex_unlock(&sqd->ctx_lock);
+
+		if (sqd->thread)
+			io_sq_thread_unpark(sqd);
+
+		io_put_sq_data(sqd);
+		ctx->sq_data = NULL;
 	}
 }
 
@@ -7475,68 +7202,12 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p)
 	mutex_init(&sqd->ctx_lock);
 	mutex_init(&sqd->lock);
 	init_waitqueue_head(&sqd->wait);
+	init_completion(&sqd->startup);
+	init_completion(&sqd->completion);
+	init_completion(&sqd->exited);
 	return sqd;
 }
 
-static void io_sq_thread_unpark(struct io_sq_data *sqd)
-	__releases(&sqd->lock)
-{
-	if (!sqd->thread)
-		return;
-	kthread_unpark(sqd->thread);
-	mutex_unlock(&sqd->lock);
-}
-
-static void io_sq_thread_park(struct io_sq_data *sqd)
-	__acquires(&sqd->lock)
-{
-	if (!sqd->thread)
-		return;
-	mutex_lock(&sqd->lock);
-	kthread_park(sqd->thread);
-}
-
-static void io_sq_thread_stop(struct io_ring_ctx *ctx)
-{
-	struct io_sq_data *sqd = ctx->sq_data;
-
-	if (sqd) {
-		if (sqd->thread) {
-			/*
-			 * We may arrive here from the error branch in
-			 * io_sq_offload_create() where the kthread is created
-			 * without being waked up, thus wake it up now to make
-			 * sure the wait will complete.
-			 */
-			wake_up_process(sqd->thread);
-			wait_for_completion(&ctx->sq_thread_comp);
-
-			io_sq_thread_park(sqd);
-		}
-
-		mutex_lock(&sqd->ctx_lock);
-		list_del(&ctx->sqd_list);
-		io_sqd_update_thread_idle(sqd);
-		mutex_unlock(&sqd->ctx_lock);
-
-		if (sqd->thread)
-			io_sq_thread_unpark(sqd);
-
-		io_put_sq_data(sqd);
-		ctx->sq_data = NULL;
-	}
-}
-
-static void io_finish_async(struct io_ring_ctx *ctx)
-{
-	io_sq_thread_stop(ctx);
-
-	if (ctx->io_wq) {
-		io_wq_destroy(ctx->io_wq);
-		ctx->io_wq = NULL;
-	}
-}
-
 #if defined(CONFIG_UNIX)
 /*
  * Ensure the UNIX gc is aware of our file set, so we are certain that
@@ -7563,7 +7234,7 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
 	skb->sk = sk;
 
 	nr_files = 0;
-	fpl->user = get_uid(ctx->user);
+	fpl->user = get_uid(current_user());
 	for (i = 0; i < nr; i++) {
 		struct file *file = io_file_from_index(ctx, i + offset);
 
@@ -8095,54 +7766,34 @@ static struct io_wq_work *io_free_work(struct io_wq_work *work)
 	return req ? &req->work : NULL;
 }
 
-static int io_init_wq_offload(struct io_ring_ctx *ctx,
-			      struct io_uring_params *p)
+static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx)
 {
+	struct io_wq_hash *hash;
 	struct io_wq_data data;
-	struct fd f;
-	struct io_ring_ctx *ctx_attach;
 	unsigned int concurrency;
-	int ret = 0;
-
-	data.user = ctx->user;
-	data.free_work = io_free_work;
-	data.do_work = io_wq_submit_work;
-
-	if (!(p->flags & IORING_SETUP_ATTACH_WQ)) {
-		/* Do QD, or 4 * CPUS, whatever is smallest */
-		concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
 
-		ctx->io_wq = io_wq_create(concurrency, &data);
-		if (IS_ERR(ctx->io_wq)) {
-			ret = PTR_ERR(ctx->io_wq);
-			ctx->io_wq = NULL;
-		}
-		return ret;
+	hash = ctx->hash_map;
+	if (!hash) {
+		hash = kzalloc(sizeof(*hash), GFP_KERNEL);
+		if (!hash)
+			return ERR_PTR(-ENOMEM);
+		refcount_set(&hash->refs, 1);
+		init_waitqueue_head(&hash->wait);
+		ctx->hash_map = hash;
 	}
 
-	f = fdget(p->wq_fd);
-	if (!f.file)
-		return -EBADF;
-
-	if (f.file->f_op != &io_uring_fops) {
-		ret = -EINVAL;
-		goto out_fput;
-	}
+	data.hash = hash;
+	data.free_work = io_free_work;
+	data.do_work = io_wq_submit_work;
 
-	ctx_attach = f.file->private_data;
-	/* @io_wq is protected by holding the fd */
-	if (!io_wq_get(ctx_attach->io_wq, &data)) {
-		ret = -EINVAL;
-		goto out_fput;
-	}
+	/* Do QD, or 4 * CPUS, whatever is smallest */
+	concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
 
-	ctx->io_wq = ctx_attach->io_wq;
-out_fput:
-	fdput(f);
-	return ret;
+	return io_wq_create(concurrency, &data);
 }
 
-static int io_uring_alloc_task_context(struct task_struct *task)
+static int io_uring_alloc_task_context(struct task_struct *task,
+				       struct io_ring_ctx *ctx)
 {
 	struct io_uring_task *tctx;
 	int ret;
@@ -8157,13 +7808,19 @@ static int io_uring_alloc_task_context(struct task_struct *task)
 		return ret;
 	}
 
+	tctx->io_wq = io_init_wq_offload(ctx);
+	if (IS_ERR(tctx->io_wq)) {
+		ret = PTR_ERR(tctx->io_wq);
+		percpu_counter_destroy(&tctx->inflight);
+		kfree(tctx);
+		return ret;
+	}
+
 	xa_init(&tctx->xa);
 	init_waitqueue_head(&tctx->wait);
 	tctx->last = NULL;
 	atomic_set(&tctx->in_idle, 0);
 	tctx->sqpoll = false;
-	io_init_identity(&tctx->__identity);
-	tctx->identity = &tctx->__identity;
 	task->io_uring = tctx;
 	spin_lock_init(&tctx->task_lock);
 	INIT_WQ_LIST(&tctx->task_list);
@@ -8177,19 +7834,49 @@ void __io_uring_free(struct task_struct *tsk)
 	struct io_uring_task *tctx = tsk->io_uring;
 
 	WARN_ON_ONCE(!xa_empty(&tctx->xa));
-	WARN_ON_ONCE(refcount_read(&tctx->identity->count) != 1);
-	if (tctx->identity != &tctx->__identity)
-		kfree(tctx->identity);
 	percpu_counter_destroy(&tctx->inflight);
 	kfree(tctx);
 	tsk->io_uring = NULL;
 }
 
+static int io_sq_thread_fork(struct io_sq_data *sqd, struct io_ring_ctx *ctx)
+{
+	int ret;
+
+	clear_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+	reinit_completion(&sqd->completion);
+	ctx->sqo_dead = ctx->sqo_exec = 0;
+	sqd->task_pid = current->pid;
+	current->flags |= PF_IO_WORKER;
+	ret = io_wq_fork_thread(io_sq_thread, sqd);
+	current->flags &= ~PF_IO_WORKER;
+	if (ret < 0) {
+		sqd->thread = NULL;
+		return ret;
+	}
+	wait_for_completion(&sqd->completion);
+	return io_uring_alloc_task_context(sqd->thread, ctx);
+}
+
 static int io_sq_offload_create(struct io_ring_ctx *ctx,
 				struct io_uring_params *p)
 {
 	int ret;
 
+	/* Retain compatibility with failing for an invalid attach attempt */
+	if ((ctx->flags & (IORING_SETUP_ATTACH_WQ | IORING_SETUP_SQPOLL)) ==
+				IORING_SETUP_ATTACH_WQ) {
+		struct fd f;
+
+		f = fdget(p->wq_fd);
+		if (!f.file)
+			return -ENXIO;
+		if (f.file->f_op != &io_uring_fops) {
+			fdput(f);
+			return -EINVAL;
+		}
+		fdput(f);
+	}
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
 		struct io_sq_data *sqd;
 
@@ -8215,7 +7902,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
 			ctx->sq_thread_idle = HZ;
 
 		if (sqd->thread)
-			goto done;
+			return 0;
 
 		if (p->flags & IORING_SETUP_SQ_AFF) {
 			int cpu = p->sq_thread_cpu;
@@ -8226,18 +7913,21 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
 			if (!cpu_online(cpu))
 				goto err;
 
-			sqd->thread = kthread_create_on_cpu(io_sq_thread, sqd,
-							cpu, "io_uring-sq");
+			sqd->sq_cpu = cpu;
 		} else {
-			sqd->thread = kthread_create(io_sq_thread, sqd,
-							"io_uring-sq");
+			sqd->sq_cpu = -1;
 		}
-		if (IS_ERR(sqd->thread)) {
-			ret = PTR_ERR(sqd->thread);
+
+		sqd->task_pid = current->pid;
+		current->flags |= PF_IO_WORKER;
+		ret = io_wq_fork_thread(io_sq_thread, sqd);
+		current->flags &= ~PF_IO_WORKER;
+		if (ret < 0) {
 			sqd->thread = NULL;
 			goto err;
 		}
-		ret = io_uring_alloc_task_context(sqd->thread);
+		wait_for_completion(&sqd->completion);
+		ret = io_uring_alloc_task_context(sqd->thread, ctx);
 		if (ret)
 			goto err;
 	} else if (p->flags & IORING_SETUP_SQ_AFF) {
@@ -8246,14 +7936,9 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
 		goto err;
 	}
 
-done:
-	ret = io_init_wq_offload(ctx, p);
-	if (ret)
-		goto err;
-
 	return 0;
 err:
-	io_finish_async(ctx);
+	io_sq_thread_finish(ctx);
 	return ret;
 }
 
@@ -8261,8 +7946,8 @@ static void io_sq_offload_start(struct io_ring_ctx *ctx)
 {
 	struct io_sq_data *sqd = ctx->sq_data;
 
-	if ((ctx->flags & IORING_SETUP_SQPOLL) && sqd->thread)
-		wake_up_process(sqd->thread);
+	if (ctx->flags & IORING_SETUP_SQPOLL)
+		complete(&sqd->startup);
 }
 
 static inline void __io_unaccount_mem(struct user_struct *user,
@@ -8292,7 +7977,7 @@ static inline int __io_account_mem(struct user_struct *user,
 
 static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
 {
-	if (ctx->limit_mem)
+	if (ctx->user)
 		__io_unaccount_mem(ctx->user, nr_pages);
 
 	if (ctx->mm_account)
@@ -8303,7 +7988,7 @@ static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
 {
 	int ret;
 
-	if (ctx->limit_mem) {
+	if (ctx->user) {
 		ret = __io_account_mem(ctx->user, nr_pages);
 		if (ret)
 			return ret;
@@ -8702,19 +8387,23 @@ static void io_req_cache_free(struct list_head *list, struct task_struct *tsk)
 static void io_req_caches_free(struct io_ring_ctx *ctx, struct task_struct *tsk)
 {
 	struct io_submit_state *submit_state = &ctx->submit_state;
+	struct io_comp_state *cs = &ctx->submit_state.comp;
 
 	mutex_lock(&ctx->uring_lock);
 
-	if (submit_state->free_reqs)
+	if (submit_state->free_reqs) {
 		kmem_cache_free_bulk(req_cachep, submit_state->free_reqs,
 				     submit_state->reqs);
-
-	io_req_cache_free(&submit_state->comp.free_list, NULL);
+		submit_state->free_reqs = 0;
+	}
 
 	spin_lock_irq(&ctx->completion_lock);
-	io_req_cache_free(&submit_state->comp.locked_free_list, NULL);
+	list_splice_init(&cs->locked_free_list, &cs->free_list);
+	cs->locked_free_nr = 0;
 	spin_unlock_irq(&ctx->completion_lock);
 
+	io_req_cache_free(&cs->free_list, NULL);
+
 	mutex_unlock(&ctx->uring_lock);
 }
 
@@ -8728,22 +8417,17 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	mutex_lock(&ctx->uring_lock);
 	mutex_unlock(&ctx->uring_lock);
 
-	io_finish_async(ctx);
+	io_sq_thread_finish(ctx);
 	io_sqe_buffers_unregister(ctx);
 
-	if (ctx->sqo_task) {
-		put_task_struct(ctx->sqo_task);
-		ctx->sqo_task = NULL;
+	if (ctx->mm_account) {
 		mmdrop(ctx->mm_account);
 		ctx->mm_account = NULL;
 	}
 
-#ifdef CONFIG_BLK_CGROUP
-	if (ctx->sqo_blkcg_css)
-		css_put(ctx->sqo_blkcg_css);
-#endif
-
+	mutex_lock(&ctx->uring_lock);
 	io_sqe_files_unregister(ctx);
+	mutex_unlock(&ctx->uring_lock);
 	io_eventfd_unregister(ctx);
 	io_destroy_buffers(ctx);
 	idr_destroy(&ctx->personality_idr);
@@ -8760,8 +8444,9 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 
 	percpu_ref_exit(&ctx->refs);
 	free_uid(ctx->user);
-	put_cred(ctx->creds);
 	io_req_caches_free(ctx, NULL);
+	if (ctx->hash_map)
+		io_wq_put_hash(ctx->hash_map);
 	kfree(ctx->cancel_hash);
 	kfree(ctx);
 }
@@ -8808,13 +8493,11 @@ static int io_uring_fasync(int fd, struct file *file, int on)
 
 static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
 {
-	struct io_identity *iod;
+	const struct cred *creds;
 
-	iod = idr_remove(&ctx->personality_idr, id);
-	if (iod) {
-		put_cred(iod->creds);
-		if (refcount_dec_and_test(&iod->count))
-			kfree(iod);
+	creds = idr_remove(&ctx->personality_idr, id);
+	if (creds) {
+		put_cred(creds);
 		return 0;
 	}
 
@@ -8829,6 +8512,28 @@ static int io_remove_personalities(int id, void *p, void *data)
 	return 0;
 }
 
+static void io_run_ctx_fallback(struct io_ring_ctx *ctx)
+{
+	struct callback_head *work, *head, *next;
+
+	do {
+		do {
+			head = NULL;
+			work = READ_ONCE(ctx->exit_task_work);
+		} while (cmpxchg(&ctx->exit_task_work, work, head) != work);
+
+		if (!work)
+			break;
+
+		do {
+			next = work->next;
+			work->func(work);
+			work = next;
+			cond_resched();
+		} while (work);
+	} while (1);
+}
+
 static void io_ring_exit_work(struct work_struct *work)
 {
 	struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
@@ -8842,17 +8547,11 @@ static void io_ring_exit_work(struct work_struct *work)
 	 */
 	do {
 		io_uring_try_cancel_requests(ctx, NULL, NULL);
+		io_run_ctx_fallback(ctx);
 	} while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
 	io_ring_ctx_free(ctx);
 }
 
-static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
-{
-	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
-
-	return req->ctx == data;
-}
-
 static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 {
 	mutex_lock(&ctx->uring_lock);
@@ -8871,9 +8570,6 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 	io_kill_timeouts(ctx, NULL, NULL);
 	io_poll_remove_all(ctx, NULL, NULL);
 
-	if (ctx->io_wq)
-		io_wq_cancel_cb(ctx->io_wq, io_cancel_ctx_cb, ctx, true);
-
 	/* if we failed setting up the ctx, we might not have any rings */
 	io_iopoll_try_reap_events(ctx);
 
@@ -8952,13 +8648,14 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 					 struct files_struct *files)
 {
 	struct io_task_cancel cancel = { .task = task, .files = files, };
+	struct io_uring_task *tctx = current->io_uring;
 
 	while (1) {
 		enum io_wq_cancel cret;
 		bool ret = false;
 
-		if (ctx->io_wq) {
-			cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb,
+		if (tctx && tctx->io_wq) {
+			cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb,
 					       &cancel, true);
 			ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
 		}
@@ -9041,12 +8738,15 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
 					  struct files_struct *files)
 {
 	struct task_struct *task = current;
+	bool did_park = false;
 
 	if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
 		io_disable_sqo_submit(ctx);
-		task = ctx->sq_data->thread;
-		atomic_inc(&task->io_uring->in_idle);
-		io_sq_thread_park(ctx->sq_data);
+		did_park = io_sq_thread_park(ctx->sq_data);
+		if (did_park) {
+			task = ctx->sq_data->thread;
+			atomic_inc(&task->io_uring->in_idle);
+		}
 	}
 
 	io_cancel_defer_files(ctx, task, files);
@@ -9055,7 +8755,7 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
 	if (!files)
 		io_uring_try_cancel_requests(ctx, task, NULL);
 
-	if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
+	if (did_park) {
 		atomic_dec(&task->io_uring->in_idle);
 		io_sq_thread_unpark(ctx->sq_data);
 	}
@@ -9070,7 +8770,7 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file)
 	int ret;
 
 	if (unlikely(!tctx)) {
-		ret = io_uring_alloc_task_context(current);
+		ret = io_uring_alloc_task_context(current, ctx);
 		if (unlikely(ret))
 			return ret;
 		tctx = current->io_uring;
@@ -9140,8 +8840,13 @@ void __io_uring_files_cancel(struct files_struct *files)
 		io_uring_cancel_task_requests(file->private_data, files);
 	atomic_dec(&tctx->in_idle);
 
-	if (files)
+	if (files) {
 		io_uring_remove_task_files(tctx);
+		if (tctx->io_wq) {
+			io_wq_put(tctx->io_wq);
+			tctx->io_wq = NULL;
+		}
+	}
 }
 
 static s64 tctx_inflight(struct io_uring_task *tctx)
@@ -9151,14 +8856,17 @@ static s64 tctx_inflight(struct io_uring_task *tctx)
 
 static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx)
 {
+	struct io_sq_data *sqd = ctx->sq_data;
 	struct io_uring_task *tctx;
 	s64 inflight;
 	DEFINE_WAIT(wait);
 
-	if (!ctx->sq_data)
+	if (!sqd)
 		return;
-	tctx = ctx->sq_data->thread->io_uring;
 	io_disable_sqo_submit(ctx);
+	if (!io_sq_thread_park(sqd))
+		return;
+	tctx = ctx->sq_data->thread->io_uring;
 
 	atomic_inc(&tctx->in_idle);
 	do {
@@ -9179,6 +8887,7 @@ static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx)
 		finish_wait(&tctx->wait, &wait);
 	} while (1);
 	atomic_dec(&tctx->in_idle);
+	io_sq_thread_unpark(sqd);
 }
 
 /*
@@ -9232,11 +8941,17 @@ static int io_uring_flush(struct file *file, void *data)
 	struct io_uring_task *tctx = current->io_uring;
 	struct io_ring_ctx *ctx = file->private_data;
 
+	/* Ignore helper thread files exit */
+	if (current->flags & PF_IO_WORKER)
+		return 0;
+
 	if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) {
 		io_uring_cancel_task_requests(ctx, NULL);
 		io_req_caches_free(ctx, current);
 	}
 
+	io_run_ctx_fallback(ctx);
+
 	if (!tctx)
 		return 0;
 
@@ -9435,6 +9150,12 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
 		io_cqring_overflow_flush(ctx, false, NULL, NULL);
 
+		if (unlikely(ctx->sqo_exec)) {
+			ret = io_sq_thread_fork(ctx->sq_data, ctx);
+			if (ret)
+				goto out;
+			ctx->sqo_exec = 0;
+		}
 		ret = -EOWNERDEAD;
 		if (unlikely(ctx->sqo_dead))
 			goto out;
@@ -9491,8 +9212,7 @@ out_fput:
 #ifdef CONFIG_PROC_FS
 static int io_uring_show_cred(int id, void *p, void *data)
 {
-	struct io_identity *iod = p;
-	const struct cred *cred = iod->creds;
+	const struct cred *cred = p;
 	struct seq_file *m = data;
 	struct user_namespace *uns = seq_user_ns(m);
 	struct group_info *gi;
@@ -9537,8 +9257,11 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
 	 */
 	has_lock = mutex_trylock(&ctx->uring_lock);
 
-	if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL))
+	if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
 		sq = ctx->sq_data;
+		if (!sq->thread)
+			sq = NULL;
+	}
 
 	seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1);
 	seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1);
@@ -9698,7 +9421,6 @@ static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
 static int io_uring_create(unsigned entries, struct io_uring_params *p,
 			   struct io_uring_params __user *params)
 {
-	struct user_struct *user = NULL;
 	struct io_ring_ctx *ctx;
 	struct file *file;
 	int ret;
@@ -9740,22 +9462,13 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 		p->cq_entries = 2 * p->sq_entries;
 	}
 
-	user = get_uid(current_user());
-
 	ctx = io_ring_ctx_alloc(p);
-	if (!ctx) {
-		free_uid(user);
+	if (!ctx)
 		return -ENOMEM;
-	}
 	ctx->compat = in_compat_syscall();
-	ctx->limit_mem = !capable(CAP_IPC_LOCK);
-	ctx->user = user;
-	ctx->creds = get_current_cred();
-#ifdef CONFIG_AUDIT
-	ctx->loginuid = current->loginuid;
-	ctx->sessionid = current->sessionid;
-#endif
-	ctx->sqo_task = get_task_struct(current);
+	if (!capable(CAP_IPC_LOCK))
+		ctx->user = get_uid(current_user());
+	ctx->sqo_task = current;
 
 	/*
 	 * This is just grabbed for accounting purposes. When a process exits,
@@ -9766,24 +9479,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 	mmgrab(current->mm);
 	ctx->mm_account = current->mm;
 
-#ifdef CONFIG_BLK_CGROUP
-	/*
-	 * The sq thread will belong to the original cgroup it was inited in.
-	 * If the cgroup goes offline (e.g. disabling the io controller), then
-	 * issued bios will be associated with the closest cgroup later in the
-	 * block layer.
-	 */
-	rcu_read_lock();
-	ctx->sqo_blkcg_css = blkcg_css();
-	ret = css_tryget_online(ctx->sqo_blkcg_css);
-	rcu_read_unlock();
-	if (!ret) {
-		/* don't init against a dying cgroup, have the user try again */
-		ctx->sqo_blkcg_css = NULL;
-		ret = -ENODEV;
-		goto err;
-	}
-#endif
 	ret = io_allocate_scq_urings(ctx, p);
 	if (ret)
 		goto err;
@@ -9817,7 +9512,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
 			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
 			IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED |
-			IORING_FEAT_EXT_ARG;
+			IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS;
 
 	if (copy_to_user(params, p, sizeof(*p))) {
 		ret = -EFAULT;
@@ -9923,21 +9618,15 @@ out:
 
 static int io_register_personality(struct io_ring_ctx *ctx)
 {
-	struct io_identity *id;
+	const struct cred *creds;
 	int ret;
 
-	id = kmalloc(sizeof(*id), GFP_KERNEL);
-	if (unlikely(!id))
-		return -ENOMEM;
-
-	io_init_identity(id);
-	id->creds = get_current_cred();
+	creds = get_current_cred();
 
-	ret = idr_alloc_cyclic(&ctx->personality_idr, id, 1, USHRT_MAX, GFP_KERNEL);
-	if (ret < 0) {
-		put_cred(id->creds);
-		kfree(id);
-	}
+	ret = idr_alloc_cyclic(&ctx->personality_idr, (void *) creds, 1,
+				USHRT_MAX, GFP_KERNEL);
+	if (ret < 0)
+		put_cred(creds);
 	return ret;
 }
 
@@ -10196,6 +9885,8 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
 
 	ctx = f.file->private_data;
 
+	io_run_task_work();
+
 	mutex_lock(&ctx->uring_lock);
 	ret = __io_uring_register(ctx, opcode, arg, nr_args);
 	mutex_unlock(&ctx->uring_lock);
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 16a1e82e3aeb..7ffcd7ef33d4 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -278,14 +278,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 	if (!is_contig || bio_full(ctx->bio, plen)) {
 		gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
 		gfp_t orig_gfp = gfp;
-		int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
+		unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE);
 
 		if (ctx->bio)
 			submit_bio(ctx->bio);
 
 		if (ctx->rac) /* same as readahead_gfp_mask */
 			gfp |= __GFP_NORETRY | __GFP_NOWARN;
-		ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs));
+		ctx->bio = bio_alloc(gfp, bio_max_segs(nr_vecs));
 		/*
 		 * If the bio_alloc fails, try it again for a single page to
 		 * avoid having to deal with partial page reads.  This emulates
@@ -1459,13 +1459,6 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data)
 		goto redirty;
 
 	/*
-	 * Given that we do not allow direct reclaim to call us, we should
-	 * never be called in a recursive filesystem reclaim context.
-	 */
-	if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS))
-		goto redirty;
-
-	/*
 	 * Is this page beyond the end of the file?
 	 *
 	 * The page index is less than the end_index, adjust the end_offset
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index b2dc4d1f9dcc..1f0ffabbde56 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -551,7 +551,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 		ret = -ENOMEM;
 		goto out_unload;
 	}
-	inode->i_ino = 0;
 	inode->i_size = i_size_read(sb->s_bdev->bd_inode);
 	inode->i_mapping->a_ops = &jfs_metapage_aops;
 	inode_fake_hash(inode);
diff --git a/fs/mpage.c b/fs/mpage.c
index 830e6cc2a9e7..961234d68779 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -304,9 +304,7 @@ alloc_new:
 				goto out;
 		}
 		args->bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
-					min_t(int, args->nr_pages,
-					      BIO_MAX_PAGES),
-					gfp);
+					bio_max_segs(args->nr_pages), gfp);
 		if (args->bio == NULL)
 			goto confused;
 	}
diff --git a/fs/namespace.c b/fs/namespace.c
index 062fe984a697..56bb5a5fdc0d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -381,50 +381,36 @@ int mnt_want_write(struct vfsmount *m)
 EXPORT_SYMBOL_GPL(mnt_want_write);
 
 /**
- * mnt_clone_write - get write access to a mount
- * @mnt: the mount on which to take a write
- *
- * This is effectively like mnt_want_write, except
- * it must only be used to take an extra write reference
- * on a mountpoint that we already know has a write reference
- * on it. This allows some optimisation.
- *
- * After finished, mnt_drop_write must be called as usual to
- * drop the reference.
- */
-int mnt_clone_write(struct vfsmount *mnt)
-{
-	/* superblock may be r/o */
-	if (__mnt_is_readonly(mnt))
-		return -EROFS;
-	preempt_disable();
-	mnt_inc_writers(real_mount(mnt));
-	preempt_enable();
-	return 0;
-}
-EXPORT_SYMBOL_GPL(mnt_clone_write);
-
-/**
  * __mnt_want_write_file - get write access to a file's mount
  * @file: the file who's mount on which to take a write
  *
- * This is like __mnt_want_write, but it takes a file and can
- * do some optimisations if the file is open for write already
+ * This is like __mnt_want_write, but if the file is already open for writing it
+ * skips incrementing mnt_writers (since the open file already has a reference)
+ * and instead only does the check for emergency r/o remounts.  This must be
+ * paired with __mnt_drop_write_file.
  */
 int __mnt_want_write_file(struct file *file)
 {
-	if (!(file->f_mode & FMODE_WRITER))
-		return __mnt_want_write(file->f_path.mnt);
-	else
-		return mnt_clone_write(file->f_path.mnt);
+	if (file->f_mode & FMODE_WRITER) {
+		/*
+		 * Superblock may have become readonly while there are still
+		 * writable fd's, e.g. due to a fs error with errors=remount-ro
+		 */
+		if (__mnt_is_readonly(file->f_path.mnt))
+			return -EROFS;
+		return 0;
+	}
+	return __mnt_want_write(file->f_path.mnt);
 }
 
 /**
  * mnt_want_write_file - get write access to a file's mount
  * @file: the file who's mount on which to take a write
  *
- * This is like mnt_want_write, but it takes a file and can
- * do some optimisations if the file is open for write already
+ * This is like mnt_want_write, but if the file is already open for writing it
+ * skips incrementing mnt_writers (since the open file already has a reference)
+ * and instead only does the freeze protection and the check for emergency r/o
+ * remounts.  This must be paired with mnt_drop_write_file.
  */
 int mnt_want_write_file(struct file *file)
 {
@@ -470,7 +456,8 @@ EXPORT_SYMBOL_GPL(mnt_drop_write);
 
 void __mnt_drop_write_file(struct file *file)
 {
-	__mnt_drop_write(file->f_path.mnt);
+	if (!(file->f_mode & FMODE_WRITER))
+		__mnt_drop_write(file->f_path.mnt);
 }
 
 void mnt_drop_write_file(struct file *file)
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 1a96ce28efb0..fe860c538747 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -115,13 +115,13 @@ bl_submit_bio(struct bio *bio)
 	return NULL;
 }
 
-static struct bio *
-bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector,
+static struct bio *bl_alloc_init_bio(unsigned int npg,
+		struct block_device *bdev, sector_t disk_sector,
 		bio_end_io_t end_io, struct parallel_io *par)
 {
 	struct bio *bio;
 
-	npg = min(npg, BIO_MAX_PAGES);
+	npg = bio_max_segs(npg);
 	bio = bio_alloc(GFP_NOIO, npg);
 	if (bio) {
 		bio->bi_iter.bi_sector = disk_sector;
diff --git a/fs/proc/self.c b/fs/proc/self.c
index a4012154e109..72cd69bcaf4a 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -16,13 +16,6 @@ static const char *proc_self_get_link(struct dentry *dentry,
 	pid_t tgid = task_tgid_nr_ns(current, ns);
 	char *name;
 
-	/*
-	 * Not currently supported. Once we can inherit all of struct pid,
-	 * we can allow this.
-	 */
-	if (current->flags & PF_IO_WORKER)
-		return ERR_PTR(-EOPNOTSUPP);
-
 	if (!tgid)
 		return ERR_PTR(-ENOENT);
 	/* max length of unsigned int in decimal + NULL term */
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index d56681d86d28..a553273fbd41 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -17,13 +17,6 @@ static const char *proc_thread_self_get_link(struct dentry *dentry,
 	pid_t pid = task_pid_nr_ns(current, ns);
 	char *name;
 
-	/*
-	 * Not currently supported. Once we can inherit all of struct pid,
-	 * we can allow this.
-	 */
-	if (current->flags & PF_IO_WORKER)
-		return ERR_PTR(-EOPNOTSUPP);
-
 	if (!pid)
 		return ERR_PTR(-ENOENT);
 	name = kmalloc(10 + 6 + 10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index b56ff451adce..5b6fcb9b44e2 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -2805,7 +2805,7 @@ xfs_btree_split_worker(
 	struct xfs_btree_split_args	*args = container_of(work,
 						struct xfs_btree_split_args, work);
 	unsigned long		pflags;
-	unsigned long		new_pflags = PF_MEMALLOC_NOFS;
+	unsigned long		new_pflags = 0;
 
 	/*
 	 * we are in a transaction context here, but may also be doing work
@@ -2817,12 +2817,20 @@ xfs_btree_split_worker(
 		new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
 
 	current_set_flags_nested(&pflags, new_pflags);
+	xfs_trans_set_context(args->cur->bc_tp);
 
 	args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
 					 args->key, args->curp, args->stat);
-	complete(args->done);
 
+	xfs_trans_clear_context(args->cur->bc_tp);
 	current_restore_flags_nested(&pflags, new_pflags);
+
+	/*
+	 * Do not access args after complete() has run here. We don't own args
+	 * and the owner may run and free args before we return here.
+	 */
+	complete(args->done);
+
 }
 
 /*
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4304c6416fbb..b4186d666157 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -62,7 +62,7 @@ xfs_setfilesize_trans_alloc(
 	 * We hand off the transaction to the completion thread now, so
 	 * clear the flag here.
 	 */
-	current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
+	xfs_trans_clear_context(tp);
 	return 0;
 }
 
@@ -125,7 +125,7 @@ xfs_setfilesize_ioend(
 	 * thus we need to mark ourselves as being in a transaction manually.
 	 * Similarly for freeze protection.
 	 */
-	current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
+	xfs_trans_set_context(tp);
 	__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
 
 	/* we abort the update if there was an IO error */
@@ -568,6 +568,12 @@ xfs_vm_writepage(
 {
 	struct xfs_writepage_ctx wpc = { };
 
+	if (WARN_ON_ONCE(current->journal_info)) {
+		redirty_page_for_writepage(wbc, page);
+		unlock_page(page);
+		return 0;
+	}
+
 	return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops);
 }
 
@@ -578,6 +584,13 @@ xfs_vm_writepages(
 {
 	struct xfs_writepage_ctx wpc = { };
 
+	/*
+	 * Writing back data in a transaction context can result in recursive
+	 * transactions. This is bad, so issue a warning and get out of here.
+	 */
+	if (WARN_ON_ONCE(current->journal_info))
+		return 0;
+
 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
 	return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
 }
diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c
index e2148f2d5d6b..17f36db2f792 100644
--- a/fs/xfs/xfs_bio_io.c
+++ b/fs/xfs/xfs_bio_io.c
@@ -6,7 +6,7 @@
 
 static inline unsigned int bio_max_vecs(unsigned int count)
 {
-	return min_t(unsigned, howmany(count, PAGE_SIZE), BIO_MAX_PAGES);
+	return bio_max_segs(howmany(count, PAGE_SIZE));
 }
 
 int
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index f6e5235df7c9..37a1d12762d8 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1480,7 +1480,7 @@ xfs_buf_ioapply_map(
 	int		op)
 {
 	int		page_index;
-	int		total_nr_pages = bp->b_page_count;
+	unsigned int	total_nr_pages = bp->b_page_count;
 	int		nr_pages;
 	struct bio	*bio;
 	sector_t	sector =  bp->b_maps[map].bm_bn;
@@ -1505,7 +1505,7 @@ xfs_buf_ioapply_map(
 
 next_chunk:
 	atomic_inc(&bp->b_io_remaining);
-	nr_pages = min(total_nr_pages, BIO_MAX_PAGES);
+	nr_pages = bio_max_segs(total_nr_pages);
 
 	bio = bio_alloc(GFP_NOIO, nr_pages);
 	bio_set_dev(bio, bp->b_target->bt_bdev);
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index 3991e59cfd18..ef17c1f6db32 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -344,7 +344,6 @@ xfs_extent_busy_trim(
 	ASSERT(*len > 0);
 
 	spin_lock(&args->pag->pagb_lock);
-restart:
 	fbno = *bno;
 	flen = *len;
 	rbp = args->pag->pagb_tree.rb_node;
@@ -363,19 +362,6 @@ restart:
 			continue;
 		}
 
-		/*
-		 * If this is a metadata allocation, try to reuse the busy
-		 * extent instead of trimming the allocation.
-		 */
-		if (!(args->datatype & XFS_ALLOC_USERDATA) &&
-		    !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
-			if (!xfs_extent_busy_update_extent(args->mp, args->pag,
-							  busyp, fbno, flen,
-							  false))
-				goto restart;
-			continue;
-		}
-
 		if (bbno <= fbno) {
 			/* start overlap */
 
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index 145e06c47744..546a6cd96729 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -51,7 +51,7 @@ xfs_panic_mask_proc_handler(
 #endif /* CONFIG_PROC_FS */
 
 STATIC int
-xfs_deprecate_irix_sgid_inherit_proc_handler(
+xfs_deprecated_dointvec_minmax(
 	struct ctl_table	*ctl,
 	int			write,
 	void			*buffer,
@@ -59,24 +59,8 @@ xfs_deprecate_irix_sgid_inherit_proc_handler(
 	loff_t			*ppos)
 {
 	if (write) {
-		printk_once(KERN_WARNING
-				"XFS: " "%s sysctl option is deprecated.\n",
-				ctl->procname);
-	}
-	return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
-}
-
-STATIC int
-xfs_deprecate_irix_symlink_mode_proc_handler(
-	struct ctl_table	*ctl,
-	int			write,
-	void			*buffer,
-	size_t			*lenp,
-	loff_t			*ppos)
-{
-	if (write) {
-		printk_once(KERN_WARNING
-				"XFS: " "%s sysctl option is deprecated.\n",
+		printk_ratelimited(KERN_WARNING
+				"XFS: %s sysctl option is deprecated.\n",
 				ctl->procname);
 	}
 	return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
@@ -88,7 +72,7 @@ static struct ctl_table xfs_table[] = {
 		.data		= &xfs_params.sgid_inherit.val,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= xfs_deprecate_irix_sgid_inherit_proc_handler,
+		.proc_handler	= xfs_deprecated_dointvec_minmax,
 		.extra1		= &xfs_params.sgid_inherit.min,
 		.extra2		= &xfs_params.sgid_inherit.max
 	},
@@ -97,7 +81,7 @@ static struct ctl_table xfs_table[] = {
 		.data		= &xfs_params.symlink_mode.val,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= xfs_deprecate_irix_symlink_mode_proc_handler,
+		.proc_handler	= xfs_deprecated_dointvec_minmax,
 		.extra1		= &xfs_params.symlink_mode.min,
 		.extra2		= &xfs_params.symlink_mode.max
 	},
@@ -201,6 +185,15 @@ static struct ctl_table xfs_table[] = {
 		.extra1		= &xfs_params.blockgc_timer.min,
 		.extra2		= &xfs_params.blockgc_timer.max,
 	},
+	{
+		.procname	= "speculative_cow_prealloc_lifetime",
+		.data		= &xfs_params.blockgc_timer.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= xfs_deprecated_dointvec_minmax,
+		.extra1		= &xfs_params.blockgc_timer.min,
+		.extra2		= &xfs_params.blockgc_timer.max,
+	},
 	/* please keep this the last entry */
 #ifdef CONFIG_PROC_FS
 	{
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 44f72c09c203..b22a09e9daee 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -72,6 +72,7 @@ xfs_trans_free(
 	xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
 
 	trace_xfs_trans_free(tp, _RET_IP_);
+	xfs_trans_clear_context(tp);
 	if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
 		sb_end_intwrite(tp->t_mountp->m_super);
 	xfs_trans_free_dqinfo(tp);
@@ -123,7 +124,8 @@ xfs_trans_dup(
 
 	ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
 	tp->t_rtx_res = tp->t_rtx_res_used;
-	ntp->t_pflags = tp->t_pflags;
+
+	xfs_trans_switch_context(tp, ntp);
 
 	/* move deferred ops over to the new tp */
 	xfs_defer_move(ntp, tp);
@@ -157,9 +159,6 @@ xfs_trans_reserve(
 	int			error = 0;
 	bool			rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
 
-	/* Mark this thread as being in a transaction */
-	current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
-
 	/*
 	 * Attempt to reserve the needed disk blocks by decrementing
 	 * the number needed from the number available.  This will
@@ -167,10 +166,8 @@ xfs_trans_reserve(
 	 */
 	if (blocks > 0) {
 		error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd);
-		if (error != 0) {
-			current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
+		if (error != 0)
 			return -ENOSPC;
-		}
 		tp->t_blk_res += blocks;
 	}
 
@@ -244,9 +241,6 @@ undo_blocks:
 		xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd);
 		tp->t_blk_res = 0;
 	}
-
-	current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
-
 	return error;
 }
 
@@ -260,6 +254,7 @@ xfs_trans_alloc(
 	struct xfs_trans	**tpp)
 {
 	struct xfs_trans	*tp;
+	bool			want_retry = true;
 	int			error;
 
 	/*
@@ -267,9 +262,11 @@ xfs_trans_alloc(
 	 * GFP_NOFS allocation context so that we avoid lockdep false positives
 	 * by doing GFP_KERNEL allocations inside sb_start_intwrite().
 	 */
+retry:
 	tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL);
 	if (!(flags & XFS_TRANS_NO_WRITECOUNT))
 		sb_start_intwrite(mp->m_super);
+	xfs_trans_set_context(tp);
 
 	/*
 	 * Zero-reservation ("empty") transactions can't modify anything, so
@@ -289,7 +286,9 @@ xfs_trans_alloc(
 	tp->t_firstblock = NULLFSBLOCK;
 
 	error = xfs_trans_reserve(tp, resp, blocks, rtextents);
-	if (error == -ENOSPC) {
+	if (error == -ENOSPC && want_retry) {
+		xfs_trans_cancel(tp);
+
 		/*
 		 * We weren't able to reserve enough space for the transaction.
 		 * Flush the other speculative space allocations to free space.
@@ -297,8 +296,11 @@ xfs_trans_alloc(
 		 * other locks.
 		 */
 		error = xfs_blockgc_free_space(mp, NULL);
-		if (!error)
-			error = xfs_trans_reserve(tp, resp, blocks, rtextents);
+		if (error)
+			return error;
+
+		want_retry = false;
+		goto retry;
 	}
 	if (error) {
 		xfs_trans_cancel(tp);
@@ -893,7 +895,6 @@ __xfs_trans_commit(
 
 	xfs_log_commit_cil(mp, tp, &commit_lsn, regrant);
 
-	current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
 	xfs_trans_free(tp);
 
 	/*
@@ -925,7 +926,6 @@ out_unreserve:
 			xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
 		tp->t_ticket = NULL;
 	}
-	current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
 	xfs_trans_free_items(tp, !!error);
 	xfs_trans_free(tp);
 
@@ -985,9 +985,6 @@ xfs_trans_cancel(
 		tp->t_ticket = NULL;
 	}
 
-	/* mark this thread as no longer being in a transaction */
-	current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
-
 	xfs_trans_free_items(tp, dirty);
 	xfs_trans_free(tp);
 }
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 8b03fbfe9a1b..9dd745cf77c9 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -281,4 +281,34 @@ int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp,
 		struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force,
 		struct xfs_trans **tpp);
 
+static inline void
+xfs_trans_set_context(
+	struct xfs_trans	*tp)
+{
+	ASSERT(current->journal_info == NULL);
+	tp->t_pflags = memalloc_nofs_save();
+	current->journal_info = tp;
+}
+
+static inline void
+xfs_trans_clear_context(
+	struct xfs_trans	*tp)
+{
+	if (current->journal_info == tp) {
+		memalloc_nofs_restore(tp->t_pflags);
+		current->journal_info = NULL;
+	}
+}
+
+static inline void
+xfs_trans_switch_context(
+	struct xfs_trans	*old_tp,
+	struct xfs_trans	*new_tp)
+{
+	ASSERT(current->journal_info == old_tp);
+	new_tp->t_pflags = old_tp->t_pflags;
+	old_tp->t_pflags = 0;
+	current->journal_info = new_tp;
+}
+
 #endif	/* __XFS_TRANS_H__ */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5b468f2242ff..983ed2fe7c85 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -20,7 +20,12 @@
 #define BIO_BUG_ON
 #endif
 
-#define BIO_MAX_PAGES		256
+#define BIO_MAX_PAGES		256U
+
+static inline unsigned int bio_max_segs(unsigned int nr_segs)
+{
+	return min(nr_segs, BIO_MAX_PAGES);
+}
 
 #define bio_prio(bio)			(bio)->bi_ioprio
 #define bio_set_prio(bio, prio)		((bio)->bi_ioprio = prio)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 69035e9f632b..c032cfe133c7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -462,7 +462,6 @@ struct request_queue {
 #ifdef CONFIG_PM
 	struct device		*dev;
 	enum rpm_status		rpm_status;
-	unsigned int		nr_pending;
 #endif
 
 	/*
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 05556573b896..a083e15df608 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -23,8 +23,6 @@ struct blk_trace {
 	u32 pid;
 	u32 dev;
 	struct dentry *dir;
-	struct dentry *dropped_file;
-	struct dentry *msg_file;
 	struct list_head running_list;
 	atomic_t dropped;
 };
@@ -119,7 +117,7 @@ struct compat_blk_user_trace_setup {
 
 #endif
 
-extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes);
+void blk_fill_rwbs(char *rwbs, unsigned int op);
 
 static inline sector_t blk_rq_trace_sector(struct request *rq)
 {
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index ee09a39627d6..9129ba231423 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -168,6 +168,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_X86_CQM_ONLINE,
 	CPUHP_AP_PERF_X86_CSTATE_ONLINE,
 	CPUHP_AP_PERF_S390_CF_ONLINE,
+	CPUHP_AP_PERF_S390_CFD_ONLINE,
 	CPUHP_AP_PERF_S390_SF_ONLINE,
 	CPUHP_AP_PERF_ARM_CCI_ONLINE,
 	CPUHP_AP_PERF_ARM_CCN_ONLINE,
diff --git a/include/linux/device.h b/include/linux/device.h
index 7619a84f8ce4..ba660731bd25 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -291,6 +291,7 @@ struct device_dma_parameters {
 	 * sg limitations.
 	 */
 	unsigned int max_segment_size;
+	unsigned int min_align_mask;
 	unsigned long segment_boundary_mask;
 };
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index fbfa3f5abd94..2a984cb4d1e0 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -509,6 +509,22 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
 	return -EIO;
 }
 
+static inline unsigned int dma_get_min_align_mask(struct device *dev)
+{
+	if (dev->dma_parms)
+		return dev->dma_parms->min_align_mask;
+	return 0;
+}
+
+static inline int dma_set_min_align_mask(struct device *dev,
+		unsigned int min_align_mask)
+{
+	if (WARN_ON_ONCE(!dev->dma_parms))
+		return -EIO;
+	dev->dma_parms->min_align_mask = min_align_mask;
+	return 0;
+}
+
 static inline int dma_get_cache_alignment(void)
 {
 #ifdef ARCH_DMA_MINALIGN
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 2eb6d19de336..51ede771cd99 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -5,23 +5,6 @@
 #include <linux/sched.h>
 #include <linux/xarray.h>
 
-struct io_identity {
-	struct files_struct		*files;
-	struct mm_struct		*mm;
-#ifdef CONFIG_BLK_CGROUP
-	struct cgroup_subsys_state	*blkcg_css;
-#endif
-	const struct cred		*creds;
-	struct nsproxy			*nsproxy;
-	struct fs_struct		*fs;
-	unsigned long			fsize;
-#ifdef CONFIG_AUDIT
-	kuid_t				loginuid;
-	unsigned int			sessionid;
-#endif
-	refcount_t			count;
-};
-
 struct io_wq_work_node {
 	struct io_wq_work_node *next;
 };
@@ -36,9 +19,8 @@ struct io_uring_task {
 	struct xarray		xa;
 	struct wait_queue_head	wait;
 	struct file		*last;
+	void			*io_wq;
 	struct percpu_counter	inflight;
-	struct io_identity	__identity;
-	struct io_identity	*identity;
 	atomic_t		in_idle;
 	bool			sqpoll;
 
@@ -61,7 +43,7 @@ static inline void io_uring_task_cancel(void)
 }
 static inline void io_uring_files_cancel(struct files_struct *files)
 {
-	if (current->io_uring && !xa_empty(&current->io_uring->xa))
+	if (current->io_uring)
 		__io_uring_files_cancel(files);
 }
 static inline void io_uring_free(struct task_struct *tsk)
diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h
index 21a3358a1731..612b4cab3819 100644
--- a/include/linux/led-class-flash.h
+++ b/include/linux/led-class-flash.h
@@ -85,6 +85,7 @@ static inline struct led_classdev_flash *lcdev_to_flcdev(
 	return container_of(lcdev, struct led_classdev_flash, led_cdev);
 }
 
+#if IS_ENABLED(CONFIG_LEDS_CLASS_FLASH)
 /**
  * led_classdev_flash_register_ext - register a new object of LED class with
  *				     init data and with support for flash LEDs
@@ -98,12 +99,6 @@ int led_classdev_flash_register_ext(struct device *parent,
 				    struct led_classdev_flash *fled_cdev,
 				    struct led_init_data *init_data);
 
-static inline int led_classdev_flash_register(struct device *parent,
-					   struct led_classdev_flash *fled_cdev)
-{
-	return led_classdev_flash_register_ext(parent, fled_cdev, NULL);
-}
-
 /**
  * led_classdev_flash_unregister - unregisters an object of led_classdev class
  *				   with support for flash LEDs
@@ -118,15 +113,44 @@ int devm_led_classdev_flash_register_ext(struct device *parent,
 				     struct led_init_data *init_data);
 
 
+void devm_led_classdev_flash_unregister(struct device *parent,
+					struct led_classdev_flash *fled_cdev);
+
+#else
+
+static inline int led_classdev_flash_register_ext(struct device *parent,
+				    struct led_classdev_flash *fled_cdev,
+				    struct led_init_data *init_data)
+{
+	return 0;
+}
+
+static inline void led_classdev_flash_unregister(struct led_classdev_flash *fled_cdev) {};
+static inline int devm_led_classdev_flash_register_ext(struct device *parent,
+				     struct led_classdev_flash *fled_cdev,
+				     struct led_init_data *init_data)
+{
+	return 0;
+}
+
+static inline void devm_led_classdev_flash_unregister(struct device *parent,
+					struct led_classdev_flash *fled_cdev)
+{};
+
+#endif  /* IS_ENABLED(CONFIG_LEDS_CLASS_FLASH) */
+
+static inline int led_classdev_flash_register(struct device *parent,
+					   struct led_classdev_flash *fled_cdev)
+{
+	return led_classdev_flash_register_ext(parent, fled_cdev, NULL);
+}
+
 static inline int devm_led_classdev_flash_register(struct device *parent,
 				     struct led_classdev_flash *fled_cdev)
 {
 	return devm_led_classdev_flash_register_ext(parent, fled_cdev, NULL);
 }
 
-void devm_led_classdev_flash_unregister(struct device *parent,
-					struct led_classdev_flash *fled_cdev);
-
 /**
  * led_set_flash_strobe - setup flash strobe
  * @fled_cdev: the flash LED to set strobe on
diff --git a/include/linux/led-class-multicolor.h b/include/linux/led-class-multicolor.h
index 5116f9a866cc..210d57bcd767 100644
--- a/include/linux/led-class-multicolor.h
+++ b/include/linux/led-class-multicolor.h
@@ -44,12 +44,6 @@ int led_classdev_multicolor_register_ext(struct device *parent,
 					    struct led_classdev_mc *mcled_cdev,
 					    struct led_init_data *init_data);
 
-static inline int led_classdev_multicolor_register(struct device *parent,
-					    struct led_classdev_mc *mcled_cdev)
-{
-	return led_classdev_multicolor_register_ext(parent, mcled_cdev, NULL);
-}
-
 /**
  * led_classdev_multicolor_unregister - unregisters an object of led_classdev
  *					class with support for multicolor LEDs
@@ -68,13 +62,6 @@ int devm_led_classdev_multicolor_register_ext(struct device *parent,
 					  struct led_classdev_mc *mcled_cdev,
 					  struct led_init_data *init_data);
 
-static inline int devm_led_classdev_multicolor_register(struct device *parent,
-				     struct led_classdev_mc *mcled_cdev)
-{
-	return devm_led_classdev_multicolor_register_ext(parent, mcled_cdev,
-							 NULL);
-}
-
 void devm_led_classdev_multicolor_unregister(struct device *parent,
 					    struct led_classdev_mc *mcled_cdev);
 #else
@@ -83,27 +70,33 @@ static inline int led_classdev_multicolor_register_ext(struct device *parent,
 					    struct led_classdev_mc *mcled_cdev,
 					    struct led_init_data *init_data)
 {
-	return -EINVAL;
-}
-
-static inline int led_classdev_multicolor_register(struct device *parent,
-					    struct led_classdev_mc *mcled_cdev)
-{
-	return led_classdev_multicolor_register_ext(parent, mcled_cdev, NULL);
+	return 0;
 }
 
 static inline void led_classdev_multicolor_unregister(struct led_classdev_mc *mcled_cdev) {};
 static inline int led_mc_calc_color_components(struct led_classdev_mc *mcled_cdev,
 					       enum led_brightness brightness)
 {
-	return -EINVAL;
+	return 0;
 }
 
 static inline int devm_led_classdev_multicolor_register_ext(struct device *parent,
 					  struct led_classdev_mc *mcled_cdev,
 					  struct led_init_data *init_data)
 {
-	return -EINVAL;
+	return 0;
+}
+
+static inline void devm_led_classdev_multicolor_unregister(struct device *parent,
+					    struct led_classdev_mc *mcled_cdev)
+{};
+
+#endif  /* IS_ENABLED(CONFIG_LEDS_CLASS_MULTICOLOR) */
+
+static inline int led_classdev_multicolor_register(struct device *parent,
+					    struct led_classdev_mc *mcled_cdev)
+{
+	return led_classdev_multicolor_register_ext(parent, mcled_cdev, NULL);
 }
 
 static inline int devm_led_classdev_multicolor_register(struct device *parent,
@@ -113,9 +106,4 @@ static inline int devm_led_classdev_multicolor_register(struct device *parent,
 							 NULL);
 }
 
-static inline void devm_led_classdev_multicolor_unregister(struct device *parent,
-					    struct led_classdev_mc *mcled_cdev)
-{};
-
-#endif  /* IS_ENABLED(CONFIG_LEDS_CLASS_MULTICOLOR) */
 #endif	/* _LINUX_MULTICOLOR_LEDS_H_INCLUDED */
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 6a8d6409c993..329fd914cf24 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -63,8 +63,8 @@ struct led_hw_trigger_type {
 
 struct led_classdev {
 	const char		*name;
-	enum led_brightness	 brightness;
-	enum led_brightness	 max_brightness;
+	unsigned int brightness;
+	unsigned int max_brightness;
 	int			 flags;
 
 	/* Lower 16 bits reflect status */
@@ -253,8 +253,7 @@ void led_blink_set_oneshot(struct led_classdev *led_cdev,
  * software blink timer that implements blinking when the
  * hardware doesn't. This function is guaranteed not to sleep.
  */
-void led_set_brightness(struct led_classdev *led_cdev,
-			enum led_brightness brightness);
+void led_set_brightness(struct led_classdev *led_cdev, unsigned int brightness);
 
 /**
  * led_set_brightness_sync - set LED brightness synchronously
@@ -267,8 +266,7 @@ void led_set_brightness(struct led_classdev *led_cdev,
  *
  * Returns: 0 on success or negative error value on failure
  */
-int led_set_brightness_sync(struct led_classdev *led_cdev,
-			    enum led_brightness value);
+int led_set_brightness_sync(struct led_classdev *led_cdev, unsigned int value);
 
 /**
  * led_update_brightness - update LED brightness
@@ -565,7 +563,7 @@ static inline void ledtrig_cpu(enum cpu_led_event evt)
 
 #ifdef CONFIG_LEDS_BRIGHTNESS_HW_CHANGED
 void led_classdev_notify_brightness_hw_changed(
-	struct led_classdev *led_cdev, enum led_brightness brightness);
+	struct led_classdev *led_cdev, unsigned int brightness);
 #else
 static inline void led_classdev_notify_brightness_hw_changed(
 	struct led_classdev *led_cdev, enum led_brightness brightness) { }
diff --git a/include/linux/litex.h b/include/linux/litex.h
index 40f5be503593..5ea9ccf5cce4 100644
--- a/include/linux/litex.h
+++ b/include/linux/litex.h
@@ -3,9 +3,6 @@
  * Common LiteX header providing
  * helper functions for accessing CSRs.
  *
- * Implementation of the functions is provided by
- * the LiteX SoC Controller driver.
- *
  * Copyright (C) 2019-2020 Antmicro <www.antmicro.com>
  */
 
@@ -13,90 +10,147 @@
 #define _LINUX_LITEX_H
 
 #include <linux/io.h>
-#include <linux/types.h>
-#include <linux/compiler_types.h>
+
+/* LiteX SoCs support 8- or 32-bit CSR Bus data width (i.e., subreg. size) */
+#if defined(CONFIG_LITEX_SUBREG_SIZE) && \
+	(CONFIG_LITEX_SUBREG_SIZE == 1 || CONFIG_LITEX_SUBREG_SIZE == 4)
+#define LITEX_SUBREG_SIZE      CONFIG_LITEX_SUBREG_SIZE
+#else
+#error LiteX subregister size (LITEX_SUBREG_SIZE) must be 4 or 1!
+#endif
+#define LITEX_SUBREG_SIZE_BIT	 (LITEX_SUBREG_SIZE * 8)
+
+/* LiteX subregisters of any width are always aligned on a 4-byte boundary */
+#define LITEX_SUBREG_ALIGN	  0x4
+
+static inline void _write_litex_subregister(u32 val, void __iomem *addr)
+{
+	writel((u32 __force)cpu_to_le32(val), addr);
+}
+
+static inline u32 _read_litex_subregister(void __iomem *addr)
+{
+	return le32_to_cpu((__le32 __force)readl(addr));
+}
 
 /*
- * The parameters below are true for LiteX SoCs configured for 8-bit CSR Bus,
- * 32-bit aligned.
+ * LiteX SoC Generator, depending on the configuration, can split a single
+ * logical CSR (Control&Status Register) into a series of consecutive physical
+ * registers.
+ *
+ * For example, in the configuration with 8-bit CSR Bus, a 32-bit aligned,
+ * 32-bit wide logical CSR will be laid out as four 32-bit physical
+ * subregisters, each one containing one byte of meaningful data.
  *
- * Supporting other configurations will require extending the logic in this
- * header and in the LiteX SoC controller driver.
+ * For details see: https://github.com/enjoy-digital/litex/wiki/CSR-Bus
  */
-#define LITEX_REG_SIZE	  0x4
-#define LITEX_SUBREG_SIZE	0x1
-#define LITEX_SUBREG_SIZE_BIT	 (LITEX_SUBREG_SIZE * 8)
 
-#define WRITE_LITEX_SUBREGISTER(val, base_offset, subreg_id) \
-	writel((u32 __force)cpu_to_le32(val), base_offset + (LITEX_REG_SIZE * subreg_id))
+/* number of LiteX subregisters needed to store a register of given reg_size */
+#define _litex_num_subregs(reg_size) \
+	(((reg_size) - 1) / LITEX_SUBREG_SIZE + 1)
 
-#define READ_LITEX_SUBREGISTER(base_offset, subreg_id) \
-	le32_to_cpu((__le32 __force)readl(base_offset + (LITEX_REG_SIZE * subreg_id)))
+/*
+ * since the number of 4-byte aligned subregisters required to store a single
+ * LiteX CSR (MMIO) register varies with LITEX_SUBREG_SIZE, the offset of the
+ * next adjacent LiteX CSR register w.r.t. the offset of the current one also
+ * depends on how many subregisters the latter is spread across
+ */
+#define _next_reg_off(off, size) \
+	((off) + _litex_num_subregs(size) * LITEX_SUBREG_ALIGN)
 
-void litex_set_reg(void __iomem *reg, unsigned long reg_sz, unsigned long val);
+/*
+ * The purpose of `_litex_[set|get]_reg()` is to implement the logic of
+ * writing to/reading from the LiteX CSR in a single place that can be then
+ * reused by all LiteX drivers via the `litex_[write|read][8|16|32|64]()`
+ * accessors for the appropriate data width.
+ * NOTE: direct use of `_litex_[set|get]_reg()` by LiteX drivers is strongly
+ * discouraged, as they perform no error checking on the requested data width!
+ */
 
-unsigned long litex_get_reg(void __iomem *reg, unsigned long reg_sz);
+/**
+ * _litex_set_reg() - Writes a value to the LiteX CSR (Control&Status Register)
+ * @reg: Address of the CSR
+ * @reg_size: The width of the CSR expressed in the number of bytes
+ * @val: Value to be written to the CSR
+ *
+ * This function splits a single (possibly multi-byte) LiteX CSR write into
+ * a series of subregister writes with a proper offset.
+ * NOTE: caller is responsible for ensuring (0 < reg_size <= sizeof(u64)).
+ */
+static inline void _litex_set_reg(void __iomem *reg, size_t reg_size, u64 val)
+{
+	u8 shift = _litex_num_subregs(reg_size) * LITEX_SUBREG_SIZE_BIT;
+
+	while (shift > 0) {
+		shift -= LITEX_SUBREG_SIZE_BIT;
+		_write_litex_subregister(val >> shift, reg);
+		reg += LITEX_SUBREG_ALIGN;
+	}
+}
+
+/**
+ * _litex_get_reg() - Reads a value of the LiteX CSR (Control&Status Register)
+ * @reg: Address of the CSR
+ * @reg_size: The width of the CSR expressed in the number of bytes
+ *
+ * Return: Value read from the CSR
+ *
+ * This function generates a series of subregister reads with a proper offset
+ * and joins their results into a single (possibly multi-byte) LiteX CSR value.
+ * NOTE: caller is responsible for ensuring (0 < reg_size <= sizeof(u64)).
+ */
+static inline u64 _litex_get_reg(void __iomem *reg, size_t reg_size)
+{
+	u64 r;
+	u8 i;
+
+	r = _read_litex_subregister(reg);
+	for (i = 1; i < _litex_num_subregs(reg_size); i++) {
+		r <<= LITEX_SUBREG_SIZE_BIT;
+		reg += LITEX_SUBREG_ALIGN;
+		r |= _read_litex_subregister(reg);
+	}
+	return r;
+}
 
 static inline void litex_write8(void __iomem *reg, u8 val)
 {
-	WRITE_LITEX_SUBREGISTER(val, reg, 0);
+	_litex_set_reg(reg, sizeof(u8), val);
 }
 
 static inline void litex_write16(void __iomem *reg, u16 val)
 {
-	WRITE_LITEX_SUBREGISTER(val >> 8, reg, 0);
-	WRITE_LITEX_SUBREGISTER(val, reg, 1);
+	_litex_set_reg(reg, sizeof(u16), val);
 }
 
 static inline void litex_write32(void __iomem *reg, u32 val)
 {
-	WRITE_LITEX_SUBREGISTER(val >> 24, reg, 0);
-	WRITE_LITEX_SUBREGISTER(val >> 16, reg, 1);
-	WRITE_LITEX_SUBREGISTER(val >> 8, reg, 2);
-	WRITE_LITEX_SUBREGISTER(val, reg, 3);
+	_litex_set_reg(reg, sizeof(u32), val);
 }
 
 static inline void litex_write64(void __iomem *reg, u64 val)
 {
-	WRITE_LITEX_SUBREGISTER(val >> 56, reg, 0);
-	WRITE_LITEX_SUBREGISTER(val >> 48, reg, 1);
-	WRITE_LITEX_SUBREGISTER(val >> 40, reg, 2);
-	WRITE_LITEX_SUBREGISTER(val >> 32, reg, 3);
-	WRITE_LITEX_SUBREGISTER(val >> 24, reg, 4);
-	WRITE_LITEX_SUBREGISTER(val >> 16, reg, 5);
-	WRITE_LITEX_SUBREGISTER(val >> 8, reg, 6);
-	WRITE_LITEX_SUBREGISTER(val, reg, 7);
+	_litex_set_reg(reg, sizeof(u64), val);
 }
 
 static inline u8 litex_read8(void __iomem *reg)
 {
-	return READ_LITEX_SUBREGISTER(reg, 0);
+	return _litex_get_reg(reg, sizeof(u8));
 }
 
 static inline u16 litex_read16(void __iomem *reg)
 {
-	return (READ_LITEX_SUBREGISTER(reg, 0) << 8)
-		| (READ_LITEX_SUBREGISTER(reg, 1));
+	return _litex_get_reg(reg, sizeof(u16));
 }
 
 static inline u32 litex_read32(void __iomem *reg)
 {
-	return (READ_LITEX_SUBREGISTER(reg, 0) << 24)
-		| (READ_LITEX_SUBREGISTER(reg, 1) << 16)
-		| (READ_LITEX_SUBREGISTER(reg, 2) << 8)
-		| (READ_LITEX_SUBREGISTER(reg, 3));
+	return _litex_get_reg(reg, sizeof(u32));
 }
 
 static inline u64 litex_read64(void __iomem *reg)
 {
-	return ((u64)READ_LITEX_SUBREGISTER(reg, 0) << 56)
-		| ((u64)READ_LITEX_SUBREGISTER(reg, 1) << 48)
-		| ((u64)READ_LITEX_SUBREGISTER(reg, 2) << 40)
-		| ((u64)READ_LITEX_SUBREGISTER(reg, 3) << 32)
-		| ((u64)READ_LITEX_SUBREGISTER(reg, 4) << 24)
-		| ((u64)READ_LITEX_SUBREGISTER(reg, 5) << 16)
-		| ((u64)READ_LITEX_SUBREGISTER(reg, 6) << 8)
-		| ((u64)READ_LITEX_SUBREGISTER(reg, 7));
+	return _litex_get_reg(reg, sizeof(u64));
 }
 
 #endif /* _LINUX_LITEX_H */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 161f4419db6c..5d92a7e1a742 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -86,7 +86,6 @@ struct path;
 
 extern int mnt_want_write(struct vfsmount *mnt);
 extern int mnt_want_write_file(struct file *file);
-extern int mnt_clone_write(struct vfsmount *mnt);
 extern void mnt_drop_write(struct vfsmount *mnt);
 extern void mnt_drop_write_file(struct file *file);
 extern void mntput(struct vfsmount *mnt);
diff --git a/include/linux/net.h b/include/linux/net.h
index 9e2324efc26a..ba736b457a06 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -42,8 +42,6 @@ struct net;
 #define SOCK_PASSCRED		3
 #define SOCK_PASSSEC		4
 
-#define PROTO_CMSG_DATA_ONLY	0x0001
-
 #ifndef ARCH_HAS_SOCKET_TYPES
 /**
  * enum sock_type - Socket types
@@ -138,7 +136,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
 
 struct proto_ops {
 	int		family;
-	unsigned int	flags;
 	struct module	*owner;
 	int		(*release)   (struct socket *sock);
 	int		(*bind)	     (struct socket *sock,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26f499810dfa..ef00bb22164c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -895,6 +895,9 @@ struct task_struct {
 	/* CLONE_CHILD_CLEARTID: */
 	int __user			*clear_child_tid;
 
+	/* PF_IO_WORKER */
+	void				*pf_io_worker;
+
 	u64				utime;
 	u64				stime;
 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index d9c9fc9ca5d2..5857a937c637 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -29,6 +29,7 @@ enum swiotlb_force {
  * controllable.
  */
 #define IO_TLB_SHIFT 11
+#define IO_TLB_SIZE (1 << IO_TLB_SHIFT)
 
 /* default to 64MB */
 #define IO_TLB_DEFAULT_SIZE (64UL<<20)
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index b3bbd10eb3f0..02f966e9358f 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -187,7 +187,7 @@ struct iscsi_conn {
 	struct iscsi_task	*task;		/* xmit task in progress */
 
 	/* xmit */
-	spinlock_t		taskqueuelock;  /* protects the next three lists */
+	/* items must be added/deleted under frwd lock */
 	struct list_head	mgmtqueue;	/* mgmt (control) xmit queue */
 	struct list_head	cmdqueue;	/* data-path cmd queue */
 	struct list_head	requeue;	/* tasks needing another run */
@@ -332,7 +332,7 @@ struct iscsi_session {
 						 * cmdsn, queued_cmdsn     *
 						 * session resources:      *
 						 * - cmdpool kfifo_out ,   *
-						 * - mgmtpool,		   */
+						 * - mgmtpool, queues	   */
 	spinlock_t		back_lock;	/* protects cmdsn_exp      *
 						 * cmdsn_max,              *
 						 * cmdpool kfifo_in        */
@@ -395,6 +395,8 @@ extern struct Scsi_Host *iscsi_host_alloc(struct scsi_host_template *sht,
 extern void iscsi_host_remove(struct Scsi_Host *shost);
 extern void iscsi_host_free(struct Scsi_Host *shost);
 extern int iscsi_target_alloc(struct scsi_target *starget);
+extern int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost,
+					uint16_t requested_cmds_max);
 
 /*
  * session management
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 6336780d83a7..ce2fba49c95d 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -72,6 +72,7 @@ int	transport_backend_register(const struct target_backend_ops *);
 void	target_backend_unregister(const struct target_backend_ops *);
 
 void	target_complete_cmd(struct se_cmd *, u8);
+void	target_set_cmd_data_length(struct se_cmd *, int);
 void	target_complete_cmd_with_length(struct se_cmd *, u8, int);
 
 void	transport_copy_sense_to_cmd(struct se_cmd *, unsigned char *);
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index e41c611d6d3b..899fdacf57b9 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -28,7 +28,7 @@ DECLARE_EVENT_CLASS(bcache_request,
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->orig_sector	= bio->bi_iter.bi_sector - 16;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
 	),
 
 	TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
@@ -102,7 +102,7 @@ DECLARE_EVENT_CLASS(bcache_bio,
 		__entry->dev		= bio_dev(bio);
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
 	),
 
 	TP_printk("%d,%d  %s %llu + %u",
@@ -137,7 +137,7 @@ TRACE_EVENT(bcache_read,
 		__entry->dev		= bio_dev(bio);
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
 		__entry->cache_hit = hit;
 		__entry->bypass = bypass;
 	),
@@ -168,7 +168,7 @@ TRACE_EVENT(bcache_write,
 		__entry->inode		= inode;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
 		__entry->writeback = writeback;
 		__entry->bypass = bypass;
 	),
@@ -238,7 +238,7 @@ TRACE_EVENT(bcache_journal_write,
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
 		__entry->nr_keys	= keys;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
 	),
 
 	TP_printk("%d,%d  %s %llu + %u keys %u",
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 0d782663a005..cc5ab96a7471 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -89,7 +89,7 @@ TRACE_EVENT(block_rq_requeue,
 		__entry->sector    = blk_rq_trace_sector(rq);
 		__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
 
-		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
+		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
 		__get_str(cmd)[0] = '\0';
 	),
 
@@ -133,7 +133,7 @@ TRACE_EVENT(block_rq_complete,
 		__entry->nr_sector = nr_bytes >> 9;
 		__entry->error     = error;
 
-		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes);
+		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
 		__get_str(cmd)[0] = '\0';
 	),
 
@@ -166,7 +166,7 @@ DECLARE_EVENT_CLASS(block_rq,
 		__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
 		__entry->bytes     = blk_rq_bytes(rq);
 
-		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
+		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
 		__get_str(cmd)[0] = '\0';
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
@@ -196,7 +196,7 @@ DEFINE_EVENT(block_rq, block_rq_insert,
 
 /**
  * block_rq_issue - issue pending block IO request operation to device driver
- * @rq: block IO operation operation request
+ * @rq: block IO operation request
  *
  * Called when block operation request @rq from queue @q is sent to a
  * device driver for processing.
@@ -210,7 +210,7 @@ DEFINE_EVENT(block_rq, block_rq_issue,
 
 /**
  * block_rq_merge - merge request with another one in the elevator
- * @rq: block IO operation operation request
+ * @rq: block IO operation request
  *
  * Called when block operation request @rq from queue @q is merged to another
  * request queued in the elevator.
@@ -249,7 +249,7 @@ TRACE_EVENT(block_bio_complete,
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
 		__entry->error		= blk_status_to_errno(bio->bi_status);
-		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
 	),
 
 	TP_printk("%d,%d %s %llu + %u [%d]",
@@ -276,7 +276,7 @@ DECLARE_EVENT_CLASS(block_bio,
 		__entry->dev		= bio_dev(bio);
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
-		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -433,7 +433,7 @@ TRACE_EVENT(block_split,
 		__entry->dev		= bio_dev(bio);
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->new_sector	= new_sector;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -474,7 +474,7 @@ TRACE_EVENT(block_bio_remap,
 		__entry->nr_sector	= bio_sectors(bio);
 		__entry->old_dev	= dev;
 		__entry->old_sector	= from;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
 	),
 
 	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
@@ -518,7 +518,7 @@ TRACE_EVENT(block_rq_remap,
 		__entry->old_dev	= dev;
 		__entry->old_sector	= from;
 		__entry->nr_bios	= blk_rq_count_bios(rq);
-		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
+		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
 	),
 
 	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu %u",
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ac4e1738a9af..2514eb6b1cf2 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -262,6 +262,7 @@ struct io_uring_params {
 #define IORING_FEAT_POLL_32BITS 	(1U << 6)
 #define IORING_FEAT_SQPOLL_NONFIXED	(1U << 7)
 #define IORING_FEAT_EXT_ARG		(1U << 8)
+#define IORING_FEAT_NATIVE_WORKERS	(1U << 9)
 
 /*
  * io_uring_register(2) opcodes and arguments
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index 5b3f01da172b..60739d5e3373 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -84,7 +84,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
 
 	dentry = kern_path_locked(pathname, &path);
 	if (IS_ERR(dentry))
-		return (void *)dentry; /* returning an error */
+		return ERR_CAST(dentry); /* returning an error */
 	inode = path.dentry->d_inode;
 	inode_unlock(inode);
 
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 7c42df6e6100..c10e855a03bc 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -50,9 +50,6 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/swiotlb.h>
 
-#define OFFSET(val,align) ((unsigned long)	\
-	                   ( (val) & ( (align) - 1)))
-
 #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
 
 /*
@@ -103,6 +100,11 @@ static unsigned int max_segment;
 static phys_addr_t *io_tlb_orig_addr;
 
 /*
+ * The mapped buffer's size should be validated during a sync operation.
+ */
+static size_t *io_tlb_orig_size;
+
+/*
  * Protect the above data structures in the map and unmap calls
  */
 static DEFINE_SPINLOCK(io_tlb_lock);
@@ -171,7 +173,7 @@ void __init swiotlb_adjust_size(unsigned long new_size)
 	 * adjust/expand SWIOTLB size for their use.
 	 */
 	if (!io_tlb_nslabs) {
-		size = ALIGN(new_size, 1 << IO_TLB_SHIFT);
+		size = ALIGN(new_size, IO_TLB_SIZE);
 		io_tlb_nslabs = size >> IO_TLB_SHIFT;
 		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
 
@@ -192,6 +194,16 @@ void swiotlb_print_info(void)
 	       bytes >> 20);
 }
 
+static inline unsigned long io_tlb_offset(unsigned long val)
+{
+	return val & (IO_TLB_SEGSIZE - 1);
+}
+
+static inline unsigned long nr_slots(u64 val)
+{
+	return DIV_ROUND_UP(val, IO_TLB_SIZE);
+}
+
 /*
  * Early SWIOTLB allocation may be too early to allow an architecture to
  * perform the desired operations.  This function allows the architecture to
@@ -240,9 +252,16 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
 		      __func__, alloc_size, PAGE_SIZE);
 
+	alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t));
+	io_tlb_orig_size = memblock_alloc(alloc_size, PAGE_SIZE);
+	if (!io_tlb_orig_size)
+		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
+		      __func__, alloc_size, PAGE_SIZE);
+
 	for (i = 0; i < io_tlb_nslabs; i++) {
-		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+		io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
 		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+		io_tlb_orig_size[i] = 0;
 	}
 	io_tlb_index = 0;
 	no_iotlb_memory = false;
@@ -363,7 +382,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	 * between io_tlb_start and io_tlb_end.
 	 */
 	io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
-	                              get_order(io_tlb_nslabs * sizeof(int)));
+				      get_order(io_tlb_nslabs * sizeof(int)));
 	if (!io_tlb_list)
 		goto cleanup3;
 
@@ -374,9 +393,18 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	if (!io_tlb_orig_addr)
 		goto cleanup4;
 
+	io_tlb_orig_size = (size_t *)
+		__get_free_pages(GFP_KERNEL,
+				 get_order(io_tlb_nslabs *
+					   sizeof(size_t)));
+	if (!io_tlb_orig_size)
+		goto cleanup5;
+
+
 	for (i = 0; i < io_tlb_nslabs; i++) {
-		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+		io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
 		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+		io_tlb_orig_size[i] = 0;
 	}
 	io_tlb_index = 0;
 	no_iotlb_memory = false;
@@ -389,6 +417,10 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 
 	return 0;
 
+cleanup5:
+	free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
+							      sizeof(phys_addr_t)));
+
 cleanup4:
 	free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
 	                                                 sizeof(int)));
@@ -404,6 +436,8 @@ void __init swiotlb_exit(void)
 		return;
 
 	if (late_alloc) {
+		free_pages((unsigned long)io_tlb_orig_size,
+			   get_order(io_tlb_nslabs * sizeof(size_t)));
 		free_pages((unsigned long)io_tlb_orig_addr,
 			   get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
 		free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
@@ -413,6 +447,8 @@ void __init swiotlb_exit(void)
 	} else {
 		memblock_free_late(__pa(io_tlb_orig_addr),
 				   PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
+		memblock_free_late(__pa(io_tlb_orig_size),
+				   PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t)));
 		memblock_free_late(__pa(io_tlb_list),
 				   PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
 		memblock_free_late(io_tlb_start,
@@ -461,79 +497,71 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
 	}
 }
 
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
-		size_t mapping_size, size_t alloc_size,
-		enum dma_data_direction dir, unsigned long attrs)
-{
-	dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start);
-	unsigned long flags;
-	phys_addr_t tlb_addr;
-	unsigned int nslots, stride, index, wrap;
-	int i;
-	unsigned long mask;
-	unsigned long offset_slots;
-	unsigned long max_slots;
-	unsigned long tmp_io_tlb_used;
-
-	if (no_iotlb_memory)
-		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
-
-	if (mem_encrypt_active())
-		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+#define slot_addr(start, idx)	((start) + ((idx) << IO_TLB_SHIFT))
 
-	if (mapping_size > alloc_size) {
-		dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
-			      mapping_size, alloc_size);
-		return (phys_addr_t)DMA_MAPPING_ERROR;
-	}
+/*
+ * Return the offset into a iotlb slot required to keep the device happy.
+ */
+static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
+{
+	return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
+}
 
-	mask = dma_get_seg_boundary(hwdev);
+/*
+ * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
+ */
+static inline unsigned long get_max_slots(unsigned long boundary_mask)
+{
+	if (boundary_mask == ~0UL)
+		return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+	return nr_slots(boundary_mask + 1);
+}
 
-	tbl_dma_addr &= mask;
+static unsigned int wrap_index(unsigned int index)
+{
+	if (index >= io_tlb_nslabs)
+		return 0;
+	return index;
+}
 
-	offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+/*
+ * Find a suitable number of IO TLB entries size that will fit this request and
+ * allocate a buffer from that IO TLB pool.
+ */
+static int find_slots(struct device *dev, phys_addr_t orig_addr,
+		size_t alloc_size)
+{
+	unsigned long boundary_mask = dma_get_seg_boundary(dev);
+	dma_addr_t tbl_dma_addr =
+		phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask;
+	unsigned long max_slots = get_max_slots(boundary_mask);
+	unsigned int iotlb_align_mask =
+		dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
+	unsigned int nslots = nr_slots(alloc_size), stride;
+	unsigned int index, wrap, count = 0, i;
+	unsigned long flags;
 
-	/*
-	 * Carefully handle integer overflow which can occur when mask == ~0UL.
-	 */
-	max_slots = mask + 1
-		    ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
-		    : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+	BUG_ON(!nslots);
 
 	/*
-	 * For mappings greater than or equal to a page, we limit the stride
-	 * (and hence alignment) to a page size.
+	 * For mappings with an alignment requirement don't bother looping to
+	 * unaligned slots once we found an aligned one.  For allocations of
+	 * PAGE_SIZE or larger only look for page aligned allocations.
 	 */
-	nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+	stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
 	if (alloc_size >= PAGE_SIZE)
-		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
-	else
-		stride = 1;
-
-	BUG_ON(!nslots);
+		stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
 
-	/*
-	 * Find suitable number of IO TLB entries size that will fit this
-	 * request and allocate a buffer from that IO TLB pool.
-	 */
 	spin_lock_irqsave(&io_tlb_lock, flags);
-
 	if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
 		goto not_found;
 
-	index = ALIGN(io_tlb_index, stride);
-	if (index >= io_tlb_nslabs)
-		index = 0;
-	wrap = index;
-
+	index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
 	do {
-		while (iommu_is_span_boundary(index, nslots, offset_slots,
-					      max_slots)) {
-			index += stride;
-			if (index >= io_tlb_nslabs)
-				index = 0;
-			if (index == wrap)
-				goto not_found;
+		if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
+		    (orig_addr & iotlb_align_mask)) {
+			index = wrap_index(index + 1);
+			continue;
 		}
 
 		/*
@@ -541,55 +569,96 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 		 * contiguous buffers, we allocate the buffers from that slot
 		 * and mark the entries as '0' indicating unavailable.
 		 */
-		if (io_tlb_list[index] >= nslots) {
-			int count = 0;
-
-			for (i = index; i < (int) (index + nslots); i++)
-				io_tlb_list[i] = 0;
-			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
-				io_tlb_list[i] = ++count;
-			tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
-
-			/*
-			 * Update the indices to avoid searching in the next
-			 * round.
-			 */
-			io_tlb_index = ((index + nslots) < io_tlb_nslabs
-					? (index + nslots) : 0);
-
-			goto found;
+		if (!iommu_is_span_boundary(index, nslots,
+					    nr_slots(tbl_dma_addr),
+					    max_slots)) {
+			if (io_tlb_list[index] >= nslots)
+				goto found;
 		}
-		index += stride;
-		if (index >= io_tlb_nslabs)
-			index = 0;
+		index = wrap_index(index + stride);
 	} while (index != wrap);
 
 not_found:
-	tmp_io_tlb_used = io_tlb_used;
-
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
-	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
-		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
-			 alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
-	return (phys_addr_t)DMA_MAPPING_ERROR;
+	return -1;
+
 found:
+	for (i = index; i < index + nslots; i++)
+		io_tlb_list[i] = 0;
+	for (i = index - 1;
+	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
+	     io_tlb_list[i]; i--)
+		io_tlb_list[i] = ++count;
+
+	/*
+	 * Update the indices to avoid searching in the next round.
+	 */
+	if (index + nslots < io_tlb_nslabs)
+		io_tlb_index = index + nslots;
+	else
+		io_tlb_index = 0;
 	io_tlb_used += nslots;
+
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
+	return index;
+}
+
+phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
+		size_t mapping_size, size_t alloc_size,
+		enum dma_data_direction dir, unsigned long attrs)
+{
+	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+	unsigned int index, i;
+	phys_addr_t tlb_addr;
+
+	if (no_iotlb_memory)
+		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
+
+	if (mem_encrypt_active())
+		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+
+	if (mapping_size > alloc_size) {
+		dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
+			      mapping_size, alloc_size);
+		return (phys_addr_t)DMA_MAPPING_ERROR;
+	}
+
+	index = find_slots(dev, orig_addr, alloc_size + offset);
+	if (index == -1) {
+		if (!(attrs & DMA_ATTR_NO_WARN))
+			dev_warn_ratelimited(dev,
+	"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
+				 alloc_size, io_tlb_nslabs, io_tlb_used);
+		return (phys_addr_t)DMA_MAPPING_ERROR;
+	}
 
 	/*
 	 * Save away the mapping from the original address to the DMA address.
 	 * This is needed when we sync the memory.  Then we sync the buffer if
 	 * needed.
 	 */
-	for (i = 0; i < nslots; i++)
-		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
+	for (i = 0; i < nr_slots(alloc_size + offset); i++) {
+		io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
+		io_tlb_orig_size[index+i] = alloc_size - (i << IO_TLB_SHIFT);
+	}
+	tlb_addr = slot_addr(io_tlb_start, index) + offset;
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
 	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
 		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
-
 	return tlb_addr;
 }
 
+static void validate_sync_size_and_truncate(struct device *hwdev, size_t orig_size, size_t *size)
+{
+	if (*size > orig_size) {
+		/* Warn and truncate mapping_size */
+		dev_WARN_ONCE(hwdev, 1,
+			"Attempt for buffer overflow. Original size: %zu. Mapping size: %zu.\n",
+			orig_size, *size);
+		*size = orig_size;
+	}
+}
+
 /*
  * tlb_addr is the physical address of the bounce buffer to unmap.
  */
@@ -598,10 +667,13 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 			      enum dma_data_direction dir, unsigned long attrs)
 {
 	unsigned long flags;
-	int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+	unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
+	int i, count, nslots = nr_slots(alloc_size + offset);
+	int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT;
 	phys_addr_t orig_addr = io_tlb_orig_addr[index];
 
+	validate_sync_size_and_truncate(hwdev, io_tlb_orig_size[index], &mapping_size);
+
 	/*
 	 * First, sync the memory before unmapping the entry
 	 */
@@ -617,26 +689,30 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 	 * with slots below and above the pool being returned.
 	 */
 	spin_lock_irqsave(&io_tlb_lock, flags);
-	{
-		count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
-			 io_tlb_list[index + nslots] : 0);
-		/*
-		 * Step 1: return the slots to the free list, merging the
-		 * slots with superceeding slots
-		 */
-		for (i = index + nslots - 1; i >= index; i--) {
-			io_tlb_list[i] = ++count;
-			io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
-		}
-		/*
-		 * Step 2: merge the returned slots with the preceding slots,
-		 * if available (non zero)
-		 */
-		for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
-			io_tlb_list[i] = ++count;
+	if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
+		count = io_tlb_list[index + nslots];
+	else
+		count = 0;
 
-		io_tlb_used -= nslots;
+	/*
+	 * Step 1: return the slots to the free list, merging the slots with
+	 * superceeding slots
+	 */
+	for (i = index + nslots - 1; i >= index; i--) {
+		io_tlb_list[i] = ++count;
+		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+		io_tlb_orig_size[i] = 0;
 	}
+
+	/*
+	 * Step 2: merge the returned slots with the preceding slots, if
+	 * available (non zero)
+	 */
+	for (i = index - 1;
+	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i];
+	     i--)
+		io_tlb_list[i] = ++count;
+	io_tlb_used -= nslots;
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 }
 
@@ -645,11 +721,13 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 			     enum dma_sync_target target)
 {
 	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+	size_t orig_size = io_tlb_orig_size[index];
 	phys_addr_t orig_addr = io_tlb_orig_addr[index];
 
 	if (orig_addr == INVALID_PHYS_ADDR)
 		return;
-	orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
+
+	validate_sync_size_and_truncate(hwdev, orig_size, &size);
 
 	switch (target) {
 	case SYNC_FOR_CPU:
@@ -707,7 +785,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
 
 size_t swiotlb_max_mapping_size(struct device *dev)
 {
-	return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
+	return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE;
 }
 
 bool is_swiotlb_active(void)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 61db50f7ca86..821cf1723814 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -375,7 +375,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 	audit_ptrace(task);
 
 	retval = -EPERM;
-	if (unlikely(task->flags & PF_KTHREAD))
+	if (unlikely(task->flags & (PF_KTHREAD | PF_IO_WORKER)))
 		goto out;
 	if (same_thread_group(task, current))
 		goto out;
diff --git a/kernel/signal.c b/kernel/signal.c
index 5ad8566534e7..ba4d1ef39a9e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -91,7 +91,7 @@ static bool sig_task_ignored(struct task_struct *t, int sig, bool force)
 		return true;
 
 	/* Only allow kernel generated signals to this kthread */
-	if (unlikely((t->flags & PF_KTHREAD) &&
+	if (unlikely((t->flags & (PF_KTHREAD | PF_IO_WORKER)) &&
 		     (handler == SIG_KTHREAD_KERNEL) && !force))
 		return true;
 
@@ -1096,7 +1096,7 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc
 	/*
 	 * Skip useless siginfo allocation for SIGKILL and kernel threads.
 	 */
-	if ((sig == SIGKILL) || (t->flags & PF_KTHREAD))
+	if ((sig == SIGKILL) || (t->flags & (PF_KTHREAD | PF_IO_WORKER)))
 		goto out_set;
 
 	/*
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index c286c13bd31a..c221e4c3f625 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -312,8 +312,6 @@ record_it:
 
 static void blk_trace_free(struct blk_trace *bt)
 {
-	debugfs_remove(bt->msg_file);
-	debugfs_remove(bt->dropped_file);
 	relay_close(bt->rchan);
 	debugfs_remove(bt->dir);
 	free_percpu(bt->sequence);
@@ -545,10 +543,8 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 	INIT_LIST_HEAD(&bt->running_list);
 
 	ret = -EIO;
-	bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
-					       &blk_dropped_fops);
-
-	bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
+	debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
+	debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
 
 	bt->rchan = relay_open("trace", dir, buts->buf_size,
 				buts->buf_nr, &blk_relay_callbacks, bt);
@@ -1868,7 +1864,17 @@ void blk_trace_remove_sysfs(struct device *dev)
 
 #ifdef CONFIG_EVENT_TRACING
 
-void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes)
+/**
+ * blk_fill_rwbs - Fill the buffer rwbs by mapping op to character string.
+ * @rwbs:	buffer to be filled
+ * @op:		REQ_OP_XXX for the tracepoint
+ *
+ * Description:
+ *     Maps the REQ_OP_XXX to character and fills the buffer provided by the
+ *     caller with resulting string.
+ *
+ **/
+void blk_fill_rwbs(char *rwbs, unsigned int op)
 {
 	int i = 0;
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a02ce89b56b5..1355e6c0d567 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1021,7 +1021,6 @@ static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
 
 const struct proto_ops inet_stream_ops = {
 	.family		   = PF_INET,
-	.flags		   = PROTO_CMSG_DATA_ONLY,
 	.owner		   = THIS_MODULE,
 	.release	   = inet_release,
 	.bind		   = inet_bind,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 1fb75f01756c..802f5111805a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -665,7 +665,6 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
 const struct proto_ops inet6_stream_ops = {
 	.family		   = PF_INET6,
-	.flags		   = PROTO_CMSG_DATA_ONLY,
 	.owner		   = THIS_MODULE,
 	.release	   = inet6_release,
 	.bind		   = inet6_bind,
diff --git a/net/socket.c b/net/socket.c
index 23c7842389de..84a8049c2b09 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2413,10 +2413,6 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
 long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
 			unsigned int flags)
 {
-	/* disallow ancillary data requests from this path */
-	if (msg->msg_control || msg->msg_controllen)
-		return -EINVAL;
-
 	return ____sys_sendmsg(sock, msg, flags, NULL, 0);
 }
 
@@ -2625,12 +2621,6 @@ long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
 			struct user_msghdr __user *umsg,
 			struct sockaddr __user *uaddr, unsigned int flags)
 {
-	if (msg->msg_control || msg->msg_controllen) {
-		/* disallow ancillary data reqs unless cmsg is plain data */
-		if (!(sock->ops->flags & PROTO_CMSG_DATA_ONLY))
-			return -EINVAL;
-	}
-
 	return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
 }
 
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index e046e16e4411..8b5bc7bf4bb8 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1553,7 +1553,7 @@ sub create_parameterlist($$$$) {
 	} elsif ($arg =~ m/\(.+\)\s*\(/) {
 	    # pointer-to-function
 	    $arg =~ tr/#/,/;
-	    $arg =~ m/[^\(]+\(\*?\s*([\w\.]*)\s*\)/;
+	    $arg =~ m/[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)/;
 	    $param = $1;
 	    $type = $arg;
 	    $type =~ s/([^\(]+\(\*?)\s*$param/$1/;