Merge remote-tracking branch 'net-next/master' into mac80211-next

Merge net-next so that we get the changes from net, which would otherwise conflict with the NLA_POLICY_NESTED/_ARRAY changes. Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2019-02-01 11:05:27 +01:00 · 2019-02-01 11:05:27 +01:00 · 752cfee90d
parent 5ac4a12df5 fa6821cbf1
commit 752cfee90d
1123 changed files with 44218 additions and 27535 deletions
--- a/.clang-format
+++ b/.clang-format
@ -72,6 +72,10 @@ ForEachMacros:
  - 'apei_estatus_for_each_section'
  - 'ata_for_each_dev'
  - 'ata_for_each_link'
+  - '__ata_qc_for_each'
+  - 'ata_qc_for_each'
+  - 'ata_qc_for_each_raw'
+  - 'ata_qc_for_each_with_internal'
  - 'ax25_for_each'
  - 'ax25_uid_for_each'
  - 'bio_for_each_integrity_vec'
@ -85,6 +89,7 @@ ForEachMacros:
  - 'blk_queue_for_each_rl'
  - 'bond_for_each_slave'
  - 'bond_for_each_slave_rcu'
+  - 'bpf_for_each_spilled_reg'
  - 'btree_for_each_safe128'
  - 'btree_for_each_safe32'
  - 'btree_for_each_safe64'
@ -103,6 +108,8 @@ ForEachMacros:
  - 'drm_atomic_crtc_for_each_plane'
  - 'drm_atomic_crtc_state_for_each_plane'
  - 'drm_atomic_crtc_state_for_each_plane_state'
+  - 'drm_atomic_for_each_plane_damage'
+  - 'drm_connector_for_each_possible_encoder'
  - 'drm_for_each_connector_iter'
  - 'drm_for_each_crtc'
  - 'drm_for_each_encoder'
@ -121,11 +128,21 @@ ForEachMacros:
  - 'for_each_bio'
  - 'for_each_board_func_rsrc'
  - 'for_each_bvec'
+  - 'for_each_card_components'
+  - 'for_each_card_links'
+  - 'for_each_card_links_safe'
+  - 'for_each_card_prelinks'
+  - 'for_each_card_rtds'
+  - 'for_each_card_rtds_safe'
+  - 'for_each_cgroup_storage_type'
  - 'for_each_child_of_node'
  - 'for_each_clear_bit'
  - 'for_each_clear_bit_from'
  - 'for_each_cmsghdr'
  - 'for_each_compatible_node'
+  - 'for_each_component_dais'
+  - 'for_each_component_dais_safe'
+  - 'for_each_comp_order'
  - 'for_each_console'
  - 'for_each_cpu'
  - 'for_each_cpu_and'
@ -133,6 +150,10 @@ ForEachMacros:
  - 'for_each_cpu_wrap'
  - 'for_each_dev_addr'
  - 'for_each_dma_cap_mask'
+  - 'for_each_dpcm_be'
+  - 'for_each_dpcm_be_rollback'
+  - 'for_each_dpcm_be_safe'
+  - 'for_each_dpcm_fe'
  - 'for_each_drhd_unit'
  - 'for_each_dss_dev'
  - 'for_each_efi_memory_desc'
@ -149,6 +170,7 @@ ForEachMacros:
  - 'for_each_iommu'
  - 'for_each_ip_tunnel_rcu'
  - 'for_each_irq_nr'
+  - 'for_each_link_codecs'
  - 'for_each_lru'
  - 'for_each_matching_node'
  - 'for_each_matching_node_and_match'
@ -160,6 +182,7 @@ ForEachMacros:
  - 'for_each_mem_range_rev'
  - 'for_each_migratetype_order'
  - 'for_each_msi_entry'
+  - 'for_each_msi_entry_safe'
  - 'for_each_net'
  - 'for_each_netdev'
  - 'for_each_netdev_continue'
@ -183,12 +206,14 @@ ForEachMacros:
  - 'for_each_node_with_property'
  - 'for_each_of_allnodes'
  - 'for_each_of_allnodes_from'
+  - 'for_each_of_cpu_node'
  - 'for_each_of_pci_range'
  - 'for_each_old_connector_in_state'
  - 'for_each_old_crtc_in_state'
  - 'for_each_oldnew_connector_in_state'
  - 'for_each_oldnew_crtc_in_state'
  - 'for_each_oldnew_plane_in_state'
+  - 'for_each_oldnew_plane_in_state_reverse'
  - 'for_each_oldnew_private_obj_in_state'
  - 'for_each_old_plane_in_state'
  - 'for_each_old_private_obj_in_state'
@ -206,14 +231,17 @@ ForEachMacros:
  - 'for_each_process'
  - 'for_each_process_thread'
  - 'for_each_property_of_node'
+  - 'for_each_registered_fb'
  - 'for_each_reserved_mem_region'
-  - 'for_each_resv_unavail_range'
+  - 'for_each_rtd_codec_dai'
+  - 'for_each_rtd_codec_dai_rollback'
  - 'for_each_rtdcom'
  - 'for_each_rtdcom_safe'
  - 'for_each_set_bit'
  - 'for_each_set_bit_from'
  - 'for_each_sg'
  - 'for_each_sg_page'
+  - 'for_each_sibling_event'
  - '__for_each_thread'
  - 'for_each_thread'
  - 'for_each_zone'
@ -251,6 +279,8 @@ ForEachMacros:
  - 'hlist_nulls_for_each_entry_from'
  - 'hlist_nulls_for_each_entry_rcu'
  - 'hlist_nulls_for_each_entry_safe'
+  - 'i3c_bus_for_each_i2cdev'
+  - 'i3c_bus_for_each_i3cdev'
  - 'ide_host_for_each_port'
  - 'ide_port_for_each_dev'
  - 'ide_port_for_each_present_dev'
@ -267,11 +297,14 @@ ForEachMacros:
  - 'kvm_for_each_memslot'
  - 'kvm_for_each_vcpu'
  - 'list_for_each'
+  - 'list_for_each_codec'
+  - 'list_for_each_codec_safe'
  - 'list_for_each_entry'
  - 'list_for_each_entry_continue'
  - 'list_for_each_entry_continue_rcu'
  - 'list_for_each_entry_continue_reverse'
  - 'list_for_each_entry_from'
+  - 'list_for_each_entry_from_rcu'
  - 'list_for_each_entry_from_reverse'
  - 'list_for_each_entry_lockless'
  - 'list_for_each_entry_rcu'
@ -291,6 +324,7 @@ ForEachMacros:
  - 'media_device_for_each_intf'
  - 'media_device_for_each_link'
  - 'media_device_for_each_pad'
+  - 'nanddev_io_for_each_page'
  - 'netdev_for_each_lower_dev'
  - 'netdev_for_each_lower_private'
  - 'netdev_for_each_lower_private_rcu'
@ -357,12 +391,14 @@ ForEachMacros:
  - 'sk_nulls_for_each'
  - 'sk_nulls_for_each_from'
  - 'sk_nulls_for_each_rcu'
+  - 'snd_array_for_each'
  - 'snd_pcm_group_for_each_entry'
  - 'snd_soc_dapm_widget_for_each_path'
  - 'snd_soc_dapm_widget_for_each_path_safe'
  - 'snd_soc_dapm_widget_for_each_sink_path'
  - 'snd_soc_dapm_widget_for_each_source_path'
  - 'tb_property_for_each'
+  - 'tcf_exts_for_each_action'
  - 'udp_portaddr_for_each_entry'
  - 'udp_portaddr_for_each_entry_rcu'
  - 'usb_hub_for_each_child'
@ -371,6 +407,11 @@ ForEachMacros:
  - 'v4l2_m2m_for_each_dst_buf_safe'
  - 'v4l2_m2m_for_each_src_buf'
  - 'v4l2_m2m_for_each_src_buf_safe'
+  - 'virtio_device_for_each_vq'
+  - 'xa_for_each'
+  - 'xas_for_each'
+  - 'xas_for_each_conflict'
+  - 'xas_for_each_marked'
  - 'zorro_for_each_dev'

 #IncludeBlocks: Preserve # Unknown to clang-format-5.0
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@ -0,0 +1,870 @@
+=====================
+BPF Type Format (BTF)
+=====================
+
+1. Introduction
+***************
+
+BTF (BPF Type Format) is the meta data format which
+encodes the debug info related to BPF program/map.
+The name BTF was used initially to describe
+data types. The BTF was later extended to include
+function info for defined subroutines, and line info
+for source/line information.
+
+The debug info is used for map pretty print, function
+signature, etc. The function signature enables better
+bpf program/function kernel symbol.
+The line info helps generate
+source annotated translated byte code, jited code
+and verifier log.
+
+The BTF specification contains two parts,
+  * BTF kernel API
+  * BTF ELF file format
+
+The kernel API is the contract between
+user space and kernel. The kernel verifies
+the BTF info before using it.
+The ELF file format is a user space contract
+between ELF file and libbpf loader.
+
+The type and string sections are part of the
+BTF kernel API, describing the debug info
+(mostly types related) referenced by the bpf program.
+These two sections are discussed in
+details in :ref:`BTF_Type_String`.
+
+.. _BTF_Type_String:
+
+2. BTF Type and String Encoding
+*******************************
+
+The file ``include/uapi/linux/btf.h`` provides high
+level definition on how types/strings are encoded.
+
+The beginning of data blob must be::
+
+    struct btf_header {
+        __u16   magic;
+        __u8    version;
+        __u8    flags;
+        __u32   hdr_len;
+
+        /* All offsets are in bytes relative to the end of this header */
+        __u32   type_off;       /* offset of type section       */
+        __u32   type_len;       /* length of type section       */
+        __u32   str_off;        /* offset of string section     */
+        __u32   str_len;        /* length of string section     */
+    };
+
+The magic is ``0xeB9F``, which has different encoding for big and little
+endian system, and can be used to test whether BTF is generated for
+big or little endian target.
+The btf_header is designed to be extensible with hdr_len equal to
+``sizeof(struct btf_header)`` when the data blob is generated.
+
+2.1 String Encoding
+===================
+
+The first string in the string section must be a null string.
+The rest of string table is a concatenation of other null-treminated
+strings.
+
+2.2 Type Encoding
+=================
+
+The type id ``0`` is reserved for ``void`` type.
+The type section is parsed sequentially and the type id is assigned to
+each recognized type starting from id ``1``.
+Currently, the following types are supported::
+
+    #define BTF_KIND_INT            1       /* Integer      */
+    #define BTF_KIND_PTR            2       /* Pointer      */
+    #define BTF_KIND_ARRAY          3       /* Array        */
+    #define BTF_KIND_STRUCT         4       /* Struct       */
+    #define BTF_KIND_UNION          5       /* Union        */
+    #define BTF_KIND_ENUM           6       /* Enumeration  */
+    #define BTF_KIND_FWD            7       /* Forward      */
+    #define BTF_KIND_TYPEDEF        8       /* Typedef      */
+    #define BTF_KIND_VOLATILE       9       /* Volatile     */
+    #define BTF_KIND_CONST          10      /* Const        */
+    #define BTF_KIND_RESTRICT       11      /* Restrict     */
+    #define BTF_KIND_FUNC           12      /* Function     */
+    #define BTF_KIND_FUNC_PROTO     13      /* Function Proto       */
+
+Note that the type section encodes debug info, not just pure types.
+``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
+
+Each type contains the following common data::
+
+    struct btf_type {
+        __u32 name_off;
+        /* "info" bits arrangement
+         * bits  0-15: vlen (e.g. # of struct's members)
+         * bits 16-23: unused
+         * bits 24-27: kind (e.g. int, ptr, array...etc)
+         * bits 28-30: unused
+         * bit     31: kind_flag, currently used by
+         *             struct, union and fwd
+         */
+        __u32 info;
+        /* "size" is used by INT, ENUM, STRUCT and UNION.
+         * "size" tells the size of the type it is describing.
+         *
+         * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+         * FUNC and FUNC_PROTO.
+         * "type" is a type_id referring to another type.
+         */
+        union {
+                __u32 size;
+                __u32 type;
+        };
+    };
+
+For certain kinds, the common data are followed by kind specific data.
+The ``name_off`` in ``struct btf_type`` specifies the offset in the string table.
+The following details encoding of each kind.
+
+2.2.1 BTF_KIND_INT
+~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: any valid offset
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_INT
+ * ``info.vlen``: 0
+ * ``size``: the size of the int type in bytes.
+
+``btf_type`` is followed by a ``u32`` with following bits arrangement::
+
+  #define BTF_INT_ENCODING(VAL)   (((VAL) & 0x0f000000) >> 24)
+  #define BTF_INT_OFFSET(VAL)     (((VAL  & 0x00ff0000)) >> 16)
+  #define BTF_INT_BITS(VAL)       ((VAL)  & 0x000000ff)
+
+The ``BTF_INT_ENCODING`` has the following attributes::
+
+  #define BTF_INT_SIGNED  (1 << 0)
+  #define BTF_INT_CHAR    (1 << 1)
+  #define BTF_INT_BOOL    (1 << 2)
+
+The ``BTF_INT_ENCODING()`` provides extra information, signness,
+char, or bool, for the int type. The char and bool encoding
+are mostly useful for pretty print. At most one encoding can
+be specified for the int type.
+
+The ``BTF_INT_BITS()`` specifies the number of actual bits held by
+this int type. For example, a 4-bit bitfield encodes
+``BTF_INT_BITS()`` equals to 4. The ``btf_type.size * 8``
+must be equal to or greater than ``BTF_INT_BITS()`` for the type.
+The maximum value of ``BTF_INT_BITS()`` is 128.
+
+The ``BTF_INT_OFFSET()`` specifies the starting bit offset to
+calculate values for this int. For example, a bitfield struct
+member has
+
+ * btf member bit offset 100 from the start of the structure,
+ * btf member pointing to an int type,
+ * the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4``
+
+Then in the struct memory layout, this member will occupy
+``4`` bits starting from bits ``100 + 2 = 102``.
+
+Alternatively, the bitfield struct member can be the following to
+access the same bits as the above:
+
+ * btf member bit offset 102,
+ * btf member pointing to an int type,
+ * the int type has ``BTF_INT_OFFSET() = 0`` and ``BTF_INT_BITS() = 4``
+
+The original intention of ``BTF_INT_OFFSET()`` is to provide
+flexibility of bitfield encoding.
+Currently, both llvm and pahole generates ``BTF_INT_OFFSET() = 0``
+for all int types.
+
+2.2.2 BTF_KIND_PTR
+~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+  * ``name_off``: 0
+  * ``info.kind_flag``: 0
+  * ``info.kind``: BTF_KIND_PTR
+  * ``info.vlen``: 0
+  * ``type``: the pointee type of the pointer
+
+No additional type data follow ``btf_type``.
+
+2.2.3 BTF_KIND_ARRAY
+~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+  * ``name_off``: 0
+  * ``info.kind_flag``: 0
+  * ``info.kind``: BTF_KIND_ARRAY
+  * ``info.vlen``: 0
+  * ``size/type``: 0, not used
+
+btf_type is followed by one "struct btf_array"::
+
+    struct btf_array {
+        __u32   type;
+        __u32   index_type;
+        __u32   nelems;
+    };
+
+The ``struct btf_array`` encoding:
+  * ``type``: the element type
+  * ``index_type``: the index type
+  * ``nelems``: the number of elements for this array (``0`` is also allowed).
+
+The ``index_type`` can be any regular int types
+(u8, u16, u32, u64, unsigned __int128).
+The original design of including ``index_type`` follows dwarf
+which has a ``index_type`` for its array type.
+Currently in BTF, beyond type verification, the ``index_type`` is not used.
+
+The ``struct btf_array`` allows chaining through element type to represent
+multiple dimensional arrays. For example, ``int a[5][6]``, the following
+type system illustrates the chaining:
+
+  * [1]: int
+  * [2]: array, ``btf_array.type = [1]``, ``btf_array.nelems = 6``
+  * [3]: array, ``btf_array.type = [2]``, ``btf_array.nelems = 5``
+
+Currently, both pahole and llvm collapse multiple dimensional array
+into one dimensional array, e.g., ``a[5][6]``, the btf_array.nelems
+equal to ``30``. This is because the original use case is map pretty
+print where the whole array is dumped out so one dimensional array
+is enough. As more BTF usage is explored, pahole and llvm can be
+changed to generate proper chained representation for
+multiple dimensional arrays.
+
+2.2.4 BTF_KIND_STRUCT
+~~~~~~~~~~~~~~~~~~~~~
+2.2.5 BTF_KIND_UNION
+~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+  * ``name_off``: 0 or offset to a valid C identifier
+  * ``info.kind_flag``: 0 or 1
+  * ``info.kind``: BTF_KIND_STRUCT or BTF_KIND_UNION
+  * ``info.vlen``: the number of struct/union members
+  * ``info.size``: the size of the struct/union in bytes
+
+``btf_type`` is followed by ``info.vlen`` number of ``struct btf_member``.::
+
+    struct btf_member {
+        __u32   name_off;
+        __u32   type;
+        __u32   offset;
+    };
+
+``struct btf_member`` encoding:
+  * ``name_off``: offset to a valid C identifier
+  * ``type``: the member type
+  * ``offset``: <see below>
+
+If the type info ``kind_flag`` is not set, the offset contains
+only bit offset of the member. Note that the base type of the
+bitfield can only be int or enum type. If the bitfield size
+is 32, the base type can be either int or enum type.
+If the bitfield size is not 32, the base type must be int,
+and int type ``BTF_INT_BITS()`` encodes the bitfield size.
+
+If the ``kind_flag`` is set, the ``btf_member.offset``
+contains both member bitfield size and bit offset. The
+bitfield size and bit offset are calculated as below.::
+
+  #define BTF_MEMBER_BITFIELD_SIZE(val)   ((val) >> 24)
+  #define BTF_MEMBER_BIT_OFFSET(val)      ((val) & 0xffffff)
+
+In this case, if the base type is an int type, it must
+be a regular int type:
+
+  * ``BTF_INT_OFFSET()`` must be 0.
+  * ``BTF_INT_BITS()`` must be equal to ``{1,2,4,8,16} * 8``.
+
+The following kernel patch introduced ``kind_flag`` and
+explained why both modes exist:
+
+  https://github.com/torvalds/linux/commit/9d5f9f701b1891466fb3dbb1806ad97716f95cc3#diff-fa650a64fdd3968396883d2fe8215ff3
+
+2.2.6 BTF_KIND_ENUM
+~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+  * ``name_off``: 0 or offset to a valid C identifier
+  * ``info.kind_flag``: 0
+  * ``info.kind``: BTF_KIND_ENUM
+  * ``info.vlen``: number of enum values
+  * ``size``: 4
+
+``btf_type`` is followed by ``info.vlen`` number of ``struct btf_enum``.::
+
+    struct btf_enum {
+        __u32   name_off;
+        __s32   val;
+    };
+
+The ``btf_enum`` encoding:
+  * ``name_off``: offset to a valid C identifier
+  * ``val``: any value
+
+2.2.7 BTF_KIND_FWD
+~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+  * ``name_off``: offset to a valid C identifier
+  * ``info.kind_flag``: 0 for struct, 1 for union
+  * ``info.kind``: BTF_KIND_FWD
+  * ``info.vlen``: 0
+  * ``type``: 0
+
+No additional type data follow ``btf_type``.
+
+2.2.8 BTF_KIND_TYPEDEF
+~~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+  * ``name_off``: offset to a valid C identifier
+  * ``info.kind_flag``: 0
+  * ``info.kind``: BTF_KIND_TYPEDEF
+  * ``info.vlen``: 0
+  * ``type``: the type which can be referred by name at ``name_off``
+
+No additional type data follow ``btf_type``.
+
+2.2.9 BTF_KIND_VOLATILE
+~~~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+  * ``name_off``: 0
+  * ``info.kind_flag``: 0
+  * ``info.kind``: BTF_KIND_VOLATILE
+  * ``info.vlen``: 0
+  * ``type``: the type with ``volatile`` qualifier
+
+No additional type data follow ``btf_type``.
+
+2.2.10 BTF_KIND_CONST
+~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+  * ``name_off``: 0
+  * ``info.kind_flag``: 0
+  * ``info.kind``: BTF_KIND_CONST
+  * ``info.vlen``: 0
+  * ``type``: the type with ``const`` qualifier
+
+No additional type data follow ``btf_type``.
+
+2.2.11 BTF_KIND_RESTRICT
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+  * ``name_off``: 0
+  * ``info.kind_flag``: 0
+  * ``info.kind``: BTF_KIND_RESTRICT
+  * ``info.vlen``: 0
+  * ``type``: the type with ``restrict`` qualifier
+
+No additional type data follow ``btf_type``.
+
+2.2.12 BTF_KIND_FUNC
+~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+  * ``name_off``: offset to a valid C identifier
+  * ``info.kind_flag``: 0
+  * ``info.kind``: BTF_KIND_FUNC
+  * ``info.vlen``: 0
+  * ``type``: a BTF_KIND_FUNC_PROTO type
+
+No additional type data follow ``btf_type``.
+
+A BTF_KIND_FUNC defines, not a type, but a subprogram (function) whose
+signature is defined by ``type``. The subprogram is thus an instance of
+that type. The BTF_KIND_FUNC may in turn be referenced by a func_info in
+the :ref:`BTF_Ext_Section` (ELF) or in the arguments to
+:ref:`BPF_Prog_Load` (ABI).
+
+2.2.13 BTF_KIND_FUNC_PROTO
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+  * ``name_off``: 0
+  * ``info.kind_flag``: 0
+  * ``info.kind``: BTF_KIND_FUNC_PROTO
+  * ``info.vlen``: # of parameters
+  * ``type``: the return type
+
+``btf_type`` is followed by ``info.vlen`` number of ``struct btf_param``.::
+
+    struct btf_param {
+        __u32   name_off;
+        __u32   type;
+    };
+
+If a BTF_KIND_FUNC_PROTO type is referred by a BTF_KIND_FUNC type,
+then ``btf_param.name_off`` must point to a valid C identifier
+except for the possible last argument representing the variable
+argument. The btf_param.type refers to parameter type.
+
+If the function has variable arguments, the last parameter
+is encoded with ``name_off = 0`` and ``type = 0``.
+
+3. BTF Kernel API
+*****************
+
+The following bpf syscall command involves BTF:
+   * BPF_BTF_LOAD: load a blob of BTF data into kernel
+   * BPF_MAP_CREATE: map creation with btf key and value type info.
+   * BPF_PROG_LOAD: prog load with btf function and line info.
+   * BPF_BTF_GET_FD_BY_ID: get a btf fd
+   * BPF_OBJ_GET_INFO_BY_FD: btf, func_info, line_info
+     and other btf related info are returned.
+
+The workflow typically looks like:
+::
+
+  Application:
+      BPF_BTF_LOAD
+          |
+          v
+      BPF_MAP_CREATE and BPF_PROG_LOAD
+          |
+          V
+      ......
+
+  Introspection tool:
+      ......
+      BPF_{PROG,MAP}_GET_NEXT_ID (get prog/map id's)
+          |
+          V
+      BPF_{PROG,MAP}_GET_FD_BY_ID (get a prog/map fd)
+          |
+          V
+      BPF_OBJ_GET_INFO_BY_FD (get bpf_prog_info/bpf_map_info with btf_id)
+          |                                     |
+          V                                     |
+      BPF_BTF_GET_FD_BY_ID (get btf_fd)         |
+          |                                     |
+          V                                     |
+      BPF_OBJ_GET_INFO_BY_FD (get btf)          |
+          |                                     |
+          V                                     V
+      pretty print types, dump func signatures and line info, etc.
+
+
+3.1 BPF_BTF_LOAD
+================
+
+Load a blob of BTF data into kernel. A blob of data
+described in :ref:`BTF_Type_String`
+can be directly loaded into the kernel.
+A ``btf_fd`` returns to userspace.
+
+3.2 BPF_MAP_CREATE
+==================
+
+A map can be created with ``btf_fd`` and specified key/value type id.::
+
+    __u32   btf_fd;         /* fd pointing to a BTF type data */
+    __u32   btf_key_type_id;        /* BTF type_id of the key */
+    __u32   btf_value_type_id;      /* BTF type_id of the value */
+
+In libbpf, the map can be defined with extra annotation like below:
+::
+
+    struct bpf_map_def SEC("maps") btf_map = {
+        .type = BPF_MAP_TYPE_ARRAY,
+        .key_size = sizeof(int),
+        .value_size = sizeof(struct ipv_counts),
+        .max_entries = 4,
+    };
+    BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
+
+Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name,
+key and value types for the map.
+During ELF parsing, libbpf is able to extract key/value type_id's
+and assigned them to BPF_MAP_CREATE attributes automatically.
+
+.. _BPF_Prog_Load:
+
+3.3 BPF_PROG_LOAD
+=================
+
+During prog_load, func_info and line_info can be passed to kernel with
+proper values for the following attributes:
+::
+
+    __u32           insn_cnt;
+    __aligned_u64   insns;
+    ......
+    __u32           prog_btf_fd;    /* fd pointing to BTF type data */
+    __u32           func_info_rec_size;     /* userspace bpf_func_info size */
+    __aligned_u64   func_info;      /* func info */
+    __u32           func_info_cnt;  /* number of bpf_func_info records */
+    __u32           line_info_rec_size;     /* userspace bpf_line_info size */
+    __aligned_u64   line_info;      /* line info */
+    __u32           line_info_cnt;  /* number of bpf_line_info records */
+
+The func_info and line_info are an array of below, respectively.::
+
+    struct bpf_func_info {
+        __u32   insn_off; /* [0, insn_cnt - 1] */
+        __u32   type_id;  /* pointing to a BTF_KIND_FUNC type */
+    };
+    struct bpf_line_info {
+        __u32   insn_off; /* [0, insn_cnt - 1] */
+        __u32   file_name_off; /* offset to string table for the filename */
+        __u32   line_off; /* offset to string table for the source line */
+        __u32   line_col; /* line number and column number */
+    };
+
+func_info_rec_size is the size of each func_info record, and line_info_rec_size
+is the size of each line_info record. Passing the record size to kernel make
+it possible to extend the record itself in the future.
+
+Below are requirements for func_info:
+  * func_info[0].insn_off must be 0.
+  * the func_info insn_off is in strictly increasing order and matches
+    bpf func boundaries.
+
+Below are requirements for line_info:
+  * the first insn in each func must points to a line_info record.
+  * the line_info insn_off is in strictly increasing order.
+
+For line_info, the line number and column number are defined as below:
+::
+
+    #define BPF_LINE_INFO_LINE_NUM(line_col)        ((line_col) >> 10)
+    #define BPF_LINE_INFO_LINE_COL(line_col)        ((line_col) & 0x3ff)
+
+3.4 BPF_{PROG,MAP}_GET_NEXT_ID
+
+In kernel, every loaded program, map or btf has a unique id.
+The id won't change during the life time of the program, map or btf.
+
+The bpf syscall command BPF_{PROG,MAP}_GET_NEXT_ID
+returns all id's, one for each command, to user space, for bpf
+program or maps,
+so the inspection tool can inspect all programs and maps.
+
+3.5 BPF_{PROG,MAP}_GET_FD_BY_ID
+
+The introspection tool cannot use id to get details about program or maps.
+A file descriptor needs to be obtained first for reference counting purpose.
+
+3.6 BPF_OBJ_GET_INFO_BY_FD
+==========================
+
+Once a program/map fd is acquired, the introspection tool can
+get the detailed information from kernel about this fd,
+some of which is btf related. For example,
+``bpf_map_info`` returns ``btf_id``, key/value type id.
+``bpf_prog_info`` returns ``btf_id``, func_info and line info
+for translated bpf byte codes, and jited_line_info.
+
+3.7 BPF_BTF_GET_FD_BY_ID
+========================
+
+With ``btf_id`` obtained in ``bpf_map_info`` and ``bpf_prog_info``,
+bpf syscall command BPF_BTF_GET_FD_BY_ID can retrieve a btf fd.
+Then, with command BPF_OBJ_GET_INFO_BY_FD, the btf blob, originally
+loaded into the kernel with BPF_BTF_LOAD, can be retrieved.
+
+With the btf blob, ``bpf_map_info`` and ``bpf_prog_info``, the introspection
+tool has full btf knowledge and is able to pretty print map key/values,
+dump func signatures, dump line info along with byte/jit codes.
+
+4. ELF File Format Interface
+****************************
+
+4.1 .BTF section
+================
+
+The .BTF section contains type and string data. The format of this section
+is same as the one describe in :ref:`BTF_Type_String`.
+
+.. _BTF_Ext_Section:
+
+4.2 .BTF.ext section
+====================
+
+The .BTF.ext section encodes func_info and line_info which
+needs loader manipulation before loading into the kernel.
+
+The specification for .BTF.ext section is defined at
+``tools/lib/bpf/btf.h`` and ``tools/lib/bpf/btf.c``.
+
+The current header of .BTF.ext section::
+
+    struct btf_ext_header {
+        __u16   magic;
+        __u8    version;
+        __u8    flags;
+        __u32   hdr_len;
+
+        /* All offsets are in bytes relative to the end of this header */
+        __u32   func_info_off;
+        __u32   func_info_len;
+        __u32   line_info_off;
+        __u32   line_info_len;
+    };
+
+It is very similar to .BTF section. Instead of type/string section,
+it contains func_info and line_info section. See :ref:`BPF_Prog_Load`
+for details about func_info and line_info record format.
+
+The func_info is organized as below.::
+
+     func_info_rec_size
+     btf_ext_info_sec for section #1 /* func_info for section #1 */
+     btf_ext_info_sec for section #2 /* func_info for section #2 */
+     ...
+
+``func_info_rec_size`` specifies the size of ``bpf_func_info`` structure
+when .BTF.ext is generated. btf_ext_info_sec, defined below, is
+the func_info for each specific ELF section.::
+
+     struct btf_ext_info_sec {
+        __u32   sec_name_off; /* offset to section name */
+        __u32   num_info;
+        /* Followed by num_info * record_size number of bytes */
+        __u8    data[0];
+     };
+
+Here, num_info must be greater than 0.
+
+The line_info is organized as below.::
+
+     line_info_rec_size
+     btf_ext_info_sec for section #1 /* line_info for section #1 */
+     btf_ext_info_sec for section #2 /* line_info for section #2 */
+     ...
+
+``line_info_rec_size`` specifies the size of ``bpf_line_info`` structure
+when .BTF.ext is generated.
+
+The interpretation of ``bpf_func_info->insn_off`` and
+``bpf_line_info->insn_off`` is different between kernel API and ELF API.
+For kernel API, the ``insn_off`` is the instruction offset in the unit
+of ``struct bpf_insn``. For ELF API, the ``insn_off`` is the byte offset
+from the beginning of section (``btf_ext_info_sec->sec_name_off``).
+
+5. Using BTF
+************
+
+5.1 bpftool map pretty print
+============================
+
+With BTF, the map key/value can be printed based on fields rather than
+simply raw bytes. This is especially
+valuable for large structure or if you data structure
+has bitfields. For example, for the following map,::
+
+      enum A { A1, A2, A3, A4, A5 };
+      typedef enum A ___A;
+      struct tmp_t {
+           char a1:4;
+           int  a2:4;
+           int  :4;
+           __u32 a3:4;
+           int b;
+           ___A b1:4;
+           enum A b2:4;
+      };
+      struct bpf_map_def SEC("maps") tmpmap = {
+           .type = BPF_MAP_TYPE_ARRAY,
+           .key_size = sizeof(__u32),
+           .value_size = sizeof(struct tmp_t),
+           .max_entries = 1,
+      };
+      BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t);
+
+bpftool is able to pretty print like below:
+::
+
+      [{
+            "key": 0,
+            "value": {
+                "a1": 0x2,
+                "a2": 0x4,
+                "a3": 0x6,
+                "b": 7,
+                "b1": 0x8,
+                "b2": 0xa
+            }
+        }
+      ]
+
+5.2 bpftool prog dump
+=====================
+
+The following is an example to show func_info and line_info
+can help prog dump with better kernel symbol name, function prototype
+and line information.::
+
+    $ bpftool prog dump jited pinned /sys/fs/bpf/test_btf_haskv
+    [...]
+    int test_long_fname_2(struct dummy_tracepoint_args * arg):
+    bpf_prog_44a040bf25481309_test_long_fname_2:
+    ; static int test_long_fname_2(struct dummy_tracepoint_args *arg)
+       0:   push   %rbp
+       1:   mov    %rsp,%rbp
+       4:   sub    $0x30,%rsp
+       b:   sub    $0x28,%rbp
+       f:   mov    %rbx,0x0(%rbp)
+      13:   mov    %r13,0x8(%rbp)
+      17:   mov    %r14,0x10(%rbp)
+      1b:   mov    %r15,0x18(%rbp)
+      1f:   xor    %eax,%eax
+      21:   mov    %rax,0x20(%rbp)
+      25:   xor    %esi,%esi
+    ; int key = 0;
+      27:   mov    %esi,-0x4(%rbp)
+    ; if (!arg->sock)
+      2a:   mov    0x8(%rdi),%rdi
+    ; if (!arg->sock)
+      2e:   cmp    $0x0,%rdi
+      32:   je     0x0000000000000070
+      34:   mov    %rbp,%rsi
+    ; counts = bpf_map_lookup_elem(&btf_map, &key);
+    [...]
+
+5.3 verifier log
+================
+
+The following is an example how line_info can help verifier failure debug.::
+
+       /* The code at tools/testing/selftests/bpf/test_xdp_noinline.c
+        * is modified as below.
+        */
+       data = (void *)(long)xdp->data;
+       data_end = (void *)(long)xdp->data_end;
+       /*
+       if (data + 4 > data_end)
+               return XDP_DROP;
+       */
+       *(u32 *)data = dst->dst;
+
+    $ bpftool prog load ./test_xdp_noinline.o /sys/fs/bpf/test_xdp_noinline type xdp
+        ; data = (void *)(long)xdp->data;
+        224: (79) r2 = *(u64 *)(r10 -112)
+        225: (61) r2 = *(u32 *)(r2 +0)
+        ; *(u32 *)data = dst->dst;
+        226: (63) *(u32 *)(r2 +0) = r1
+        invalid access to packet, off=0 size=4, R2(id=0,off=0,r=0)
+        R2 offset is outside of the packet
+
+6. BTF Generation
+*****************
+
+You need latest pahole
+
+  https://git.kernel.org/pub/scm/devel/pahole/pahole.git/
+
+or llvm (8.0 or later). The pahole acts as a dwarf2btf converter. It doesn't support .BTF.ext
+and btf BTF_KIND_FUNC type yet. For example,::
+
+      -bash-4.4$ cat t.c
+      struct t {
+        int a:2;
+        int b:3;
+        int c:2;
+      } g;
+      -bash-4.4$ gcc -c -O2 -g t.c
+      -bash-4.4$ pahole -JV t.o
+      File t.o:
+      [1] STRUCT t kind_flag=1 size=4 vlen=3
+              a type_id=2 bitfield_size=2 bits_offset=0
+              b type_id=2 bitfield_size=3 bits_offset=2
+              c type_id=2 bitfield_size=2 bits_offset=5
+      [2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
+
+The llvm is able to generate .BTF and .BTF.ext directly with -g for bpf target only.
+The assembly code (-S) is able to show the BTF encoding in assembly format.::
+
+    -bash-4.4$ cat t2.c
+    typedef int __int32;
+    struct t2 {
+      int a2;
+      int (*f2)(char q1, __int32 q2, ...);
+      int (*f3)();
+    } g2;
+    int main() { return 0; }
+    int test() { return 0; }
+    -bash-4.4$ clang -c -g -O2 -target bpf t2.c
+    -bash-4.4$ readelf -S t2.o
+      ......
+      [ 8] .BTF              PROGBITS         0000000000000000  00000247
+           000000000000016e  0000000000000000           0     0     1
+      [ 9] .BTF.ext          PROGBITS         0000000000000000  000003b5
+           0000000000000060  0000000000000000           0     0     1
+      [10] .rel.BTF.ext      REL              0000000000000000  000007e0
+           0000000000000040  0000000000000010          16     9     8
+      ......
+    -bash-4.4$ clang -S -g -O2 -target bpf t2.c
+    -bash-4.4$ cat t2.s
+      ......
+            .section        .BTF,"",@progbits
+            .short  60319                   # 0xeb9f
+            .byte   1
+            .byte   0
+            .long   24
+            .long   0
+            .long   220
+            .long   220
+            .long   122
+            .long   0                       # BTF_KIND_FUNC_PROTO(id = 1)
+            .long   218103808               # 0xd000000
+            .long   2
+            .long   83                      # BTF_KIND_INT(id = 2)
+            .long   16777216                # 0x1000000
+            .long   4
+            .long   16777248                # 0x1000020
+      ......
+            .byte   0                       # string offset=0
+            .ascii  ".text"                 # string offset=1
+            .byte   0
+            .ascii  "/home/yhs/tmp-pahole/t2.c" # string offset=7
+            .byte   0
+            .ascii  "int main() { return 0; }" # string offset=33
+            .byte   0
+            .ascii  "int test() { return 0; }" # string offset=58
+            .byte   0
+            .ascii  "int"                   # string offset=83
+      ......
+            .section        .BTF.ext,"",@progbits
+            .short  60319                   # 0xeb9f
+            .byte   1
+            .byte   0
+            .long   24
+            .long   0
+            .long   28
+            .long   28
+            .long   44
+            .long   8                       # FuncInfo
+            .long   1                       # FuncInfo section string offset=1
+            .long   2
+            .long   .Lfunc_begin0
+            .long   3
+            .long   .Lfunc_begin1
+            .long   5
+            .long   16                      # LineInfo
+            .long   1                       # LineInfo section string offset=1
+            .long   2
+            .long   .Ltmp0
+            .long   7
+            .long   33
+            .long   7182                    # Line 7 Col 14
+            .long   .Ltmp3
+            .long   7
+            .long   58
+            .long   8206                    # Line 8 Col 14
+
+7. Testing
+**********
+
+Kernel bpf selftest `test_btf.c` provides extensive set of BTF related tests.
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@ -15,6 +15,13 @@ that goes into great technical depth about the BPF Architecture.
 The primary info for the bpf syscall is available in the `man-pages`_
 for `bpf(2)`_.

+BPF Type Format (BTF)
+=====================
+
+.. toctree::
+   :maxdepth: 1
+
+   btf


 Frequently asked questions (FAQ)
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@ -108,12 +108,13 @@ some, but not all of the other indices changing.

 Sometimes you need to ensure that a subsequent call to :c:func:`xa_store`
 will not need to allocate memory.  The :c:func:`xa_reserve` function
-will store a reserved entry at the indicated index.  Users of the normal
-API will see this entry as containing ``NULL``.  If you do not need to
-use the reserved entry, you can call :c:func:`xa_release` to remove the
-unused entry.  If another user has stored to the entry in the meantime,
-:c:func:`xa_release` will do nothing; if instead you want the entry to
-become ``NULL``, you should use :c:func:`xa_erase`.
+will store a reserved entry at the indicated index.  Users of the
+normal API will see this entry as containing ``NULL``.  If you do
+not need to use the reserved entry, you can call :c:func:`xa_release`
+to remove the unused entry.  If another user has stored to the entry
+in the meantime, :c:func:`xa_release` will do nothing; if instead you
+want the entry to become ``NULL``, you should use :c:func:`xa_erase`.
+Using :c:func:`xa_insert` on a reserved entry will fail.

 If all entries in the array are ``NULL``, the :c:func:`xa_empty` function
 will return ``true``.
@ -183,6 +184,8 @@ Takes xa_lock internally:
 * :c:func:`xa_store_bh`
 * :c:func:`xa_store_irq`
 * :c:func:`xa_insert`
+ * :c:func:`xa_insert_bh`
+ * :c:func:`xa_insert_irq`
 * :c:func:`xa_erase`
 * :c:func:`xa_erase_bh`
 * :c:func:`xa_erase_irq`
--- a/Documentation/devicetree/bindings/arm/cpu-capacity.txt
+++ b/Documentation/devicetree/bindings/arm/cpu-capacity.txt
@ -235,4 +235,4 @@ cpus {
 ===========================================

 [1] ARM Linux Kernel documentation - CPUs bindings
-    Documentation/devicetree/bindings/arm/cpus.txt
+    Documentation/devicetree/bindings/arm/cpus.yaml
--- a/Documentation/devicetree/bindings/arm/idle-states.txt
+++ b/Documentation/devicetree/bindings/arm/idle-states.txt
@ -684,7 +684,7 @@ cpus {
 ===========================================

 [1] ARM Linux Kernel documentation - CPUs bindings
-    Documentation/devicetree/bindings/arm/cpus.txt
+    Documentation/devicetree/bindings/arm/cpus.yaml

 [2] ARM Linux Kernel documentation - PSCI bindings
    Documentation/devicetree/bindings/arm/psci.txt
--- a/Documentation/devicetree/bindings/arm/sp810.txt
+++ b/Documentation/devicetree/bindings/arm/sp810.txt
@ -4,7 +4,7 @@ SP810 System Controller
 Required properties:

 - compatible:	standard compatible string for a Primecell peripheral,
-		see Documentation/devicetree/bindings/arm/primecell.txt
+		see Documentation/devicetree/bindings/arm/primecell.yaml
 		for more details
 		should be: "arm,sp810", "arm,primecell"

--- a/Documentation/devicetree/bindings/arm/topology.txt
+++ b/Documentation/devicetree/bindings/arm/topology.txt
@ -472,4 +472,4 @@ cpus {

 ===============================================================================
 [1] ARM Linux kernel documentation
-    Documentation/devicetree/bindings/arm/cpus.txt
+    Documentation/devicetree/bindings/arm/cpus.yaml
--- a/Documentation/devicetree/bindings/clock/marvell,mmp2.txt
+++ b/Documentation/devicetree/bindings/clock/marvell,mmp2.txt
@ -18,4 +18,4 @@ Required Properties:
 Each clock is assigned an identifier and client nodes use this identifier
 to specify the clock which they consume.

-All these identifier could be found in <dt-bindings/clock/marvell-mmp2.h>.
+All these identifiers could be found in <dt-bindings/clock/marvell,mmp2.h>.
--- a/Documentation/devicetree/bindings/display/arm,pl11x.txt
+++ b/Documentation/devicetree/bindings/display/arm,pl11x.txt
@ -1,6 +1,6 @@
 * ARM PrimeCell Color LCD Controller PL110/PL111

-See also Documentation/devicetree/bindings/arm/primecell.txt
+See also Documentation/devicetree/bindings/arm/primecell.yaml

 Required properties:

--- a/Documentation/devicetree/bindings/display/msm/gpu.txt
+++ b/Documentation/devicetree/bindings/display/msm/gpu.txt
@ -27,7 +27,6 @@ Example:
 		reg = <0x04300000 0x20000>;
 		reg-names = "kgsl_3d0_reg_memory";
 		interrupts = <GIC_SPI 80 0>;
-		interrupt-names = "kgsl_3d0_irq";
 		clock-names =
 		    "core",
 		    "iface",
--- a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
@ -14,8 +14,6 @@ Required properties:

    "marvell,armada-8k-gpio" should be used for the Armada 7K and 8K
    SoCs (either from AP or CP), see
-    Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt
-    and
    Documentation/devicetree/bindings/arm/marvell/ap806-system-controller.txt
    for specific details about the offset property.

--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt
@ -78,7 +78,7 @@ Sub-nodes:
 PPI affinity can be expressed as a single "ppi-partitions" node,
 containing a set of sub-nodes, each with the following property:
 - affinity: Should be a list of phandles to CPU nodes (as described in
-Documentation/devicetree/bindings/arm/cpus.txt).
+  Documentation/devicetree/bindings/arm/cpus.yaml).

 GICv3 has one or more Interrupt Translation Services (ITS) that are
 used to route Message Signalled Interrupts (MSI) to the CPUs.
--- a/Documentation/devicetree/bindings/net/dsa/mt7530.txt
+++ b/Documentation/devicetree/bindings/net/dsa/mt7530.txt
@ -3,12 +3,16 @@ Mediatek MT7530 Ethernet switch

 Required properties:

- compatible: Must be compatible = "mediatek,mt7530";
+- compatible: may be compatible = "mediatek,mt7530"
+	or compatible = "mediatek,mt7621"
 - #address-cells: Must be 1.
 - #size-cells: Must be 0.
 - mediatek,mcm: Boolean; if defined, indicates that either MT7530 is the part
 	on multi-chip module belong to MT7623A has or the remotely standalone
 	chip as the function MT7623N reference board provided for.
+
+If compatible mediatek,mt7530 is set then the following properties are required
+
 - core-supply: Phandle to the regulator node necessary for the core power.
 - io-supply: Phandle to the regulator node necessary for the I/O power.
 	See Documentation/devicetree/bindings/regulator/mt6323-regulator.txt
--- a/Documentation/devicetree/bindings/net/qcom,ethqos.txt
+++ b/Documentation/devicetree/bindings/net/qcom,ethqos.txt
@ -0,0 +1,64 @@
+Qualcomm Ethernet ETHQOS device
+
+This documents dwmmac based ethernet device which supports Gigabit
+ethernet for version v2.3.0 onwards.
+
+This device has following properties:
+
+Required properties:
+
+- compatible: Should be qcom,qcs404-ethqos"
+
+- reg: Address and length of the register set for the device
+
+- reg-names: Should contain register names "stmmaceth", "rgmii"
+
+- clocks: Should contain phandle to clocks
+
+- clock-names: Should contain clock names "stmmaceth", "pclk",
+		"ptp_ref", "rgmii"
+
+- interrupts: Should contain phandle to interrupts
+
+- interrupt-names: Should contain interrupt names "macirq", "eth_lpi"
+
+Rest of the properties are defined in stmmac.txt file in same directory
+
+
+Example:
+
+ethernet: ethernet@7a80000 {
+	compatible = "qcom,qcs404-ethqos";
+	reg = <0x07a80000 0x10000>,
+		<0x07a96000 0x100>;
+	reg-names = "stmmaceth", "rgmii";
+	clock-names = "stmmaceth", "pclk", "ptp_ref", "rgmii";
+	clocks = <&gcc GCC_ETH_AXI_CLK>,
+		<&gcc GCC_ETH_SLAVE_AHB_CLK>,
+		<&gcc GCC_ETH_PTP_CLK>,
+		<&gcc GCC_ETH_RGMII_CLK>;
+	interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
+			<GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+	interrupt-names = "macirq", "eth_lpi";
+	snps,reset-gpio = <&tlmm 60 GPIO_ACTIVE_LOW>;
+	snps,reset-active-low;
+
+	snps,txpbl = <8>;
+	snps,rxpbl = <2>;
+	snps,aal;
+	snps,tso;
+
+	phy-handle = <&phy1>;
+	phy-mode = "rgmii";
+
+	mdio {
+		#address-cells = <0x1>;
+		#size-cells = <0x0>;
+		compatible = "snps,dwmac-mdio";
+		phy1: phy@4 {
+			device_type = "ethernet-phy";
+			reg = <0x4>;
+		};
+	};
+
+};
--- a/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
+++ b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
@ -17,6 +17,8 @@ Clock Properties:
  - fsl,tmr-fiper1   Fixed interval period pulse generator.
  - fsl,tmr-fiper2   Fixed interval period pulse generator.
  - fsl,max-adj      Maximum frequency adjustment in parts per billion.
+  - fsl,extts-fifo   The presence of this property indicates hardware
+		     support for the external trigger stamp FIFO.

  These properties set the operational parameters for the PTP
  clock. You must choose these carefully for the clock to work right.
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,glink.txt
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,glink.txt
@ -55,7 +55,7 @@ of these nodes are defined by the individual bindings for the specific function
 = EXAMPLE
 The following example represents the GLINK RPM node on a MSM8996 device, with
 the function for the "rpm_request" channel defined, which is used for
-regualtors and root clocks.
+regulators and root clocks.

 	apcs_glb: mailbox@9820000 {
 		compatible = "qcom,msm8996-apcs-hmss-global";
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,smp2p.txt
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,smp2p.txt
@ -41,12 +41,12 @@ processor ID) and a string identifier.
 - qcom,local-pid:
 	Usage: required
 	Value type: <u32>
-	Definition: specifies the identfier of the local endpoint of this edge
+	Definition: specifies the identifier of the local endpoint of this edge

 - qcom,remote-pid:
 	Usage: required
 	Value type: <u32>
-	Definition: specifies the identfier of the remote endpoint of this edge
+	Definition: specifies the identifier of the remote endpoint of this edge

 = SUBNODES
 Each SMP2P pair contain a set of inbound and outbound entries, these are
--- a/Documentation/fb/fbcon.txt
+++ b/Documentation/fb/fbcon.txt
@ -163,6 +163,14 @@ C. Boot options
 	be preserved until there actually is some text is output to the console.
 	This option causes fbcon to bind immediately to the fbdev device.

+7. fbcon=logo-pos:<location>
+
+	The only possible 'location' is 'center' (without quotes), and when
+	given, the bootup logo is moved from the default top-left corner
+	location to the center of the framebuffer. If more than one logo is
+	displayed due to multiple CPUs, the collected line of logos is moved
+	as a whole.
+
 C. Attaching, Detaching and Unloading

 Before going on to how to attach, detach and unload the framebuffer console, an
--- a/Documentation/networking/devlink-health.txt
+++ b/Documentation/networking/devlink-health.txt
@ -1,86 +0,0 @@
-The health mechanism is targeted for Real Time Alerting, in order to know when
-something bad had happened to a PCI device
- Provide alert debug information
- Self healing
- If problem needs vendor support, provide a way to gather all needed debugging
-  information.
-
-The main idea is to unify and centralize driver health reports in the
-generic devlink instance and allow the user to set different
-attributes of the health reporting and recovery procedures.
-
-The devlink health reporter:
-Device driver creates a "health reporter" per each error/health type.
-Error/Health type can be a known/generic (eg pci error, fw error, rx/tx error)
-or unknown (driver specific).
-For each registered health reporter a driver can issue error/health reports
-asynchronously. All health reports handling is done by devlink.
-Device driver can provide specific callbacks for each "health reporter", e.g.
- - Recovery procedures
- - Diagnostics and object dump procedures
- - OOB initial parameters
-Different parts of the driver can register different types of health reporters
-with different handlers.
-
-Once an error is reported, devlink health will do the following actions:
-  * A log is being send to the kernel trace events buffer
-  * Health status and statistics are being updated for the reporter instance
-  * Object dump is being taken and saved at the reporter instance (as long as
-    there is no other dump which is already stored)
-  * Auto recovery attempt is being done. Depends on:
-    - Auto-recovery configuration
-    - Grace period vs. time passed since last recover
-
-The user interface:
-User can access/change each reporter's parameters and driver specific callbacks
-via devlink, e.g per error type (per health reporter)
- - Configure reporter's generic parameters (like: disable/enable auto recovery)
- - Invoke recovery procedure
- - Run diagnostics
- - Object dump
-
-The devlink health interface (via netlink):
-DEVLINK_CMD_HEALTH_REPORTER_GET
-  Retrieves status and configuration info per DEV and reporter.
-DEVLINK_CMD_HEALTH_REPORTER_SET
-  Allows reporter-related configuration setting.
-DEVLINK_CMD_HEALTH_REPORTER_RECOVER
-  Triggers a reporter's recovery procedure.
-DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE
-  Retrieves diagnostics data from a reporter on a device.
-DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET
-  Retrieves the last stored dump. Devlink health
-  saves a single dump. If an dump is not already stored by the devlink
-  for this reporter, devlink generates a new dump.
-  dump output is defined by the reporter.
-DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR
-  Clears the last saved dump file for the specified reporter.
-
-
-                                               netlink
-                                      +--------------------------+
-                                      |                          |
-                                      |            +             |
-                                      |            |             |
-                                      +--------------------------+
-                                                   |request for ops
-                                                   |(diagnose,
- mlx5_core                             devlink     |recover,
-                                                   |dump)
-+--------+                            +--------------------------+
-|        |                            |    reporter|             |
-|        |                            |  +---------v----------+  |
-|        |   ops execution            |  |                    |  |
-|     <----------------------------------+                    |  |
-|        |                            |  |                    |  |
-|        |                            |  + ^------------------+  |
-|        |                            |    | request for ops     |
-|        |                            |    | (recover, dump)     |
-|        |                            |    |                     |
-|        |                            |  +-+------------------+  |
-|        |     health report          |  | health handler     |  |
-|        +------------------------------->                    |  |
-|        |                            |  +--------------------+  |
-|        |     health reporter create |                          |
-|        +---------------------------->                          |
-+--------+                            +--------------------------+
--- a/Documentation/networking/devlink-params-mlxsw.txt
+++ b/Documentation/networking/devlink-params-mlxsw.txt
@ -0,0 +1,2 @@
+fw_load_policy		[DEVICE, GENERIC]
+			Configuration mode: driverinit
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@ -865,7 +865,7 @@ Three LSB bits store instruction class which is one of:
  BPF_STX   0x03          BPF_STX   0x03
  BPF_ALU   0x04          BPF_ALU   0x04
  BPF_JMP   0x05          BPF_JMP   0x05
-  BPF_RET   0x06          [ class 6 unused, for future if needed ]
+  BPF_RET   0x06          BPF_JMP32 0x06
  BPF_MISC  0x07          BPF_ALU64 0x07

 When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ...
@ -902,9 +902,9 @@ If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of:
  BPF_ARSH  0xc0  /* eBPF only: sign extending shift right */
  BPF_END   0xd0  /* eBPF only: endianness conversion */

-If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of:
+If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 [ in eBPF ], BPF_OP(code) is one of:

-  BPF_JA    0x00
+  BPF_JA    0x00  /* BPF_JMP only */
  BPF_JEQ   0x10
  BPF_JGT   0x20
  BPF_JGE   0x30
@ -912,8 +912,8 @@ If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of:
  BPF_JNE   0x50  /* eBPF only: jump != */
  BPF_JSGT  0x60  /* eBPF only: signed '>' */
  BPF_JSGE  0x70  /* eBPF only: signed '>=' */
-  BPF_CALL  0x80  /* eBPF only: function call */
-  BPF_EXIT  0x90  /* eBPF only: function return */
+  BPF_CALL  0x80  /* eBPF BPF_JMP only: function call */
+  BPF_EXIT  0x90  /* eBPF BPF_JMP only: function return */
  BPF_JLT   0xa0  /* eBPF only: unsigned '<' */
  BPF_JLE   0xb0  /* eBPF only: unsigned '<=' */
  BPF_JSLT  0xc0  /* eBPF only: signed '<' */
@ -936,8 +936,9 @@ Classic BPF wastes the whole BPF_RET class to represent a single 'ret'
 operation. Classic BPF_RET | BPF_K means copy imm32 into return register
 and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT
 in eBPF means function exit only. The eBPF program needs to store return
-value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is currently
-unused and reserved for future use.
+value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is used as
+BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide
+operands for the comparisons instead.

 For load and store instructions the 8-bit 'code' field is divided as:

--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@ -11,24 +11,25 @@ Contents:
   batman-adv
   can
   can_ucan_protocol
-   dpaa2/index
-   e100
-   e1000
-   e1000e
-   fm10k
-   igb
-   igbvf
-   ixgb
-   ixgbe
-   ixgbevf
-   i40e
-   iavf
-   ice
+   device_drivers/freescale/dpaa2/index
+   device_drivers/intel/e100
+   device_drivers/intel/e1000
+   device_drivers/intel/e1000e
+   device_drivers/intel/fm10k
+   device_drivers/intel/igb
+   device_drivers/intel/igbvf
+   device_drivers/intel/ixgb
+   device_drivers/intel/ixgbe
+   device_drivers/intel/ixgbevf
+   device_drivers/intel/i40e
+   device_drivers/intel/iavf
+   device_drivers/intel/ice
   kapi
   z8530book
   msg_zerocopy
   failover
   net_failover
+   phy
   alias
   bridge
   snmp_counter
--- a/Documentation/networking/phy.rst
+++ b/Documentation/networking/phy.rst
@ -0,0 +1,447 @@
+=====================
+PHY Abstraction Layer
+=====================
+
+Purpose
+=======
+
+Most network devices consist of set of registers which provide an interface
+to a MAC layer, which communicates with the physical connection through a
+PHY.  The PHY concerns itself with negotiating link parameters with the link
+partner on the other side of the network connection (typically, an ethernet
+cable), and provides a register interface to allow drivers to determine what
+settings were chosen, and to configure what settings are allowed.
+
+While these devices are distinct from the network devices, and conform to a
+standard layout for the registers, it has been common practice to integrate
+the PHY management code with the network driver.  This has resulted in large
+amounts of redundant code.  Also, on embedded systems with multiple (and
+sometimes quite different) ethernet controllers connected to the same
+management bus, it is difficult to ensure safe use of the bus.
+
+Since the PHYs are devices, and the management busses through which they are
+accessed are, in fact, busses, the PHY Abstraction Layer treats them as such.
+In doing so, it has these goals:
+
+#. Increase code-reuse
+#. Increase overall code-maintainability
+#. Speed development time for new network drivers, and for new systems
+
+Basically, this layer is meant to provide an interface to PHY devices which
+allows network driver writers to write as little code as possible, while
+still providing a full feature set.
+
+The MDIO bus
+============
+
+Most network devices are connected to a PHY by means of a management bus.
+Different devices use different busses (though some share common interfaces).
+In order to take advantage of the PAL, each bus interface needs to be
+registered as a distinct device.
+
+#. read and write functions must be implemented. Their prototypes are::
+
+	int write(struct mii_bus *bus, int mii_id, int regnum, u16 value);
+	int read(struct mii_bus *bus, int mii_id, int regnum);
+
+   mii_id is the address on the bus for the PHY, and regnum is the register
+   number.  These functions are guaranteed not to be called from interrupt
+   time, so it is safe for them to block, waiting for an interrupt to signal
+   the operation is complete
+
+#. A reset function is optional. This is used to return the bus to an
+   initialized state.
+
+#. A probe function is needed.  This function should set up anything the bus
+   driver needs, setup the mii_bus structure, and register with the PAL using
+   mdiobus_register.  Similarly, there's a remove function to undo all of
+   that (use mdiobus_unregister).
+
+#. Like any driver, the device_driver structure must be configured, and init
+   exit functions are used to register the driver.
+
+#. The bus must also be declared somewhere as a device, and registered.
+
+As an example for how one driver implemented an mdio bus driver, see
+drivers/net/ethernet/freescale/fsl_pq_mdio.c and an associated DTS file
+for one of the users. (e.g. "git grep fsl,.*-mdio arch/powerpc/boot/dts/")
+
+(RG)MII/electrical interface considerations
+===========================================
+
+The Reduced Gigabit Medium Independent Interface (RGMII) is a 12-pin
+electrical signal interface using a synchronous 125Mhz clock signal and several
+data lines. Due to this design decision, a 1.5ns to 2ns delay must be added
+between the clock line (RXC or TXC) and the data lines to let the PHY (clock
+sink) have enough setup and hold times to sample the data lines correctly. The
+PHY library offers different types of PHY_INTERFACE_MODE_RGMII* values to let
+the PHY driver and optionally the MAC driver, implement the required delay. The
+values of phy_interface_t must be understood from the perspective of the PHY
+device itself, leading to the following:
+
+* PHY_INTERFACE_MODE_RGMII: the PHY is not responsible for inserting any
+  internal delay by itself, it assumes that either the Ethernet MAC (if capable
+  or the PCB traces) insert the correct 1.5-2ns delay
+
+* PHY_INTERFACE_MODE_RGMII_TXID: the PHY should insert an internal delay
+  for the transmit data lines (TXD[3:0]) processed by the PHY device
+
+* PHY_INTERFACE_MODE_RGMII_RXID: the PHY should insert an internal delay
+  for the receive data lines (RXD[3:0]) processed by the PHY device
+
+* PHY_INTERFACE_MODE_RGMII_ID: the PHY should insert internal delays for
+  both transmit AND receive data lines from/to the PHY device
+
+Whenever possible, use the PHY side RGMII delay for these reasons:
+
+* PHY devices may offer sub-nanosecond granularity in how they allow a
+  receiver/transmitter side delay (e.g: 0.5, 1.0, 1.5ns) to be specified. Such
+  precision may be required to account for differences in PCB trace lengths
+
+* PHY devices are typically qualified for a large range of applications
+  (industrial, medical, automotive...), and they provide a constant and
+  reliable delay across temperature/pressure/voltage ranges
+
+* PHY device drivers in PHYLIB being reusable by nature, being able to
+  configure correctly a specified delay enables more designs with similar delay
+  requirements to be operate correctly
+
+For cases where the PHY is not capable of providing this delay, but the
+Ethernet MAC driver is capable of doing so, the correct phy_interface_t value
+should be PHY_INTERFACE_MODE_RGMII, and the Ethernet MAC driver should be
+configured correctly in order to provide the required transmit and/or receive
+side delay from the perspective of the PHY device. Conversely, if the Ethernet
+MAC driver looks at the phy_interface_t value, for any other mode but
+PHY_INTERFACE_MODE_RGMII, it should make sure that the MAC-level delays are
+disabled.
+
+In case neither the Ethernet MAC, nor the PHY are capable of providing the
+required delays, as defined per the RGMII standard, several options may be
+available:
+
+* Some SoCs may offer a pin pad/mux/controller capable of configuring a given
+  set of pins'strength, delays, and voltage; and it may be a suitable
+  option to insert the expected 2ns RGMII delay.
+
+* Modifying the PCB design to include a fixed delay (e.g: using a specifically
+  designed serpentine), which may not require software configuration at all.
+
+Common problems with RGMII delay mismatch
+-----------------------------------------
+
+When there is a RGMII delay mismatch between the Ethernet MAC and the PHY, this
+will most likely result in the clock and data line signals to be unstable when
+the PHY or MAC take a snapshot of these signals to translate them into logical
+1 or 0 states and reconstruct the data being transmitted/received. Typical
+symptoms include:
+
+* Transmission/reception partially works, and there is frequent or occasional
+  packet loss observed
+
+* Ethernet MAC may report some or all packets ingressing with a FCS/CRC error,
+  or just discard them all
+
+* Switching to lower speeds such as 10/100Mbits/sec makes the problem go away
+  (since there is enough setup/hold time in that case)
+
+Connecting to a PHY
+===================
+
+Sometime during startup, the network driver needs to establish a connection
+between the PHY device, and the network device.  At this time, the PHY's bus
+and drivers need to all have been loaded, so it is ready for the connection.
+At this point, there are several ways to connect to the PHY:
+
+#. The PAL handles everything, and only calls the network driver when
+   the link state changes, so it can react.
+
+#. The PAL handles everything except interrupts (usually because the
+   controller has the interrupt registers).
+
+#. The PAL handles everything, but checks in with the driver every second,
+   allowing the network driver to react first to any changes before the PAL
+   does.
+
+#. The PAL serves only as a library of functions, with the network device
+   manually calling functions to update status, and configure the PHY
+
+
+Letting the PHY Abstraction Layer do Everything
+===============================================
+
+If you choose option 1 (The hope is that every driver can, but to still be
+useful to drivers that can't), connecting to the PHY is simple:
+
+First, you need a function to react to changes in the link state.  This
+function follows this protocol::
+
+	static void adjust_link(struct net_device *dev);
+
+Next, you need to know the device name of the PHY connected to this device.
+The name will look something like, "0:00", where the first number is the
+bus id, and the second is the PHY's address on that bus.  Typically,
+the bus is responsible for making its ID unique.
+
+Now, to connect, just call this function::
+
+	phydev = phy_connect(dev, phy_name, &adjust_link, interface);
+
+*phydev* is a pointer to the phy_device structure which represents the PHY.
+If phy_connect is successful, it will return the pointer.  dev, here, is the
+pointer to your net_device.  Once done, this function will have started the
+PHY's software state machine, and registered for the PHY's interrupt, if it
+has one.  The phydev structure will be populated with information about the
+current state, though the PHY will not yet be truly operational at this
+point.
+
+PHY-specific flags should be set in phydev->dev_flags prior to the call
+to phy_connect() such that the underlying PHY driver can check for flags
+and perform specific operations based on them.
+This is useful if the system has put hardware restrictions on
+the PHY/controller, of which the PHY needs to be aware.
+
+*interface* is a u32 which specifies the connection type used
+between the controller and the PHY.  Examples are GMII, MII,
+RGMII, and SGMII.  For a full list, see include/linux/phy.h
+
+Now just make sure that phydev->supported and phydev->advertising have any
+values pruned from them which don't make sense for your controller (a 10/100
+controller may be connected to a gigabit capable PHY, so you would need to
+mask off SUPPORTED_1000baseT*).  See include/linux/ethtool.h for definitions
+for these bitfields. Note that you should not SET any bits, except the
+SUPPORTED_Pause and SUPPORTED_AsymPause bits (see below), or the PHY may get
+put into an unsupported state.
+
+Lastly, once the controller is ready to handle network traffic, you call
+phy_start(phydev).  This tells the PAL that you are ready, and configures the
+PHY to connect to the network. If the MAC interrupt of your network driver
+also handles PHY status changes, just set phydev->irq to PHY_IGNORE_INTERRUPT
+before you call phy_start and use phy_mac_interrupt() from the network
+driver. If you don't want to use interrupts, set phydev->irq to PHY_POLL.
+phy_start() enables the PHY interrupts (if applicable) and starts the
+phylib state machine.
+
+When you want to disconnect from the network (even if just briefly), you call
+phy_stop(phydev). This function also stops the phylib state machine and
+disables PHY interrupts.
+
+Pause frames / flow control
+===========================
+
+The PHY does not participate directly in flow control/pause frames except by
+making sure that the SUPPORTED_Pause and SUPPORTED_AsymPause bits are set in
+MII_ADVERTISE to indicate towards the link partner that the Ethernet MAC
+controller supports such a thing. Since flow control/pause frames generation
+involves the Ethernet MAC driver, it is recommended that this driver takes care
+of properly indicating advertisement and support for such features by setting
+the SUPPORTED_Pause and SUPPORTED_AsymPause bits accordingly. This can be done
+either before or after phy_connect() and/or as a result of implementing the
+ethtool::set_pauseparam feature.
+
+
+Keeping Close Tabs on the PAL
+=============================
+
+It is possible that the PAL's built-in state machine needs a little help to
+keep your network device and the PHY properly in sync.  If so, you can
+register a helper function when connecting to the PHY, which will be called
+every second before the state machine reacts to any changes.  To do this, you
+need to manually call phy_attach() and phy_prepare_link(), and then call
+phy_start_machine() with the second argument set to point to your special
+handler.
+
+Currently there are no examples of how to use this functionality, and testing
+on it has been limited because the author does not have any drivers which use
+it (they all use option 1).  So Caveat Emptor.
+
+Doing it all yourself
+=====================
+
+There's a remote chance that the PAL's built-in state machine cannot track
+the complex interactions between the PHY and your network device.  If this is
+so, you can simply call phy_attach(), and not call phy_start_machine or
+phy_prepare_link().  This will mean that phydev->state is entirely yours to
+handle (phy_start and phy_stop toggle between some of the states, so you
+might need to avoid them).
+
+An effort has been made to make sure that useful functionality can be
+accessed without the state-machine running, and most of these functions are
+descended from functions which did not interact with a complex state-machine.
+However, again, no effort has been made so far to test running without the
+state machine, so tryer beware.
+
+Here is a brief rundown of the functions::
+
+ int phy_read(struct phy_device *phydev, u16 regnum);
+ int phy_write(struct phy_device *phydev, u16 regnum, u16 val);
+
+Simple read/write primitives.  They invoke the bus's read/write function
+pointers.
+::
+
+ void phy_print_status(struct phy_device *phydev);
+
+A convenience function to print out the PHY status neatly.
+::
+
+ void phy_request_interrupt(struct phy_device *phydev);
+
+Requests the IRQ for the PHY interrupts.
+::
+
+ struct phy_device * phy_attach(struct net_device *dev, const char *phy_id,
+		                phy_interface_t interface);
+
+Attaches a network device to a particular PHY, binding the PHY to a generic
+driver if none was found during bus initialization.
+::
+
+ int phy_start_aneg(struct phy_device *phydev);
+
+Using variables inside the phydev structure, either configures advertising
+and resets autonegotiation, or disables autonegotiation, and configures
+forced settings.
+::
+
+ static inline int phy_read_status(struct phy_device *phydev);
+
+Fills the phydev structure with up-to-date information about the current
+settings in the PHY.
+::
+
+ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
+
+Ethtool convenience functions.
+::
+
+ int phy_mii_ioctl(struct phy_device *phydev,
+                   struct mii_ioctl_data *mii_data, int cmd);
+
+The MII ioctl.  Note that this function will completely screw up the state
+machine if you write registers like BMCR, BMSR, ADVERTISE, etc.  Best to
+use this only to write registers which are not standard, and don't set off
+a renegotiation.
+
+PHY Device Drivers
+==================
+
+With the PHY Abstraction Layer, adding support for new PHYs is
+quite easy. In some cases, no work is required at all! However,
+many PHYs require a little hand-holding to get up-and-running.
+
+Generic PHY driver
+------------------
+
+If the desired PHY doesn't have any errata, quirks, or special
+features you want to support, then it may be best to not add
+support, and let the PHY Abstraction Layer's Generic PHY Driver
+do all of the work.
+
+Writing a PHY driver
+--------------------
+
+If you do need to write a PHY driver, the first thing to do is
+make sure it can be matched with an appropriate PHY device.
+This is done during bus initialization by reading the device's
+UID (stored in registers 2 and 3), then comparing it to each
+driver's phy_id field by ANDing it with each driver's
+phy_id_mask field.  Also, it needs a name.  Here's an example::
+
+   static struct phy_driver dm9161_driver = {
+         .phy_id         = 0x0181b880,
+	 .name           = "Davicom DM9161E",
+	 .phy_id_mask    = 0x0ffffff0,
+	 ...
+   }
+
+Next, you need to specify what features (speed, duplex, autoneg,
+etc) your PHY device and driver support.  Most PHYs support
+PHY_BASIC_FEATURES, but you can look in include/mii.h for other
+features.
+
+Each driver consists of a number of function pointers, documented
+in include/linux/phy.h under the phy_driver structure.
+
+Of these, only config_aneg and read_status are required to be
+assigned by the driver code.  The rest are optional.  Also, it is
+preferred to use the generic phy driver's versions of these two
+functions if at all possible: genphy_read_status and
+genphy_config_aneg.  If this is not possible, it is likely that
+you only need to perform some actions before and after invoking
+these functions, and so your functions will wrap the generic
+ones.
+
+Feel free to look at the Marvell, Cicada, and Davicom drivers in
+drivers/net/phy/ for examples (the lxt and qsemi drivers have
+not been tested as of this writing).
+
+The PHY's MMD register accesses are handled by the PAL framework
+by default, but can be overridden by a specific PHY driver if
+required. This could be the case if a PHY was released for
+manufacturing before the MMD PHY register definitions were
+standardized by the IEEE. Most modern PHYs will be able to use
+the generic PAL framework for accessing the PHY's MMD registers.
+An example of such usage is for Energy Efficient Ethernet support,
+implemented in the PAL. This support uses the PAL to access MMD
+registers for EEE query and configuration if the PHY supports
+the IEEE standard access mechanisms, or can use the PHY's specific
+access interfaces if overridden by the specific PHY driver. See
+the Micrel driver in drivers/net/phy/ for an example of how this
+can be implemented.
+
+Board Fixups
+============
+
+Sometimes the specific interaction between the platform and the PHY requires
+special handling.  For instance, to change where the PHY's clock input is,
+or to add a delay to account for latency issues in the data path.  In order
+to support such contingencies, the PHY Layer allows platform code to register
+fixups to be run when the PHY is brought up (or subsequently reset).
+
+When the PHY Layer brings up a PHY it checks to see if there are any fixups
+registered for it, matching based on UID (contained in the PHY device's phy_id
+field) and the bus identifier (contained in phydev->dev.bus_id).  Both must
+match, however two constants, PHY_ANY_ID and PHY_ANY_UID, are provided as
+wildcards for the bus ID and UID, respectively.
+
+When a match is found, the PHY layer will invoke the run function associated
+with the fixup.  This function is passed a pointer to the phy_device of
+interest.  It should therefore only operate on that PHY.
+
+The platform code can either register the fixup using phy_register_fixup()::
+
+	int phy_register_fixup(const char *phy_id,
+		u32 phy_uid, u32 phy_uid_mask,
+		int (*run)(struct phy_device *));
+
+Or using one of the two stubs, phy_register_fixup_for_uid() and
+phy_register_fixup_for_id()::
+
+ int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask,
+		int (*run)(struct phy_device *));
+ int phy_register_fixup_for_id(const char *phy_id,
+		int (*run)(struct phy_device *));
+
+The stubs set one of the two matching criteria, and set the other one to
+match anything.
+
+When phy_register_fixup() or \*_for_uid()/\*_for_id() is called at module,
+unregister fixup and free allocate memory are required.
+
+Call one of following function before unloading module::
+
+ int phy_unregister_fixup(const char *phy_id, u32 phy_uid, u32 phy_uid_mask);
+ int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask);
+ int phy_register_fixup_for_id(const char *phy_id);
+
+Standards
+=========
+
+IEEE Standard 802.3: CSMA/CD Access Method and Physical Layer Specifications, Section Two:
+http://standards.ieee.org/getieee802/download/802.3-2008_section2.pdf
+
+RGMII v1.3:
+http://web.archive.org/web/20160303212629/http://www.hp.com/rnd/pdfs/RGMIIv1_3.pdf
+
+RGMII v2.0:
+http://web.archive.org/web/20160303171328/http://www.hp.com/rnd/pdfs/RGMIIv2_0_final_hp.pdf
--- a/Documentation/networking/phy.txt
+++ b/Documentation/networking/phy.txt
@ -1,427 +0,0 @@
-
-------
-PHY Abstraction Layer
-(Updated 2008-04-08)
-
-Purpose
-
- Most network devices consist of set of registers which provide an interface
- to a MAC layer, which communicates with the physical connection through a
- PHY.  The PHY concerns itself with negotiating link parameters with the link
- partner on the other side of the network connection (typically, an ethernet
- cable), and provides a register interface to allow drivers to determine what
- settings were chosen, and to configure what settings are allowed.
-
- While these devices are distinct from the network devices, and conform to a
- standard layout for the registers, it has been common practice to integrate
- the PHY management code with the network driver.  This has resulted in large
- amounts of redundant code.  Also, on embedded systems with multiple (and
- sometimes quite different) ethernet controllers connected to the same 
- management bus, it is difficult to ensure safe use of the bus.
-
- Since the PHYs are devices, and the management busses through which they are
- accessed are, in fact, busses, the PHY Abstraction Layer treats them as such.
- In doing so, it has these goals:
-
-   1) Increase code-reuse
-   2) Increase overall code-maintainability
-   3) Speed development time for new network drivers, and for new systems
- 
- Basically, this layer is meant to provide an interface to PHY devices which
- allows network driver writers to write as little code as possible, while
- still providing a full feature set.
-
-The MDIO bus
-
- Most network devices are connected to a PHY by means of a management bus.
- Different devices use different busses (though some share common interfaces).
- In order to take advantage of the PAL, each bus interface needs to be
- registered as a distinct device.
-
- 1) read and write functions must be implemented.  Their prototypes are:
-
-     int write(struct mii_bus *bus, int mii_id, int regnum, u16 value);
-     int read(struct mii_bus *bus, int mii_id, int regnum);
-
-   mii_id is the address on the bus for the PHY, and regnum is the register
-   number.  These functions are guaranteed not to be called from interrupt
-   time, so it is safe for them to block, waiting for an interrupt to signal
-   the operation is complete
- 
- 2) A reset function is optional.  This is used to return the bus to an
-   initialized state.
-
- 3) A probe function is needed.  This function should set up anything the bus
-   driver needs, setup the mii_bus structure, and register with the PAL using
-   mdiobus_register.  Similarly, there's a remove function to undo all of
-   that (use mdiobus_unregister).
- 
- 4) Like any driver, the device_driver structure must be configured, and init
-   exit functions are used to register the driver.
-
- 5) The bus must also be declared somewhere as a device, and registered.
-
- As an example for how one driver implemented an mdio bus driver, see
- drivers/net/ethernet/freescale/fsl_pq_mdio.c and an associated DTS file
- for one of the users. (e.g. "git grep fsl,.*-mdio arch/powerpc/boot/dts/")
-
-(RG)MII/electrical interface considerations
-
- The Reduced Gigabit Medium Independent Interface (RGMII) is a 12-pin
- electrical signal interface using a synchronous 125Mhz clock signal and several
- data lines. Due to this design decision, a 1.5ns to 2ns delay must be added
- between the clock line (RXC or TXC) and the data lines to let the PHY (clock
- sink) have enough setup and hold times to sample the data lines correctly. The
- PHY library offers different types of PHY_INTERFACE_MODE_RGMII* values to let
- the PHY driver and optionally the MAC driver, implement the required delay. The
- values of phy_interface_t must be understood from the perspective of the PHY
- device itself, leading to the following:
-
- * PHY_INTERFACE_MODE_RGMII: the PHY is not responsible for inserting any
-   internal delay by itself, it assumes that either the Ethernet MAC (if capable
-   or the PCB traces) insert the correct 1.5-2ns delay
-
- * PHY_INTERFACE_MODE_RGMII_TXID: the PHY should insert an internal delay
-   for the transmit data lines (TXD[3:0]) processed by the PHY device
-
- * PHY_INTERFACE_MODE_RGMII_RXID: the PHY should insert an internal delay
-   for the receive data lines (RXD[3:0]) processed by the PHY device
-
- * PHY_INTERFACE_MODE_RGMII_ID: the PHY should insert internal delays for
-   both transmit AND receive data lines from/to the PHY device
-
- Whenever possible, use the PHY side RGMII delay for these reasons:
-
- * PHY devices may offer sub-nanosecond granularity in how they allow a
-   receiver/transmitter side delay (e.g: 0.5, 1.0, 1.5ns) to be specified. Such
-   precision may be required to account for differences in PCB trace lengths
-
- * PHY devices are typically qualified for a large range of applications
-   (industrial, medical, automotive...), and they provide a constant and
-   reliable delay across temperature/pressure/voltage ranges
-
- * PHY device drivers in PHYLIB being reusable by nature, being able to
-   configure correctly a specified delay enables more designs with similar delay
-   requirements to be operate correctly
-
- For cases where the PHY is not capable of providing this delay, but the
- Ethernet MAC driver is capable of doing so, the correct phy_interface_t value
- should be PHY_INTERFACE_MODE_RGMII, and the Ethernet MAC driver should be
- configured correctly in order to provide the required transmit and/or receive
- side delay from the perspective of the PHY device. Conversely, if the Ethernet
- MAC driver looks at the phy_interface_t value, for any other mode but
- PHY_INTERFACE_MODE_RGMII, it should make sure that the MAC-level delays are
- disabled.
-
- In case neither the Ethernet MAC, nor the PHY are capable of providing the
- required delays, as defined per the RGMII standard, several options may be
- available:
-
- * Some SoCs may offer a pin pad/mux/controller capable of configuring a given
-   set of pins'strength, delays, and voltage; and it may be a suitable
-   option to insert the expected 2ns RGMII delay.
-
- * Modifying the PCB design to include a fixed delay (e.g: using a specifically
-   designed serpentine), which may not require software configuration at all.
-
-Common problems with RGMII delay mismatch
-
- When there is a RGMII delay mismatch between the Ethernet MAC and the PHY, this
- will most likely result in the clock and data line signals to be unstable when
- the PHY or MAC take a snapshot of these signals to translate them into logical
- 1 or 0 states and reconstruct the data being transmitted/received. Typical
- symptoms include:
-
- * Transmission/reception partially works, and there is frequent or occasional
-   packet loss observed
-
- * Ethernet MAC may report some or all packets ingressing with a FCS/CRC error,
-   or just discard them all
-
- * Switching to lower speeds such as 10/100Mbits/sec makes the problem go away
-   (since there is enough setup/hold time in that case)
-
-
-Connecting to a PHY
-
- Sometime during startup, the network driver needs to establish a connection
- between the PHY device, and the network device.  At this time, the PHY's bus
- and drivers need to all have been loaded, so it is ready for the connection.
- At this point, there are several ways to connect to the PHY:
-
- 1) The PAL handles everything, and only calls the network driver when
-   the link state changes, so it can react.
-
- 2) The PAL handles everything except interrupts (usually because the
-   controller has the interrupt registers).
-
- 3) The PAL handles everything, but checks in with the driver every second,
-   allowing the network driver to react first to any changes before the PAL
-   does.
- 
- 4) The PAL serves only as a library of functions, with the network device
-   manually calling functions to update status, and configure the PHY
-
-
-Letting the PHY Abstraction Layer do Everything
-
- If you choose option 1 (The hope is that every driver can, but to still be
- useful to drivers that can't), connecting to the PHY is simple:
-
- First, you need a function to react to changes in the link state.  This
- function follows this protocol:
-
-   static void adjust_link(struct net_device *dev);
- 
- Next, you need to know the device name of the PHY connected to this device. 
- The name will look something like, "0:00", where the first number is the
- bus id, and the second is the PHY's address on that bus.  Typically,
- the bus is responsible for making its ID unique.
- 
- Now, to connect, just call this function:
- 
-   phydev = phy_connect(dev, phy_name, &adjust_link, interface);
-
- phydev is a pointer to the phy_device structure which represents the PHY.  If
- phy_connect is successful, it will return the pointer.  dev, here, is the
- pointer to your net_device.  Once done, this function will have started the
- PHY's software state machine, and registered for the PHY's interrupt, if it
- has one.  The phydev structure will be populated with information about the
- current state, though the PHY will not yet be truly operational at this
- point.
-
- PHY-specific flags should be set in phydev->dev_flags prior to the call
- to phy_connect() such that the underlying PHY driver can check for flags
- and perform specific operations based on them.
- This is useful if the system has put hardware restrictions on
- the PHY/controller, of which the PHY needs to be aware.
-
- interface is a u32 which specifies the connection type used
- between the controller and the PHY.  Examples are GMII, MII,
- RGMII, and SGMII.  For a full list, see include/linux/phy.h
-
- Now just make sure that phydev->supported and phydev->advertising have any
- values pruned from them which don't make sense for your controller (a 10/100
- controller may be connected to a gigabit capable PHY, so you would need to
- mask off SUPPORTED_1000baseT*).  See include/linux/ethtool.h for definitions
- for these bitfields. Note that you should not SET any bits, except the
- SUPPORTED_Pause and SUPPORTED_AsymPause bits (see below), or the PHY may get
- put into an unsupported state.
-
- Lastly, once the controller is ready to handle network traffic, you call
- phy_start(phydev).  This tells the PAL that you are ready, and configures the
- PHY to connect to the network.  If you want to handle your own interrupts,
- just set phydev->irq to PHY_IGNORE_INTERRUPT before you call phy_start.
- Similarly, if you don't want to use interrupts, set phydev->irq to PHY_POLL.
-
- When you want to disconnect from the network (even if just briefly), you call
- phy_stop(phydev).
-
-Pause frames / flow control
-
- The PHY does not participate directly in flow control/pause frames except by
- making sure that the SUPPORTED_Pause and SUPPORTED_AsymPause bits are set in
- MII_ADVERTISE to indicate towards the link partner that the Ethernet MAC
- controller supports such a thing. Since flow control/pause frames generation
- involves the Ethernet MAC driver, it is recommended that this driver takes care
- of properly indicating advertisement and support for such features by setting
- the SUPPORTED_Pause and SUPPORTED_AsymPause bits accordingly. This can be done
- either before or after phy_connect() and/or as a result of implementing the
- ethtool::set_pauseparam feature.
-
-
-Keeping Close Tabs on the PAL
-
- It is possible that the PAL's built-in state machine needs a little help to
- keep your network device and the PHY properly in sync.  If so, you can
- register a helper function when connecting to the PHY, which will be called
- every second before the state machine reacts to any changes.  To do this, you
- need to manually call phy_attach() and phy_prepare_link(), and then call
- phy_start_machine() with the second argument set to point to your special
- handler.
-
- Currently there are no examples of how to use this functionality, and testing
- on it has been limited because the author does not have any drivers which use
- it (they all use option 1).  So Caveat Emptor.
-
-Doing it all yourself
-
- There's a remote chance that the PAL's built-in state machine cannot track
- the complex interactions between the PHY and your network device.  If this is
- so, you can simply call phy_attach(), and not call phy_start_machine or
- phy_prepare_link().  This will mean that phydev->state is entirely yours to
- handle (phy_start and phy_stop toggle between some of the states, so you
- might need to avoid them).
-
- An effort has been made to make sure that useful functionality can be
- accessed without the state-machine running, and most of these functions are
- descended from functions which did not interact with a complex state-machine.
- However, again, no effort has been made so far to test running without the
- state machine, so tryer beware.
-
- Here is a brief rundown of the functions:
-
- int phy_read(struct phy_device *phydev, u16 regnum);
- int phy_write(struct phy_device *phydev, u16 regnum, u16 val);
-
-   Simple read/write primitives.  They invoke the bus's read/write function
-   pointers.
-
- void phy_print_status(struct phy_device *phydev);
- 
-   A convenience function to print out the PHY status neatly.
-
- int phy_start_interrupts(struct phy_device *phydev);
- int phy_stop_interrupts(struct phy_device *phydev);
-
-   Requests the IRQ for the PHY interrupts, then enables them for
-   start, or disables then frees them for stop.
-
- struct phy_device * phy_attach(struct net_device *dev, const char *phy_id,
-		 phy_interface_t interface);
-
-   Attaches a network device to a particular PHY, binding the PHY to a generic
-   driver if none was found during bus initialization.
-
- int phy_start_aneg(struct phy_device *phydev);
-   
-   Using variables inside the phydev structure, either configures advertising
-   and resets autonegotiation, or disables autonegotiation, and configures
-   forced settings.
-
- static inline int phy_read_status(struct phy_device *phydev);
-
-   Fills the phydev structure with up-to-date information about the current
-   settings in the PHY.
-
- int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
-
-   Ethtool convenience functions.
-
- int phy_mii_ioctl(struct phy_device *phydev,
-                 struct mii_ioctl_data *mii_data, int cmd);
-
-   The MII ioctl.  Note that this function will completely screw up the state
-   machine if you write registers like BMCR, BMSR, ADVERTISE, etc.  Best to
-   use this only to write registers which are not standard, and don't set off
-   a renegotiation.
-
-
-PHY Device Drivers
-
- With the PHY Abstraction Layer, adding support for new PHYs is
- quite easy.  In some cases, no work is required at all!  However,
- many PHYs require a little hand-holding to get up-and-running.
-
-Generic PHY driver
-
- If the desired PHY doesn't have any errata, quirks, or special
- features you want to support, then it may be best to not add
- support, and let the PHY Abstraction Layer's Generic PHY Driver
- do all of the work.  
-
-Writing a PHY driver
-
- If you do need to write a PHY driver, the first thing to do is
- make sure it can be matched with an appropriate PHY device.
- This is done during bus initialization by reading the device's
- UID (stored in registers 2 and 3), then comparing it to each
- driver's phy_id field by ANDing it with each driver's
- phy_id_mask field.  Also, it needs a name.  Here's an example:
-
-   static struct phy_driver dm9161_driver = {
-         .phy_id         = 0x0181b880,
-	 .name           = "Davicom DM9161E",
-	 .phy_id_mask    = 0x0ffffff0,
-	 ...
-   }
-
- Next, you need to specify what features (speed, duplex, autoneg,
- etc) your PHY device and driver support.  Most PHYs support
- PHY_BASIC_FEATURES, but you can look in include/mii.h for other
- features.
-
- Each driver consists of a number of function pointers, documented
- in include/linux/phy.h under the phy_driver structure.
-
- Of these, only config_aneg and read_status are required to be
- assigned by the driver code.  The rest are optional.  Also, it is
- preferred to use the generic phy driver's versions of these two
- functions if at all possible: genphy_read_status and
- genphy_config_aneg.  If this is not possible, it is likely that
- you only need to perform some actions before and after invoking
- these functions, and so your functions will wrap the generic
- ones.
-
- Feel free to look at the Marvell, Cicada, and Davicom drivers in
- drivers/net/phy/ for examples (the lxt and qsemi drivers have
- not been tested as of this writing).
-
- The PHY's MMD register accesses are handled by the PAL framework
- by default, but can be overridden by a specific PHY driver if
- required. This could be the case if a PHY was released for
- manufacturing before the MMD PHY register definitions were
- standardized by the IEEE. Most modern PHYs will be able to use
- the generic PAL framework for accessing the PHY's MMD registers.
- An example of such usage is for Energy Efficient Ethernet support,
- implemented in the PAL. This support uses the PAL to access MMD
- registers for EEE query and configuration if the PHY supports
- the IEEE standard access mechanisms, or can use the PHY's specific
- access interfaces if overridden by the specific PHY driver. See
- the Micrel driver in drivers/net/phy/ for an example of how this
- can be implemented.
-
-Board Fixups
-
- Sometimes the specific interaction between the platform and the PHY requires
- special handling.  For instance, to change where the PHY's clock input is,
- or to add a delay to account for latency issues in the data path.  In order
- to support such contingencies, the PHY Layer allows platform code to register
- fixups to be run when the PHY is brought up (or subsequently reset).
-
- When the PHY Layer brings up a PHY it checks to see if there are any fixups
- registered for it, matching based on UID (contained in the PHY device's phy_id
- field) and the bus identifier (contained in phydev->dev.bus_id).  Both must
- match, however two constants, PHY_ANY_ID and PHY_ANY_UID, are provided as
- wildcards for the bus ID and UID, respectively.
-
- When a match is found, the PHY layer will invoke the run function associated
- with the fixup.  This function is passed a pointer to the phy_device of
- interest.  It should therefore only operate on that PHY.
-
- The platform code can either register the fixup using phy_register_fixup():
-
-	int phy_register_fixup(const char *phy_id,
-		u32 phy_uid, u32 phy_uid_mask,
-		int (*run)(struct phy_device *));
-
- Or using one of the two stubs, phy_register_fixup_for_uid() and
- phy_register_fixup_for_id():
-
- int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask,
-		int (*run)(struct phy_device *));
- int phy_register_fixup_for_id(const char *phy_id,
-		int (*run)(struct phy_device *));
-
- The stubs set one of the two matching criteria, and set the other one to
- match anything.
-
- When phy_register_fixup() or *_for_uid()/*_for_id() is called at module,
- unregister fixup and free allocate memory are required.
-
- Call one of following function before unloading module.
-
- int phy_unregister_fixup(const char *phy_id, u32 phy_uid, u32 phy_uid_mask);
- int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask);
- int phy_register_fixup_for_id(const char *phy_id);
-
-Standards
-
- IEEE Standard 802.3: CSMA/CD Access Method and Physical Layer Specifications, Section Two:
- http://standards.ieee.org/getieee802/download/802.3-2008_section2.pdf
-
- RGMII v1.3:
- http://web.archive.org/web/20160303212629/http://www.hp.com/rnd/pdfs/RGMIIv1_3.pdf
-
- RGMII v2.0:
- http://web.archive.org/web/20160303171328/http://www.hp.com/rnd/pdfs/RGMIIv2_0_final_hp.pdf
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@ -1000,51 +1000,6 @@ The kernel interface functions are as follows:
     size should be set when the call is begun.  tx_total_len may not be less
     than zero.

- (*) Check to see the completion state of a call so that the caller can assess
-     whether it needs to be retried.
-
-	enum rxrpc_call_completion {
-		RXRPC_CALL_SUCCEEDED,
-		RXRPC_CALL_REMOTELY_ABORTED,
-		RXRPC_CALL_LOCALLY_ABORTED,
-		RXRPC_CALL_LOCAL_ERROR,
-		RXRPC_CALL_NETWORK_ERROR,
-	};
-
-	int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call,
-				    enum rxrpc_call_completion *_compl,
-				    u32 *_abort_code);
-
-     On return, -EINPROGRESS will be returned if the call is still ongoing; if
-     it is finished, *_compl will be set to indicate the manner of completion,
-     *_abort_code will be set to any abort code that occurred.  0 will be
-     returned on a successful completion, -ECONNABORTED will be returned if the
-     client failed due to a remote abort and anything else will return an
-     appropriate error code.
-
-     The caller should look at this information to decide if it's worth
-     retrying the call.
-
- (*) Retry a client call.
-
-	int rxrpc_kernel_retry_call(struct socket *sock,
-				    struct rxrpc_call *call,
-				    struct sockaddr_rxrpc *srx,
-				    struct key *key);
-
-     This attempts to partially reinitialise a call and submit it again while
-     reusing the original call's Tx queue to avoid the need to repackage and
-     re-encrypt the data to be sent.  call indicates the call to retry, srx the
-     new address to send it to and key the encryption key to use for signing or
-     encrypting the packets.
-
-     For this to work, the first Tx data packet must still be in the transmit
-     queue, and currently this is only permitted for local and network errors
-     and the call must not have been aborted.  Any partially constructed Tx
-     packet is left as is and can continue being filled afterwards.
-
-     It returns 0 if the call was requeued and an error otherwise.
-
 (*) Get call RTT.

 	u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call);
--- a/Documentation/networking/snmp_counter.rst
+++ b/Documentation/networking/snmp_counter.rst
@ -366,6 +366,27 @@ to the accept queue.

 TCP Fast Open
 =============
+* TcpEstabResets
+Defined in `RFC1213 tcpEstabResets`_.
+
+.. _RFC1213 tcpEstabResets: https://tools.ietf.org/html/rfc1213#page-48
+
+* TcpAttemptFails
+Defined in `RFC1213 tcpAttemptFails`_.
+
+.. _RFC1213 tcpAttemptFails: https://tools.ietf.org/html/rfc1213#page-48
+
+* TcpOutRsts
+Defined in `RFC1213 tcpOutRsts`_. The RFC says this counter indicates
+the 'segments sent containing the RST flag', but in linux kernel, this
+couner indicates the segments kerenl tried to send. The sending
+process might be failed due to some errors (e.g. memory alloc failed).
+
+.. _RFC1213 tcpOutRsts: https://tools.ietf.org/html/rfc1213#page-52
+
+
+TCP Fast Path
+============
 When kernel receives a TCP packet, it has two paths to handler the
 packet, one is fast path, another is slow path. The comment in kernel
 code provides a good explanation of them, I pasted them below::
@ -413,7 +434,6 @@ increase 1.

 TCP abort
 =========
-
 * TcpExtTCPAbortOnData

 It means TCP layer has data in flight, but need to close the
@ -589,7 +609,6 @@ packet yet, the sender would know packet 4 is out of order. The TCP
 stack of kernel will increase TcpExtTCPSACKReorder for both of the
 above scenarios.

-
 DSACK
 =====
 The DSACK is defined in `RFC2883`_. The receiver uses DSACK to report
@ -612,8 +631,7 @@ The TCP stack receives an out of order duplicate packet, so it sends a
 DSACK to the sender.

 * TcpExtTCPDSACKRecv
-
-The TCP stack receives a DSACK, which indicate an acknowledged
+The TCP stack receives a DSACK, which indicates an acknowledged
 duplicate packet is received.

 * TcpExtTCPDSACKOfoRecv
@ -621,6 +639,56 @@ duplicate packet is received.
 The TCP stack receives a DSACK, which indicate an out of order
 duplicate packet is received.

+invalid SACK and DSACK
+====================
+When a SACK (or DSACK) block is invalid, a corresponding counter would
+be updated. The validation method is base on the start/end sequence
+number of the SACK block. For more details, please refer the comment
+of the function tcp_is_sackblock_valid in the kernel source code. A
+SACK option could have up to 4 blocks, they are checked
+individually. E.g., if 3 blocks of a SACk is invalid, the
+corresponding counter would be updated 3 times. The comment of the
+`Add counters for discarded SACK blocks`_ patch has additional
+explaination:
+
+.. _Add counters for discarded SACK blocks: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=18f02545a9a16c9a89778b91a162ad16d510bb32
+
+* TcpExtTCPSACKDiscard
+This counter indicates how many SACK blocks are invalid. If the invalid
+SACK block is caused by ACK recording, the TCP stack will only ignore
+it and won't update this counter.
+
+* TcpExtTCPDSACKIgnoredOld and TcpExtTCPDSACKIgnoredNoUndo
+When a DSACK block is invalid, one of these two counters would be
+updated. Which counter will be updated depends on the undo_marker flag
+of the TCP socket. If the undo_marker is not set, the TCP stack isn't
+likely to re-transmit any packets, and we still receive an invalid
+DSACK block, the reason might be that the packet is duplicated in the
+middle of the network. In such scenario, TcpExtTCPDSACKIgnoredNoUndo
+will be updated. If the undo_marker is set, TcpExtTCPDSACKIgnoredOld
+will be updated. As implied in its name, it might be an old packet.
+
+SACK shift
+=========
+The linux networking stack stores data in sk_buff struct (skb for
+short). If a SACK block acrosses multiple skb, the TCP stack will try
+to re-arrange data in these skb. E.g. if a SACK block acknowledges seq
+10 to 15, skb1 has seq 10 to 13, skb2 has seq 14 to 20. The seq 14 and
+15 in skb2 would be moved to skb1. This operation is 'shift'. If a
+SACK block acknowledges seq 10 to 20, skb1 has seq 10 to 13, skb2 has
+seq 14 to 20. All data in skb2 will be moved to skb1, and skb2 will be
+discard, this operation is 'merge'.
+
+* TcpExtTCPSackShifted
+A skb is shifted
+
+* TcpExtTCPSackMerged
+A skb is merged
+
+* TcpExtTCPSackShiftFallback
+A skb should be shifted or merged, but the TCP stack doesn't do it for
+some reasons.
+
 TCP out of order
 ================
 * TcpExtTCPOFOQueue
@ -721,6 +789,60 @@ unacknowledged number (more strict than `RFC 5961 section 5.2`_).
 .. _RFC 5961 section 4.2: https://tools.ietf.org/html/rfc5961#page-9
 .. _RFC 5961 section 5.2: https://tools.ietf.org/html/rfc5961#page-11

+TCP receive window
+=================
+* TcpExtTCPWantZeroWindowAdv
+Depending on current memory usage, the TCP stack tries to set receive
+window to zero. But the receive window might still be a no-zero
+value. For example, if the previous window size is 10, and the TCP
+stack receives 3 bytes, the current window size would be 7 even if the
+window size calculated by the memory usage is zero.
+
+* TcpExtTCPToZeroWindowAdv
+The TCP receive window is set to zero from a no-zero value.
+
+* TcpExtTCPFromZeroWindowAdv
+The TCP receive window is set to no-zero value from zero.
+
+
+Delayed ACK
+==========
+The TCP Delayed ACK is a technique which is used for reducing the
+packet count in the network. For more details, please refer the
+`Delayed ACK wiki`_
+
+.. _Delayed ACK wiki: https://en.wikipedia.org/wiki/TCP_delayed_acknowledgment
+
+* TcpExtDelayedACKs
+A delayed ACK timer expires. The TCP stack will send a pure ACK packet
+and exit the delayed ACK mode.
+
+* TcpExtDelayedACKLocked
+A delayed ACK timer expires, but the TCP stack can't send an ACK
+immediately due to the socket is locked by a userspace program. The
+TCP stack will send a pure ACK later (after the userspace program
+unlock the socket). When the TCP stack sends the pure ACK later, the
+TCP stack will also update TcpExtDelayedACKs and exit the delayed ACK
+mode.
+
+* TcpExtDelayedACKLost
+It will be updated when the TCP stack receives a packet which has been
+ACKed. A Delayed ACK loss might cause this issue, but it would also be
+triggered by other reasons, such as a packet is duplicated in the
+network.
+
+Tail Loss Probe (TLP)
+===================
+TLP is an algorithm which is used to detect TCP packet loss. For more
+details, please refer the `TLP paper`_.
+
+.. _TLP paper: https://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01
+
+* TcpExtTCPLossProbes
+A TLP probe packet is sent.
+
+* TcpExtTCPLossProbeRecovery
+A packet loss is detected and recovered by TLP.

 examples
 ========
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@ -417,7 +417,7 @@ is again deprecated and ts[2] holds a hardware timestamp if set.

 Hardware time stamping must also be initialized for each device driver
 that is expected to do hardware time stamping. The parameter is defined in
-/include/linux/net_tstamp.h as:
+include/uapi/linux/net_tstamp.h as:

 struct hwtstamp_config {
 	int flags;	/* no flags defined right now, must be zero */
@ -487,7 +487,7 @@ enum {
 	HWTSTAMP_FILTER_PTP_V1_L4_EVENT,

 	/* for the complete list of values, please check
-	 * the include file /include/linux/net_tstamp.h
+	 * the include file include/uapi/linux/net_tstamp.h
 	 */
 };

--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@ -291,6 +291,20 @@ user space is responsible for creating them if needed.

 Default : 0  (for compatibility reasons)

+devconf_inherit_init_net
+----------------------------
+
+Controls if a new network namespace should inherit all current
+settings under /proc/sys/net/{ipv4,ipv6}/conf/{all,default}/. By
+default, we keep the current behavior: for IPv4 we inherit all current
+settings from init_net and for IPv6 we reset all settings to default.
+
+If set to 1, both IPv4 and IPv6 settings are forced to inherit from
+current ones in init_net. If set to 2, both IPv4 and IPv6 settings are
+forced to reset to their default values.
+
+Default : 0  (for compatibility reasons)
+
 2. /proc/sys/net/unix - Parameters for Unix domain sockets
 -------------------------------------------------------

--- a/89
+++ b/89
@ -3052,8 +3052,8 @@ F:	include/linux/bcm963xx_nvram.h
 F:	include/linux/bcm963xx_tag.h

 BROADCOM BNX2 GIGABIT ETHERNET DRIVER
-M:	Rasesh Mody <rasesh.mody@cavium.com>
-M:	Dept-GELinuxNICDev@cavium.com
+M:	Rasesh Mody <rmody@marvell.com>
+M:	GR-Linux-NIC-Dev@marvell.com
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/broadcom/bnx2.*
@ -3072,9 +3072,9 @@ S:	Supported
 F:	drivers/scsi/bnx2i/

 BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
-M:	Ariel Elior <ariel.elior@cavium.com>
-M:	Sudarsana Kalluru <sudarsana.kalluru@cavium.com>
-M:	everest-linux-l2@cavium.com
+M:	Ariel Elior <aelior@marvell.com>
+M:	Sudarsana Kalluru <skalluru@marvell.com>
+M:	GR-everest-linux-l2@marvell.com
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/broadcom/bnx2x/
@ -3249,9 +3249,9 @@ S:	Supported
 F:	drivers/scsi/bfa/

 BROCADE BNA 10 GIGABIT ETHERNET DRIVER
-M:	Rasesh Mody <rasesh.mody@cavium.com>
-M:	Sudarsana Kalluru <sudarsana.kalluru@cavium.com>
-M:	Dept-GELinuxNICDev@cavium.com
+M:	Rasesh Mody <rmody@marvell.com>
+M:	Sudarsana Kalluru <skalluru@marvell.com>
+M:	GR-Linux-NIC-Dev@marvell.com
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/brocade/bna/
@ -3471,10 +3471,9 @@ F:	drivers/i2c/busses/i2c-octeon*
 F:	drivers/i2c/busses/i2c-thunderx*

 CAVIUM LIQUIDIO NETWORK DRIVER
-M:	Derek Chickles <derek.chickles@caviumnetworks.com>
-M:	Satanand Burla <satananda.burla@caviumnetworks.com>
-M:	Felix Manlunas <felix.manlunas@caviumnetworks.com>
-M:	Raghu Vatsavayi <raghu.vatsavayi@caviumnetworks.com>
+M:	Derek Chickles <dchickles@marvell.com>
+M:	Satanand Burla <sburla@marvell.com>
+M:	Felix Manlunas <fmanlunas@marvell.com>
 L:	netdev@vger.kernel.org
 W:	http://www.cavium.com
 S:	Supported
@ -3979,6 +3978,7 @@ F:	drivers/cpufreq/arm_big_little.c
 CPU POWER MONITORING SUBSYSTEM
 M:	Thomas Renninger <trenn@suse.com>
 M:	Shuah Khan <shuah@kernel.org>
+M:	Shuah Khan <skhan@linuxfoundation.org>
 L:	linux-pm@vger.kernel.org
 S:	Maintained
 F:	tools/power/cpupower/
@ -4123,7 +4123,7 @@ S:	Maintained
 F:	drivers/media/dvb-frontends/cxd2820r*

 CXGB3 ETHERNET DRIVER (CXGB3)
-M:	Arjun Vynipadath <arjun@chelsio.com>
+M:	Vishal Kulkarni <vishal@chelsio.com>
 L:	netdev@vger.kernel.org
 W:	http://www.chelsio.com
 S:	Supported
@ -4152,7 +4152,7 @@ S:	Supported
 F:	drivers/crypto/chelsio

 CXGB4 ETHERNET DRIVER (CXGB4)
-M:	Arjun Vynipadath <arjun@chelsio.com>
+M:	Vishal Kulkarni <vishal@chelsio.com>
 L:	netdev@vger.kernel.org
 W:	http://www.chelsio.com
 S:	Supported
@ -6024,6 +6024,12 @@ L:	linuxppc-dev@lists.ozlabs.org
 S:	Maintained
 F:	drivers/dma/fsldma.*

+FREESCALE ENETC ETHERNET DRIVERS
+M:	Claudiu Manoil <claudiu.manoil@nxp.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/freescale/enetc/
+
 FREESCALE eTSEC ETHERNET DRIVER (GIANFAR)
 M:	Claudiu Manoil <claudiu.manoil@nxp.com>
 L:	netdev@vger.kernel.org
@ -6088,6 +6094,7 @@ M:	Yangbo Lu <yangbo.lu@nxp.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/ptp/ptp_qoriq.c
+F:	drivers/ptp/ptp_qoriq_debugfs.c
 F:	include/linux/fsl/ptp_qoriq.h
 F:	Documentation/devicetree/bindings/ptp/ptp-qoriq.txt

@ -8259,6 +8266,7 @@ F:	include/uapi/linux/sunrpc/

 KERNEL SELFTEST FRAMEWORK
 M:	Shuah Khan <shuah@kernel.org>
+M:	Shuah Khan <skhan@linuxfoundation.org>
 L:	linux-kselftest@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git
 Q:	https://patchwork.kernel.org/project/linux-kselftest/list/
@ -10690,9 +10698,9 @@ S:	Maintained
 F:	drivers/net/netdevsim/*

 NETXEN (1/10) GbE SUPPORT
-M:	Manish Chopra <manish.chopra@cavium.com>
-M:	Rahul Verma <rahul.verma@cavium.com>
-M:	Dept-GELinuxNICDev@cavium.com
+M:	Manish Chopra <manishc@marvell.com>
+M:	Rahul Verma <rahulv@marvell.com>
+M:	GR-Linux-NIC-Dev@marvell.com
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/qlogic/netxen/
@ -12476,8 +12484,8 @@ S:	Supported
 F:	drivers/scsi/qedi/

 QLOGIC QL4xxx ETHERNET DRIVER
-M:	Ariel Elior <Ariel.Elior@cavium.com>
-M:	everest-linux-l2@cavium.com
+M:	Ariel Elior <aelior@marvell.com>
+M:	GR-everest-linux-l2@marvell.com
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/qlogic/qed/
@ -12485,8 +12493,8 @@ F:	include/linux/qed/
 F:	drivers/net/ethernet/qlogic/qede/

 QLOGIC QL4xxx RDMA DRIVER
-M:	Michal Kalderon <Michal.Kalderon@cavium.com>
-M:	Ariel Elior <Ariel.Elior@cavium.com>
+M:	Michal Kalderon <mkalderon@marvell.com>
+M:	Ariel Elior <aelior@marvell.com>
 L:	linux-rdma@vger.kernel.org
 S:	Supported
 F:	drivers/infiniband/hw/qedr/
@ -12506,7 +12514,7 @@ F:	Documentation/scsi/LICENSE.qla2xxx
 F:	drivers/scsi/qla2xxx/

 QLOGIC QLA3XXX NETWORK DRIVER
-M:	Dept-GELinuxNICDev@cavium.com
+M:	GR-Linux-NIC-Dev@marvell.com
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	Documentation/networking/device_drivers/qlogic/LICENSE.qla3xxx
@ -12520,16 +12528,16 @@ F:	Documentation/scsi/LICENSE.qla4xxx
 F:	drivers/scsi/qla4xxx/

 QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER
-M:	Shahed Shaikh <Shahed.Shaikh@cavium.com>
-M:	Manish Chopra <manish.chopra@cavium.com>
-M:	Dept-GELinuxNICDev@cavium.com
+M:	Shahed Shaikh <shshaikh@marvell.com>
+M:	Manish Chopra <manishc@marvell.com>
+M:	GR-Linux-NIC-Dev@marvell.com
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/qlogic/qlcnic/

 QLOGIC QLGE 10Gb ETHERNET DRIVER
-M:	Manish Chopra <manish.chopra@cavium.com>
-M:	Dept-GELinuxNICDev@cavium.com
+M:	Manish Chopra <manishc@marvell.com>
+M:	GR-Linux-NIC-Dev@marvell.com
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/qlogic/qlge/
@ -12609,6 +12617,14 @@ L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/qualcomm/emac/

+QUALCOMM ETHQOS ETHERNET DRIVER
+M:	Vinod Koul <vkoul@kernel.org>
+M:	Niklas Cassel <niklas.cassel@linaro.org>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+F:	Documentation/devicetree/bindings/net/qcom,dwmac.txt
+
 QUALCOMM GENERIC INTERFACE I2C DRIVER
 M:	Alok Chauhan <alokc@codeaurora.org>
 M:	Karthikeyan Ramasubramanian <kramasub@codeaurora.org>
@ -15843,6 +15859,7 @@ F:	drivers/usb/common/usb-otg-fsm.c
 USB OVER IP DRIVER
 M:	Valentina Manea <valentina.manea.m@gmail.com>
 M:	Shuah Khan <shuah@kernel.org>
+M:	Shuah Khan <skhan@linuxfoundation.org>
 L:	linux-usb@vger.kernel.org
 S:	Maintained
 F:	Documentation/usb/usbip_protocol.txt
@ -16672,6 +16689,24 @@ T:	git git://linuxtv.org/media_tree.git
 S:	Maintained
 F:	drivers/media/tuners/tuner-xc2028.*

+XDP (eXpress Data Path)
+M:	Alexei Starovoitov <ast@kernel.org>
+M:	Daniel Borkmann <daniel@iogearbox.net>
+M:	David S. Miller <davem@davemloft.net>
+M:	Jakub Kicinski <jakub.kicinski@netronome.com>
+M:	Jesper Dangaard Brouer <hawk@kernel.org>
+M:	John Fastabend <john.fastabend@gmail.com>
+L:	netdev@vger.kernel.org
+L:	xdp-newbies@vger.kernel.org
+S:	Supported
+F:	net/core/xdp.c
+F:	include/net/xdp.h
+F:	kernel/bpf/devmap.c
+F:	kernel/bpf/cpumap.c
+F:	include/trace/events/xdp.h
+K:	xdp
+N:	xdp
+
 XDP SOCKETS (AF_XDP)
 M:	Björn Töpel <bjorn.topel@intel.com>
 M:	Magnus Karlsson <magnus.karlsson@intel.com>
--- a/10
+++ b/10
@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 0
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc4
 NAME = Shy Crocodile

 # *DOCUMENTATION*
@ -955,6 +955,7 @@ ifdef CONFIG_STACK_VALIDATION
  endif
 endif

+PHONY += prepare0

 ifeq ($(KBUILD_EXTMOD),)
 core-y		+= kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/
@ -1061,8 +1062,7 @@ scripts: scripts_basic scripts_dtc
 # archprepare is used in arch Makefiles and when processed asm symlink,
 # version.h and scripts_basic is processed / created.

-# Listed in dependency order
-PHONY += prepare archprepare prepare0 prepare1 prepare2 prepare3
+PHONY += prepare archprepare prepare1 prepare2 prepare3

 # prepare3 is used to check if we are building in a separate output directory,
 # and if so do:
@ -1360,11 +1360,11 @@ mrproper: rm-dirs  := $(wildcard $(MRPROPER_DIRS))
 mrproper: rm-files := $(wildcard $(MRPROPER_FILES))
 mrproper-dirs      := $(addprefix _mrproper_,scripts)

-PHONY += $(mrproper-dirs) mrproper archmrproper
+PHONY += $(mrproper-dirs) mrproper
 $(mrproper-dirs):
 	$(Q)$(MAKE) $(clean)=$(patsubst _mrproper_%,%,$@)

-mrproper: clean archmrproper $(mrproper-dirs)
+mrproper: clean $(mrproper-dirs)
 	$(call cmd,rmdirs)
 	$(call cmd,rmfiles)

--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@ -3,23 +3,19 @@ generic-y += bugs.h
 generic-y += compat.h
 generic-y += device.h
 generic-y += div64.h
-generic-y += dma-mapping.h
 generic-y += emergency-restart.h
 generic-y += extable.h
-generic-y += fb.h
 generic-y += ftrace.h
 generic-y += hardirq.h
 generic-y += hw_irq.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
-generic-y += kmap_types.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += msi.h
 generic-y += parport.h
-generic-y += pci.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += topology.h
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@ -216,6 +216,14 @@ struct bcr_fp_arcv2 {
 #endif
 };

+struct bcr_actionpoint {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:21, min:1, num:2, ver:8;
+#else
+	unsigned int ver:8, num:2, min:1, pad:21;
+#endif
+};
+
 #include <soc/arc/timers.h>

 struct bcr_bpu_arcompact {
@ -283,7 +291,7 @@ struct cpuinfo_arc_cache {
 };

 struct cpuinfo_arc_bpu {
-	unsigned int ver, full, num_cache, num_pred;
+	unsigned int ver, full, num_cache, num_pred, ret_stk;
 };

 struct cpuinfo_arc_ccm {
@ -302,7 +310,7 @@ struct cpuinfo_arc {
 	struct {
 		unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2,
 			     fpu_sp:1, fpu_dp:1, dual:1, dual_enb:1, pad2:4,
-			     debug:1, ap:1, smart:1, rtt:1, pad3:4,
+			     ap_num:4, ap_full:1, smart:1, rtt:1, pad3:1,
 			     timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4;
 	} extn;
 	struct bcr_mpy extn_mpy;
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@ -340,7 +340,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x)
 /*
 * __ffs: Similar to ffs, but zero based (0-31)
 */
-static inline __attribute__ ((const)) int __ffs(unsigned long word)
+static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word)
 {
 	if (!word)
 		return word;
@ -400,9 +400,9 @@ static inline __attribute__ ((const)) int ffs(unsigned long x)
 /*
 * __ffs: Similar to ffs, but zero based (0-31)
 */
-static inline __attribute__ ((const)) int __ffs(unsigned long x)
+static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
 {
-	int n;
+	unsigned long n;

 	asm volatile(
 	"	ffs.f	%0, %1		\n"  /* 0:31; 31(Z) if src 0 */
--- a/arch/arc/include/asm/perf_event.h
+++ b/arch/arc/include/asm/perf_event.h
@ -103,7 +103,8 @@ static const char * const arc_pmu_ev_hw_map[] = {

 	/* counts condition */
 	[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", /* Excludes ZOL jumps */
+	/* All jump instructions that are taken */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak",
 	[PERF_COUNT_ARC_BPOK]         = "bpok",	  /* NP-NT, PT-T, PNT-NT */
 #ifdef CONFIG_ISA_ARCV2
 	[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@ -1,15 +1,10 @@
-/*
- * Linux performance counter support for ARC700 series
- *
- * Copyright (C) 2013-2015 Synopsys, Inc. (www.synopsys.com)
- *
- * This code is inspired by the perf support of various other architectures.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Linux performance counter support for ARC CPUs.
+// This code is inspired by the perf support of various other architectures.
+//
+// Copyright (C) 2013-2018 Synopsys, Inc. (www.synopsys.com)
+
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
@ -19,12 +14,31 @@
 #include <asm/arcregs.h>
 #include <asm/stacktrace.h>

+/* HW holds 8 symbols + one for null terminator */
+#define ARCPMU_EVENT_NAME_LEN	9
+
+enum arc_pmu_attr_groups {
+	ARCPMU_ATTR_GR_EVENTS,
+	ARCPMU_ATTR_GR_FORMATS,
+	ARCPMU_NR_ATTR_GR
+};
+
+struct arc_pmu_raw_event_entry {
+	char name[ARCPMU_EVENT_NAME_LEN];
+};
+
 struct arc_pmu {
 	struct pmu	pmu;
 	unsigned int	irq;
 	int		n_counters;
+	int		n_events;
 	u64		max_period;
 	int		ev_hw_idx[PERF_COUNT_ARC_HW_MAX];
+
+	struct arc_pmu_raw_event_entry	*raw_entry;
+	struct attribute		**attrs;
+	struct perf_pmu_events_attr	*attr;
+	const struct attribute_group	*attr_groups[ARCPMU_NR_ATTR_GR + 1];
 };

 struct arc_pmu_cpu {
@ -49,6 +63,7 @@ static int callchain_trace(unsigned int addr, void *data)
 {
 	struct arc_callchain_trace *ctrl = data;
 	struct perf_callchain_entry_ctx *entry = ctrl->perf_stuff;
+
 	perf_callchain_store(entry, addr);

 	if (ctrl->depth++ < 3)
@ -57,8 +72,8 @@ static int callchain_trace(unsigned int addr, void *data)
 	return -1;
 }

-void
-perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+			   struct pt_regs *regs)
 {
 	struct arc_callchain_trace ctrl = {
 		.depth = 0,
@ -68,8 +83,8 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
 	arc_unwind_core(NULL, regs, callchain_trace, &ctrl);
 }

-void
-perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+			 struct pt_regs *regs)
 {
 	/*
 	 * User stack can't be unwound trivially with kernel dwarf unwinder
@ -82,10 +97,10 @@ static struct arc_pmu *arc_pmu;
 static DEFINE_PER_CPU(struct arc_pmu_cpu, arc_pmu_cpu);

 /* read counter #idx; note that counter# != event# on ARC! */
-static uint64_t arc_pmu_read_counter(int idx)
+static u64 arc_pmu_read_counter(int idx)
 {
-	uint32_t tmp;
-	uint64_t result;
+	u32 tmp;
+	u64 result;

 	/*
 	 * ARC supports making 'snapshots' of the counters, so we don't
@ -94,7 +109,7 @@ static uint64_t arc_pmu_read_counter(int idx)
 	write_aux_reg(ARC_REG_PCT_INDEX, idx);
 	tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
 	write_aux_reg(ARC_REG_PCT_CONTROL, tmp | ARC_REG_PCT_CONTROL_SN);
-	result = (uint64_t) (read_aux_reg(ARC_REG_PCT_SNAPH)) << 32;
+	result = (u64) (read_aux_reg(ARC_REG_PCT_SNAPH)) << 32;
 	result |= read_aux_reg(ARC_REG_PCT_SNAPL);

 	return result;
@ -103,9 +118,9 @@ static uint64_t arc_pmu_read_counter(int idx)
 static void arc_perf_event_update(struct perf_event *event,
 				  struct hw_perf_event *hwc, int idx)
 {
-	uint64_t prev_raw_count = local64_read(&hwc->prev_count);
-	uint64_t new_raw_count = arc_pmu_read_counter(idx);
-	int64_t delta = new_raw_count - prev_raw_count;
+	u64 prev_raw_count = local64_read(&hwc->prev_count);
+	u64 new_raw_count = arc_pmu_read_counter(idx);
+	s64 delta = new_raw_count - prev_raw_count;

 	/*
 	 * We aren't afraid of hwc->prev_count changing beneath our feet
@ -155,7 +170,7 @@ static int arc_pmu_event_init(struct perf_event *event)
 	int ret;

 	if (!is_sampling_event(event)) {
-		hwc->sample_period  = arc_pmu->max_period;
+		hwc->sample_period = arc_pmu->max_period;
 		hwc->last_period = hwc->sample_period;
 		local64_set(&hwc->period_left, hwc->sample_period);
 	}
@ -192,6 +207,18 @@ static int arc_pmu_event_init(struct perf_event *event)
 		pr_debug("init cache event with h/w %08x \'%s\'\n",
 			 (int)hwc->config, arc_pmu_ev_hw_map[ret]);
 		return 0;
+
+	case PERF_TYPE_RAW:
+		if (event->attr.config >= arc_pmu->n_events)
+			return -ENOENT;
+
+		hwc->config |= event->attr.config;
+		pr_debug("init raw event with idx %lld \'%s\'\n",
+			 event->attr.config,
+			 arc_pmu->raw_entry[event->attr.config].name);
+
+		return 0;
+
 	default:
 		return -ENOENT;
 	}
@ -200,7 +227,7 @@ static int arc_pmu_event_init(struct perf_event *event)
 /* starts all counters */
 static void arc_pmu_enable(struct pmu *pmu)
 {
-	uint32_t tmp;
+	u32 tmp;
 	tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
 	write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x1);
 }
@ -208,7 +235,7 @@ static void arc_pmu_enable(struct pmu *pmu)
 /* stops all counters */
 static void arc_pmu_disable(struct pmu *pmu)
 {
-	uint32_t tmp;
+	u32 tmp;
 	tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
 	write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x0);
 }
@ -228,7 +255,7 @@ static int arc_pmu_event_set_period(struct perf_event *event)
 		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		overflow = 1;
-	} else	if (unlikely(left <= 0)) {
+	} else if (unlikely(left <= 0)) {
 		/* left underflowed by less than period. */
 		left += period;
 		local64_set(&hwc->period_left, left);
@ -246,8 +273,8 @@ static int arc_pmu_event_set_period(struct perf_event *event)
 	write_aux_reg(ARC_REG_PCT_INDEX, idx);

 	/* Write value */
-	write_aux_reg(ARC_REG_PCT_COUNTL, (u32)value);
-	write_aux_reg(ARC_REG_PCT_COUNTH, (value >> 32));
+	write_aux_reg(ARC_REG_PCT_COUNTL, lower_32_bits(value));
+	write_aux_reg(ARC_REG_PCT_COUNTH, upper_32_bits(value));

 	perf_event_update_userpage(event);

@ -277,7 +304,7 @@ static void arc_pmu_start(struct perf_event *event, int flags)
 	/* Enable interrupt for this counter */
 	if (is_sampling_event(event))
 		write_aux_reg(ARC_REG_PCT_INT_CTRL,
-			      read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx));
+			      read_aux_reg(ARC_REG_PCT_INT_CTRL) | BIT(idx));

 	/* enable ARC pmu here */
 	write_aux_reg(ARC_REG_PCT_INDEX, idx);		/* counter # */
@ -295,9 +322,9 @@ static void arc_pmu_stop(struct perf_event *event, int flags)
 		 * Reset interrupt flag by writing of 1. This is required
 		 * to make sure pending interrupt was not left.
 		 */
-		write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx);
+		write_aux_reg(ARC_REG_PCT_INT_ACT, BIT(idx));
 		write_aux_reg(ARC_REG_PCT_INT_CTRL,
-			      read_aux_reg(ARC_REG_PCT_INT_CTRL) & ~(1 << idx));
+			      read_aux_reg(ARC_REG_PCT_INT_CTRL) & ~BIT(idx));
 	}

 	if (!(event->hw.state & PERF_HES_STOPPED)) {
@ -349,9 +376,10 @@ static int arc_pmu_add(struct perf_event *event, int flags)

 	if (is_sampling_event(event)) {
 		/* Mimic full counter overflow as other arches do */
-		write_aux_reg(ARC_REG_PCT_INT_CNTL, (u32)arc_pmu->max_period);
+		write_aux_reg(ARC_REG_PCT_INT_CNTL,
+			      lower_32_bits(arc_pmu->max_period));
 		write_aux_reg(ARC_REG_PCT_INT_CNTH,
-			      (arc_pmu->max_period >> 32));
+			      upper_32_bits(arc_pmu->max_period));
 	}

 	write_aux_reg(ARC_REG_PCT_CONFIG, 0);
@ -392,7 +420,7 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
 		idx = __ffs(active_ints);

 		/* Reset interrupt flag by writing of 1 */
-		write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx);
+		write_aux_reg(ARC_REG_PCT_INT_ACT, BIT(idx));

 		/*
 		 * On reset of "interrupt active" bit corresponding
@ -400,7 +428,7 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
 		 * Now we need to re-enable interrupt for the counter.
 		 */
 		write_aux_reg(ARC_REG_PCT_INT_CTRL,
-			read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx));
+			read_aux_reg(ARC_REG_PCT_INT_CTRL) | BIT(idx));

 		event = pmu_cpu->act_counter[idx];
 		hwc = &event->hw;
@ -414,7 +442,7 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
 				arc_pmu_stop(event, 0);
 		}

-		active_ints &= ~(1U << idx);
+		active_ints &= ~BIT(idx);
 	} while (active_ints);

 done:
@ -441,19 +469,108 @@ static void arc_cpu_pmu_irq_init(void *data)
 	write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
 }

+/* Event field occupies the bottom 15 bits of our config field */
+PMU_FORMAT_ATTR(event, "config:0-14");
+static struct attribute *arc_pmu_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group arc_pmu_format_attr_gr = {
+	.name = "format",
+	.attrs = arc_pmu_format_attrs,
+};
+
+static ssize_t arc_pmu_events_sysfs_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
+}
+
+/*
+ * We don't add attrs here as we don't have pre-defined list of perf events.
+ * We will generate and add attrs dynamically in probe() after we read HW
+ * configuration.
+ */
+static struct attribute_group arc_pmu_events_attr_gr = {
+	.name = "events",
+};
+
+static void arc_pmu_add_raw_event_attr(int j, char *str)
+{
+	memmove(arc_pmu->raw_entry[j].name, str, ARCPMU_EVENT_NAME_LEN - 1);
+	arc_pmu->attr[j].attr.attr.name = arc_pmu->raw_entry[j].name;
+	arc_pmu->attr[j].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(0444);
+	arc_pmu->attr[j].attr.show = arc_pmu_events_sysfs_show;
+	arc_pmu->attr[j].id = j;
+	arc_pmu->attrs[j] = &(arc_pmu->attr[j].attr.attr);
+}
+
+static int arc_pmu_raw_alloc(struct device *dev)
+{
+	arc_pmu->attr = devm_kmalloc_array(dev, arc_pmu->n_events + 1,
+		sizeof(*arc_pmu->attr), GFP_KERNEL | __GFP_ZERO);
+	if (!arc_pmu->attr)
+		return -ENOMEM;
+
+	arc_pmu->attrs = devm_kmalloc_array(dev, arc_pmu->n_events + 1,
+		sizeof(*arc_pmu->attrs), GFP_KERNEL | __GFP_ZERO);
+	if (!arc_pmu->attrs)
+		return -ENOMEM;
+
+	arc_pmu->raw_entry = devm_kmalloc_array(dev, arc_pmu->n_events,
+		sizeof(*arc_pmu->raw_entry), GFP_KERNEL | __GFP_ZERO);
+	if (!arc_pmu->raw_entry)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static inline bool event_in_hw_event_map(int i, char *name)
+{
+	if (!arc_pmu_ev_hw_map[i])
+		return false;
+
+	if (!strlen(arc_pmu_ev_hw_map[i]))
+		return false;
+
+	if (strcmp(arc_pmu_ev_hw_map[i], name))
+		return false;
+
+	return true;
+}
+
+static void arc_pmu_map_hw_event(int j, char *str)
+{
+	int i;
+
+	/* See if HW condition has been mapped to a perf event_id */
+	for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) {
+		if (event_in_hw_event_map(i, str)) {
+			pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n",
+				 i, str, j);
+			arc_pmu->ev_hw_idx[i] = j;
+		}
+	}
+}
+
 static int arc_pmu_device_probe(struct platform_device *pdev)
 {
 	struct arc_reg_pct_build pct_bcr;
 	struct arc_reg_cc_build cc_bcr;
-	int i, j, has_interrupts;
+	int i, has_interrupts;
 	int counter_size;	/* in bits */

 	union cc_name {
 		struct {
-			uint32_t word0, word1;
+			u32 word0, word1;
 			char sentinel;
 		} indiv;
-		char str[9];
+		char str[ARCPMU_EVENT_NAME_LEN];
 	} cc_name;


@ -463,15 +580,22 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 	BUILD_BUG_ON(ARC_PERF_MAX_COUNTERS > 32);
-	BUG_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS);
+	if (WARN_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS))
+		return -EINVAL;

 	READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
-	BUG_ON(!cc_bcr.v); /* Counters exist but No countable conditions ? */
+	if (WARN(!cc_bcr.v, "Counters exist but No countable conditions?"))
+		return -EINVAL;

 	arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), GFP_KERNEL);
 	if (!arc_pmu)
 		return -ENOMEM;

+	arc_pmu->n_events = cc_bcr.c;
+
+	if (arc_pmu_raw_alloc(&pdev->dev))
+		return -ENOMEM;
+
 	has_interrupts = is_isa_arcv2() ? pct_bcr.i : 0;

 	arc_pmu->n_counters = pct_bcr.c;
@ -481,30 +605,26 @@ static int arc_pmu_device_probe(struct platform_device *pdev)

 	pr_info("ARC perf\t: %d counters (%d bits), %d conditions%s\n",
 		arc_pmu->n_counters, counter_size, cc_bcr.c,
-		has_interrupts ? ", [overflow IRQ support]":"");
+		has_interrupts ? ", [overflow IRQ support]" : "");

-	cc_name.str[8] = 0;
+	cc_name.str[ARCPMU_EVENT_NAME_LEN - 1] = 0;
 	for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++)
 		arc_pmu->ev_hw_idx[i] = -1;

 	/* loop thru all available h/w condition indexes */
-	for (j = 0; j < cc_bcr.c; j++) {
-		write_aux_reg(ARC_REG_CC_INDEX, j);
+	for (i = 0; i < cc_bcr.c; i++) {
+		write_aux_reg(ARC_REG_CC_INDEX, i);
 		cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0);
 		cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1);

-		/* See if it has been mapped to a perf event_id */
-		for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) {
-			if (arc_pmu_ev_hw_map[i] &&
-			    !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) &&
-			    strlen(arc_pmu_ev_hw_map[i])) {
-				pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n",
-					 i, cc_name.str, j);
-				arc_pmu->ev_hw_idx[i] = j;
-			}
-		}
+		arc_pmu_map_hw_event(i, cc_name.str);
+		arc_pmu_add_raw_event_attr(i, cc_name.str);
 	}

+	arc_pmu_events_attr_gr.attrs = arc_pmu->attrs;
+	arc_pmu->attr_groups[ARCPMU_ATTR_GR_EVENTS] = &arc_pmu_events_attr_gr;
+	arc_pmu->attr_groups[ARCPMU_ATTR_GR_FORMATS] = &arc_pmu_format_attr_gr;
+
 	arc_pmu->pmu = (struct pmu) {
 		.pmu_enable	= arc_pmu_enable,
 		.pmu_disable	= arc_pmu_disable,
@ -514,6 +634,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
 		.start		= arc_pmu_start,
 		.stop		= arc_pmu_stop,
 		.read		= arc_pmu_read,
+		.attr_groups	= arc_pmu->attr_groups,
 	};

 	if (has_interrupts) {
@ -535,17 +656,19 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
 	} else
 		arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;

-	return perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW);
+	/*
+	 * perf parser doesn't really like '-' symbol in events name, so let's
+	 * use '_' in arc pct name as it goes to kernel PMU event prefix.
+	 */
+	return perf_pmu_register(&arc_pmu->pmu, "arc_pct", PERF_TYPE_RAW);
 }

-#ifdef CONFIG_OF
 static const struct of_device_id arc_pmu_match[] = {
 	{ .compatible = "snps,arc700-pct" },
 	{ .compatible = "snps,archs-pct" },
 	{},
 };
 MODULE_DEVICE_TABLE(of, arc_pmu_match);
-#endif

 static struct platform_driver arc_pmu_driver = {
 	.driver	= {
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@ -123,6 +123,7 @@ static void read_arc_build_cfg_regs(void)
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
 	const struct id_to_str *tbl;
 	struct bcr_isa_arcv2 isa;
+	struct bcr_actionpoint ap;

 	FIX_PTR(cpu);

@ -195,6 +196,7 @@ static void read_arc_build_cfg_regs(void)
 		cpu->bpu.full = bpu.ft;
 		cpu->bpu.num_cache = 256 << bpu.bce;
 		cpu->bpu.num_pred = 2048 << bpu.pte;
+		cpu->bpu.ret_stk = 4 << bpu.rse;

 		if (cpu->core.family >= 0x54) {
 			unsigned int exec_ctrl;
@ -207,8 +209,11 @@ static void read_arc_build_cfg_regs(void)
 		}
 	}

-	READ_BCR(ARC_REG_AP_BCR, bcr);
-	cpu->extn.ap = bcr.ver ? 1 : 0;
+	READ_BCR(ARC_REG_AP_BCR, ap);
+	if (ap.ver) {
+		cpu->extn.ap_num = 2 << ap.num;
+		cpu->extn.ap_full = !!ap.min;
+	}

 	READ_BCR(ARC_REG_SMART_BCR, bcr);
 	cpu->extn.smart = bcr.ver ? 1 : 0;
@ -216,8 +221,6 @@ static void read_arc_build_cfg_regs(void)
 	READ_BCR(ARC_REG_RTT_BCR, bcr);
 	cpu->extn.rtt = bcr.ver ? 1 : 0;

-	cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt;
-
 	READ_BCR(ARC_REG_ISA_CFG_BCR, isa);

 	/* some hacks for lack of feature BCR info in old ARC700 cores */
@ -299,10 +302,10 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)

 	if (cpu->bpu.ver)
 		n += scnprintf(buf + n, len - n,
-			      "BPU\t\t: %s%s match, cache:%d, Predict Table:%d",
+			      "BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d",
 			      IS_AVAIL1(cpu->bpu.full, "full"),
 			      IS_AVAIL1(!cpu->bpu.full, "partial"),
-			      cpu->bpu.num_cache, cpu->bpu.num_pred);
+			      cpu->bpu.num_cache, cpu->bpu.num_pred, cpu->bpu.ret_stk);

 	if (is_isa_arcv2()) {
 		struct bcr_lpb lpb;
@ -336,11 +339,17 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
 			       IS_AVAIL1(cpu->extn.fpu_sp, "SP "),
 			       IS_AVAIL1(cpu->extn.fpu_dp, "DP "));

-	if (cpu->extn.debug)
-		n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s%s\n",
-			       IS_AVAIL1(cpu->extn.ap, "ActionPoint "),
+	if (cpu->extn.ap_num | cpu->extn.smart | cpu->extn.rtt) {
+		n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s",
 			       IS_AVAIL1(cpu->extn.smart, "smaRT "),
 			       IS_AVAIL1(cpu->extn.rtt, "RTT "));
+		if (cpu->extn.ap_num) {
+			n += scnprintf(buf + n, len - n, "ActionPoint %d/%s",
+				       cpu->extn.ap_num,
+				       cpu->extn.ap_full ? "full":"min");
+		}
+		n += scnprintf(buf + n, len - n, "\n");
+	}

 	if (cpu->dccm.sz || cpu->iccm.sz)
 		n += scnprintf(buf + n, len - n, "Extn [CCM]\t: DCCM @ %x, %d KB / ICCM: @ %x, %d KB\n",
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@ -18,6 +18,8 @@
 #include <asm/arcregs.h>
 #include <asm/irqflags.h>

+#define ARC_PATH_MAX	256
+
 /*
 * Common routine to print scratch regs (r0-r12) or callee regs (r13-r25)
 *   -Prints 3 regs per line and a CR.
@ -58,11 +60,12 @@ static void show_callee_regs(struct callee_regs *cregs)
 	print_reg_file(&(cregs->r13), 13);
 }

-static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
+static void print_task_path_n_nm(struct task_struct *tsk)
 {
 	char *path_nm = NULL;
 	struct mm_struct *mm;
 	struct file *exe_file;
+	char buf[ARC_PATH_MAX];

 	mm = get_task_mm(tsk);
 	if (!mm)
@ -72,7 +75,7 @@ static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
 	mmput(mm);

 	if (exe_file) {
-		path_nm = file_path(exe_file, buf, 255);
+		path_nm = file_path(exe_file, buf, ARC_PATH_MAX-1);
 		fput(exe_file);
 	}

@ -80,10 +83,9 @@ done:
 	pr_info("Path: %s\n", !IS_ERR(path_nm) ? path_nm : "?");
 }

-static void show_faulting_vma(unsigned long address, char *buf)
+static void show_faulting_vma(unsigned long address)
 {
 	struct vm_area_struct *vma;
-	char *nm = buf;
 	struct mm_struct *active_mm = current->active_mm;

 	/* can't use print_vma_addr() yet as it doesn't check for
@ -96,8 +98,11 @@ static void show_faulting_vma(unsigned long address, char *buf)
 	 * if the container VMA is not found
 	 */
 	if (vma && (vma->vm_start <= address)) {
+		char buf[ARC_PATH_MAX];
+		char *nm = "?";
+
 		if (vma->vm_file) {
-			nm = file_path(vma->vm_file, buf, PAGE_SIZE - 1);
+			nm = file_path(vma->vm_file, buf, ARC_PATH_MAX-1);
 			if (IS_ERR(nm))
 				nm = "?";
 		}
@ -173,13 +178,14 @@ void show_regs(struct pt_regs *regs)
 {
 	struct task_struct *tsk = current;
 	struct callee_regs *cregs;
-	char *buf;

-	buf = (char *)__get_free_page(GFP_KERNEL);
-	if (!buf)
-		return;
+	/*
+	 * generic code calls us with preemption disabled, but some calls
+	 * here could sleep, so re-enable to avoid lockdep splat
+	 */
+	preempt_enable();

-	print_task_path_n_nm(tsk, buf);
+	print_task_path_n_nm(tsk);
 	show_regs_print_info(KERN_INFO);

 	show_ecr_verbose(regs);
@ -189,7 +195,7 @@ void show_regs(struct pt_regs *regs)
 		(void *)regs->blink, (void *)regs->ret);

 	if (user_mode(regs))
-		show_faulting_vma(regs->ret, buf); /* faulting code, not data */
+		show_faulting_vma(regs->ret); /* faulting code, not data */

 	pr_info("[STAT32]: 0x%08lx", regs->status32);

@ -222,7 +228,7 @@ void show_regs(struct pt_regs *regs)
 	if (cregs)
 		show_callee_regs(cregs);

-	free_page((unsigned long)buf);
+	preempt_disable();
 }

 void show_kernel_fault_diag(const char *str, struct pt_regs *regs,
--- a/arch/arc/lib/memset-archs.S
+++ b/arch/arc/lib/memset-archs.S
@ -7,11 +7,39 @@
 */

 #include <linux/linkage.h>
+#include <asm/cache.h>

-#undef PREALLOC_NOT_AVAIL
+/*
+ * The memset implementation below is optimized to use prefetchw and prealloc
+ * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
+ * If you want to implement optimized memset for other possible L1 data cache
+ * line lengths (32B and 128B) you should rewrite code carefully checking
+ * we don't call any prefetchw/prealloc instruction for L1 cache lines which
+ * don't belongs to memset area.
+ */
+
+#if L1_CACHE_SHIFT == 6
+
+.macro PREALLOC_INSTR	reg, off
+	prealloc	[\reg, \off]
+.endm
+
+.macro PREFETCHW_INSTR	reg, off
+	prefetchw	[\reg, \off]
+.endm
+
+#else
+
+.macro PREALLOC_INSTR
+.endm
+
+.macro PREFETCHW_INSTR
+.endm
+
+#endif

 ENTRY_CFI(memset)
-	prefetchw [r0]		; Prefetch the write location
+	PREFETCHW_INSTR	r0, 0	; Prefetch the first write location
 	mov.f	0, r2
 ;;; if size is zero
 	jz.d	[blink]
@ -48,11 +76,8 @@ ENTRY_CFI(memset)

 	lpnz	@.Lset64bytes
 	;; LOOP START
-#ifdef PREALLOC_NOT_AVAIL
-	prefetchw [r3, 64]	;Prefetch the next write location
-#else
-	prealloc  [r3, 64]
-#endif
+	PREALLOC_INSTR	r3, 64	; alloc next line w/o fetching
+
 #ifdef CONFIG_ARC_HAS_LL64
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
@ -85,7 +110,6 @@ ENTRY_CFI(memset)
 	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes
 	lpnz	.Lset32bytes
 	;; LOOP START
-	prefetchw   [r3, 32]	;Prefetch the next write location
 #ifdef CONFIG_ARC_HAS_LL64
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@ -141,12 +141,17 @@ good_area:
 	 */
 	fault = handle_mm_fault(vma, address, flags);

-	/* If Pagefault was interrupted by SIGKILL, exit page fault "early" */
 	if (fatal_signal_pending(current)) {
-		if ((fault & VM_FAULT_ERROR) && !(fault & VM_FAULT_RETRY))
-			up_read(&mm->mmap_sem);
-		if (user_mode(regs))
+
+		/*
+		 * if fault retry, mmap_sem already relinquished by core mm
+		 * so OK to return to user mode (with signal handled first)
+		 */
+		if (fault & VM_FAULT_RETRY) {
+			if (!user_mode(regs))
+				goto no_context;
 			return;
+		}
 	}

 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@ -119,7 +119,8 @@ void __init setup_arch_memory(void)
 	 */

 	memblock_add_node(low_mem_start, low_mem_sz, 0);
-	memblock_reserve(low_mem_start, __pa(_end) - low_mem_start);
+	memblock_reserve(CONFIG_LINUX_LINK_BASE,
+			 __pa(_end) - CONFIG_LINUX_LINK_BASE);

 #ifdef CONFIG_BLK_DEV_INITRD
 	if (phys_initrd_size) {
--- a/arch/arm/boot/dts/ls1021a.dtsi
+++ b/arch/arm/boot/dts/ls1021a.dtsi
@ -706,6 +706,7 @@
 			fsl,tmr-fiper1  = <999999995>;
 			fsl,tmr-fiper2  = <99990>;
 			fsl,max-adj     = <499999999>;
+			fsl,extts-fifo;
 		};

 		enet0: ethernet@2d10000 {
--- a/arch/arm/include/asm/xen/page-coherent.h
+++ b/arch/arm/include/asm/xen/page-coherent.h
@ -1 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H
+#define _ASM_ARM_XEN_PAGE_COHERENT_H
+
+#include <linux/dma-mapping.h>
+#include <asm/page.h>
 #include <xen/arm/page-coherent.h>
+
+static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev)
+{
+	if (dev && dev->archdata.dev_dma_ops)
+		return dev->archdata.dev_dma_ops;
+	return get_arch_dma_ops(NULL);
+}
+
+static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
+		dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
+{
+	return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs);
+}
+
+static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
+		void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
+{
+	xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs);
+}
+
+static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
+	     dma_addr_t dev_addr, unsigned long offset, size_t size,
+	     enum dma_data_direction dir, unsigned long attrs)
+{
+	unsigned long page_pfn = page_to_xen_pfn(page);
+	unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
+	unsigned long compound_pages =
+		(1<<compound_order(page)) * XEN_PFN_PER_PAGE;
+	bool local = (page_pfn <= dev_pfn) &&
+		(dev_pfn - page_pfn < compound_pages);
+
+	/*
+	 * Dom0 is mapped 1:1, while the Linux page can span across
+	 * multiple Xen pages, it's not possible for it to contain a
+	 * mix of local and foreign Xen pages. So if the first xen_pfn
+	 * == mfn the page is local otherwise it's a foreign page
+	 * grant-mapped in dom0. If the page is local we can safely
+	 * call the native dma_ops function, otherwise we call the xen
+	 * specific function.
+	 */
+	if (local)
+		xen_get_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
+	else
+		__xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
+}
+
+static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	unsigned long pfn = PFN_DOWN(handle);
+	/*
+	 * Dom0 is mapped 1:1, while the Linux page can be spanned accross
+	 * multiple Xen page, it's not possible to have a mix of local and
+	 * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
+	 * foreign mfn will always return false. If the page is local we can
+	 * safely call the native dma_ops function, otherwise we call the xen
+	 * specific function.
+	 */
+	if (pfn_valid(pfn)) {
+		if (xen_get_dma_ops(hwdev)->unmap_page)
+			xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs);
+	} else
+		__xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
+}
+
+static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	unsigned long pfn = PFN_DOWN(handle);
+	if (pfn_valid(pfn)) {
+		if (xen_get_dma_ops(hwdev)->sync_single_for_cpu)
+			xen_get_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir);
+	} else
+		__xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
+}
+
+static inline void xen_dma_sync_single_for_device(struct device *hwdev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	unsigned long pfn = PFN_DOWN(handle);
+	if (pfn_valid(pfn)) {
+		if (xen_get_dma_ops(hwdev)->sync_single_for_device)
+			xen_get_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir);
+	} else
+		__xen_dma_sync_single_for_device(hwdev, handle, size, dir);
+}
+
+#endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@ -1083,12 +1083,17 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src,

 /* Arithmatic Operation */
 static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
-			     const u8 rn, struct jit_ctx *ctx, u8 op) {
+			     const u8 rn, struct jit_ctx *ctx, u8 op,
+			     bool is_jmp64) {
 	switch (op) {
 	case BPF_JSET:
-		emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
-		emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
-		emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
+		if (is_jmp64) {
+			emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
+			emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
+			emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
+		} else {
+			emit(ARM_ANDS_R(ARM_IP, rt, rn), ctx);
+		}
 		break;
 	case BPF_JEQ:
 	case BPF_JNE:
@ -1096,18 +1101,25 @@ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
 	case BPF_JGE:
 	case BPF_JLE:
 	case BPF_JLT:
-		emit(ARM_CMP_R(rd, rm), ctx);
-		_emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx);
+		if (is_jmp64) {
+			emit(ARM_CMP_R(rd, rm), ctx);
+			/* Only compare low halve if high halve are equal. */
+			_emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx);
+		} else {
+			emit(ARM_CMP_R(rt, rn), ctx);
+		}
 		break;
 	case BPF_JSLE:
 	case BPF_JSGT:
 		emit(ARM_CMP_R(rn, rt), ctx);
-		emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx);
+		if (is_jmp64)
+			emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx);
 		break;
 	case BPF_JSLT:
 	case BPF_JSGE:
 		emit(ARM_CMP_R(rt, rn), ctx);
-		emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx);
+		if (is_jmp64)
+			emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx);
 		break;
 	}
 }
@ -1615,6 +1627,17 @@ exit:
 	case BPF_JMP | BPF_JLT | BPF_X:
 	case BPF_JMP | BPF_JSLT | BPF_X:
 	case BPF_JMP | BPF_JSLE | BPF_X:
+	case BPF_JMP32 | BPF_JEQ | BPF_X:
+	case BPF_JMP32 | BPF_JGT | BPF_X:
+	case BPF_JMP32 | BPF_JGE | BPF_X:
+	case BPF_JMP32 | BPF_JNE | BPF_X:
+	case BPF_JMP32 | BPF_JSGT | BPF_X:
+	case BPF_JMP32 | BPF_JSGE | BPF_X:
+	case BPF_JMP32 | BPF_JSET | BPF_X:
+	case BPF_JMP32 | BPF_JLE | BPF_X:
+	case BPF_JMP32 | BPF_JLT | BPF_X:
+	case BPF_JMP32 | BPF_JSLT | BPF_X:
+	case BPF_JMP32 | BPF_JSLE | BPF_X:
 		/* Setup source registers */
 		rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx);
 		rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
@ -1641,6 +1664,17 @@ exit:
 	case BPF_JMP | BPF_JLE | BPF_K:
 	case BPF_JMP | BPF_JSLT | BPF_K:
 	case BPF_JMP | BPF_JSLE | BPF_K:
+	case BPF_JMP32 | BPF_JEQ | BPF_K:
+	case BPF_JMP32 | BPF_JGT | BPF_K:
+	case BPF_JMP32 | BPF_JGE | BPF_K:
+	case BPF_JMP32 | BPF_JNE | BPF_K:
+	case BPF_JMP32 | BPF_JSGT | BPF_K:
+	case BPF_JMP32 | BPF_JSGE | BPF_K:
+	case BPF_JMP32 | BPF_JSET | BPF_K:
+	case BPF_JMP32 | BPF_JLT | BPF_K:
+	case BPF_JMP32 | BPF_JLE | BPF_K:
+	case BPF_JMP32 | BPF_JSLT | BPF_K:
+	case BPF_JMP32 | BPF_JSLE | BPF_K:
 		if (off == 0)
 			break;
 		rm = tmp2[0];
@ -1652,7 +1686,8 @@ go_jmp:
 		rd = arm_bpf_get_reg64(dst, tmp, ctx);

 		/* Check for the condition */
-		emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code));
+		emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code),
+			  BPF_CLASS(code) == BPF_JMP);

 		/* Setup JUMP instruction */
 		jmp_offset = bpf2a32_offset(i+off, i, ctx);
--- a/arch/arm/net/bpf_jit_32.h
+++ b/arch/arm/net/bpf_jit_32.h
@ -62,6 +62,7 @@
 #define ARM_INST_ADDS_I		0x02900000

 #define ARM_INST_AND_R		0x00000000
+#define ARM_INST_ANDS_R		0x00100000
 #define ARM_INST_AND_I		0x02000000

 #define ARM_INST_BIC_R		0x01c00000
@ -172,6 +173,7 @@
 #define ARM_ADC_I(rd, rn, imm)	_AL3_I(ARM_INST_ADC, rd, rn, imm)

 #define ARM_AND_R(rd, rn, rm)	_AL3_R(ARM_INST_AND, rd, rn, rm)
+#define ARM_ANDS_R(rd, rn, rm)	_AL3_R(ARM_INST_ANDS, rd, rn, rm)
 #define ARM_AND_I(rd, rn, imm)	_AL3_I(ARM_INST_AND, rd, rn, imm)

 #define ARM_BIC_R(rd, rn, rm)	_AL3_R(ARM_INST_BIC, rd, rn, rm)
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@ -60,8 +60,6 @@

 #ifdef CONFIG_KASAN_SW_TAGS
 #define ARCH_SLAB_MINALIGN	(1ULL << KASAN_SHADOW_SCALE_SHIFT)
-#else
-#define ARCH_SLAB_MINALIGN	__alignof__(unsigned long long)
 #endif

 #ifndef __ASSEMBLY__
--- a/arch/arm64/include/asm/device.h
+++ b/arch/arm64/include/asm/device.h
@ -20,9 +20,6 @@ struct dev_archdata {
 #ifdef CONFIG_IOMMU_API
 	void *iommu;			/* private IOMMU data */
 #endif
-#ifdef CONFIG_XEN
-	const struct dma_map_ops *dev_dma_ops;
-#endif
 };

 struct pdev_archdata {
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@ -60,8 +60,11 @@ static inline bool arm64_kernel_use_ng_mappings(void)
 	 * later determine that kpti is required, then
 	 * kpti_install_ng_mappings() will make them non-global.
 	 */
+	if (arm64_kernel_unmapped_at_el0())
+		return true;
+
 	if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
-		return arm64_kernel_unmapped_at_el0();
+		return false;

 	/*
 	 * KASLR is enabled so we're going to be enabling kpti on non-broken
--- a/arch/arm64/include/asm/xen/page-coherent.h
+++ b/arch/arm64/include/asm/xen/page-coherent.h
@ -1 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM64_XEN_PAGE_COHERENT_H
+#define _ASM_ARM64_XEN_PAGE_COHERENT_H
+
+#include <linux/dma-mapping.h>
+#include <asm/page.h>
 #include <xen/arm/page-coherent.h>
+
+static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
+		dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
+{
+	return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
+}
+
+static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
+		void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
+{
+	dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs);
+}
+
+static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	unsigned long pfn = PFN_DOWN(handle);
+
+	if (pfn_valid(pfn))
+		dma_direct_sync_single_for_cpu(hwdev, handle, size, dir);
+	else
+		__xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
+}
+
+static inline void xen_dma_sync_single_for_device(struct device *hwdev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	unsigned long pfn = PFN_DOWN(handle);
+	if (pfn_valid(pfn))
+		dma_direct_sync_single_for_device(hwdev, handle, size, dir);
+	else
+		__xen_dma_sync_single_for_device(hwdev, handle, size, dir);
+}
+
+static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
+	     dma_addr_t dev_addr, unsigned long offset, size_t size,
+	     enum dma_data_direction dir, unsigned long attrs)
+{
+	unsigned long page_pfn = page_to_xen_pfn(page);
+	unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
+	unsigned long compound_pages =
+		(1<<compound_order(page)) * XEN_PFN_PER_PAGE;
+	bool local = (page_pfn <= dev_pfn) &&
+		(dev_pfn - page_pfn < compound_pages);
+
+	if (local)
+		dma_direct_map_page(hwdev, page, offset, size, dir, attrs);
+	else
+		__xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
+}
+
+static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	unsigned long pfn = PFN_DOWN(handle);
+	/*
+	 * Dom0 is mapped 1:1, while the Linux page can be spanned accross
+	 * multiple Xen page, it's not possible to have a mix of local and
+	 * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
+	 * foreign mfn will always return false. If the page is local we can
+	 * safely call the native dma_ops function, otherwise we call the xen
+	 * specific function.
+	 */
+	if (pfn_valid(pfn))
+		dma_direct_unmap_page(hwdev, handle, size, dir, attrs);
+	else
+		__xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
+}
+
+#endif /* _ASM_ARM64_XEN_PAGE_COHERENT_H */
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/types.h>

+#include <asm/cacheflush.h>
 #include <asm/fixmap.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/memory.h>
@ -43,7 +44,7 @@ static __init u64 get_kaslr_seed(void *fdt)
 	return ret;
 }

-static __init const u8 *get_cmdline(void *fdt)
+static __init const u8 *kaslr_get_cmdline(void *fdt)
 {
 	static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE;

@ -109,7 +110,7 @@ u64 __init kaslr_early_init(u64 dt_phys)
 	 * Check if 'nokaslr' appears on the command line, and
 	 * return 0 if that is the case.
 	 */
-	cmdline = get_cmdline(fdt);
+	cmdline = kaslr_get_cmdline(fdt);
 	str = strstr(cmdline, "nokaslr");
 	if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
 		return 0;
@ -169,5 +170,8 @@ u64 __init kaslr_early_init(u64 dt_phys)
 	module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
 	module_alloc_base &= PAGE_MASK;

+	__flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));
+	__flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed));
+
 	return offset;
 }
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@ -466,9 +466,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 	__iommu_setup_dma_ops(dev, dma_base, size, iommu);

 #ifdef CONFIG_XEN
-	if (xen_initial_domain()) {
-		dev->archdata.dev_dma_ops = dev->dma_ops;
+	if (xen_initial_domain())
 		dev->dma_ops = xen_dma_ops;
-	}
 #endif
 }
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@ -362,7 +362,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 	const s16 off = insn->off;
 	const s32 imm = insn->imm;
 	const int i = insn - ctx->prog->insnsi;
-	const bool is64 = BPF_CLASS(code) == BPF_ALU64;
+	const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
+			  BPF_CLASS(code) == BPF_JMP;
 	const bool isdw = BPF_SIZE(code) == BPF_DW;
 	u8 jmp_cond;
 	s32 jmp_offset;
@ -559,7 +560,17 @@ emit_bswap_uxt:
 	case BPF_JMP | BPF_JSLT | BPF_X:
 	case BPF_JMP | BPF_JSGE | BPF_X:
 	case BPF_JMP | BPF_JSLE | BPF_X:
-		emit(A64_CMP(1, dst, src), ctx);
+	case BPF_JMP32 | BPF_JEQ | BPF_X:
+	case BPF_JMP32 | BPF_JGT | BPF_X:
+	case BPF_JMP32 | BPF_JLT | BPF_X:
+	case BPF_JMP32 | BPF_JGE | BPF_X:
+	case BPF_JMP32 | BPF_JLE | BPF_X:
+	case BPF_JMP32 | BPF_JNE | BPF_X:
+	case BPF_JMP32 | BPF_JSGT | BPF_X:
+	case BPF_JMP32 | BPF_JSLT | BPF_X:
+	case BPF_JMP32 | BPF_JSGE | BPF_X:
+	case BPF_JMP32 | BPF_JSLE | BPF_X:
+		emit(A64_CMP(is64, dst, src), ctx);
 emit_cond_jmp:
 		jmp_offset = bpf2a64_offset(i + off, i, ctx);
 		check_imm19(jmp_offset);
@ -601,7 +612,8 @@ emit_cond_jmp:
 		emit(A64_B_(jmp_cond, jmp_offset), ctx);
 		break;
 	case BPF_JMP | BPF_JSET | BPF_X:
-		emit(A64_TST(1, dst, src), ctx);
+	case BPF_JMP32 | BPF_JSET | BPF_X:
+		emit(A64_TST(is64, dst, src), ctx);
 		goto emit_cond_jmp;
 	/* IF (dst COND imm) JUMP off */
 	case BPF_JMP | BPF_JEQ | BPF_K:
@ -614,12 +626,23 @@ emit_cond_jmp:
 	case BPF_JMP | BPF_JSLT | BPF_K:
 	case BPF_JMP | BPF_JSGE | BPF_K:
 	case BPF_JMP | BPF_JSLE | BPF_K:
-		emit_a64_mov_i(1, tmp, imm, ctx);
-		emit(A64_CMP(1, dst, tmp), ctx);
+	case BPF_JMP32 | BPF_JEQ | BPF_K:
+	case BPF_JMP32 | BPF_JGT | BPF_K:
+	case BPF_JMP32 | BPF_JLT | BPF_K:
+	case BPF_JMP32 | BPF_JGE | BPF_K:
+	case BPF_JMP32 | BPF_JLE | BPF_K:
+	case BPF_JMP32 | BPF_JNE | BPF_K:
+	case BPF_JMP32 | BPF_JSGT | BPF_K:
+	case BPF_JMP32 | BPF_JSLT | BPF_K:
+	case BPF_JMP32 | BPF_JSGE | BPF_K:
+	case BPF_JMP32 | BPF_JSLE | BPF_K:
+		emit_a64_mov_i(is64, tmp, imm, ctx);
+		emit(A64_CMP(is64, dst, tmp), ctx);
 		goto emit_cond_jmp;
 	case BPF_JMP | BPF_JSET | BPF_K:
-		emit_a64_mov_i(1, tmp, imm, ctx);
-		emit(A64_TST(1, dst, tmp), ctx);
+	case BPF_JMP32 | BPF_JSET | BPF_K:
+		emit_a64_mov_i(is64, tmp, imm, ctx);
+		emit(A64_TST(is64, dst, tmp), ctx);
 		goto emit_cond_jmp;
 	/* function call */
 	case BPF_JMP | BPF_CALL:
--- a/arch/h8300/Makefile
+++ b/arch/h8300/Makefile
@ -37,8 +37,6 @@ libs-y	+= arch/$(ARCH)/lib/

 boot := arch/h8300/boot

-archmrproper:
-
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)

--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@ -16,8 +16,6 @@ KBUILD_DEFCONFIG := generic_defconfig
 NM := $(CROSS_COMPILE)nm -B
 READELF := $(CROSS_COMPILE)readelf

-export AWK
-
 CHECKFLAGS	+= -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__

 OBJCOPYFLAGS	:= --strip-all
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@ -3155,6 +3155,7 @@ config MIPS32_O32
 config MIPS32_N32
 	bool "Kernel support for n32 binaries"
 	depends on 64BIT
+	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select COMPAT
 	select MIPS32_COMPAT
 	select SYSVIPC_COMPAT if SYSVIPC
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@ -173,6 +173,31 @@ void __init plat_mem_setup(void)
 	pm_power_off = bcm47xx_machine_halt;
 }

+#ifdef CONFIG_BCM47XX_BCMA
+static struct device * __init bcm47xx_setup_device(void)
+{
+	struct device *dev;
+	int err;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return NULL;
+
+	err = dev_set_name(dev, "bcm47xx_soc");
+	if (err) {
+		pr_err("Failed to set SoC device name: %d\n", err);
+		kfree(dev);
+		return NULL;
+	}
+
+	err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
+	if (err)
+		pr_err("Failed to set SoC DMA mask: %d\n", err);
+
+	return dev;
+}
+#endif
+
 /*
 * This finishes bus initialization doing things that were not possible without
 * kmalloc. Make sure to call it late enough (after mm_init).
@ -183,6 +208,10 @@ void __init bcm47xx_bus_setup(void)
 	if (bcm47xx_bus_type == BCM47XX_BUS_TYPE_BCMA) {
 		int err;

+		bcm47xx_bus.bcma.dev = bcm47xx_setup_device();
+		if (!bcm47xx_bus.bcma.dev)
+			panic("Failed to setup SoC device\n");
+
 		err = bcma_host_soc_init(&bcm47xx_bus.bcma);
 		if (err)
 			panic("Failed to initialize BCMA bus (err %d)", err);
@ -235,6 +264,8 @@ static int __init bcm47xx_register_bus_complete(void)
 #endif
 #ifdef CONFIG_BCM47XX_BCMA
 	case BCM47XX_BUS_TYPE_BCMA:
+		if (device_register(bcm47xx_bus.bcma.dev))
+			pr_err("Failed to register SoC device\n");
 		bcma_bus_register(&bcm47xx_bus.bcma.bus);
 		break;
 #endif
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@ -98,7 +98,7 @@ static void octeon_kexec_smp_down(void *ignored)
 	"	sync						\n"
 	"	synci	($0)					\n");

-	relocated_kexec_smp_wait(NULL);
+	kexec_reboot();
 }
 #endif

--- a/arch/mips/configs/ath79_defconfig
+++ b/arch/mips/configs/ath79_defconfig
@ -66,6 +66,7 @@ CONFIG_SERIAL_8250_CONSOLE=y
 # CONFIG_SERIAL_8250_PCI is not set
 CONFIG_SERIAL_8250_NR_UARTS=1
 CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_AR933X=y
 CONFIG_SERIAL_AR933X_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
--- a/arch/mips/include/asm/mach-lantiq/falcon/falcon_irq.h
+++ b/arch/mips/include/asm/mach-lantiq/falcon/falcon_irq.h
@ -18,8 +18,6 @@
 #define INT_NUM_EXTRA_START		(INT_NUM_IM4_IRL0 + 32)
 #define INT_NUM_IM_OFFSET		(INT_NUM_IM1_IRL0 - INT_NUM_IM0_IRL0)

-#define MIPS_CPU_TIMER_IRQ			7
-
 #define MAX_IM			5

 #endif /* _FALCON_IRQ__ */
--- a/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h
+++ b/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h
@ -19,8 +19,6 @@

 #define LTQ_DMA_CH0_INT		(INT_NUM_IM2_IRL0)

-#define MIPS_CPU_TIMER_IRQ	7
-
 #define MAX_IM			5

 #endif
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@ -74,14 +74,15 @@ static int __init vdma_init(void)
 						    get_order(VDMA_PGTBL_SIZE));
 	BUG_ON(!pgtbl);
 	dma_cache_wback_inv((unsigned long)pgtbl, VDMA_PGTBL_SIZE);
-	pgtbl = (VDMA_PGTBL_ENTRY *)KSEG1ADDR(pgtbl);
+	pgtbl = (VDMA_PGTBL_ENTRY *)CKSEG1ADDR((unsigned long)pgtbl);

 	/*
 	 * Clear the R4030 translation table
 	 */
 	vdma_pgtbl_init();

-	r4030_write_reg32(JAZZ_R4030_TRSTBL_BASE, CPHYSADDR(pgtbl));
+	r4030_write_reg32(JAZZ_R4030_TRSTBL_BASE,
+			  CPHYSADDR((unsigned long)pgtbl));
 	r4030_write_reg32(JAZZ_R4030_TRSTBL_LIM, VDMA_PGTBL_SIZE);
 	r4030_write_reg32(JAZZ_R4030_TRSTBL_INV, 0);

--- a/arch/mips/lantiq/irq.c
+++ b/arch/mips/lantiq/irq.c
@ -224,9 +224,11 @@ static struct irq_chip ltq_eiu_type = {
 	.irq_set_type = ltq_eiu_settype,
 };

-static void ltq_hw_irqdispatch(int module)
+static void ltq_hw_irq_handler(struct irq_desc *desc)
 {
+	int module = irq_desc_get_irq(desc) - 2;
 	u32 irq;
+	int hwirq;

 	irq = ltq_icu_r32(module, LTQ_ICU_IM0_IOSR);
 	if (irq == 0)
@ -237,7 +239,8 @@ static void ltq_hw_irqdispatch(int module)
 	 * other bits might be bogus
 	 */
 	irq = __fls(irq);
-	do_IRQ((int)irq + MIPS_CPU_IRQ_CASCADE + (INT_NUM_IM_OFFSET * module));
+	hwirq = irq + MIPS_CPU_IRQ_CASCADE + (INT_NUM_IM_OFFSET * module);
+	generic_handle_irq(irq_linear_revmap(ltq_domain, hwirq));

 	/* if this is a EBU irq, we need to ack it or get a deadlock */
 	if ((irq == LTQ_ICU_EBU_IRQ) && (module == 0) && LTQ_EBU_PCC_ISTAT)
@ -245,49 +248,6 @@ static void ltq_hw_irqdispatch(int module)
 			LTQ_EBU_PCC_ISTAT);
 }

-#define DEFINE_HWx_IRQDISPATCH(x)					\
-	static void ltq_hw ## x ## _irqdispatch(void)			\
-	{								\
-		ltq_hw_irqdispatch(x);					\
-	}
-DEFINE_HWx_IRQDISPATCH(0)
-DEFINE_HWx_IRQDISPATCH(1)
-DEFINE_HWx_IRQDISPATCH(2)
-DEFINE_HWx_IRQDISPATCH(3)
-DEFINE_HWx_IRQDISPATCH(4)
-
-#if MIPS_CPU_TIMER_IRQ == 7
-static void ltq_hw5_irqdispatch(void)
-{
-	do_IRQ(MIPS_CPU_TIMER_IRQ);
-}
-#else
-DEFINE_HWx_IRQDISPATCH(5)
-#endif
-
-static void ltq_hw_irq_handler(struct irq_desc *desc)
-{
-	ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2);
-}
-
-asmlinkage void plat_irq_dispatch(void)
-{
-	unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM;
-	int irq;
-
-	if (!pending) {
-		spurious_interrupt();
-		return;
-	}
-
-	pending >>= CAUSEB_IP;
-	while (pending) {
-		irq = fls(pending) - 1;
-		do_IRQ(MIPS_CPU_IRQ_BASE + irq);
-		pending &= ~BIT(irq);
-	}
-}
-
 static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
 {
 	struct irq_chip *chip = &ltq_irq_type;
@ -343,38 +303,13 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent)
 	for (i = 0; i < MAX_IM; i++)
 		irq_set_chained_handler(i + 2, ltq_hw_irq_handler);

-	if (cpu_has_vint) {
-		pr_info("Setting up vectored interrupts\n");
-		set_vi_handler(2, ltq_hw0_irqdispatch);
-		set_vi_handler(3, ltq_hw1_irqdispatch);
-		set_vi_handler(4, ltq_hw2_irqdispatch);
-		set_vi_handler(5, ltq_hw3_irqdispatch);
-		set_vi_handler(6, ltq_hw4_irqdispatch);
-		set_vi_handler(7, ltq_hw5_irqdispatch);
-	}
-
 	ltq_domain = irq_domain_add_linear(node,
 		(MAX_IM * INT_NUM_IM_OFFSET) + MIPS_CPU_IRQ_CASCADE,
 		&irq_domain_ops, 0);

-#ifndef CONFIG_MIPS_MT_SMP
-	set_c0_status(IE_IRQ0 | IE_IRQ1 | IE_IRQ2 |
-		IE_IRQ3 | IE_IRQ4 | IE_IRQ5);
-#else
-	set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ0 | IE_IRQ1 |
-		IE_IRQ2 | IE_IRQ3 | IE_IRQ4 | IE_IRQ5);
-#endif
-
 	/* tell oprofile which irq to use */
 	ltq_perfcount_irq = irq_create_mapping(ltq_domain, LTQ_PERF_IRQ);

-	/*
-	 * if the timer irq is not one of the mips irqs we need to
-	 * create a mapping
-	 */
-	if (MIPS_CPU_TIMER_IRQ != 7)
-		irq_create_mapping(ltq_domain, MIPS_CPU_TIMER_IRQ);
-
 	/* the external interrupts are optional and xway only */
 	eiu_node = of_find_compatible_node(NULL, NULL, "lantiq,eiu-xway");
 	if (eiu_node && !of_address_to_resource(eiu_node, 0, &res)) {
@ -411,7 +346,7 @@ EXPORT_SYMBOL_GPL(get_c0_perfcount_int);

 unsigned int get_c0_compare_int(void)
 {
-	return MIPS_CPU_TIMER_IRQ;
+	return CP0_LEGACY_COMPARE_IRQ;
 }

 static struct of_device_id __initdata of_irq_ids[] = {
--- a/arch/mips/pci/msi-octeon.c
+++ b/arch/mips/pci/msi-octeon.c
@ -369,7 +369,9 @@ int __init octeon_msi_initialize(void)
 	int irq;
 	struct irq_chip *msi;

-	if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_PCIE) {
+	if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_INVALID) {
+		return 0;
+	} else if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_PCIE) {
 		msi_rcv_reg[0] = CVMX_PEXP_NPEI_MSI_RCV0;
 		msi_rcv_reg[1] = CVMX_PEXP_NPEI_MSI_RCV1;
 		msi_rcv_reg[2] = CVMX_PEXP_NPEI_MSI_RCV2;
--- a/arch/nds32/Makefile
+++ b/arch/nds32/Makefile
@ -3,9 +3,6 @@ OBJCOPYFLAGS	:= -O binary -R .note -R .note.gnu.build-id -R .comment -S

 KBUILD_DEFCONFIG := defconfig

-comma = ,
-
-
 ifdef CONFIG_FUNCTION_TRACER
 arch-y += -malways-save-lp -mno-relax
 endif
@ -54,8 +51,6 @@ endif
 boot := arch/nds32/boot
 core-y += $(boot)/dts/

-.PHONY: FORCE
-
 Image: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@

@ -68,9 +63,6 @@ prepare: vdso_prepare
 vdso_prepare: prepare0
 	$(Q)$(MAKE) $(build)=arch/nds32/kernel/vdso include/generated/vdso-offsets.h

-CLEAN_FILES += include/asm-nds32/constants.h*
-
-# We use MRPROPER_FILES and CLEAN_FILES now
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)

--- a/arch/openrisc/Makefile
+++ b/arch/openrisc/Makefile
@ -20,7 +20,6 @@
 KBUILD_DEFCONFIG := or1ksim_defconfig

 OBJCOPYFLAGS    := -O binary -R .note -R .comment -S
-LDFLAGS_vmlinux :=
 LIBGCC 		:= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)

 KBUILD_CFLAGS	+= -pipe -ffixed-r10 -D__linux__
@ -50,5 +49,3 @@ else
 BUILTIN_DTB := n
 endif
 core-$(BUILTIN_DTB) += arch/openrisc/boot/dts/
-
-all: vmlinux
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@ -337,6 +337,7 @@
 #define PPC_INST_DIVWU			0x7c000396
 #define PPC_INST_DIVD			0x7c0003d2
 #define PPC_INST_RLWINM			0x54000000
+#define PPC_INST_RLWINM_DOT		0x54000001
 #define PPC_INST_RLWIMI			0x50000000
 #define PPC_INST_RLDICL			0x78000000
 #define PPC_INST_RLDICR			0x78000004
--- a/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@ -47,6 +47,7 @@ enum perf_event_powerpc_regs {
 	PERF_REG_POWERPC_DAR,
 	PERF_REG_POWERPC_DSISR,
 	PERF_REG_POWERPC_SIER,
+	PERF_REG_POWERPC_MMCRA,
 	PERF_REG_POWERPC_MAX,
 };
 #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@ -852,11 +852,12 @@ start_here:

 	/* set up the PTE pointers for the Abatron bdiGDB.
 	*/
-	tovirt(r6,r6)
 	lis	r5, abatron_pteptrs@h
 	ori	r5, r5, abatron_pteptrs@l
 	stw	r5, 0xf0(0)	/* Must match your Abatron config file */
 	tophys(r5,r5)
+	lis	r6, swapper_pg_dir@h
+	ori	r6, r6, swapper_pg_dir@l
 	stw	r6, 0(r5)

 /* Now turn on the MMU for real! */
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@ -755,11 +755,12 @@ SYSCALL_DEFINE0(rt_sigreturn)
 		if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
 					   &uc_transact->uc_mcontext))
 			goto badframe;
-	}
+	} else
 #endif
-	/* Fall through, for non-TM restore */
-	if (!MSR_TM_ACTIVE(msr)) {
+	{
 		/*
+		 * Fall through, for non-TM restore
+		 *
 		 * Unset MSR[TS] on the thread regs since MSR from user
 		 * context does not have MSR active, and recheckpoint was
 		 * not called since restore_tm_sigcontexts() was not called
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@ -967,13 +967,6 @@ out:
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */

-#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64)
-unsigned long __init arch_syscall_addr(int nr)
-{
-	return sys_call_table[nr*2];
-}
-#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 */
-
 #ifdef PPC64_ELF_ABI_v1
 char *arch_ftrace_match_adjust(char *str, const char *search)
 {
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@ -165,6 +165,10 @@
 #define PPC_RLWINM(d, a, i, mb, me)	EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \
 					___PPC_RS(a) | __PPC_SH(i) |	      \
 					__PPC_MB(mb) | __PPC_ME(me))
+#define PPC_RLWINM_DOT(d, a, i, mb, me)	EMIT(PPC_INST_RLWINM_DOT |	      \
+					___PPC_RA(d) | ___PPC_RS(a) |	      \
+					__PPC_SH(i) | __PPC_MB(mb) |	      \
+					__PPC_ME(me))
 #define PPC_RLWIMI(d, a, i, mb, me)	EMIT(PPC_INST_RLWIMI | ___PPC_RA(d) | \
 					___PPC_RS(a) | __PPC_SH(i) |	      \
 					__PPC_MB(mb) | __PPC_ME(me))
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@ -768,36 +768,58 @@ emit_clear:
 		case BPF_JMP | BPF_JGT | BPF_X:
 		case BPF_JMP | BPF_JSGT | BPF_K:
 		case BPF_JMP | BPF_JSGT | BPF_X:
+		case BPF_JMP32 | BPF_JGT | BPF_K:
+		case BPF_JMP32 | BPF_JGT | BPF_X:
+		case BPF_JMP32 | BPF_JSGT | BPF_K:
+		case BPF_JMP32 | BPF_JSGT | BPF_X:
 			true_cond = COND_GT;
 			goto cond_branch;
 		case BPF_JMP | BPF_JLT | BPF_K:
 		case BPF_JMP | BPF_JLT | BPF_X:
 		case BPF_JMP | BPF_JSLT | BPF_K:
 		case BPF_JMP | BPF_JSLT | BPF_X:
+		case BPF_JMP32 | BPF_JLT | BPF_K:
+		case BPF_JMP32 | BPF_JLT | BPF_X:
+		case BPF_JMP32 | BPF_JSLT | BPF_K:
+		case BPF_JMP32 | BPF_JSLT | BPF_X:
 			true_cond = COND_LT;
 			goto cond_branch;
 		case BPF_JMP | BPF_JGE | BPF_K:
 		case BPF_JMP | BPF_JGE | BPF_X:
 		case BPF_JMP | BPF_JSGE | BPF_K:
 		case BPF_JMP | BPF_JSGE | BPF_X:
+		case BPF_JMP32 | BPF_JGE | BPF_K:
+		case BPF_JMP32 | BPF_JGE | BPF_X:
+		case BPF_JMP32 | BPF_JSGE | BPF_K:
+		case BPF_JMP32 | BPF_JSGE | BPF_X:
 			true_cond = COND_GE;
 			goto cond_branch;
 		case BPF_JMP | BPF_JLE | BPF_K:
 		case BPF_JMP | BPF_JLE | BPF_X:
 		case BPF_JMP | BPF_JSLE | BPF_K:
 		case BPF_JMP | BPF_JSLE | BPF_X:
+		case BPF_JMP32 | BPF_JLE | BPF_K:
+		case BPF_JMP32 | BPF_JLE | BPF_X:
+		case BPF_JMP32 | BPF_JSLE | BPF_K:
+		case BPF_JMP32 | BPF_JSLE | BPF_X:
 			true_cond = COND_LE;
 			goto cond_branch;
 		case BPF_JMP | BPF_JEQ | BPF_K:
 		case BPF_JMP | BPF_JEQ | BPF_X:
+		case BPF_JMP32 | BPF_JEQ | BPF_K:
+		case BPF_JMP32 | BPF_JEQ | BPF_X:
 			true_cond = COND_EQ;
 			goto cond_branch;
 		case BPF_JMP | BPF_JNE | BPF_K:
 		case BPF_JMP | BPF_JNE | BPF_X:
+		case BPF_JMP32 | BPF_JNE | BPF_K:
+		case BPF_JMP32 | BPF_JNE | BPF_X:
 			true_cond = COND_NE;
 			goto cond_branch;
 		case BPF_JMP | BPF_JSET | BPF_K:
 		case BPF_JMP | BPF_JSET | BPF_X:
+		case BPF_JMP32 | BPF_JSET | BPF_K:
+		case BPF_JMP32 | BPF_JSET | BPF_X:
 			true_cond = COND_NE;
 			/* Fall through */

@ -809,18 +831,44 @@ cond_branch:
 			case BPF_JMP | BPF_JLE | BPF_X:
 			case BPF_JMP | BPF_JEQ | BPF_X:
 			case BPF_JMP | BPF_JNE | BPF_X:
+			case BPF_JMP32 | BPF_JGT | BPF_X:
+			case BPF_JMP32 | BPF_JLT | BPF_X:
+			case BPF_JMP32 | BPF_JGE | BPF_X:
+			case BPF_JMP32 | BPF_JLE | BPF_X:
+			case BPF_JMP32 | BPF_JEQ | BPF_X:
+			case BPF_JMP32 | BPF_JNE | BPF_X:
 				/* unsigned comparison */
-				PPC_CMPLD(dst_reg, src_reg);
+				if (BPF_CLASS(code) == BPF_JMP32)
+					PPC_CMPLW(dst_reg, src_reg);
+				else
+					PPC_CMPLD(dst_reg, src_reg);
 				break;
 			case BPF_JMP | BPF_JSGT | BPF_X:
 			case BPF_JMP | BPF_JSLT | BPF_X:
 			case BPF_JMP | BPF_JSGE | BPF_X:
 			case BPF_JMP | BPF_JSLE | BPF_X:
+			case BPF_JMP32 | BPF_JSGT | BPF_X:
+			case BPF_JMP32 | BPF_JSLT | BPF_X:
+			case BPF_JMP32 | BPF_JSGE | BPF_X:
+			case BPF_JMP32 | BPF_JSLE | BPF_X:
 				/* signed comparison */
-				PPC_CMPD(dst_reg, src_reg);
+				if (BPF_CLASS(code) == BPF_JMP32)
+					PPC_CMPW(dst_reg, src_reg);
+				else
+					PPC_CMPD(dst_reg, src_reg);
 				break;
 			case BPF_JMP | BPF_JSET | BPF_X:
-				PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, src_reg);
+			case BPF_JMP32 | BPF_JSET | BPF_X:
+				if (BPF_CLASS(code) == BPF_JMP) {
+					PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
+						    src_reg);
+				} else {
+					int tmp_reg = b2p[TMP_REG_1];
+
+					PPC_AND(tmp_reg, dst_reg, src_reg);
+					PPC_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0,
+						       31);
+				}
 				break;
 			case BPF_JMP | BPF_JNE | BPF_K:
 			case BPF_JMP | BPF_JEQ | BPF_K:
@ -828,43 +876,87 @@ cond_branch:
 			case BPF_JMP | BPF_JLT | BPF_K:
 			case BPF_JMP | BPF_JGE | BPF_K:
 			case BPF_JMP | BPF_JLE | BPF_K:
+			case BPF_JMP32 | BPF_JNE | BPF_K:
+			case BPF_JMP32 | BPF_JEQ | BPF_K:
+			case BPF_JMP32 | BPF_JGT | BPF_K:
+			case BPF_JMP32 | BPF_JLT | BPF_K:
+			case BPF_JMP32 | BPF_JGE | BPF_K:
+			case BPF_JMP32 | BPF_JLE | BPF_K:
+			{
+				bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
+
 				/*
 				 * Need sign-extended load, so only positive
 				 * values can be used as imm in cmpldi
 				 */
-				if (imm >= 0 && imm < 32768)
-					PPC_CMPLDI(dst_reg, imm);
-				else {
+				if (imm >= 0 && imm < 32768) {
+					if (is_jmp32)
+						PPC_CMPLWI(dst_reg, imm);
+					else
+						PPC_CMPLDI(dst_reg, imm);
+				} else {
 					/* sign-extending load */
 					PPC_LI32(b2p[TMP_REG_1], imm);
 					/* ... but unsigned comparison */
-					PPC_CMPLD(dst_reg, b2p[TMP_REG_1]);
+					if (is_jmp32)
+						PPC_CMPLW(dst_reg,
+							  b2p[TMP_REG_1]);
+					else
+						PPC_CMPLD(dst_reg,
+							  b2p[TMP_REG_1]);
 				}
 				break;
+			}
 			case BPF_JMP | BPF_JSGT | BPF_K:
 			case BPF_JMP | BPF_JSLT | BPF_K:
 			case BPF_JMP | BPF_JSGE | BPF_K:
 			case BPF_JMP | BPF_JSLE | BPF_K:
+			case BPF_JMP32 | BPF_JSGT | BPF_K:
+			case BPF_JMP32 | BPF_JSLT | BPF_K:
+			case BPF_JMP32 | BPF_JSGE | BPF_K:
+			case BPF_JMP32 | BPF_JSLE | BPF_K:
+			{
+				bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
+
 				/*
 				 * signed comparison, so any 16-bit value
 				 * can be used in cmpdi
 				 */
-				if (imm >= -32768 && imm < 32768)
-					PPC_CMPDI(dst_reg, imm);
-				else {
+				if (imm >= -32768 && imm < 32768) {
+					if (is_jmp32)
+						PPC_CMPWI(dst_reg, imm);
+					else
+						PPC_CMPDI(dst_reg, imm);
+				} else {
 					PPC_LI32(b2p[TMP_REG_1], imm);
-					PPC_CMPD(dst_reg, b2p[TMP_REG_1]);
+					if (is_jmp32)
+						PPC_CMPW(dst_reg,
+							 b2p[TMP_REG_1]);
+					else
+						PPC_CMPD(dst_reg,
+							 b2p[TMP_REG_1]);
 				}
 				break;
+			}
 			case BPF_JMP | BPF_JSET | BPF_K:
+			case BPF_JMP32 | BPF_JSET | BPF_K:
 				/* andi does not sign-extend the immediate */
 				if (imm >= 0 && imm < 32768)
 					/* PPC_ANDI is _only/always_ dot-form */
 					PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm);
 				else {
-					PPC_LI32(b2p[TMP_REG_1], imm);
-					PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
-						    b2p[TMP_REG_1]);
+					int tmp_reg = b2p[TMP_REG_1];
+
+					PPC_LI32(tmp_reg, imm);
+					if (BPF_CLASS(code) == BPF_JMP) {
+						PPC_AND_DOT(tmp_reg, dst_reg,
+							    tmp_reg);
+					} else {
+						PPC_AND(tmp_reg, dst_reg,
+							tmp_reg);
+						PPC_RLWINM_DOT(tmp_reg, tmp_reg,
+							       0, 0, 31);
+					}
 				}
 				break;
 			}
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@ -70,6 +70,7 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
 	PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
 	PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
 	PT_REGS_OFFSET(PERF_REG_POWERPC_SIER, dar),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr),
 };

 u64 perf_reg_value(struct pt_regs *regs, int idx)
@ -83,6 +84,11 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
 	    !is_sier_available()))
 		return 0;

+	if (idx == PERF_REG_POWERPC_MMCRA &&
+	   (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
+	    IS_ENABLED(CONFIG_PPC32)))
+		return 0;
+
 	return regs_get_register(regs, pt_regs_offset[idx]);
 }

--- a/arch/powerpc/platforms/4xx/ocm.c
+++ b/arch/powerpc/platforms/4xx/ocm.c
@ -237,12 +237,12 @@ static int ocm_debugfs_show(struct seq_file *m, void *v)
 			continue;

 		seq_printf(m, "PPC4XX OCM   : %d\n", ocm->index);
-		seq_printf(m, "PhysAddr     : %pa[p]\n", &(ocm->phys));
+		seq_printf(m, "PhysAddr     : %pa\n", &(ocm->phys));
 		seq_printf(m, "MemTotal     : %d Bytes\n", ocm->memtotal);
 		seq_printf(m, "MemTotal(NC) : %d Bytes\n", ocm->nc.memtotal);
 		seq_printf(m, "MemTotal(C)  : %d Bytes\n\n", ocm->c.memtotal);

-		seq_printf(m, "NC.PhysAddr  : %pa[p]\n", &(ocm->nc.phys));
+		seq_printf(m, "NC.PhysAddr  : %pa\n", &(ocm->nc.phys));
 		seq_printf(m, "NC.VirtAddr  : 0x%p\n", ocm->nc.virt);
 		seq_printf(m, "NC.MemTotal  : %d Bytes\n", ocm->nc.memtotal);
 		seq_printf(m, "NC.MemFree   : %d Bytes\n", ocm->nc.memfree);
@ -252,7 +252,7 @@ static int ocm_debugfs_show(struct seq_file *m, void *v)
 							blk->size, blk->owner);
 		}

-		seq_printf(m, "\nC.PhysAddr   : %pa[p]\n", &(ocm->c.phys));
+		seq_printf(m, "\nC.PhysAddr   : %pa\n", &(ocm->c.phys));
 		seq_printf(m, "C.VirtAddr   : 0x%p\n", ocm->c.virt);
 		seq_printf(m, "C.MemTotal   : %d Bytes\n", ocm->c.memtotal);
 		seq_printf(m, "C.MemFree    : %d Bytes\n", ocm->c.memfree);
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@ -538,8 +538,7 @@ static void __init chrp_init_IRQ(void)
 	/* see if there is a keyboard in the device tree
 	   with a parent of type "adb" */
 	for_each_node_by_name(kbd, "keyboard")
-		if (kbd->parent && kbd->parent->type
-		    && strcmp(kbd->parent->type, "adb") == 0)
+		if (of_node_is_type(kbd->parent, "adb"))
 			break;
 	of_node_put(kbd);
 	if (kbd)
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@ -564,7 +564,7 @@ struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
 		}
 	} else {
 		/* Create a group for 1 GPU and attached NPUs for POWER8 */
-		pe->npucomp = kzalloc(sizeof(pe->npucomp), GFP_KERNEL);
+		pe->npucomp = kzalloc(sizeof(*pe->npucomp), GFP_KERNEL);
 		table_group = &pe->npucomp->table_group;
 		table_group->ops = &pnv_npu_peers_ops;
 		iommu_register_group(table_group, hose->global_number,
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@ -2681,7 +2681,8 @@ static void pnv_pci_ioda_setup_iommu_api(void)
 	list_for_each_entry(hose, &hose_list, list_node) {
 		phb = hose->private_data;

-		if (phb->type == PNV_PHB_NPU_NVLINK)
+		if (phb->type == PNV_PHB_NPU_NVLINK ||
+		    phb->type == PNV_PHB_NPU_OCAPI)
 			continue;

 		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@ -264,7 +264,9 @@ void __init pSeries_final_fixup(void)
 			if (!of_device_is_compatible(nvdn->parent,
 						"ibm,power9-npu"))
 				continue;
+#ifdef CONFIG_PPC_POWERNV
 			WARN_ON_ONCE(pnv_npu2_init(hose));
+#endif
 			break;
 		}
 	}
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@ -25,7 +25,7 @@ static inline int init_new_context(struct task_struct *tsk,
 	atomic_set(&mm->context.flush_count, 0);
 	mm->context.gmap_asce = 0;
 	mm->context.flush_mm = 0;
-	mm->context.compat_mm = 0;
+	mm->context.compat_mm = test_thread_flag(TIF_31BIT);
 #ifdef CONFIG_PGSTE
 	mm->context.alloc_pgste = page_table_allocate_pgste ||
 		test_thread_flag(TIF_PGSTE) ||
@ -90,8 +90,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 {
 	int cpu = smp_processor_id();

-	if (prev == next)
-		return;
 	S390_lowcore.user_asce = next->context.asce;
 	cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
 	/* Clear previous user-ASCE from CR1 and CR7 */
@ -103,7 +101,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		__ctl_load(S390_lowcore.vdso_asce, 7, 7);
 		clear_cpu_flag(CIF_ASCE_SECONDARY);
 	}
-	cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
+	if (prev != next)
+		cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
 }

 #define finish_arch_post_lock_switch finish_arch_post_lock_switch
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@ -63,10 +63,10 @@ static noinline __init void detect_machine_type(void)
 	if (stsi(vmms, 3, 2, 2) || !vmms->count)
 		return;

-	/* Running under KVM? If not we assume z/VM */
+	/* Detect known hypervisors */
 	if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
 		S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
-	else
+	else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
 		S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
 }

--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@ -1006,6 +1006,8 @@ void __init setup_arch(char **cmdline_p)
 		pr_info("Linux is running under KVM in 64-bit mode\n");
 	else if (MACHINE_IS_LPAR)
 		pr_info("Linux is running natively in 64-bit mode\n");
+	else
+		pr_info("Linux is running as a guest in 64-bit mode\n");

 	/* Have one command line that is parsed and saved in /proc/cmdline */
 	/* boot_command_line has been already set up in early.c */
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@ -381,8 +381,13 @@ void smp_call_online_cpu(void (*func)(void *), void *data)
 */
 void smp_call_ipl_cpu(void (*func)(void *), void *data)
 {
+	struct lowcore *lc = pcpu_devices->lowcore;
+
+	if (pcpu_devices[0].address == stap())
+		lc = &S390_lowcore;
+
 	pcpu_delegate(&pcpu_devices[0], func, data,
-		      pcpu_devices->lowcore->nodat_stack);
+		      lc->nodat_stack);
 }

 int smp_find_processor_id(u16 address)
@ -1166,7 +1171,11 @@ static ssize_t __ref rescan_store(struct device *dev,
 {
 	int rc;

+	rc = lock_device_hotplug_sysfs();
+	if (rc)
+		return rc;
 	rc = smp_rescan_cpus();
+	unlock_device_hotplug();
 	return rc ? rc : count;
 }
 static DEVICE_ATTR_WO(rescan);
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@ -224,10 +224,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)

 	vdso_pages = vdso64_pages;
 #ifdef CONFIG_COMPAT
-	if (is_compat_task()) {
+	mm->context.compat_mm = is_compat_task();
+	if (mm->context.compat_mm)
 		vdso_pages = vdso32_pages;
-		mm->context.compat_mm = 1;
-	}
 #endif
 	/*
 	 * vDSO has a problem and was disabled, just don't "enable" it for
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@ -1110,103 +1110,141 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 		mask = 0xf000; /* j */
 		goto branch_oc;
 	case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
+	case BPF_JMP32 | BPF_JSGT | BPF_K: /* ((s32) dst > (s32) imm) */
 		mask = 0x2000; /* jh */
 		goto branch_ks;
 	case BPF_JMP | BPF_JSLT | BPF_K: /* ((s64) dst < (s64) imm) */
+	case BPF_JMP32 | BPF_JSLT | BPF_K: /* ((s32) dst < (s32) imm) */
 		mask = 0x4000; /* jl */
 		goto branch_ks;
 	case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
+	case BPF_JMP32 | BPF_JSGE | BPF_K: /* ((s32) dst >= (s32) imm) */
 		mask = 0xa000; /* jhe */
 		goto branch_ks;
 	case BPF_JMP | BPF_JSLE | BPF_K: /* ((s64) dst <= (s64) imm) */
+	case BPF_JMP32 | BPF_JSLE | BPF_K: /* ((s32) dst <= (s32) imm) */
 		mask = 0xc000; /* jle */
 		goto branch_ks;
 	case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
+	case BPF_JMP32 | BPF_JGT | BPF_K: /* ((u32) dst_reg > (u32) imm) */
 		mask = 0x2000; /* jh */
 		goto branch_ku;
 	case BPF_JMP | BPF_JLT | BPF_K: /* (dst_reg < imm) */
+	case BPF_JMP32 | BPF_JLT | BPF_K: /* ((u32) dst_reg < (u32) imm) */
 		mask = 0x4000; /* jl */
 		goto branch_ku;
 	case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
+	case BPF_JMP32 | BPF_JGE | BPF_K: /* ((u32) dst_reg >= (u32) imm) */
 		mask = 0xa000; /* jhe */
 		goto branch_ku;
 	case BPF_JMP | BPF_JLE | BPF_K: /* (dst_reg <= imm) */
+	case BPF_JMP32 | BPF_JLE | BPF_K: /* ((u32) dst_reg <= (u32) imm) */
 		mask = 0xc000; /* jle */
 		goto branch_ku;
 	case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
+	case BPF_JMP32 | BPF_JNE | BPF_K: /* ((u32) dst_reg != (u32) imm) */
 		mask = 0x7000; /* jne */
 		goto branch_ku;
 	case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */
+	case BPF_JMP32 | BPF_JEQ | BPF_K: /* ((u32) dst_reg == (u32) imm) */
 		mask = 0x8000; /* je */
 		goto branch_ku;
 	case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */
+	case BPF_JMP32 | BPF_JSET | BPF_K: /* ((u32) dst_reg & (u32) imm) */
 		mask = 0x7000; /* jnz */
-		/* lgfi %w1,imm (load sign extend imm) */
-		EMIT6_IMM(0xc0010000, REG_W1, imm);
-		/* ngr %w1,%dst */
-		EMIT4(0xb9800000, REG_W1, dst_reg);
+		if (BPF_CLASS(insn->code) == BPF_JMP32) {
+			/* llilf %w1,imm (load zero extend imm) */
+			EMIT6_IMM(0xc0010000, REG_W1, imm);
+			/* nr %w1,%dst */
+			EMIT2(0x1400, REG_W1, dst_reg);
+		} else {
+			/* lgfi %w1,imm (load sign extend imm) */
+			EMIT6_IMM(0xc0010000, REG_W1, imm);
+			/* ngr %w1,%dst */
+			EMIT4(0xb9800000, REG_W1, dst_reg);
+		}
 		goto branch_oc;

 	case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
+	case BPF_JMP32 | BPF_JSGT | BPF_X: /* ((s32) dst > (s32) src) */
 		mask = 0x2000; /* jh */
 		goto branch_xs;
 	case BPF_JMP | BPF_JSLT | BPF_X: /* ((s64) dst < (s64) src) */
+	case BPF_JMP32 | BPF_JSLT | BPF_X: /* ((s32) dst < (s32) src) */
 		mask = 0x4000; /* jl */
 		goto branch_xs;
 	case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
+	case BPF_JMP32 | BPF_JSGE | BPF_X: /* ((s32) dst >= (s32) src) */
 		mask = 0xa000; /* jhe */
 		goto branch_xs;
 	case BPF_JMP | BPF_JSLE | BPF_X: /* ((s64) dst <= (s64) src) */
+	case BPF_JMP32 | BPF_JSLE | BPF_X: /* ((s32) dst <= (s32) src) */
 		mask = 0xc000; /* jle */
 		goto branch_xs;
 	case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
+	case BPF_JMP32 | BPF_JGT | BPF_X: /* ((u32) dst > (u32) src) */
 		mask = 0x2000; /* jh */
 		goto branch_xu;
 	case BPF_JMP | BPF_JLT | BPF_X: /* (dst < src) */
+	case BPF_JMP32 | BPF_JLT | BPF_X: /* ((u32) dst < (u32) src) */
 		mask = 0x4000; /* jl */
 		goto branch_xu;
 	case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
+	case BPF_JMP32 | BPF_JGE | BPF_X: /* ((u32) dst >= (u32) src) */
 		mask = 0xa000; /* jhe */
 		goto branch_xu;
 	case BPF_JMP | BPF_JLE | BPF_X: /* (dst <= src) */
+	case BPF_JMP32 | BPF_JLE | BPF_X: /* ((u32) dst <= (u32) src) */
 		mask = 0xc000; /* jle */
 		goto branch_xu;
 	case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
+	case BPF_JMP32 | BPF_JNE | BPF_X: /* ((u32) dst != (u32) src) */
 		mask = 0x7000; /* jne */
 		goto branch_xu;
 	case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */
+	case BPF_JMP32 | BPF_JEQ | BPF_X: /* ((u32) dst == (u32) src) */
 		mask = 0x8000; /* je */
 		goto branch_xu;
 	case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */
+	case BPF_JMP32 | BPF_JSET | BPF_X: /* ((u32) dst & (u32) src) */
+	{
+		bool is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
+
 		mask = 0x7000; /* jnz */
-		/* ngrk %w1,%dst,%src */
-		EMIT4_RRF(0xb9e40000, REG_W1, dst_reg, src_reg);
+		/* nrk or ngrk %w1,%dst,%src */
+		EMIT4_RRF((is_jmp32 ? 0xb9f40000 : 0xb9e40000),
+			  REG_W1, dst_reg, src_reg);
 		goto branch_oc;
 branch_ks:
 		/* lgfi %w1,imm (load sign extend imm) */
 		EMIT6_IMM(0xc0010000, REG_W1, imm);
-		/* cgrj %dst,%w1,mask,off */
-		EMIT6_PCREL(0xec000000, 0x0064, dst_reg, REG_W1, i, off, mask);
+		/* crj or cgrj %dst,%w1,mask,off */
+		EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
+			    dst_reg, REG_W1, i, off, mask);
 		break;
 branch_ku:
 		/* lgfi %w1,imm (load sign extend imm) */
 		EMIT6_IMM(0xc0010000, REG_W1, imm);
-		/* clgrj %dst,%w1,mask,off */
-		EMIT6_PCREL(0xec000000, 0x0065, dst_reg, REG_W1, i, off, mask);
+		/* clrj or clgrj %dst,%w1,mask,off */
+		EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
+			    dst_reg, REG_W1, i, off, mask);
 		break;
 branch_xs:
-		/* cgrj %dst,%src,mask,off */
-		EMIT6_PCREL(0xec000000, 0x0064, dst_reg, src_reg, i, off, mask);
+		/* crj or cgrj %dst,%src,mask,off */
+		EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
+			    dst_reg, src_reg, i, off, mask);
 		break;
 branch_xu:
-		/* clgrj %dst,%src,mask,off */
-		EMIT6_PCREL(0xec000000, 0x0065, dst_reg, src_reg, i, off, mask);
+		/* clrj or clgrj %dst,%src,mask,off */
+		EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
+			    dst_reg, src_reg, i, off, mask);
 		break;
 branch_oc:
 		/* brc mask,jmp_off (branch instruction needs 4 bytes) */
 		jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
 		EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
 		break;
+	}
 	default: /* too complex, give up */
 		pr_err("Unknown opcode %02x\n", insn->code);
 		return -1;
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@ -198,7 +198,7 @@ config X86
 	select IRQ_FORCED_THREADING
 	select NEED_SG_DMA_LENGTH
 	select PCI_DOMAINS			if PCI
-	select PCI_LOCKLESS_CONFIG
+	select PCI_LOCKLESS_CONFIG		if PCI
 	select PERF_EVENTS
 	select RTC_LIB
 	select RTC_MC146818_LIB
@ -617,7 +617,7 @@ config X86_INTEL_QUARK

 config X86_INTEL_LPSS
 	bool "Intel Low Power Subsystem Support"
-	depends on X86 && ACPI
+	depends on X86 && ACPI && PCI
 	select COMMON_CLK
 	select PINCTRL
 	select IOSF_MBI
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@ -361,7 +361,8 @@ ENTRY(entry_INT80_compat)

 	/* Need to switch before accessing the thread stack. */
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
-	movq	%rsp, %rdi
+	/* In the Xen PV case we already run on the thread stack. */
+	ALTERNATIVE "movq %rsp, %rdi", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp

 	pushq	6*8(%rdi)		/* regs->ss */
@ -370,8 +371,9 @@ ENTRY(entry_INT80_compat)
 	pushq	3*8(%rdi)		/* regs->cs */
 	pushq	2*8(%rdi)		/* regs->ip */
 	pushq	1*8(%rdi)		/* regs->orig_ax */
-
 	pushq	(%rdi)			/* pt_regs->di */
+.Lint80_keep_stack:
+
 	pushq	%rsi			/* pt_regs->si */
 	xorl	%esi, %esi		/* nospec   si */
 	pushq	%rdx			/* pt_regs->dx */
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@ -178,6 +178,10 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)

 void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);

+/*
+ * Init a new mm.  Used on mm copies, like at fork()
+ * and on mm's that are brand-new, like at execve().
+ */
 static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
 {
@ -228,8 +232,22 @@ do {						\
 } while (0)
 #endif

+static inline void arch_dup_pkeys(struct mm_struct *oldmm,
+				  struct mm_struct *mm)
+{
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+	if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
+		return;
+
+	/* Duplicate the oldmm pkey state in mm: */
+	mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map;
+	mm->context.execute_only_pkey   = oldmm->context.execute_only_pkey;
+#endif
+}
+
 static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
+	arch_dup_pkeys(oldmm, mm);
 	paravirt_arch_dup_mmap(oldmm, mm);
 	return ldt_dup_context(oldmm, mm);
 }
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@ -711,7 +711,7 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
 {
 	if (unlikely(!access_ok(ptr,len)))
 		return 0;
-	__uaccess_begin();
+	__uaccess_begin_nospec();
 	return 1;
 }
 #define user_access_begin(a,b)	user_access_begin(a,b)
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@ -470,6 +470,7 @@ int crash_load_segments(struct kimage *image)

 	kbuf.memsz = kbuf.bufsz;
 	kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
+	kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
 	ret = kexec_add_buffer(&kbuf);
 	if (ret) {
 		vfree((void *)image->arch.elf_headers);
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@ -21,10 +21,6 @@

 #define HPET_MASK			CLOCKSOURCE_MASK(32)

-/* FSEC = 10^-15
-   NSEC = 10^-9 */
-#define FSEC_PER_NSEC			1000000L
-
 #define HPET_DEV_USED_BIT		2
 #define HPET_DEV_USED			(1 << HPET_DEV_USED_BIT)
 #define HPET_DEV_VALID			0x8
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@ -434,6 +434,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
 	kbuf.memsz = PAGE_ALIGN(header->init_size);
 	kbuf.buf_align = header->kernel_alignment;
 	kbuf.buf_min = MIN_KERNEL_LOAD_ADDR;
+	kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
 	ret = kexec_add_buffer(&kbuf);
 	if (ret)
 		goto out_free_params;
@ -448,6 +449,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
 		kbuf.bufsz = kbuf.memsz = initrd_len;
 		kbuf.buf_align = PAGE_SIZE;
 		kbuf.buf_min = MIN_INITRD_LOAD_ADDR;
+		kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
 		ret = kexec_add_buffer(&kbuf);
 		if (ret)
 			goto out_free_params;
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@ -457,6 +457,7 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
 #else
 	u64 ipi_bitmap = 0;
 #endif
+	long ret;

 	if (cpumask_empty(mask))
 		return;
@ -482,8 +483,9 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
 		} else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) {
 			max = apic_id < max ? max : apic_id;
 		} else {
-			kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
+			ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
 				(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
+			WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
 			min = max = apic_id;
 			ipi_bitmap = 0;
 		}
@ -491,8 +493,9 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
 	}

 	if (ipi_bitmap) {
-		kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
+		ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
 			(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
+		WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
 	}

 	local_irq_restore(flags);
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@ -297,15 +297,16 @@ static int __init tsc_setup(char *str)

 __setup("tsc=", tsc_setup);

-#define MAX_RETRIES     5
-#define SMI_TRESHOLD    50000
+#define MAX_RETRIES		5
+#define TSC_DEFAULT_THRESHOLD	0x20000

 /*
- * Read TSC and the reference counters. Take care of SMI disturbance
+ * Read TSC and the reference counters. Take care of any disturbances
 */
 static u64 tsc_read_refs(u64 *p, int hpet)
 {
 	u64 t1, t2;
+	u64 thresh = tsc_khz ? tsc_khz >> 5 : TSC_DEFAULT_THRESHOLD;
 	int i;

 	for (i = 0; i < MAX_RETRIES; i++) {
@ -315,7 +316,7 @@ static u64 tsc_read_refs(u64 *p, int hpet)
 		else
 			*p = acpi_pm_read_early();
 		t2 = get_cycles();
-		if ((t2 - t1) < SMI_TRESHOLD)
+		if ((t2 - t1) < thresh)
 			return t2;
 	}
 	return ULLONG_MAX;
@ -703,15 +704,15 @@ static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
 	 * zero. In each wait loop iteration we read the TSC and check
 	 * the delta to the previous read. We keep track of the min
 	 * and max values of that delta. The delta is mostly defined
-	 * by the IO time of the PIT access, so we can detect when a
-	 * SMI/SMM disturbance happened between the two reads. If the
+	 * by the IO time of the PIT access, so we can detect when
+	 * any disturbance happened between the two reads. If the
 	 * maximum time is significantly larger than the minimum time,
 	 * then we discard the result and have another try.
 	 *
 	 * 2) Reference counter. If available we use the HPET or the
 	 * PMTIMER as a reference to check the sanity of that value.
 	 * We use separate TSC readouts and check inside of the
-	 * reference read for a SMI/SMM disturbance. We dicard
+	 * reference read for any possible disturbance. We dicard
 	 * disturbed values here as well. We do that around the PIT
 	 * calibration delay loop as we have to wait for a certain
 	 * amount of time anyway.
@ -744,7 +745,7 @@ static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
 		if (ref1 == ref2)
 			continue;

-		/* Check, whether the sampling was disturbed by an SMI */
+		/* Check, whether the sampling was disturbed */
 		if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
 			continue;

@ -1268,7 +1269,7 @@ static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
 */
 static void tsc_refine_calibration_work(struct work_struct *work)
 {
-	static u64 tsc_start = -1, ref_start;
+	static u64 tsc_start = ULLONG_MAX, ref_start;
 	static int hpet;
 	u64 tsc_stop, ref_stop, delta;
 	unsigned long freq;
@ -1283,14 +1284,15 @@ static void tsc_refine_calibration_work(struct work_struct *work)
 	 * delayed the first time we expire. So set the workqueue
 	 * again once we know timers are working.
 	 */
-	if (tsc_start == -1) {
+	if (tsc_start == ULLONG_MAX) {
+restart:
 		/*
 		 * Only set hpet once, to avoid mixing hardware
 		 * if the hpet becomes enabled later.
 		 */
 		hpet = is_hpet_enabled();
-		schedule_delayed_work(&tsc_irqwork, HZ);
 		tsc_start = tsc_read_refs(&ref_start, hpet);
+		schedule_delayed_work(&tsc_irqwork, HZ);
 		return;
 	}

@ -1300,9 +1302,9 @@ static void tsc_refine_calibration_work(struct work_struct *work)
 	if (ref_start == ref_stop)
 		goto out;

-	/* Check, whether the sampling was disturbed by an SMI */
-	if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
-		goto out;
+	/* Check, whether the sampling was disturbed */
+	if (tsc_stop == ULLONG_MAX)
+		goto restart;

 	delta = tsc_stop - tsc_start;
 	delta *= 1000000LL;
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@ -2,10 +2,6 @@

 ccflags-y += -Iarch/x86/kvm

-CFLAGS_x86.o := -I.
-CFLAGS_svm.o := -I.
-CFLAGS_vmx.o := -I.
-
 KVM := ../../../virt/kvm

 kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@ -1636,7 +1636,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 		ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
 		if (ret != HV_STATUS_INVALID_PORT_ID)
 			break;
-		/* maybe userspace knows this conn_id: fall through */
+		/* fall through - maybe userspace knows this conn_id. */
 	case HVCALL_POST_MESSAGE:
 		/* don't bother userspace if it has no way to handle it */
 		if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) {
@ -1832,7 +1832,6 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 			ent->eax |= HV_X64_MSR_VP_INDEX_AVAILABLE;
 			ent->eax |= HV_X64_MSR_RESET_AVAILABLE;
 			ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
-			ent->eax |= HV_X64_MSR_GUEST_IDLE_AVAILABLE;
 			ent->eax |= HV_X64_ACCESS_FREQUENCY_MSRS;
 			ent->eax |= HV_X64_ACCESS_REENLIGHTENMENT;

@ -1848,11 +1847,11 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 		case HYPERV_CPUID_ENLIGHTMENT_INFO:
 			ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
 			ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
-			ent->eax |= HV_X64_SYSTEM_RESET_RECOMMENDED;
 			ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
 			ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
 			ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
-			ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
+			if (evmcs_ver)
+				ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;

 			/*
 			 * Default number of spinlock retry attempts, matches
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@ -1035,6 +1035,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 	switch (delivery_mode) {
 	case APIC_DM_LOWEST:
 		vcpu->arch.apic_arb_prio++;
+		/* fall through */
 	case APIC_DM_FIXED:
 		if (unlikely(trig_mode && !level))
 			break;
@ -1874,6 +1875,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)

 	case APIC_LVT0:
 		apic_manage_nmi_watchdog(apic, val);
+		/* fall through */
 	case APIC_LVTTHMR:
 	case APIC_LVTPC:
 	case APIC_LVT1:
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@ -4371,6 +4371,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 			rsvd_bits(maxphyaddr, 51);
 		rsvd_check->rsvd_bits_mask[1][4] =
 			rsvd_check->rsvd_bits_mask[0][4];
+		/* fall through */
 	case PT64_ROOT_4LEVEL:
 		rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
 			nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@ -3414,6 +3414,14 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
 	kvm_mmu_reset_context(&svm->vcpu);
 	kvm_mmu_load(&svm->vcpu);

+	/*
+	 * Drop what we picked up for L2 via svm_complete_interrupts() so it
+	 * doesn't end up in L1.
+	 */
+	svm->vcpu.arch.nmi_injected = false;
+	kvm_clear_exception_queue(&svm->vcpu);
+	kvm_clear_interrupt_queue(&svm->vcpu);
+
 	return 0;
 }

@ -4395,7 +4403,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 	case MSR_IA32_APICBASE:
 		if (kvm_vcpu_apicv_active(vcpu))
 			avic_update_vapic_bar(to_svm(vcpu), data);
-		/* Follow through */
+		/* Fall through */
 	default:
 		return kvm_set_msr_common(vcpu, msr);
 	}
@ -4504,28 +4512,19 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
 		kvm_lapic_reg_write(apic, APIC_ICR, icrl);
 		break;
 	case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
-		int i;
-		struct kvm_vcpu *vcpu;
-		struct kvm *kvm = svm->vcpu.kvm;
 		struct kvm_lapic *apic = svm->vcpu.arch.apic;

 		/*
-		 * At this point, we expect that the AVIC HW has already
-		 * set the appropriate IRR bits on the valid target
-		 * vcpus. So, we just need to kick the appropriate vcpu.
+		 * Update ICR high and low, then emulate sending IPI,
+		 * which is handled when writing APIC_ICR.
 		 */
-		kvm_for_each_vcpu(i, vcpu, kvm) {
-			bool m = kvm_apic_match_dest(vcpu, apic,
-						     icrl & KVM_APIC_SHORT_MASK,
-						     GET_APIC_DEST_FIELD(icrh),
-						     icrl & KVM_APIC_DEST_MASK);
-
-			if (m && !avic_vcpu_is_running(vcpu))
-				kvm_vcpu_wake_up(vcpu);
-		}
+		kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
+		kvm_lapic_reg_write(apic, APIC_ICR, icrl);
 		break;
 	}
 	case AVIC_IPI_FAILURE_INVALID_TARGET:
+		WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
+			  index, svm->vcpu.vcpu_id, icrh, icrl);
 		break;
 	case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
 		WARN_ONCE(1, "Invalid backing page\n");
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@ -1465,7 +1465,7 @@ TRACE_EVENT(kvm_hv_send_ipi_ex,
 #endif /* _TRACE_KVM_H */

 #undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH arch/x86/kvm
+#define TRACE_INCLUDE_PATH ../../arch/x86/kvm
 #undef TRACE_INCLUDE_FILE
 #define TRACE_INCLUDE_FILE trace

--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@ -332,16 +332,17 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu,
 			uint16_t *vmcs_version)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	bool evmcs_already_enabled = vmx->nested.enlightened_vmcs_enabled;
+
+	vmx->nested.enlightened_vmcs_enabled = true;

 	if (vmcs_version)
 		*vmcs_version = nested_get_evmcs_version(vcpu);

 	/* We don't support disabling the feature for simplicity. */
-	if (vmx->nested.enlightened_vmcs_enabled)
+	if (evmcs_already_enabled)
 		return 0;

-	vmx->nested.enlightened_vmcs_enabled = true;
-
 	vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
 	vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
 	vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
--- a/Show More
+++ b/Show More